From be0e5c097fc206b863ce9fe6b3cfd6974b0110f4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 26 Jan 2007 15:51:26 -0500 Subject: [PATCH 0001/2985] Btrfs: Initial checkin, basic working tree code Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 810 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 810 insertions(+) create mode 100644 fs/btrfs/ctree.c diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c new file mode 100644 index 000000000000..893fd56960a0 --- /dev/null +++ b/fs/btrfs/ctree.c @@ -0,0 +1,810 @@ +#include +#include +#include "kerncompat.h" + +#define BLOCKSIZE 4096 + +struct key { + u64 objectid; + u32 flags; + u64 offset; +} __attribute__ ((__packed__)); + +struct header { + u64 fsid[2]; /* FS specific uuid */ + u64 blocknum; + u64 parentid; + u32 csum; + u32 ham; + u16 nritems; + u16 flags; +} __attribute__ ((__packed__)); + +#define NODEPTRS_PER_BLOCK ((BLOCKSIZE - sizeof(struct header)) / \ + (sizeof(struct key) + sizeof(u64))) + +#define LEVEL_BITS 3 +#define MAX_LEVEL (1 << LEVEL_BITS) +#define node_level(f) ((f) & (MAX_LEVEL-1)) +#define is_leaf(f) (node_level(f) == 0) + +struct ctree_root { + struct node *node; +}; + +struct item { + struct key key; + u16 offset; + u16 size; +} __attribute__ ((__packed__)); + +#define LEAF_DATA_SIZE (BLOCKSIZE - sizeof(struct header)) +struct leaf { + struct header header; + union { + struct item items[LEAF_DATA_SIZE/sizeof(struct item)]; + u8 data[BLOCKSIZE-sizeof(struct header)]; + }; +} __attribute__ ((__packed__)); + +struct node { + struct header header; + struct key keys[NODEPTRS_PER_BLOCK]; + u64 blockptrs[NODEPTRS_PER_BLOCK]; +} __attribute__ ((__packed__)); + +struct ctree_path { + struct node *nodes[MAX_LEVEL]; + int slots[MAX_LEVEL]; +}; + +static inline void init_path(struct ctree_path *p) +{ + memset(p, 0, sizeof(*p)); +} + +static inline unsigned int leaf_data_end(struct leaf *leaf) +{ + unsigned int nr = leaf->header.nritems; + if (nr == 0) + return ARRAY_SIZE(leaf->data); + return leaf->items[nr-1].offset; +} + +static inline int leaf_free_space(struct leaf *leaf) +{ + int data_end = leaf_data_end(leaf); + int nritems = leaf->header.nritems; + char *items_end = (char *)(leaf->items + nritems + 1); + return (char *)(leaf->data + data_end) - (char *)items_end; +} + +int comp_keys(struct key *k1, struct key *k2) +{ + if (k1->objectid > k2->objectid) + return 1; + if (k1->objectid < k2->objectid) + return -1; + if (k1->flags > k2->flags) + return 1; + if (k1->flags < k2->flags) + return -1; + if (k1->offset > k2->offset) + return 1; + if (k1->offset < k2->offset) + return -1; + return 0; +} +int generic_bin_search(char *p, int item_size, struct key *key, + int max, int *slot) +{ + int low = 0; + int high = max; + int mid; + int ret; + struct key *tmp; + + while(low < high) { + mid = (low + high) / 2; + tmp = (struct key *)(p + mid * item_size); + ret = comp_keys(tmp, key); + + if (ret < 0) + low = mid + 1; + else if (ret > 0) + high = mid; + else { + *slot = mid; + return 0; + } + } + *slot = low; + return 1; +} + +int bin_search(struct node *c, struct key *key, int *slot) +{ + if (is_leaf(c->header.flags)) { + struct leaf *l = (struct leaf *)c; + return generic_bin_search((void *)l->items, sizeof(struct item), + key, c->header.nritems, slot); + } else { + return generic_bin_search((void *)c->keys, sizeof(struct key), + key, c->header.nritems, slot); + } + return -1; +} + +void *read_block(u64 blocknum) +{ + return (void *)blocknum; +} + +int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p) +{ + struct node *c = root->node; + int slot; + int ret; + int level; + while (c) { + level = node_level(c->header.flags); + p->nodes[level] = c; + ret = bin_search(c, key, &slot); + if (!is_leaf(c->header.flags)) { + if (ret && slot > 0) + slot -= 1; + p->slots[level] = slot; + c = read_block(c->blockptrs[slot]); + continue; + } else { + p->slots[level] = slot; + return ret; + } + } + return -1; +} + +static void fixup_low_keys(struct ctree_path *path, struct key *key, + int level) +{ + int i; + /* adjust the pointers going up the tree */ + for (i = level; i < MAX_LEVEL; i++) { + struct node *t = path->nodes[i]; + int tslot = path->slots[i]; + if (!t) + break; + memcpy(t->keys + tslot, key, sizeof(*key)); + if (tslot != 0) + break; + } +} + +int __insert_ptr(struct ctree_root *root, + struct ctree_path *path, struct key *key, + u64 blocknr, int slot, int level) +{ + struct node *c; + struct node *lower; + struct key *lower_key; + int nritems; + /* need a new root */ + if (!path->nodes[level]) { + c = malloc(sizeof(struct node)); + memset(c, 0, sizeof(c)); + c->header.nritems = 2; + c->header.flags = node_level(level); + lower = path->nodes[level-1]; + if (is_leaf(lower->header.flags)) + lower_key = &((struct leaf *)lower)->items[0].key; + else + lower_key = lower->keys; + memcpy(c->keys, lower_key, sizeof(struct key)); + memcpy(c->keys + 1, key, sizeof(struct key)); + c->blockptrs[0] = (u64)lower; + c->blockptrs[1] = blocknr; + root->node = c; + path->nodes[level] = c; + path->slots[level] = 0; + if (c->keys[1].objectid == 0) + BUG(); + return 0; + } + lower = path->nodes[level]; + nritems = lower->header.nritems; + if (slot > nritems) + BUG(); + if (nritems == NODEPTRS_PER_BLOCK) + BUG(); + if (slot != nritems) { + memmove(lower->keys + slot + 1, lower->keys + slot, + (nritems - slot) * sizeof(struct key)); + memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot, + (nritems - slot) * sizeof(u64)); + } + memcpy(lower->keys + slot, key, sizeof(struct key)); + lower->blockptrs[slot] = blocknr; + lower->header.nritems++; + if (lower->keys[1].objectid == 0) + BUG(); + return 0; +} + +int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) +{ + int slot; + struct node *left; + struct node *right; + int push_items = 0; + int left_nritems; + int right_nritems; + + if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) + return 1; + slot = path->slots[level + 1]; + if (slot == 0) + return 1; + + left = read_block(path->nodes[level + 1]->blockptrs[slot - 1]); + right = path->nodes[level]; + left_nritems = left->header.nritems; + right_nritems = right->header.nritems; + push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1); + if (push_items <= 0) + return 1; + + if (right_nritems < push_items) + push_items = right_nritems; + memcpy(left->keys + left_nritems, right->keys, + push_items * sizeof(struct key)); + memcpy(left->blockptrs + left_nritems, right->blockptrs, + push_items * sizeof(u64)); + memmove(right->keys, right->keys + push_items, + (right_nritems - push_items) * sizeof(struct key)); + memmove(right->blockptrs, right->blockptrs + push_items, + (right_nritems - push_items) * sizeof(u64)); + right->header.nritems -= push_items; + left->header.nritems += push_items; + + /* adjust the pointers going up the tree */ + fixup_low_keys(path, right->keys, level + 1); + + /* then fixup the leaf pointer in the path */ + if (path->slots[level] < push_items) { + path->slots[level] += left_nritems; + path->nodes[level] = (struct node*)left; + path->slots[level + 1] -= 1; + } else { + path->slots[level] -= push_items; + } + return 0; +} + +int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) +{ + int slot; + struct node *dst; + struct node *src; + int push_items = 0; + int dst_nritems; + int src_nritems; + + if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) + return 1; + slot = path->slots[level + 1]; + if (slot == NODEPTRS_PER_BLOCK - 1) + return 1; + + if (slot >= path->nodes[level + 1]->header.nritems -1) + return 1; + + dst = read_block(path->nodes[level + 1]->blockptrs[slot + 1]); + src = path->nodes[level]; + dst_nritems = dst->header.nritems; + src_nritems = src->header.nritems; + push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1); + if (push_items <= 0) + return 1; + + if (src_nritems < push_items) + push_items = src_nritems; + memmove(dst->keys + push_items, dst->keys, + dst_nritems * sizeof(struct key)); + memcpy(dst->keys, src->keys + src_nritems - push_items, + push_items * sizeof(struct key)); + + memmove(dst->blockptrs + push_items, dst->blockptrs, + dst_nritems * sizeof(u64)); + memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items, + push_items * sizeof(u64)); + + src->header.nritems -= push_items; + dst->header.nritems += push_items; + + /* adjust the pointers going up the tree */ + memcpy(path->nodes[level + 1]->keys + path->slots[level + 1] + 1, + dst->keys, sizeof(struct key)); + /* then fixup the leaf pointer in the path */ + if (path->slots[level] >= src->header.nritems) { + path->slots[level] -= src->header.nritems; + path->nodes[level] = (struct node*)dst; + path->slots[level + 1] += 1; + } + return 0; +} + +int insert_ptr(struct ctree_root *root, + struct ctree_path *path, struct key *key, + u64 blocknr, int level) +{ + struct node *c = path->nodes[level]; + struct node *b; + struct node *bal[MAX_LEVEL]; + int bal_level = level; + int mid; + int bal_start = -1; + + memset(bal, 0, ARRAY_SIZE(bal)); + while(c && c->header.nritems == NODEPTRS_PER_BLOCK) { + if (push_node_left(root, path, + node_level(c->header.flags)) == 0) + break; + if (push_node_right(root, path, + node_level(c->header.flags)) == 0) + break; + bal_start = bal_level; + if (bal_level == MAX_LEVEL - 1) + BUG(); + b = malloc(sizeof(struct node)); + b->header.flags = c->header.flags; + mid = (c->header.nritems + 1) / 2; + memcpy(b->keys, c->keys + mid, + (c->header.nritems - mid) * sizeof(struct key)); + memcpy(b->blockptrs, c->blockptrs + mid, + (c->header.nritems - mid) * sizeof(u64)); + b->header.nritems = c->header.nritems - mid; + c->header.nritems = mid; + bal[bal_level] = b; + if (bal_level == MAX_LEVEL - 1) + break; + bal_level += 1; + c = path->nodes[bal_level]; + } + while(bal_start > 0) { + b = bal[bal_start]; + c = path->nodes[bal_start]; + __insert_ptr(root, path, b->keys, (u64)b, + path->slots[bal_start + 1] + 1, bal_start + 1); + if (path->slots[bal_start] >= c->header.nritems) { + path->slots[bal_start] -= c->header.nritems; + path->nodes[bal_start] = b; + path->slots[bal_start + 1] += 1; + } + bal_start--; + if (!bal[bal_start]) + break; + } + return __insert_ptr(root, path, key, blocknr, path->slots[level] + 1, + level); +} + +int leaf_space_used(struct leaf *l, int start, int nr) +{ + int data_len; + int end = start + nr - 1; + + if (!nr) + return 0; + data_len = l->items[start].offset + l->items[start].size; + data_len = data_len - l->items[end].offset; + data_len += sizeof(struct item) * nr; + return data_len; +} + +int push_leaf_left(struct ctree_root *root, struct ctree_path *path, + int data_size) +{ + struct leaf *right = (struct leaf *)path->nodes[0]; + struct leaf *left; + int slot; + int i; + int free_space; + int push_space = 0; + int push_items = 0; + struct item *item; + int old_left_nritems; + + slot = path->slots[1]; + if (slot == 0) { + return 1; + } + if (!path->nodes[1]) { + return 1; + } + left = read_block(path->nodes[1]->blockptrs[slot - 1]); + free_space = leaf_free_space(left); + if (free_space < data_size + sizeof(struct item)) { + return 1; + } + for (i = 0; i < right->header.nritems; i++) { + item = right->items + i; + if (path->slots[0] == i) + push_space += data_size + sizeof(*item); + if (item->size + sizeof(*item) + push_space > free_space) + break; + push_items++; + push_space += item->size + sizeof(*item); + } + if (push_items == 0) { + return 1; + } + /* push data from right to left */ + memcpy(left->items + left->header.nritems, + right->items, push_items * sizeof(struct item)); + push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset; + memcpy(left->data + leaf_data_end(left) - push_space, + right->data + right->items[push_items - 1].offset, + push_space); + old_left_nritems = left->header.nritems; + for(i = old_left_nritems; i < old_left_nritems + push_items; i++) { + left->items[i].offset -= LEAF_DATA_SIZE - + left->items[old_left_nritems -1].offset; + } + left->header.nritems += push_items; + + /* fixup right node */ + push_space = right->items[push_items-1].offset - leaf_data_end(right); + memmove(right->data + LEAF_DATA_SIZE - push_space, right->data + + leaf_data_end(right), push_space); + memmove(right->items, right->items + push_items, + (right->header.nritems - push_items) * sizeof(struct item)); + right->header.nritems -= push_items; + push_space = LEAF_DATA_SIZE; + for (i = 0; i < right->header.nritems; i++) { + right->items[i].offset = push_space - right->items[i].size; + push_space = right->items[i].offset; + } + fixup_low_keys(path, &right->items[0].key, 1); + + /* then fixup the leaf pointer in the path */ + if (path->slots[0] < push_items) { + path->slots[0] += old_left_nritems; + path->nodes[0] = (struct node*)left; + path->slots[1] -= 1; + } else { + path->slots[0] -= push_items; + } + return 0; +} + +int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) +{ + struct leaf *l = (struct leaf *)path->nodes[0]; + int nritems = l->header.nritems; + int mid = (nritems + 1)/ 2; + int slot = path->slots[0]; + struct leaf *right; + int space_needed = data_size + sizeof(struct item); + int data_copy_size; + int rt_data_off; + int i; + int ret; + + if (push_leaf_left(root, path, data_size) == 0) { + return 0; + } + right = malloc(sizeof(struct leaf)); + memset(right, 0, sizeof(*right)); + if (mid <= slot) { + if (leaf_space_used(l, mid, nritems - mid) + space_needed > + LEAF_DATA_SIZE) + BUG(); + } else { + if (leaf_space_used(l, 0, mid + 1) + space_needed > + LEAF_DATA_SIZE) + BUG(); + } + right->header.nritems = nritems - mid; + data_copy_size = l->items[mid].offset + l->items[mid].size - + leaf_data_end(l); + memcpy(right->items, l->items + mid, + (nritems - mid) * sizeof(struct item)); + memcpy(right->data + LEAF_DATA_SIZE - data_copy_size, + l->data + leaf_data_end(l), data_copy_size); + rt_data_off = LEAF_DATA_SIZE - + (l->items[mid].offset + l->items[mid].size); + for (i = 0; i < right->header.nritems; i++) { + right->items[i].offset += rt_data_off; + } + l->header.nritems = mid; + ret = insert_ptr(root, path, &right->items[0].key, + (u64)right, 1); + if (mid <= slot) { + path->nodes[0] = (struct node *)right; + path->slots[0] -= mid; + path->slots[1] += 1; + } + return ret; +} + +int insert_item(struct ctree_root *root, struct key *key, + void *data, int data_size) +{ + int ret; + int slot; + struct leaf *leaf; + unsigned int nritems; + unsigned int data_end; + struct ctree_path path; + + init_path(&path); + ret = search_slot(root, key, &path); + if (ret == 0) + return -EEXIST; + + leaf = (struct leaf *)path.nodes[0]; + if (leaf_free_space(leaf) < sizeof(struct item) + data_size) + split_leaf(root, &path, data_size); + leaf = (struct leaf *)path.nodes[0]; + nritems = leaf->header.nritems; + data_end = leaf_data_end(leaf); + if (leaf_free_space(leaf) < sizeof(struct item) + data_size) + BUG(); + + slot = path.slots[0]; + if (slot == 0) + fixup_low_keys(&path, key, 1); + if (slot != nritems) { + int i; + unsigned int old_data = leaf->items[slot].offset + + leaf->items[slot].size; + + /* + * item0..itemN ... dataN.offset..dataN.size .. data0.size + */ + /* first correct the data pointers */ + for (i = slot; i < nritems; i++) + leaf->items[i].offset -= data_size; + + /* shift the items */ + memmove(leaf->items + slot + 1, leaf->items + slot, + (nritems - slot) * sizeof(struct item)); + + /* shift the data */ + memmove(leaf->data + data_end - data_size, leaf->data + + data_end, old_data - data_end); + data_end = old_data; + } + memcpy(&leaf->items[slot].key, key, sizeof(struct key)); + leaf->items[slot].offset = data_end - data_size; + leaf->items[slot].size = data_size; + memcpy(leaf->data + data_end - data_size, data, data_size); + leaf->header.nritems += 1; + if (leaf_free_space(leaf) < 0) + BUG(); + return 0; +} + +int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) +{ + int slot; + struct node *node; + int nritems; + + while(1) { + node = path->nodes[level]; + if (!node) + break; + slot = path->slots[level]; + nritems = node->header.nritems; + + if (slot != nritems -1) { + memmove(node->keys + slot, node->keys + slot + 1, + sizeof(struct key) * (nritems - slot - 1)); + memmove(node->blockptrs + slot, + node->blockptrs + slot + 1, + sizeof(u64) * (nritems - slot - 1)); + } + node->header.nritems--; + if (node->header.nritems != 0) { + int tslot; + if (slot == 0) + fixup_low_keys(path, node->keys, level + 1); + tslot = path->slots[level+1]; + push_node_left(root, path, level); + if (node->header.nritems) { + push_node_right(root, path, level); + } + path->slots[level+1] = tslot; + if (node->header.nritems) + break; + } + if (node == root->node) { + printf("root is now null!\n"); + root->node = NULL; + break; + } + level++; + if (!path->nodes[level]) + BUG(); + free(node); + } + return 0; +} + +int del_item(struct ctree_root *root, struct key *key) +{ + int ret; + int slot; + struct leaf *leaf; + struct ctree_path path; + int doff; + int dsize; + + init_path(&path); + ret = search_slot(root, key, &path); + if (ret != 0) + return -1; + + leaf = (struct leaf *)path.nodes[0]; + slot = path.slots[0]; + doff = leaf->items[slot].offset; + dsize = leaf->items[slot].size; + + if (slot != leaf->header.nritems - 1) { + int i; + int data_end = leaf_data_end(leaf); + memmove(leaf->data + data_end + dsize, + leaf->data + data_end, + doff - data_end); + for (i = slot + 1; i < leaf->header.nritems; i++) + leaf->items[i].offset += dsize; + memmove(leaf->items + slot, leaf->items + slot + 1, + sizeof(struct item) * + (leaf->header.nritems - slot - 1)); + } + leaf->header.nritems -= 1; + if (leaf->header.nritems == 0) { + free(leaf); + del_ptr(root, &path, 1); + } else { + if (slot == 0) + fixup_low_keys(&path, &leaf->items[0].key, 1); + if (leaf_space_used(leaf, 0, leaf->header.nritems) < + LEAF_DATA_SIZE / 4) { + /* push_leaf_left fixes the path. + * make sure the path still points to our leaf + * for possible call to del_ptr below + */ + slot = path.slots[1]; + push_leaf_left(root, &path, 1); + path.slots[1] = slot; + if (leaf->header.nritems == 0) { + free(leaf); + del_ptr(root, &path, 1); + } + } + } + return 0; +} + +void print_leaf(struct leaf *l) +{ + int i; + int nr = l->header.nritems; + struct item *item; + printf("leaf %p total ptrs %d free space %d\n", l, nr, + leaf_free_space(l)); + fflush(stdout); + for (i = 0 ; i < nr ; i++) { + item = l->items + i; + printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n", + i, + item->key.objectid, item->key.flags, item->key.offset, + item->offset, item->size); + fflush(stdout); + printf("\t\titem data %.*s\n", item->size, l->data+item->offset); + fflush(stdout); + } +} +void print_tree(struct node *c) +{ + int i; + int nr; + + if (!c) + return; + nr = c->header.nritems; + if (is_leaf(c->header.flags)) { + print_leaf((struct leaf *)c); + return; + } + printf("node %p level %d total ptrs %d free spc %lu\n", c, + node_level(c->header.flags), c->header.nritems, + NODEPTRS_PER_BLOCK - c->header.nritems); + fflush(stdout); + for (i = 0; i < nr; i++) { + printf("\tkey %d (%lu %u %lu) block %lx\n", + i, + c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset, + c->blockptrs[i]); + fflush(stdout); + } + for (i = 0; i < nr; i++) { + struct node *next = read_block(c->blockptrs[i]); + if (is_leaf(next->header.flags) && + node_level(c->header.flags) != 1) + BUG(); + if (node_level(next->header.flags) != + node_level(c->header.flags) - 1) + BUG(); + print_tree(next); + } + +} + +/* for testing only */ +int next_key(int i, int max_key) { + return rand() % max_key; + // return i; +} + +int main() { + struct leaf *first_node = malloc(sizeof(struct leaf)); + struct ctree_root root; + struct key ins; + char *buf; + int i; + int num; + int ret; + int run_size = 10000000; + int max_key = 100000000; + int tree_size = 0; + struct ctree_path path; + + + srand(55); + root.node = (struct node *)first_node; + memset(first_node, 0, sizeof(*first_node)); + for (i = 0; i < run_size; i++) { + buf = malloc(64); + num = next_key(i, max_key); + // num = i; + sprintf(buf, "string-%d", num); + // printf("insert %d\n", num); + ins.objectid = num; + ins.offset = 0; + ins.flags = 0; + ret = insert_item(&root, &ins, buf, strlen(buf)); + if (!ret) + tree_size++; + } + srand(55); + for (i = 0; i < run_size; i++) { + num = next_key(i, max_key); + ins.objectid = num; + ins.offset = 0; + ins.flags = 0; + init_path(&path); + ret = search_slot(&root, &ins, &path); + if (ret) { + print_tree(root.node); + printf("unable to find %d\n", num); + exit(1); + } + } + printf("node %p level %d total ptrs %d free spc %lu\n", root.node, + node_level(root.node->header.flags), root.node->header.nritems, + NODEPTRS_PER_BLOCK - root.node->header.nritems); + // print_tree(root.node); + printf("all searches good\n"); + i = 0; + srand(55); + for (i = 0; i < run_size; i++) { + num = next_key(i, max_key); + ins.objectid = num; + del_item(&root, &ins); + } + print_tree(root.node); + return 0; +} From 4920c9ac9a4bbc6bf9acd8c614987ee6b378e78f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 26 Jan 2007 16:38:42 -0500 Subject: [PATCH 0002/2985] Btrfs: Faster deletes, add Makefile and kerncompat Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 7 ++++ fs/btrfs/ctree.c | 84 +++++++++++++++++++++++++++++++------------ fs/btrfs/kerncompat.h | 68 +++++++++++++++++++++++++++++++++++ 3 files changed, 137 insertions(+), 22 deletions(-) create mode 100644 fs/btrfs/Makefile create mode 100644 fs/btrfs/kerncompat.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile new file mode 100644 index 000000000000..9f84c08baab2 --- /dev/null +++ b/fs/btrfs/Makefile @@ -0,0 +1,7 @@ + +ctree: ctree.o + gcc -g -O2 -Wall -o ctree ctree.c + +clean: + rm ctree ctree.o + diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 893fd56960a0..4bf5e92584bd 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -615,9 +615,9 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) if (node->header.nritems) { push_node_right(root, path, level); } - path->slots[level+1] = tslot; if (node->header.nritems) break; + path->slots[level+1] = tslot; } if (node == root->node) { printf("root is now null!\n"); @@ -632,22 +632,15 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) return 0; } -int del_item(struct ctree_root *root, struct key *key) +int del_item(struct ctree_root *root, struct ctree_path *path) { - int ret; int slot; struct leaf *leaf; - struct ctree_path path; int doff; int dsize; - init_path(&path); - ret = search_slot(root, key, &path); - if (ret != 0) - return -1; - - leaf = (struct leaf *)path.nodes[0]; - slot = path.slots[0]; + leaf = (struct leaf *)path->nodes[0]; + slot = path->slots[0]; doff = leaf->items[slot].offset; dsize = leaf->items[slot].size; @@ -665,23 +658,26 @@ int del_item(struct ctree_root *root, struct key *key) } leaf->header.nritems -= 1; if (leaf->header.nritems == 0) { + if (leaf == (struct leaf *)root->node) + root->node = NULL; + else + del_ptr(root, path, 1); free(leaf); - del_ptr(root, &path, 1); } else { if (slot == 0) - fixup_low_keys(&path, &leaf->items[0].key, 1); + fixup_low_keys(path, &leaf->items[0].key, 1); if (leaf_space_used(leaf, 0, leaf->header.nritems) < LEAF_DATA_SIZE / 4) { /* push_leaf_left fixes the path. * make sure the path still points to our leaf * for possible call to del_ptr below */ - slot = path.slots[1]; - push_leaf_left(root, &path, 1); - path.slots[1] = slot; + slot = path->slots[1]; + push_leaf_left(root, path, 1); if (leaf->header.nritems == 0) { free(leaf); - del_ptr(root, &path, 1); + path->slots[1] = slot; + del_ptr(root, path, 1); } } } @@ -753,11 +749,12 @@ int main() { struct leaf *first_node = malloc(sizeof(struct leaf)); struct ctree_root root; struct key ins; + struct key last = { (u64)-1, 0, 0}; char *buf; int i; int num; int ret; - int run_size = 10000000; + int run_size = 100000; int max_key = 100000000; int tree_size = 0; struct ctree_path path; @@ -783,8 +780,6 @@ int main() { for (i = 0; i < run_size; i++) { num = next_key(i, max_key); ins.objectid = num; - ins.offset = 0; - ins.flags = 0; init_path(&path); ret = search_slot(&root, &ins, &path); if (ret) { @@ -800,11 +795,56 @@ int main() { printf("all searches good\n"); i = 0; srand(55); - for (i = 0; i < run_size; i++) { + for (i = 0 ; i < run_size/4; i++) { num = next_key(i, max_key); ins.objectid = num; - del_item(&root, &ins); + init_path(&path); + ret = search_slot(&root, &ins, &path); + if (ret) + continue; + ret = del_item(&root, &path); + if (ret != 0) + BUG(); + tree_size--; + } + srand(128); + for (i = 0; i < run_size; i++) { + buf = malloc(64); + num = next_key(i, max_key); + sprintf(buf, "string-%d", num); + ins.objectid = num; + ret = insert_item(&root, &ins, buf, strlen(buf)); + if (!ret) + tree_size++; + } + while(root.node) { + struct leaf *leaf; + int slot; + ins.objectid = (u64)-1; + init_path(&path); + ret = search_slot(&root, &ins, &path); + if (ret == 0) + BUG(); + + leaf = (struct leaf *)(path.nodes[0]); + slot = path.slots[0]; + if (slot != leaf->header.nritems) + BUG(); + while(path.slots[0] > 0) { + path.slots[0] -= 1; + slot = path.slots[0]; + leaf = (struct leaf *)(path.nodes[0]); + + if (comp_keys(&last, &leaf->items[slot].key) <= 0) + BUG(); + memcpy(&last, &leaf->items[slot].key, sizeof(last)); + ret = del_item(&root, &path); + if (ret != 0) + BUG(); + tree_size--; + } } print_tree(root.node); + printf("tree size is now %d\n", tree_size); return 0; } diff --git a/fs/btrfs/kerncompat.h b/fs/btrfs/kerncompat.h new file mode 100644 index 000000000000..3a4bb4d661f9 --- /dev/null +++ b/fs/btrfs/kerncompat.h @@ -0,0 +1,68 @@ +#ifndef __KERNCOMPAT +#define __KERNCOMPAT +#define gfp_t int +#define get_cpu_var(p) (p) +#define __get_cpu_var(p) (p) +#define BITS_PER_LONG 64 +#define __GFP_BITS_SHIFT 20 +#define __GFP_BITS_MASK ((int)((1 << __GFP_BITS_SHIFT) - 1)) +#define __read_mostly +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define __force +#define PAGE_SHIFT 12 +#define ULONG_MAX (~0UL) +#define BUG() abort() + +typedef unsigned int u32; +typedef unsigned long u64; +typedef unsigned char u8; +typedef unsigned short u16; + +typedef unsigned long pgoff_t; + +#include +#include +#include + +struct vma_shared { int prio_tree_node; }; +struct vm_area_struct { + unsigned long vm_pgoff; + unsigned long vm_start; + unsigned long vm_end; + struct vma_shared shared; +}; + +struct page { + unsigned long index; +}; + +static inline void preempt_enable(void) { do {; } while(0);} +static inline void preempt_disable(void) { do {; } while(0);} + +static inline void __set_bit(int bit, unsigned long *map) { + unsigned long *p = map + bit / BITS_PER_LONG; + bit = bit & (BITS_PER_LONG -1); + *p |= 1UL << bit; +} + +static inline int test_bit(int bit, unsigned long *map) { + unsigned long *p = map + bit / BITS_PER_LONG; + bit = bit & (BITS_PER_LONG -1); + return *p & (1UL << bit) ? 1 : 0; +} + +static inline void __clear_bit(int bit, unsigned long *map) { + unsigned long *p = map + bit / BITS_PER_LONG; + bit = bit & (BITS_PER_LONG -1); + *p &= ~(1UL << bit); +} +#define BUG_ON(c) do { if (c) abort(); } while (0) + +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - __builtin_offsetof(type,member) );}) + +#endif + +#define ENOMEM 5 +#define EEXIST 6 From eb60ceac0ebd11e9896ed1ca84fcad71a4b23601 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 Feb 2007 09:18:22 -0500 Subject: [PATCH 0003/2985] Btrfs: Add backing store, memory management Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 11 +- fs/btrfs/ctree.c | 430 ++++++++++++++-------- fs/btrfs/ctree.h | 62 ++++ fs/btrfs/disk-io.c | 174 +++++++++ fs/btrfs/disk-io.h | 21 ++ fs/btrfs/kerncompat.h | 1 + fs/btrfs/radix-tree.c | 836 ++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/radix-tree.h | 73 ++++ 8 files changed, 1446 insertions(+), 162 deletions(-) create mode 100644 fs/btrfs/ctree.h create mode 100644 fs/btrfs/disk-io.c create mode 100644 fs/btrfs/disk-io.h create mode 100644 fs/btrfs/radix-tree.c create mode 100644 fs/btrfs/radix-tree.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9f84c08baab2..63360212a075 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,7 +1,12 @@ -ctree: ctree.o - gcc -g -O2 -Wall -o ctree ctree.c +CFLAGS= -g -Wall + +.c.o: + $(CC) $(CFLAGS) -c $< + +ctree: ctree.o disk-io.h ctree.h disk-io.o radix-tree.o radix-tree.h + gcc $(CFLAGS) -o ctree ctree.o disk-io.o radix-tree.o clean: - rm ctree ctree.o + rm ctree *.o diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 4bf5e92584bd..6f0522f21082 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1,68 +1,25 @@ #include #include #include "kerncompat.h" - -#define BLOCKSIZE 4096 - -struct key { - u64 objectid; - u32 flags; - u64 offset; -} __attribute__ ((__packed__)); - -struct header { - u64 fsid[2]; /* FS specific uuid */ - u64 blocknum; - u64 parentid; - u32 csum; - u32 ham; - u16 nritems; - u16 flags; -} __attribute__ ((__packed__)); - -#define NODEPTRS_PER_BLOCK ((BLOCKSIZE - sizeof(struct header)) / \ - (sizeof(struct key) + sizeof(u64))) - -#define LEVEL_BITS 3 -#define MAX_LEVEL (1 << LEVEL_BITS) -#define node_level(f) ((f) & (MAX_LEVEL-1)) -#define is_leaf(f) (node_level(f) == 0) - -struct ctree_root { - struct node *node; -}; - -struct item { - struct key key; - u16 offset; - u16 size; -} __attribute__ ((__packed__)); - -#define LEAF_DATA_SIZE (BLOCKSIZE - sizeof(struct header)) -struct leaf { - struct header header; - union { - struct item items[LEAF_DATA_SIZE/sizeof(struct item)]; - u8 data[BLOCKSIZE-sizeof(struct header)]; - }; -} __attribute__ ((__packed__)); - -struct node { - struct header header; - struct key keys[NODEPTRS_PER_BLOCK]; - u64 blockptrs[NODEPTRS_PER_BLOCK]; -} __attribute__ ((__packed__)); - -struct ctree_path { - struct node *nodes[MAX_LEVEL]; - int slots[MAX_LEVEL]; -}; +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" static inline void init_path(struct ctree_path *p) { memset(p, 0, sizeof(*p)); } +static void release_path(struct ctree_root *root, struct ctree_path *p) +{ + int i; + for (i = 0; i < MAX_LEVEL; i++) { + if (!p->nodes[i]) + break; + tree_block_release(root, p->nodes[i]); + } +} + static inline unsigned int leaf_data_end(struct leaf *leaf) { unsigned int nr = leaf->header.nritems; @@ -135,26 +92,25 @@ int bin_search(struct node *c, struct key *key, int *slot) return -1; } -void *read_block(u64 blocknum) -{ - return (void *)blocknum; -} - int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p) { - struct node *c = root->node; + struct tree_buffer *b = root->node; + struct node *c; + int slot; int ret; int level; - while (c) { + b->count++; + while (b) { + c = &b->node; level = node_level(c->header.flags); - p->nodes[level] = c; + p->nodes[level] = b; ret = bin_search(c, key, &slot); if (!is_leaf(c->header.flags)) { if (ret && slot > 0) slot -= 1; p->slots[level] = slot; - c = read_block(c->blockptrs[slot]); + b = read_tree_block(root, c->blockptrs[slot]); continue; } else { p->slots[level] = slot; @@ -164,17 +120,20 @@ int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p) return -1; } -static void fixup_low_keys(struct ctree_path *path, struct key *key, - int level) +static void fixup_low_keys(struct ctree_root *root, + struct ctree_path *path, struct key *key, + int level) { int i; /* adjust the pointers going up the tree */ for (i = level; i < MAX_LEVEL; i++) { - struct node *t = path->nodes[i]; + struct node *t; int tslot = path->slots[i]; - if (!t) + if (!path->nodes[i]) break; + t = &path->nodes[i]->node; memcpy(t->keys + tslot, key, sizeof(*key)); + write_tree_block(root, path->nodes[i]); if (tslot != 0) break; } @@ -190,27 +149,34 @@ int __insert_ptr(struct ctree_root *root, int nritems; /* need a new root */ if (!path->nodes[level]) { - c = malloc(sizeof(struct node)); + struct tree_buffer *t; + t = alloc_free_block(root); + c = &t->node; memset(c, 0, sizeof(c)); c->header.nritems = 2; c->header.flags = node_level(level); - lower = path->nodes[level-1]; + c->header.blocknr = t->blocknr; + lower = &path->nodes[level-1]->node; if (is_leaf(lower->header.flags)) lower_key = &((struct leaf *)lower)->items[0].key; else lower_key = lower->keys; memcpy(c->keys, lower_key, sizeof(struct key)); memcpy(c->keys + 1, key, sizeof(struct key)); - c->blockptrs[0] = (u64)lower; + c->blockptrs[0] = path->nodes[level-1]->blocknr; c->blockptrs[1] = blocknr; - root->node = c; - path->nodes[level] = c; + /* the path has an extra ref to root->node */ + tree_block_release(root, root->node); + root->node = t; + t->count++; + write_tree_block(root, t); + path->nodes[level] = t; path->slots[level] = 0; if (c->keys[1].objectid == 0) BUG(); return 0; } - lower = path->nodes[level]; + lower = &path->nodes[level]->node; nritems = lower->header.nritems; if (slot > nritems) BUG(); @@ -227,6 +193,7 @@ int __insert_ptr(struct ctree_root *root, lower->header.nritems++; if (lower->keys[1].objectid == 0) BUG(); + write_tree_block(root, path->nodes[level]); return 0; } @@ -238,6 +205,8 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) int push_items = 0; int left_nritems; int right_nritems; + struct tree_buffer *t; + struct tree_buffer *right_buf; if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) return 1; @@ -245,13 +214,18 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) if (slot == 0) return 1; - left = read_block(path->nodes[level + 1]->blockptrs[slot - 1]); - right = path->nodes[level]; + t = read_tree_block(root, + path->nodes[level + 1]->node.blockptrs[slot - 1]); + left = &t->node; + right_buf = path->nodes[level]; + right = &right_buf->node; left_nritems = left->header.nritems; right_nritems = right->header.nritems; push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1); - if (push_items <= 0) + if (push_items <= 0) { + tree_block_release(root, t); return 1; + } if (right_nritems < push_items) push_items = right_nritems; @@ -267,15 +241,20 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) left->header.nritems += push_items; /* adjust the pointers going up the tree */ - fixup_low_keys(path, right->keys, level + 1); + fixup_low_keys(root, path, right->keys, level + 1); + + write_tree_block(root, t); + write_tree_block(root, right_buf); /* then fixup the leaf pointer in the path */ if (path->slots[level] < push_items) { path->slots[level] += left_nritems; - path->nodes[level] = (struct node*)left; + tree_block_release(root, path->nodes[level]); + path->nodes[level] = t; path->slots[level + 1] -= 1; } else { path->slots[level] -= push_items; + tree_block_release(root, t); } return 0; } @@ -283,6 +262,8 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) { int slot; + struct tree_buffer *t; + struct tree_buffer *src_buffer; struct node *dst; struct node *src; int push_items = 0; @@ -295,16 +276,21 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) if (slot == NODEPTRS_PER_BLOCK - 1) return 1; - if (slot >= path->nodes[level + 1]->header.nritems -1) + if (slot >= path->nodes[level + 1]->node.header.nritems -1) return 1; - dst = read_block(path->nodes[level + 1]->blockptrs[slot + 1]); - src = path->nodes[level]; + t = read_tree_block(root, + path->nodes[level + 1]->node.blockptrs[slot + 1]); + dst = &t->node; + src_buffer = path->nodes[level]; + src = &src_buffer->node; dst_nritems = dst->header.nritems; src_nritems = src->header.nritems; push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1); - if (push_items <= 0) + if (push_items <= 0) { + tree_block_release(root, t); return 1; + } if (src_nritems < push_items) push_items = src_nritems; @@ -322,13 +308,21 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) dst->header.nritems += push_items; /* adjust the pointers going up the tree */ - memcpy(path->nodes[level + 1]->keys + path->slots[level + 1] + 1, + memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1, dst->keys, sizeof(struct key)); + + write_tree_block(root, path->nodes[level + 1]); + write_tree_block(root, t); + write_tree_block(root, src_buffer); + /* then fixup the leaf pointer in the path */ if (path->slots[level] >= src->header.nritems) { path->slots[level] -= src->header.nritems; - path->nodes[level] = (struct node*)dst; + tree_block_release(root, path->nodes[level]); + path->nodes[level] = t; path->slots[level + 1] += 1; + } else { + tree_block_release(root, t); } return 0; } @@ -337,15 +331,18 @@ int insert_ptr(struct ctree_root *root, struct ctree_path *path, struct key *key, u64 blocknr, int level) { - struct node *c = path->nodes[level]; + struct tree_buffer *t = path->nodes[level]; + struct node *c = &path->nodes[level]->node; struct node *b; - struct node *bal[MAX_LEVEL]; + struct tree_buffer *b_buffer; + struct tree_buffer *bal[MAX_LEVEL]; int bal_level = level; int mid; int bal_start = -1; memset(bal, 0, ARRAY_SIZE(bal)); - while(c && c->header.nritems == NODEPTRS_PER_BLOCK) { + while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) { + c = &t->node; if (push_node_left(root, path, node_level(c->header.flags)) == 0) break; @@ -355,8 +352,10 @@ int insert_ptr(struct ctree_root *root, bal_start = bal_level; if (bal_level == MAX_LEVEL - 1) BUG(); - b = malloc(sizeof(struct node)); + b_buffer = alloc_free_block(root); + b = &b_buffer->node; b->header.flags = c->header.flags; + b->header.blocknr = b_buffer->blocknr; mid = (c->header.nritems + 1) / 2; memcpy(b->keys, c->keys + mid, (c->header.nritems - mid) * sizeof(struct key)); @@ -364,21 +363,28 @@ int insert_ptr(struct ctree_root *root, (c->header.nritems - mid) * sizeof(u64)); b->header.nritems = c->header.nritems - mid; c->header.nritems = mid; - bal[bal_level] = b; + + write_tree_block(root, t); + write_tree_block(root, b_buffer); + + bal[bal_level] = b_buffer; if (bal_level == MAX_LEVEL - 1) break; bal_level += 1; - c = path->nodes[bal_level]; + t = path->nodes[bal_level]; } while(bal_start > 0) { - b = bal[bal_start]; - c = path->nodes[bal_start]; - __insert_ptr(root, path, b->keys, (u64)b, + b_buffer = bal[bal_start]; + c = &path->nodes[bal_start]->node; + __insert_ptr(root, path, b_buffer->node.keys, b_buffer->blocknr, path->slots[bal_start + 1] + 1, bal_start + 1); if (path->slots[bal_start] >= c->header.nritems) { path->slots[bal_start] -= c->header.nritems; - path->nodes[bal_start] = b; + tree_block_release(root, path->nodes[bal_start]); + path->nodes[bal_start] = b_buffer; path->slots[bal_start + 1] += 1; + } else { + tree_block_release(root, b_buffer); } bal_start--; if (!bal[bal_start]) @@ -404,7 +410,9 @@ int leaf_space_used(struct leaf *l, int start, int nr) int push_leaf_left(struct ctree_root *root, struct ctree_path *path, int data_size) { - struct leaf *right = (struct leaf *)path->nodes[0]; + struct tree_buffer *right_buf = path->nodes[0]; + struct leaf *right = &right_buf->leaf; + struct tree_buffer *t; struct leaf *left; int slot; int i; @@ -421,9 +429,11 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path, if (!path->nodes[1]) { return 1; } - left = read_block(path->nodes[1]->blockptrs[slot - 1]); + t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]); + left = &t->leaf; free_space = leaf_free_space(left); if (free_space < data_size + sizeof(struct item)) { + tree_block_release(root, t); return 1; } for (i = 0; i < right->header.nritems; i++) { @@ -436,6 +446,7 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path, push_space += item->size + sizeof(*item); } if (push_items == 0) { + tree_block_release(root, t); return 1; } /* push data from right to left */ @@ -446,6 +457,8 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path, right->data + right->items[push_items - 1].offset, push_space); old_left_nritems = left->header.nritems; + BUG_ON(old_left_nritems < 0); + for(i = old_left_nritems; i < old_left_nritems + push_items; i++) { left->items[i].offset -= LEAF_DATA_SIZE - left->items[old_left_nritems -1].offset; @@ -460,30 +473,40 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path, (right->header.nritems - push_items) * sizeof(struct item)); right->header.nritems -= push_items; push_space = LEAF_DATA_SIZE; + for (i = 0; i < right->header.nritems; i++) { right->items[i].offset = push_space - right->items[i].size; push_space = right->items[i].offset; } - fixup_low_keys(path, &right->items[0].key, 1); + + write_tree_block(root, t); + write_tree_block(root, right_buf); + + fixup_low_keys(root, path, &right->items[0].key, 1); /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { path->slots[0] += old_left_nritems; - path->nodes[0] = (struct node*)left; + tree_block_release(root, path->nodes[0]); + path->nodes[0] = t; path->slots[1] -= 1; } else { + tree_block_release(root, t); path->slots[0] -= push_items; } + BUG_ON(path->slots[0] < 0); return 0; } int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) { - struct leaf *l = (struct leaf *)path->nodes[0]; - int nritems = l->header.nritems; - int mid = (nritems + 1)/ 2; - int slot = path->slots[0]; + struct tree_buffer *l_buf = path->nodes[0]; + struct leaf *l = &l_buf->leaf; + int nritems; + int mid; + int slot; struct leaf *right; + struct tree_buffer *right_buffer; int space_needed = data_size + sizeof(struct item); int data_copy_size; int rt_data_off; @@ -491,9 +514,19 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) int ret; if (push_leaf_left(root, path, data_size) == 0) { - return 0; + l_buf = path->nodes[0]; + l = &l_buf->leaf; + if (leaf_free_space(l) >= sizeof(struct item) + data_size) + return 0; } - right = malloc(sizeof(struct leaf)); + slot = path->slots[0]; + nritems = l->header.nritems; + mid = (nritems + 1)/ 2; + + right_buffer = alloc_free_block(root); + BUG_ON(!right_buffer); + BUG_ON(mid == nritems); + right = &right_buffer->leaf; memset(right, 0, sizeof(*right)); if (mid <= slot) { if (leaf_space_used(l, mid, nritems - mid) + space_needed > @@ -505,6 +538,8 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) BUG(); } right->header.nritems = nritems - mid; + right->header.blocknr = right_buffer->blocknr; + right->header.flags = node_level(0); data_copy_size = l->items[mid].offset + l->items[mid].size - leaf_data_end(l); memcpy(right->items, l->items + mid, @@ -518,12 +553,20 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) } l->header.nritems = mid; ret = insert_ptr(root, path, &right->items[0].key, - (u64)right, 1); + right_buffer->blocknr, 1); + + write_tree_block(root, right_buffer); + write_tree_block(root, l_buf); + + BUG_ON(path->slots[0] != slot); if (mid <= slot) { - path->nodes[0] = (struct node *)right; + tree_block_release(root, path->nodes[0]); + path->nodes[0] = right_buffer; path->slots[0] -= mid; path->slots[1] += 1; - } + } else + tree_block_release(root, right_buffer); + BUG_ON(path->slots[0] < 0); return ret; } @@ -532,28 +575,48 @@ int insert_item(struct ctree_root *root, struct key *key, { int ret; int slot; + int slot_orig; struct leaf *leaf; + struct tree_buffer *leaf_buf; unsigned int nritems; unsigned int data_end; struct ctree_path path; + if (!root->node) { + struct tree_buffer *t; + t = alloc_free_block(root); + BUG_ON(!t); + t->node.header.nritems = 0; + t->node.header.flags = node_level(0); + t->node.header.blocknr = t->blocknr; + root->node = t; + write_tree_block(root, t); + } init_path(&path); ret = search_slot(root, key, &path); - if (ret == 0) + if (ret == 0) { + release_path(root, &path); return -EEXIST; + } - leaf = (struct leaf *)path.nodes[0]; - if (leaf_free_space(leaf) < sizeof(struct item) + data_size) + slot_orig = path.slots[0]; + leaf_buf = path.nodes[0]; + leaf = &leaf_buf->leaf; + if (leaf_free_space(leaf) < sizeof(struct item) + data_size) { split_leaf(root, &path, data_size); - leaf = (struct leaf *)path.nodes[0]; + leaf_buf = path.nodes[0]; + leaf = &path.nodes[0]->leaf; + } nritems = leaf->header.nritems; data_end = leaf_data_end(leaf); + if (leaf_free_space(leaf) < sizeof(struct item) + data_size) BUG(); slot = path.slots[0]; + BUG_ON(slot < 0); if (slot == 0) - fixup_low_keys(&path, key, 1); + fixup_low_keys(root, &path, key, 1); if (slot != nritems) { int i; unsigned int old_data = leaf->items[slot].offset + @@ -580,21 +643,25 @@ int insert_item(struct ctree_root *root, struct key *key, leaf->items[slot].size = data_size; memcpy(leaf->data + data_end - data_size, data, data_size); leaf->header.nritems += 1; + write_tree_block(root, leaf_buf); if (leaf_free_space(leaf) < 0) BUG(); + release_path(root, &path); return 0; } int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) { int slot; + struct tree_buffer *t; struct node *node; int nritems; while(1) { - node = path->nodes[level]; - if (!node) + t = path->nodes[level]; + if (!t) break; + node = &t->node; slot = path->slots[level]; nritems = node->header.nritems; @@ -606,28 +673,34 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) sizeof(u64) * (nritems - slot - 1)); } node->header.nritems--; + write_tree_block(root, t); if (node->header.nritems != 0) { int tslot; if (slot == 0) - fixup_low_keys(path, node->keys, level + 1); + fixup_low_keys(root, path, node->keys, + level + 1); tslot = path->slots[level+1]; + t->count++; push_node_left(root, path, level); if (node->header.nritems) { push_node_right(root, path, level); } - if (node->header.nritems) + if (node->header.nritems) { + tree_block_release(root, t); break; + } + tree_block_release(root, t); path->slots[level+1] = tslot; } - if (node == root->node) { - printf("root is now null!\n"); - root->node = NULL; + if (t == root->node) { + /* just turn the root into a leaf and break */ + root->node->node.header.flags = node_level(0); + write_tree_block(root, t); break; } level++; if (!path->nodes[level]) BUG(); - free(node); } return 0; } @@ -636,10 +709,12 @@ int del_item(struct ctree_root *root, struct ctree_path *path) { int slot; struct leaf *leaf; + struct tree_buffer *leaf_buf; int doff; int dsize; - leaf = (struct leaf *)path->nodes[0]; + leaf_buf = path->nodes[0]; + leaf = &leaf_buf->leaf; slot = path->slots[0]; doff = leaf->items[slot].offset; dsize = leaf->items[slot].size; @@ -658,14 +733,15 @@ int del_item(struct ctree_root *root, struct ctree_path *path) } leaf->header.nritems -= 1; if (leaf->header.nritems == 0) { - if (leaf == (struct leaf *)root->node) - root->node = NULL; - else + if (leaf_buf == root->node) { + leaf->header.flags = node_level(0); + write_tree_block(root, leaf_buf); + } else del_ptr(root, path, 1); - free(leaf); } else { if (slot == 0) - fixup_low_keys(path, &leaf->items[0].key, 1); + fixup_low_keys(root, path, &leaf->items[0].key, 1); + write_tree_block(root, leaf_buf); if (leaf_space_used(leaf, 0, leaf->header.nritems) < LEAF_DATA_SIZE / 4) { /* push_leaf_left fixes the path. @@ -673,12 +749,13 @@ int del_item(struct ctree_root *root, struct ctree_path *path) * for possible call to del_ptr below */ slot = path->slots[1]; + leaf_buf->count++; push_leaf_left(root, path, 1); if (leaf->header.nritems == 0) { - free(leaf); path->slots[1] = slot; del_ptr(root, path, 1); } + tree_block_release(root, leaf_buf); } } return 0; @@ -689,7 +766,7 @@ void print_leaf(struct leaf *l) int i; int nr = l->header.nritems; struct item *item; - printf("leaf %p total ptrs %d free space %d\n", l, nr, + printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr, leaf_free_space(l)); fflush(stdout); for (i = 0 ; i < nr ; i++) { @@ -703,38 +780,45 @@ void print_leaf(struct leaf *l) fflush(stdout); } } -void print_tree(struct node *c) +void print_tree(struct ctree_root *root, struct tree_buffer *t) { int i; int nr; + struct node *c; - if (!c) + if (!t) return; + c = &t->node; nr = c->header.nritems; + if (c->header.blocknr != t->blocknr) + BUG(); if (is_leaf(c->header.flags)) { print_leaf((struct leaf *)c); return; } - printf("node %p level %d total ptrs %d free spc %lu\n", c, + printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr, node_level(c->header.flags), c->header.nritems, NODEPTRS_PER_BLOCK - c->header.nritems); fflush(stdout); for (i = 0; i < nr; i++) { - printf("\tkey %d (%lu %u %lu) block %lx\n", + printf("\tkey %d (%lu %u %lu) block %lu\n", i, c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset, c->blockptrs[i]); fflush(stdout); } for (i = 0; i < nr; i++) { - struct node *next = read_block(c->blockptrs[i]); + struct tree_buffer *next_buf = read_tree_block(root, + c->blockptrs[i]); + struct node *next = &next_buf->node; if (is_leaf(next->header.flags) && node_level(c->header.flags) != 1) BUG(); if (node_level(next->header.flags) != node_level(c->header.flags) - 1) BUG(); - print_tree(next); + print_tree(root, next_buf); + tree_block_release(root, next_buf); } } @@ -746,23 +830,24 @@ int next_key(int i, int max_key) { } int main() { - struct leaf *first_node = malloc(sizeof(struct leaf)); - struct ctree_root root; + struct ctree_root *root; struct key ins; struct key last = { (u64)-1, 0, 0}; char *buf; int i; int num; int ret; - int run_size = 100000; + int run_size = 1000000; int max_key = 100000000; int tree_size = 0; struct ctree_path path; + radix_tree_init(); + + + root = open_ctree("dbfile"); srand(55); - root.node = (struct node *)first_node; - memset(first_node, 0, sizeof(*first_node)); for (i = 0; i < run_size; i++) { buf = malloc(64); num = next_key(i, max_key); @@ -772,39 +857,46 @@ int main() { ins.objectid = num; ins.offset = 0; ins.flags = 0; - ret = insert_item(&root, &ins, buf, strlen(buf)); + ret = insert_item(root, &ins, buf, strlen(buf)); if (!ret) tree_size++; } + close_ctree(root); + root = open_ctree("dbfile"); + printf("starting search\n"); srand(55); for (i = 0; i < run_size; i++) { num = next_key(i, max_key); ins.objectid = num; init_path(&path); - ret = search_slot(&root, &ins, &path); + ret = search_slot(root, &ins, &path); if (ret) { - print_tree(root.node); + print_tree(root, root->node); printf("unable to find %d\n", num); exit(1); } + release_path(root, &path); } - printf("node %p level %d total ptrs %d free spc %lu\n", root.node, - node_level(root.node->header.flags), root.node->header.nritems, - NODEPTRS_PER_BLOCK - root.node->header.nritems); - // print_tree(root.node); - printf("all searches good\n"); + close_ctree(root); + root = open_ctree("dbfile"); + printf("node %p level %d total ptrs %d free spc %lu\n", root->node, + node_level(root->node->node.header.flags), + root->node->node.header.nritems, + NODEPTRS_PER_BLOCK - root->node->node.header.nritems); + printf("all searches good, deleting some items\n"); i = 0; srand(55); for (i = 0 ; i < run_size/4; i++) { num = next_key(i, max_key); ins.objectid = num; init_path(&path); - ret = search_slot(&root, &ins, &path); + ret = search_slot(root, &ins, &path); if (ret) continue; - ret = del_item(&root, &path); + ret = del_item(root, &path); if (ret != 0) BUG(); + release_path(root, &path); tree_size--; } srand(128); @@ -813,38 +905,58 @@ int main() { num = next_key(i, max_key); sprintf(buf, "string-%d", num); ins.objectid = num; - ret = insert_item(&root, &ins, buf, strlen(buf)); + ret = insert_item(root, &ins, buf, strlen(buf)); if (!ret) tree_size++; } - while(root.node) { + close_ctree(root); + root = open_ctree("dbfile"); + printf("starting search2\n"); + srand(128); + for (i = 0; i < run_size; i++) { + num = next_key(i, max_key); + ins.objectid = num; + init_path(&path); + ret = search_slot(root, &ins, &path); + if (ret) { + print_tree(root, root->node); + printf("unable to find %d\n", num); + exit(1); + } + release_path(root, &path); + } + printf("starting big long delete run\n"); + while(root->node && root->node->node.header.nritems > 0) { struct leaf *leaf; int slot; ins.objectid = (u64)-1; init_path(&path); - ret = search_slot(&root, &ins, &path); + ret = search_slot(root, &ins, &path); if (ret == 0) BUG(); - leaf = (struct leaf *)(path.nodes[0]); + leaf = &path.nodes[0]->leaf; slot = path.slots[0]; if (slot != leaf->header.nritems) BUG(); while(path.slots[0] > 0) { path.slots[0] -= 1; slot = path.slots[0]; - leaf = (struct leaf *)(path.nodes[0]); + leaf = &path.nodes[0]->leaf; if (comp_keys(&last, &leaf->items[slot].key) <= 0) BUG(); memcpy(&last, &leaf->items[slot].key, sizeof(last)); - ret = del_item(&root, &path); - if (ret != 0) + ret = del_item(root, &path); + if (ret != 0) { + printf("del_item returned %d\n", ret); BUG(); + } tree_size--; } + release_path(root, &path); } - print_tree(root.node); + close_ctree(root); printf("tree size is now %d\n", tree_size); return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h new file mode 100644 index 000000000000..586bf1866042 --- /dev/null +++ b/fs/btrfs/ctree.h @@ -0,0 +1,62 @@ +#ifndef __CTREE__ +#define __CTREE__ + +#define CTREE_BLOCKSIZE 4096 + +struct key { + u64 objectid; + u32 flags; + u64 offset; +} __attribute__ ((__packed__)); + +struct header { + u64 fsid[2]; /* FS specific uuid */ + u64 blocknr; + u64 parentid; + u32 csum; + u32 ham; + u16 nritems; + u16 flags; +} __attribute__ ((__packed__)); + +#define NODEPTRS_PER_BLOCK ((CTREE_BLOCKSIZE - sizeof(struct header)) / \ + (sizeof(struct key) + sizeof(u64))) + +#define LEVEL_BITS 3 +#define MAX_LEVEL (1 << LEVEL_BITS) +#define node_level(f) ((f) & (MAX_LEVEL-1)) +#define is_leaf(f) (node_level(f) == 0) + +struct tree_buffer; +struct ctree_root { + struct tree_buffer *node; + int fp; + struct radix_tree_root cache_radix; +}; + +struct item { + struct key key; + u16 offset; + u16 size; +} __attribute__ ((__packed__)); + +#define LEAF_DATA_SIZE (CTREE_BLOCKSIZE - sizeof(struct header)) +struct leaf { + struct header header; + union { + struct item items[LEAF_DATA_SIZE/sizeof(struct item)]; + u8 data[CTREE_BLOCKSIZE-sizeof(struct header)]; + }; +} __attribute__ ((__packed__)); + +struct node { + struct header header; + struct key keys[NODEPTRS_PER_BLOCK]; + u64 blockptrs[NODEPTRS_PER_BLOCK]; +} __attribute__ ((__packed__)); + +struct ctree_path { + struct tree_buffer *nodes[MAX_LEVEL]; + int slots[MAX_LEVEL]; +}; +#endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c new file mode 100644 index 000000000000..8d51a07051d9 --- /dev/null +++ b/fs/btrfs/disk-io.c @@ -0,0 +1,174 @@ +#define _XOPEN_SOURCE 500 +#include +#include +#include +#include +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" + +static int allocated_blocks = 0; + +struct ctree_header { + u64 root_block; +} __attribute__ ((__packed__)); + +static int get_free_block(struct ctree_root *root, u64 *block) +{ + struct stat st; + int ret; + + st.st_size = 0; + ret = fstat(root->fp, &st); + if (st.st_size > sizeof(struct ctree_header)) { + *block = (st.st_size - + sizeof(struct ctree_header)) / CTREE_BLOCKSIZE; + } else { + *block = 0; + } + ret = ftruncate(root->fp, sizeof(struct ctree_header) + (*block + 1) * + CTREE_BLOCKSIZE); + return ret; +} + +struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr) +{ + struct tree_buffer *buf; + int ret; + buf = malloc(sizeof(struct tree_buffer)); + if (!buf) + return buf; + allocated_blocks++; + buf->blocknr = blocknr; + buf->count = 1; + radix_tree_preload(GFP_KERNEL); + ret = radix_tree_insert(&root->cache_radix, blocknr, buf); + radix_tree_preload_end(); + if (ret) { + free(buf); + return NULL; + } + return buf; +} + +struct tree_buffer *alloc_free_block(struct ctree_root *root) +{ + u64 free_block; + int ret; + struct tree_buffer * buf; + ret = get_free_block(root, &free_block); + if (ret) { + BUG(); + return NULL; + } + buf = alloc_tree_block(root, free_block); + if (!buf) + BUG(); + return buf; +} + +struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) +{ + loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header); + struct tree_buffer *buf; + int ret; + + buf = radix_tree_lookup(&root->cache_radix, blocknr); + if (buf) { + buf->count++; + if (buf->blocknr != blocknr) + BUG(); + if (buf->blocknr != buf->node.header.blocknr) + BUG(); + return buf; + } + buf = alloc_tree_block(root, blocknr); + if (!buf) + return NULL; + ret = pread(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); + if (ret != CTREE_BLOCKSIZE) { + free(buf); + return NULL; + } + if (buf->blocknr != buf->node.header.blocknr) + BUG(); + return buf; +} + +int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) +{ + u64 blocknr = buf->blocknr; + loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header); + int ret; + + if (buf->blocknr != buf->node.header.blocknr) + BUG(); + ret = pwrite(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); + if (ret != CTREE_BLOCKSIZE) + return ret; + if (buf == root->node) + return update_root_block(root); + return 0; +} + +struct ctree_root *open_ctree(char *filename) +{ + struct ctree_root *root = malloc(sizeof(struct ctree_root)); + int fp; + u64 root_block; + int ret; + + fp = open(filename, O_CREAT | O_RDWR); + if (fp < 0) { + free(root); + return NULL; + } + root->fp = fp; + INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); + ret = pread(fp, &root_block, sizeof(u64), 0); + if (ret == sizeof(u64)) { + printf("reading root node at block %lu\n", root_block); + root->node = read_tree_block(root, root_block); + } else + root->node = NULL; + return root; +} + +int close_ctree(struct ctree_root *root) +{ + close(root->fp); + if (root->node) + tree_block_release(root, root->node); + free(root); + printf("on close %d blocks are allocated\n", allocated_blocks); + return 0; +} + +int update_root_block(struct ctree_root *root) +{ + int ret; + u64 root_block = root->node->blocknr; + + ret = pwrite(root->fp, &root_block, sizeof(u64), 0); + if (ret != sizeof(u64)) + return ret; + return 0; +} + +void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) +{ + buf->count--; + if (buf->count == 0) { + if (!radix_tree_lookup(&root->cache_radix, buf->blocknr)) + BUG(); + radix_tree_delete(&root->cache_radix, buf->blocknr); + memset(buf, 0, sizeof(*buf)); + free(buf); + BUG_ON(allocated_blocks == 0); + allocated_blocks--; + } +} + diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h new file mode 100644 index 000000000000..ee95fa05c4a3 --- /dev/null +++ b/fs/btrfs/disk-io.h @@ -0,0 +1,21 @@ +#ifndef __DISKIO__ +#define __DISKIO__ + +struct tree_buffer { + u64 blocknr; + int count; + union { + struct node node; + struct leaf leaf; + }; +}; + +struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr); +int write_tree_block(struct ctree_root *root, struct tree_buffer *buf); +struct ctree_root *open_ctree(char *filename); +int close_ctree(struct ctree_root *root); +void tree_block_release(struct ctree_root *root, struct tree_buffer *buf); +struct tree_buffer *alloc_free_block(struct ctree_root *root); +int update_root_block(struct ctree_root *root); + +#endif diff --git a/fs/btrfs/kerncompat.h b/fs/btrfs/kerncompat.h index 3a4bb4d661f9..347ca06205c2 100644 --- a/fs/btrfs/kerncompat.h +++ b/fs/btrfs/kerncompat.h @@ -6,6 +6,7 @@ #define BITS_PER_LONG 64 #define __GFP_BITS_SHIFT 20 #define __GFP_BITS_MASK ((int)((1 << __GFP_BITS_SHIFT) - 1)) +#define GFP_KERNEL 0 #define __read_mostly #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define __force diff --git a/fs/btrfs/radix-tree.c b/fs/btrfs/radix-tree.c new file mode 100644 index 000000000000..baa25ca1c2ac --- /dev/null +++ b/fs/btrfs/radix-tree.c @@ -0,0 +1,836 @@ +/* + * Copyright (C) 2001 Momchil Velikov + * Portions Copyright (C) 2001 Christoph Hellwig + * Copyright (C) 2005 SGI, Christoph Lameter + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "kerncompat.h" +#include "radix-tree.h" +#ifdef __KERNEL__ +#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) +#else +#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ +#endif + +#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) +#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) + +#define RADIX_TREE_TAG_LONGS \ + ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) + +struct radix_tree_node { + unsigned int count; + void *slots[RADIX_TREE_MAP_SIZE]; + unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; +}; + +struct radix_tree_path { + struct radix_tree_node *node; + int offset; +}; + +#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) +#define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2) + +static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly; + +/* + * Per-cpu pool of preloaded nodes + */ +struct radix_tree_preload { + int nr; + struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; +}; +struct radix_tree_preload radix_tree_preloads = { 0, }; + +static inline gfp_t root_gfp_mask(struct radix_tree_root *root) +{ + return root->gfp_mask & __GFP_BITS_MASK; +} + +static int internal_nodes = 0; +/* + * This assumes that the caller has performed appropriate preallocation, and + * that the caller has pinned this thread of control to the current CPU. + */ +static struct radix_tree_node * +radix_tree_node_alloc(struct radix_tree_root *root) +{ + struct radix_tree_node *ret; + ret = malloc(sizeof(struct radix_tree_node)); + if (ret) { + memset(ret, 0, sizeof(struct radix_tree_node)); + internal_nodes++; + } + return ret; +} + +static inline void +radix_tree_node_free(struct radix_tree_node *node) +{ + internal_nodes--; + free(node); +} + +/* + * Load up this CPU's radix_tree_node buffer with sufficient objects to + * ensure that the addition of a single element in the tree cannot fail. On + * success, return zero, with preemption disabled. On error, return -ENOMEM + * with preemption not disabled. + */ +int radix_tree_preload(gfp_t gfp_mask) +{ + struct radix_tree_preload *rtp; + struct radix_tree_node *node; + int ret = -ENOMEM; + + preempt_disable(); + rtp = &__get_cpu_var(radix_tree_preloads); + while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { + preempt_enable(); + node = radix_tree_node_alloc(NULL); + if (node == NULL) + goto out; + preempt_disable(); + rtp = &__get_cpu_var(radix_tree_preloads); + if (rtp->nr < ARRAY_SIZE(rtp->nodes)) + rtp->nodes[rtp->nr++] = node; + else + radix_tree_node_free(node); + } + ret = 0; +out: + return ret; +} + +static inline void tag_set(struct radix_tree_node *node, unsigned int tag, + int offset) +{ + __set_bit(offset, node->tags[tag]); +} + +static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, + int offset) +{ + __clear_bit(offset, node->tags[tag]); +} + +static inline int tag_get(struct radix_tree_node *node, unsigned int tag, + int offset) +{ + return test_bit(offset, node->tags[tag]); +} + +static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) +{ + root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); +} + + +static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) +{ + root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); +} + +static inline void root_tag_clear_all(struct radix_tree_root *root) +{ + root->gfp_mask &= __GFP_BITS_MASK; +} + +static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) +{ + return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); +} + +/* + * Returns 1 if any slot in the node has this tag set. + * Otherwise returns 0. + */ +static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) +{ + int idx; + for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { + if (node->tags[tag][idx]) + return 1; + } + return 0; +} + +/* + * Return the maximum key which can be store into a + * radix tree with height HEIGHT. + */ +static inline unsigned long radix_tree_maxindex(unsigned int height) +{ + return height_to_maxindex[height]; +} + +/* + * Extend a radix tree so it can store key @index. + */ +static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) +{ + struct radix_tree_node *node; + unsigned int height; + int tag; + + /* Figure out what the height should be. */ + height = root->height + 1; + while (index > radix_tree_maxindex(height)) + height++; + + if (root->rnode == NULL) { + root->height = height; + goto out; + } + + do { + if (!(node = radix_tree_node_alloc(root))) + return -ENOMEM; + + /* Increase the height. */ + node->slots[0] = root->rnode; + + /* Propagate the aggregated tag info into the new root */ + for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { + if (root_tag_get(root, tag)) + tag_set(node, tag, 0); + } + + node->count = 1; + root->rnode = node; + root->height++; + } while (height > root->height); +out: + return 0; +} + +/** + * radix_tree_insert - insert into a radix tree + * @root: radix tree root + * @index: index key + * @item: item to insert + * + * Insert an item into the radix tree at position @index. + */ +int radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *item) +{ + struct radix_tree_node *node = NULL, *slot; + unsigned int height, shift; + int offset; + int error; + + /* Make sure the tree is high enough. */ + if (index > radix_tree_maxindex(root->height)) { + error = radix_tree_extend(root, index); + if (error) + return error; + } + + slot = root->rnode; + height = root->height; + shift = (height-1) * RADIX_TREE_MAP_SHIFT; + + offset = 0; /* uninitialised var warning */ + while (height > 0) { + if (slot == NULL) { + /* Have to add a child node. */ + if (!(slot = radix_tree_node_alloc(root))) + return -ENOMEM; + if (node) { + node->slots[offset] = slot; + node->count++; + } else + root->rnode = slot; + } + + /* Go a level down */ + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + node = slot; + slot = node->slots[offset]; + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } + + if (slot != NULL) + return -EEXIST; + + if (node) { + node->count++; + node->slots[offset] = item; + BUG_ON(tag_get(node, 0, offset)); + BUG_ON(tag_get(node, 1, offset)); + } else { + root->rnode = item; + BUG_ON(root_tag_get(root, 0)); + BUG_ON(root_tag_get(root, 1)); + } + + return 0; +} + +static inline void **__lookup_slot(struct radix_tree_root *root, + unsigned long index) +{ + unsigned int height, shift; + struct radix_tree_node **slot; + + height = root->height; + + if (index > radix_tree_maxindex(height)) + return NULL; + + if (height == 0 && root->rnode) + return (void **)&root->rnode; + + shift = (height-1) * RADIX_TREE_MAP_SHIFT; + slot = &root->rnode; + + while (height > 0) { + if (*slot == NULL) + return NULL; + + slot = (struct radix_tree_node **) + ((*slot)->slots + + ((index >> shift) & RADIX_TREE_MAP_MASK)); + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } + + return (void **)slot; +} + +/** + * radix_tree_lookup_slot - lookup a slot in a radix tree + * @root: radix tree root + * @index: index key + * + * Lookup the slot corresponding to the position @index in the radix tree + * @root. This is useful for update-if-exists operations. + */ +void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) +{ + return __lookup_slot(root, index); +} + +/** + * radix_tree_lookup - perform lookup operation on a radix tree + * @root: radix tree root + * @index: index key + * + * Lookup the item at the position @index in the radix tree @root. + */ +void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) +{ + void **slot; + + slot = __lookup_slot(root, index); + return slot != NULL ? *slot : NULL; +} + +/** + * radix_tree_tag_set - set a tag on a radix tree node + * @root: radix tree root + * @index: index key + * @tag: tag index + * + * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) + * corresponding to @index in the radix tree. From + * the root all the way down to the leaf node. + * + * Returns the address of the tagged item. Setting a tag on a not-present + * item is a bug. + */ +void *radix_tree_tag_set(struct radix_tree_root *root, + unsigned long index, unsigned int tag) +{ + unsigned int height, shift; + struct radix_tree_node *slot; + + height = root->height; + BUG_ON(index > radix_tree_maxindex(height)); + + slot = root->rnode; + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + + while (height > 0) { + int offset; + + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + if (!tag_get(slot, tag, offset)) + tag_set(slot, tag, offset); + slot = slot->slots[offset]; + BUG_ON(slot == NULL); + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } + + /* set the root's tag bit */ + if (slot && !root_tag_get(root, tag)) + root_tag_set(root, tag); + + return slot; +} + +/** + * radix_tree_tag_clear - clear a tag on a radix tree node + * @root: radix tree root + * @index: index key + * @tag: tag index + * + * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) + * corresponding to @index in the radix tree. If + * this causes the leaf node to have no tags set then clear the tag in the + * next-to-leaf node, etc. + * + * Returns the address of the tagged item on success, else NULL. ie: + * has the same return value and semantics as radix_tree_lookup(). + */ +void *radix_tree_tag_clear(struct radix_tree_root *root, + unsigned long index, unsigned int tag) +{ + struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; + struct radix_tree_node *slot = NULL; + unsigned int height, shift; + + height = root->height; + if (index > radix_tree_maxindex(height)) + goto out; + + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + pathp->node = NULL; + slot = root->rnode; + + while (height > 0) { + int offset; + + if (slot == NULL) + goto out; + + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + pathp[1].offset = offset; + pathp[1].node = slot; + slot = slot->slots[offset]; + pathp++; + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } + + if (slot == NULL) + goto out; + + while (pathp->node) { + if (!tag_get(pathp->node, tag, pathp->offset)) + goto out; + tag_clear(pathp->node, tag, pathp->offset); + if (any_tag_set(pathp->node, tag)) + goto out; + pathp--; + } + + /* clear the root's tag bit */ + if (root_tag_get(root, tag)) + root_tag_clear(root, tag); + +out: + return slot; +} + +#ifndef __KERNEL__ /* Only the test harness uses this at present */ +/** + * radix_tree_tag_get - get a tag on a radix tree node + * @root: radix tree root + * @index: index key + * @tag: tag index (< RADIX_TREE_MAX_TAGS) + * + * Return values: + * + * 0: tag not present or not set + * 1: tag set + */ +int radix_tree_tag_get(struct radix_tree_root *root, + unsigned long index, unsigned int tag) +{ + unsigned int height, shift; + struct radix_tree_node *slot; + int saw_unset_tag = 0; + + height = root->height; + if (index > radix_tree_maxindex(height)) + return 0; + + /* check the root's tag bit */ + if (!root_tag_get(root, tag)) + return 0; + + if (height == 0) + return 1; + + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + slot = root->rnode; + + for ( ; ; ) { + int offset; + + if (slot == NULL) + return 0; + + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + + /* + * This is just a debug check. Later, we can bale as soon as + * we see an unset tag. + */ + if (!tag_get(slot, tag, offset)) + saw_unset_tag = 1; + if (height == 1) { + int ret = tag_get(slot, tag, offset); + + BUG_ON(ret && saw_unset_tag); + return !!ret; + } + slot = slot->slots[offset]; + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } +} +#endif + +static unsigned int +__lookup(struct radix_tree_root *root, void **results, unsigned long index, + unsigned int max_items, unsigned long *next_index) +{ + unsigned int nr_found = 0; + unsigned int shift, height; + struct radix_tree_node *slot; + unsigned long i; + + height = root->height; + if (height == 0) { + if (root->rnode && index == 0) + results[nr_found++] = root->rnode; + goto out; + } + + shift = (height-1) * RADIX_TREE_MAP_SHIFT; + slot = root->rnode; + + for ( ; height > 1; height--) { + + for (i = (index >> shift) & RADIX_TREE_MAP_MASK ; + i < RADIX_TREE_MAP_SIZE; i++) { + if (slot->slots[i] != NULL) + break; + index &= ~((1UL << shift) - 1); + index += 1UL << shift; + if (index == 0) + goto out; /* 32-bit wraparound */ + } + if (i == RADIX_TREE_MAP_SIZE) + goto out; + + shift -= RADIX_TREE_MAP_SHIFT; + slot = slot->slots[i]; + } + + /* Bottom level: grab some items */ + for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { + index++; + if (slot->slots[i]) { + results[nr_found++] = slot->slots[i]; + if (nr_found == max_items) + goto out; + } + } +out: + *next_index = index; + return nr_found; +} + +/** + * radix_tree_gang_lookup - perform multiple lookup on a radix tree + * @root: radix tree root + * @results: where the results of the lookup are placed + * @first_index: start the lookup from this key + * @max_items: place up to this many items at *results + * + * Performs an index-ascending scan of the tree for present items. Places + * them at *@results and returns the number of items which were placed at + * *@results. + * + * The implementation is naive. + */ +unsigned int +radix_tree_gang_lookup(struct radix_tree_root *root, void **results, + unsigned long first_index, unsigned int max_items) +{ + const unsigned long max_index = radix_tree_maxindex(root->height); + unsigned long cur_index = first_index; + unsigned int ret = 0; + + while (ret < max_items) { + unsigned int nr_found; + unsigned long next_index; /* Index of next search */ + + if (cur_index > max_index) + break; + nr_found = __lookup(root, results + ret, cur_index, + max_items - ret, &next_index); + ret += nr_found; + if (next_index == 0) + break; + cur_index = next_index; + } + return ret; +} + +/* + * FIXME: the two tag_get()s here should use find_next_bit() instead of + * open-coding the search. + */ +static unsigned int +__lookup_tag(struct radix_tree_root *root, void **results, unsigned long index, + unsigned int max_items, unsigned long *next_index, unsigned int tag) +{ + unsigned int nr_found = 0; + unsigned int shift; + unsigned int height = root->height; + struct radix_tree_node *slot; + + if (height == 0) { + if (root->rnode && index == 0) + results[nr_found++] = root->rnode; + goto out; + } + + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + slot = root->rnode; + + do { + unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK; + + for ( ; i < RADIX_TREE_MAP_SIZE; i++) { + if (tag_get(slot, tag, i)) { + BUG_ON(slot->slots[i] == NULL); + break; + } + index &= ~((1UL << shift) - 1); + index += 1UL << shift; + if (index == 0) + goto out; /* 32-bit wraparound */ + } + if (i == RADIX_TREE_MAP_SIZE) + goto out; + height--; + if (height == 0) { /* Bottom level: grab some items */ + unsigned long j = index & RADIX_TREE_MAP_MASK; + + for ( ; j < RADIX_TREE_MAP_SIZE; j++) { + index++; + if (tag_get(slot, tag, j)) { + BUG_ON(slot->slots[j] == NULL); + results[nr_found++] = slot->slots[j]; + if (nr_found == max_items) + goto out; + } + } + } + shift -= RADIX_TREE_MAP_SHIFT; + slot = slot->slots[i]; + } while (height > 0); +out: + *next_index = index; + return nr_found; +} + +/** + * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree + * based on a tag + * @root: radix tree root + * @results: where the results of the lookup are placed + * @first_index: start the lookup from this key + * @max_items: place up to this many items at *results + * @tag: the tag index (< RADIX_TREE_MAX_TAGS) + * + * Performs an index-ascending scan of the tree for present items which + * have the tag indexed by @tag set. Places the items at *@results and + * returns the number of items which were placed at *@results. + */ +unsigned int +radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, + unsigned long first_index, unsigned int max_items, + unsigned int tag) +{ + const unsigned long max_index = radix_tree_maxindex(root->height); + unsigned long cur_index = first_index; + unsigned int ret = 0; + + /* check the root's tag bit */ + if (!root_tag_get(root, tag)) + return 0; + + while (ret < max_items) { + unsigned int nr_found; + unsigned long next_index; /* Index of next search */ + + if (cur_index > max_index) + break; + nr_found = __lookup_tag(root, results + ret, cur_index, + max_items - ret, &next_index, tag); + ret += nr_found; + if (next_index == 0) + break; + cur_index = next_index; + } + return ret; +} + +/** + * radix_tree_shrink - shrink height of a radix tree to minimal + * @root radix tree root + */ +static inline void radix_tree_shrink(struct radix_tree_root *root) +{ + /* try to shrink tree height */ + while (root->height > 0 && + root->rnode->count == 1 && + root->rnode->slots[0]) { + struct radix_tree_node *to_free = root->rnode; + + root->rnode = to_free->slots[0]; + root->height--; + /* must only free zeroed nodes into the slab */ + tag_clear(to_free, 0, 0); + tag_clear(to_free, 1, 0); + to_free->slots[0] = NULL; + to_free->count = 0; + radix_tree_node_free(to_free); + } +} + +/** + * radix_tree_delete - delete an item from a radix tree + * @root: radix tree root + * @index: index key + * + * Remove the item at @index from the radix tree rooted at @root. + * + * Returns the address of the deleted item, or NULL if it was not present. + */ +void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) +{ + struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; + struct radix_tree_node *slot = NULL; + unsigned int height, shift; + int tag; + int offset; + + height = root->height; + if (index > radix_tree_maxindex(height)) + goto out; + + slot = root->rnode; + if (height == 0 && root->rnode) { + root_tag_clear_all(root); + root->rnode = NULL; + goto out; + } + + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + pathp->node = NULL; + + do { + if (slot == NULL) + goto out; + + pathp++; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + pathp->offset = offset; + pathp->node = slot; + slot = slot->slots[offset]; + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } while (height > 0); + + if (slot == NULL) + goto out; + + /* + * Clear all tags associated with the just-deleted item + */ + for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { + if (tag_get(pathp->node, tag, pathp->offset)) + radix_tree_tag_clear(root, index, tag); + } + + /* Now free the nodes we do not need anymore */ + while (pathp->node) { + pathp->node->slots[pathp->offset] = NULL; + pathp->node->count--; + + if (pathp->node->count) { + if (pathp->node == root->rnode) + radix_tree_shrink(root); + goto out; + } + + /* Node with zero slots in use so free it */ + radix_tree_node_free(pathp->node); + + pathp--; + } + root_tag_clear_all(root); + root->height = 0; + root->rnode = NULL; + +out: + return slot; +} + +/** + * radix_tree_tagged - test whether any items in the tree are tagged + * @root: radix tree root + * @tag: tag to test + */ +int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) +{ + return root_tag_get(root, tag); +} + +static unsigned long __maxindex(unsigned int height) +{ + unsigned int tmp = height * RADIX_TREE_MAP_SHIFT; + unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1; + + if (tmp >= RADIX_TREE_INDEX_BITS) + index = ~0UL; + return index; +} + +static void radix_tree_init_maxindex(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) + height_to_maxindex[i] = __maxindex(i); +} + +void radix_tree_init(void) +{ + radix_tree_init_maxindex(); +} diff --git a/fs/btrfs/radix-tree.h b/fs/btrfs/radix-tree.h new file mode 100644 index 000000000000..c3ce88137f77 --- /dev/null +++ b/fs/btrfs/radix-tree.h @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2001 Momchil Velikov + * Portions Copyright (C) 2001 Christoph Hellwig + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef _LINUX_RADIX_TREE_H +#define _LINUX_RADIX_TREE_H + +#define RADIX_TREE_MAX_TAGS 2 + +/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */ +struct radix_tree_root { + unsigned int height; + gfp_t gfp_mask; + struct radix_tree_node *rnode; +}; + +#define RADIX_TREE_INIT(mask) { \ + .height = 0, \ + .gfp_mask = (mask), \ + .rnode = NULL, \ +} + +#define RADIX_TREE(name, mask) \ + struct radix_tree_root name = RADIX_TREE_INIT(mask) + +#define INIT_RADIX_TREE(root, mask) \ +do { \ + (root)->height = 0; \ + (root)->gfp_mask = (mask); \ + (root)->rnode = NULL; \ +} while (0) + +int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); +void *radix_tree_lookup(struct radix_tree_root *, unsigned long); +void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); +void *radix_tree_delete(struct radix_tree_root *, unsigned long); +unsigned int +radix_tree_gang_lookup(struct radix_tree_root *root, void **results, + unsigned long first_index, unsigned int max_items); +int radix_tree_preload(gfp_t gfp_mask); +void radix_tree_init(void); +void *radix_tree_tag_set(struct radix_tree_root *root, + unsigned long index, unsigned int tag); +void *radix_tree_tag_clear(struct radix_tree_root *root, + unsigned long index, unsigned int tag); +int radix_tree_tag_get(struct radix_tree_root *root, + unsigned long index, unsigned int tag); +unsigned int +radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, + unsigned long first_index, unsigned int max_items, + unsigned int tag); +int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); + +static inline void radix_tree_preload_end(void) +{ + preempt_enable(); +} + +#endif /* _LINUX_RADIX_TREE_H */ From 74123bd72a4e34c66e5ad95ce233dd352bebc572 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 Feb 2007 11:05:29 -0500 Subject: [PATCH 0004/2985] Btrfs: Commenting/cleanup Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 249 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 186 insertions(+), 63 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6f0522f21082..6b64f49a0279 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -20,6 +20,11 @@ static void release_path(struct ctree_root *root, struct ctree_path *p) } } +/* + * The leaf data grows from end-to-front in the node. + * this returns the address of the start of the last item, + * which is the stop of the leaf data stack + */ static inline unsigned int leaf_data_end(struct leaf *leaf) { unsigned int nr = leaf->header.nritems; @@ -28,6 +33,11 @@ static inline unsigned int leaf_data_end(struct leaf *leaf) return leaf->items[nr-1].offset; } +/* + * The space between the end of the leaf items and + * the start of the leaf data. IOW, how much room + * the leaf has left for both items and data + */ static inline int leaf_free_space(struct leaf *leaf) { int data_end = leaf_data_end(leaf); @@ -36,6 +46,9 @@ static inline int leaf_free_space(struct leaf *leaf) return (char *)(leaf->data + data_end) - (char *)items_end; } +/* + * compare two keys in a memcmp fashion + */ int comp_keys(struct key *k1, struct key *k2) { if (k1->objectid > k2->objectid) @@ -52,6 +65,16 @@ int comp_keys(struct key *k1, struct key *k2) return -1; return 0; } + +/* + * search for key in the array p. items p are item_size apart + * and there are 'max' items in p + * the slot in the array is returned via slot, and it points to + * the place where you would insert key if it is not found in + * the array. + * + * slot may point to max if the key is bigger than all of the keys + */ int generic_bin_search(char *p, int item_size, struct key *key, int max, int *slot) { @@ -92,6 +115,14 @@ int bin_search(struct node *c, struct key *key, int *slot) return -1; } +/* + * look for key in the tree. path is filled in with nodes along the way + * if key is found, we return zero and you can find the item in the leaf + * level of the path (level 0) + * + * If the key isn't found, the path points to the slot where it should + * be inserted. + */ int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p) { struct tree_buffer *b = root->node; @@ -120,12 +151,18 @@ int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p) return -1; } +/* + * adjust the pointers going up the tree, starting at level + * making sure the right key of each node is points to 'key'. + * This is used after shifting pointers to the left, so it stops + * fixing up pointers when a given leaf/node is not in slot 0 of the + * higher levels + */ static void fixup_low_keys(struct ctree_root *root, struct ctree_path *path, struct key *key, int level) { int i; - /* adjust the pointers going up the tree */ for (i = level; i < MAX_LEVEL; i++) { struct node *t; int tslot = path->slots[i]; @@ -139,64 +176,16 @@ static void fixup_low_keys(struct ctree_root *root, } } -int __insert_ptr(struct ctree_root *root, - struct ctree_path *path, struct key *key, - u64 blocknr, int slot, int level) -{ - struct node *c; - struct node *lower; - struct key *lower_key; - int nritems; - /* need a new root */ - if (!path->nodes[level]) { - struct tree_buffer *t; - t = alloc_free_block(root); - c = &t->node; - memset(c, 0, sizeof(c)); - c->header.nritems = 2; - c->header.flags = node_level(level); - c->header.blocknr = t->blocknr; - lower = &path->nodes[level-1]->node; - if (is_leaf(lower->header.flags)) - lower_key = &((struct leaf *)lower)->items[0].key; - else - lower_key = lower->keys; - memcpy(c->keys, lower_key, sizeof(struct key)); - memcpy(c->keys + 1, key, sizeof(struct key)); - c->blockptrs[0] = path->nodes[level-1]->blocknr; - c->blockptrs[1] = blocknr; - /* the path has an extra ref to root->node */ - tree_block_release(root, root->node); - root->node = t; - t->count++; - write_tree_block(root, t); - path->nodes[level] = t; - path->slots[level] = 0; - if (c->keys[1].objectid == 0) - BUG(); - return 0; - } - lower = &path->nodes[level]->node; - nritems = lower->header.nritems; - if (slot > nritems) - BUG(); - if (nritems == NODEPTRS_PER_BLOCK) - BUG(); - if (slot != nritems) { - memmove(lower->keys + slot + 1, lower->keys + slot, - (nritems - slot) * sizeof(struct key)); - memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot, - (nritems - slot) * sizeof(u64)); - } - memcpy(lower->keys + slot, key, sizeof(struct key)); - lower->blockptrs[slot] = blocknr; - lower->header.nritems++; - if (lower->keys[1].objectid == 0) - BUG(); - write_tree_block(root, path->nodes[level]); - return 0; -} - +/* + * try to push data from one node into the next node left in the + * tree. The src node is found at specified level in the path. + * If some bytes were pushed, return 0, otherwise return 1. + * + * Lower nodes/leaves in the path are not touched, higher nodes may + * be modified to reflect the push. + * + * The path is altered to reflect the push. + */ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) { int slot; @@ -259,6 +248,16 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) return 0; } +/* + * try to push data from one node into the next node right in the + * tree. The src node is found at specified level in the path. + * If some bytes were pushed, return 0, otherwise return 1. + * + * Lower nodes/leaves in the path are not touched, higher nodes may + * be modified to reflect the push. + * + * The path is altered to reflect the push. + */ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) { int slot; @@ -270,8 +269,11 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) int dst_nritems; int src_nritems; + /* can't push from the root */ if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) return 1; + + /* only try to push inside the node higher up */ slot = path->slots[level + 1]; if (slot == NODEPTRS_PER_BLOCK - 1) return 1; @@ -315,7 +317,7 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) write_tree_block(root, t); write_tree_block(root, src_buffer); - /* then fixup the leaf pointer in the path */ + /* then fixup the pointers in the path */ if (path->slots[level] >= src->header.nritems) { path->slots[level] -= src->header.nritems; tree_block_release(root, path->nodes[level]); @@ -327,6 +329,76 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) return 0; } +/* + * worker function to insert a single pointer in a node. + * the node should have enough room for the pointer already + * slot and level indicate where you want the key to go, and + * blocknr is the block the key points to. + */ +int __insert_ptr(struct ctree_root *root, + struct ctree_path *path, struct key *key, + u64 blocknr, int slot, int level) +{ + struct node *c; + struct node *lower; + struct key *lower_key; + int nritems; + /* need a new root */ + if (!path->nodes[level]) { + struct tree_buffer *t; + t = alloc_free_block(root); + c = &t->node; + memset(c, 0, sizeof(c)); + c->header.nritems = 2; + c->header.flags = node_level(level); + c->header.blocknr = t->blocknr; + lower = &path->nodes[level-1]->node; + if (is_leaf(lower->header.flags)) + lower_key = &((struct leaf *)lower)->items[0].key; + else + lower_key = lower->keys; + memcpy(c->keys, lower_key, sizeof(struct key)); + memcpy(c->keys + 1, key, sizeof(struct key)); + c->blockptrs[0] = path->nodes[level-1]->blocknr; + c->blockptrs[1] = blocknr; + /* the path has an extra ref to root->node */ + tree_block_release(root, root->node); + root->node = t; + t->count++; + write_tree_block(root, t); + path->nodes[level] = t; + path->slots[level] = 0; + if (c->keys[1].objectid == 0) + BUG(); + return 0; + } + lower = &path->nodes[level]->node; + nritems = lower->header.nritems; + if (slot > nritems) + BUG(); + if (nritems == NODEPTRS_PER_BLOCK) + BUG(); + if (slot != nritems) { + memmove(lower->keys + slot + 1, lower->keys + slot, + (nritems - slot) * sizeof(struct key)); + memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot, + (nritems - slot) * sizeof(u64)); + } + memcpy(lower->keys + slot, key, sizeof(struct key)); + lower->blockptrs[slot] = blocknr; + lower->header.nritems++; + if (lower->keys[1].objectid == 0) + BUG(); + write_tree_block(root, path->nodes[level]); + return 0; +} + + +/* + * insert a key,blocknr pair into the tree at a given level + * If the node at that level in the path doesn't have room, + * it is split or shifted as appropriate. + */ int insert_ptr(struct ctree_root *root, struct ctree_path *path, struct key *key, u64 blocknr, int level) @@ -340,6 +412,15 @@ int insert_ptr(struct ctree_root *root, int mid; int bal_start = -1; + /* + * check to see if we need to make room in the node for this + * pointer. If we do, keep walking the tree, making sure there + * is enough room in each level for the required insertions. + * + * The bal array is filled in with any nodes to be inserted + * due to splitting. Once we've done all the splitting required + * do the inserts based on the data in the bal array. + */ memset(bal, 0, ARRAY_SIZE(bal)); while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) { c = &t->node; @@ -373,6 +454,11 @@ int insert_ptr(struct ctree_root *root, bal_level += 1; t = path->nodes[bal_level]; } + /* + * bal_start tells us the first level in the tree that needed to + * be split. Go through the bal array inserting the new nodes + * as needed. The path is fixed as we go. + */ while(bal_start > 0) { b_buffer = bal[bal_start]; c = &path->nodes[bal_start]->node; @@ -390,10 +476,16 @@ int insert_ptr(struct ctree_root *root, if (!bal[bal_start]) break; } + /* Now that the tree has room, insert the requested pointer */ return __insert_ptr(root, path, key, blocknr, path->slots[level] + 1, level); } +/* + * how many bytes are required to store the items in a leaf. start + * and nr indicate which items in the leaf to check. This totals up the + * space used both by the item structs and the item data + */ int leaf_space_used(struct leaf *l, int start, int nr) { int data_len; @@ -407,6 +499,10 @@ int leaf_space_used(struct leaf *l, int start, int nr) return data_len; } +/* + * push some data in the path leaf to the left, trying to free up at + * least data_size bytes. returns zero if the push worked, nonzero otherwise + */ int push_leaf_left(struct ctree_root *root, struct ctree_path *path, int data_size) { @@ -498,6 +594,10 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path, return 0; } +/* + * split the path's leaf in two, making sure there is at least data_size + * available for the resulting leaf level of the path. + */ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) { struct tree_buffer *l_buf = path->nodes[0]; @@ -548,9 +648,10 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) l->data + leaf_data_end(l), data_copy_size); rt_data_off = LEAF_DATA_SIZE - (l->items[mid].offset + l->items[mid].size); - for (i = 0; i < right->header.nritems; i++) { + + for (i = 0; i < right->header.nritems; i++) right->items[i].offset += rt_data_off; - } + l->header.nritems = mid; ret = insert_ptr(root, path, &right->items[0].key, right_buffer->blocknr, 1); @@ -570,6 +671,10 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) return ret; } +/* + * Given a key and some data, insert an item into the tree. + * This does all the path init required, making room in the tree if needed. + */ int insert_item(struct ctree_root *root, struct key *key, void *data, int data_size) { @@ -582,6 +687,7 @@ int insert_item(struct ctree_root *root, struct key *key, unsigned int data_end; struct ctree_path path; + /* create a root if there isn't one */ if (!root->node) { struct tree_buffer *t; t = alloc_free_block(root); @@ -602,6 +708,8 @@ int insert_item(struct ctree_root *root, struct key *key, slot_orig = path.slots[0]; leaf_buf = path.nodes[0]; leaf = &leaf_buf->leaf; + + /* make room if needed */ if (leaf_free_space(leaf) < sizeof(struct item) + data_size) { split_leaf(root, &path, data_size); leaf_buf = path.nodes[0]; @@ -638,6 +746,7 @@ int insert_item(struct ctree_root *root, struct key *key, data_end, old_data - data_end); data_end = old_data; } + /* copy the new data in */ memcpy(&leaf->items[slot].key, key, sizeof(struct key)); leaf->items[slot].offset = data_end - data_size; leaf->items[slot].size = data_size; @@ -650,6 +759,14 @@ int insert_item(struct ctree_root *root, struct key *key, return 0; } +/* + * delete the pointer from a given level in the path. The path is not + * fixed up, so after calling this it is not valid at that level. + * + * If the delete empties a node, the node is removed from the tree, + * continuing all the way the root if required. The root is converted into + * a leaf if all the nodes are emptied. + */ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) { int slot; @@ -705,6 +822,10 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) return 0; } +/* + * delete the item at the leaf level in path. If that empties + * the leaf, remove it from the tree + */ int del_item(struct ctree_root *root, struct ctree_path *path) { int slot; @@ -732,6 +853,7 @@ int del_item(struct ctree_root *root, struct ctree_path *path) (leaf->header.nritems - slot - 1)); } leaf->header.nritems -= 1; + /* delete the leaf if we've emptied it */ if (leaf->header.nritems == 0) { if (leaf_buf == root->node) { leaf->header.flags = node_level(0); @@ -742,6 +864,7 @@ int del_item(struct ctree_root *root, struct ctree_path *path) if (slot == 0) fixup_low_keys(root, path, &leaf->items[0].key, 1); write_tree_block(root, leaf_buf); + /* delete the leaf if it is mostly empty */ if (leaf_space_used(leaf, 0, leaf->header.nritems) < LEAF_DATA_SIZE / 4) { /* push_leaf_left fixes the path. @@ -837,7 +960,7 @@ int main() { int i; int num; int ret; - int run_size = 1000000; + int run_size = 25000; int max_key = 100000000; int tree_size = 0; struct ctree_path path; From d97e63b69ef21c02b67e20e41d9968b0e503572e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 20 Feb 2007 16:40:44 -0500 Subject: [PATCH 0005/2985] Btrfs: early extent mapping support Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 4 +- fs/btrfs/ctree.c | 152 +++++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/ctree.h | 34 +++++++++- fs/btrfs/disk-io.c | 76 ++++++++++++++++------- fs/btrfs/disk-io.h | 3 + 5 files changed, 236 insertions(+), 33 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 63360212a075..df065dd2dce7 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -4,8 +4,8 @@ CFLAGS= -g -Wall .c.o: $(CC) $(CFLAGS) -c $< -ctree: ctree.o disk-io.h ctree.h disk-io.o radix-tree.o radix-tree.h - gcc $(CFLAGS) -o ctree ctree.o disk-io.o radix-tree.o +ctree: ctree.o disk-io.h ctree.h disk-io.o radix-tree.o radix-tree.h mkfs.o + gcc $(CFLAGS) -o ctree ctree.o disk-io.o radix-tree.o mkfs.o clean: rm ctree *.o diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6b64f49a0279..2177744dedd3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5,6 +5,8 @@ #include "ctree.h" #include "disk-io.h" +static int refill_alloc_extent(struct ctree_root *root); + static inline void init_path(struct ctree_path *p) { memset(p, 0, sizeof(*p)); @@ -29,7 +31,7 @@ static inline unsigned int leaf_data_end(struct leaf *leaf) { unsigned int nr = leaf->header.nritems; if (nr == 0) - return ARRAY_SIZE(leaf->data); + return sizeof(leaf->data); return leaf->items[nr-1].offset; } @@ -421,7 +423,7 @@ int insert_ptr(struct ctree_root *root, * due to splitting. Once we've done all the splitting required * do the inserts based on the data in the bal array. */ - memset(bal, 0, ARRAY_SIZE(bal)); + memset(bal, 0, sizeof(bal)); while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) { c = &t->node; if (push_node_left(root, path, @@ -756,6 +758,7 @@ int insert_item(struct ctree_root *root, struct key *key, if (leaf_free_space(leaf) < 0) BUG(); release_path(root, &path); + refill_alloc_extent(root); return 0; } @@ -884,6 +887,135 @@ int del_item(struct ctree_root *root, struct ctree_path *path) return 0; } +int next_leaf(struct ctree_root *root, struct ctree_path *path) +{ + int slot; + int level = 1; + u64 blocknr; + struct tree_buffer *c; + struct tree_buffer *next; + + while(level < MAX_LEVEL) { + if (!path->nodes[level]) + return -1; + slot = path->slots[level] + 1; + c = path->nodes[level]; + if (slot >= c->node.header.nritems) { + level++; + continue; + } + blocknr = c->node.blockptrs[slot]; + next = read_tree_block(root, blocknr); + break; + } + path->slots[level] = slot; + while(1) { + level--; + c = path->nodes[level]; + tree_block_release(root, c); + path->nodes[level] = next; + path->slots[level] = 0; + if (!level) + break; + next = read_tree_block(root, next->node.blockptrs[0]); + } + return 0; +} + +int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, + u64 search_end, u64 owner, struct key *ins) +{ + struct ctree_path path; + struct key *key; + int ret; + u64 hole_size = 0; + int slot = 0; + u64 last_block; + int start_found = 0; + struct leaf *l; + struct extent_item extent_item; + + init_path(&path); + ins->objectid = search_start; + ins->offset = 0; + ins->flags = 0; + + ret = search_slot(root, ins, &path); + while (1) { + l = &path.nodes[0]->leaf; + slot = path.slots[0]; + if (!l) { + // FIXME allocate root + } + if (slot >= l->header.nritems) { + ret = next_leaf(root, &path); + if (ret == 0) + continue; + if (!start_found) { + ins->objectid = search_start; + ins->offset = num_blocks; + hole_size = search_end - search_start; + goto insert; + } + ins->objectid = last_block; + ins->offset = num_blocks; + hole_size = search_end - last_block; + goto insert; + } + key = &l->items[slot].key; + if (start_found) { + hole_size = key->objectid - last_block; + if (hole_size > num_blocks) { + ins->objectid = last_block; + ins->offset = num_blocks; + goto insert; + } + } else + start_found = 1; + last_block = key->objectid + key->offset; + path.slots[0]++; + printf("last block is not %lu\n", last_block); + } + // FIXME -ENOSPC +insert: + extent_item.refs = 1; + extent_item.owner = owner; + ret = insert_item(root, ins, &extent_item, sizeof(extent_item)); + return ret; +} + +static int refill_alloc_extent(struct ctree_root *root) +{ + struct alloc_extent *ae = root->alloc_extent; + struct key key; + int ret; + int min_blocks = MAX_LEVEL * 2; + + printf("refill alloc root %p, numused %lu total %lu\n", root, ae->num_used, ae->num_blocks); + if (ae->num_blocks > ae->num_used && ae->num_blocks - ae->num_used > + min_blocks) + return 0; + ae = root->reserve_extent; + if (ae->num_blocks > ae->num_used) { + if (root->alloc_extent->num_blocks == 0) { + /* we should swap reserve/alloc_extent when alloc + * fills up + */ + BUG(); + } + if (ae->num_blocks - ae->num_used < min_blocks) + BUG(); + return 0; + } + // FIXME, this recurses + ret = alloc_extent(root->extent_root, + min_blocks * 2, 0, (unsigned long)-1, 0, &key); + ae->blocknr = key.objectid; + ae->num_blocks = key.offset; + ae->num_used = 0; + return ret; +} + void print_leaf(struct leaf *l) { int i; @@ -948,8 +1080,8 @@ void print_tree(struct ctree_root *root, struct tree_buffer *t) /* for testing only */ int next_key(int i, int max_key) { - return rand() % max_key; - // return i; + // return rand() % max_key; + return i; } int main() { @@ -960,7 +1092,7 @@ int main() { int i; int num; int ret; - int run_size = 25000; + int run_size = 256; int max_key = 100000000; int tree_size = 0; struct ctree_path path; @@ -980,10 +1112,20 @@ int main() { ins.objectid = num; ins.offset = 0; ins.flags = 0; + printf("insert %d\n", i); ret = insert_item(root, &ins, buf, strlen(buf)); if (!ret) tree_size++; + printf("done insert %d\n", i); } + printf("root used: %lu\n", root->alloc_extent->num_used); + printf("root tree\n"); + print_tree(root, root->node); + printf("map tree\n"); + printf("map used: %lu\n", root->extent_root->alloc_extent->num_used); + print_tree(root->extent_root, root->extent_root->node); + exit(1); + close_ctree(root); root = open_ctree("dbfile"); printf("starting search\n"); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 586bf1866042..b737925be314 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,7 +1,7 @@ #ifndef __CTREE__ #define __CTREE__ -#define CTREE_BLOCKSIZE 4096 +#define CTREE_BLOCKSIZE 256 struct key { u64 objectid; @@ -22,18 +22,41 @@ struct header { #define NODEPTRS_PER_BLOCK ((CTREE_BLOCKSIZE - sizeof(struct header)) / \ (sizeof(struct key) + sizeof(u64))) -#define LEVEL_BITS 3 -#define MAX_LEVEL (1 << LEVEL_BITS) +#define MAX_LEVEL 8 #define node_level(f) ((f) & (MAX_LEVEL-1)) #define is_leaf(f) (node_level(f) == 0) struct tree_buffer; + +struct alloc_extent { + u64 blocknr; + u64 num_blocks; + u64 num_used; +} __attribute__ ((__packed__)); + struct ctree_root { struct tree_buffer *node; + struct ctree_root *extent_root; + struct alloc_extent *alloc_extent; + struct alloc_extent *reserve_extent; int fp; struct radix_tree_root cache_radix; + struct alloc_extent ai1; + struct alloc_extent ai2; }; +struct ctree_root_info { + u64 fsid[2]; /* FS specific uuid */ + u64 blocknr; /* blocknr of this block */ + u64 objectid; /* inode number of this root */ + u64 tree_root; /* the tree root */ + u32 csum; + u32 ham; + struct alloc_extent alloc_extent; + struct alloc_extent reserve_extent; + u64 snapuuid[2]; /* root specific uuid */ +} __attribute__ ((__packed__)); + struct item { struct key key; u16 offset; @@ -55,6 +78,11 @@ struct node { u64 blockptrs[NODEPTRS_PER_BLOCK]; } __attribute__ ((__packed__)); +struct extent_item { + u32 refs; + u64 owner; +} __attribute__ ((__packed__)); + struct ctree_path { struct tree_buffer *nodes[MAX_LEVEL]; int slots[MAX_LEVEL]; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8d51a07051d9..653f18aab330 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -12,25 +12,27 @@ static int allocated_blocks = 0; -struct ctree_header { - u64 root_block; -} __attribute__ ((__packed__)); - static int get_free_block(struct ctree_root *root, u64 *block) { struct stat st; int ret; + if (root->alloc_extent->num_used >= root->alloc_extent->num_blocks) + return -1; + + *block = root->alloc_extent->blocknr + root->alloc_extent->num_used; + root->alloc_extent->num_used += 1; + if (root->alloc_extent->num_used >= root->alloc_extent->num_blocks) { + struct alloc_extent *ae = root->alloc_extent; + root->alloc_extent = root->reserve_extent; + root->reserve_extent = ae; + ae->num_blocks = 0; + } st.st_size = 0; ret = fstat(root->fp, &st); - if (st.st_size > sizeof(struct ctree_header)) { - *block = (st.st_size - - sizeof(struct ctree_header)) / CTREE_BLOCKSIZE; - } else { - *block = 0; - } - ret = ftruncate(root->fp, sizeof(struct ctree_header) + (*block + 1) * - CTREE_BLOCKSIZE); + if (st.st_size < (*block + 1) * CTREE_BLOCKSIZE) + ret = ftruncate(root->fp, + (*block + 1) * CTREE_BLOCKSIZE); return ret; } @@ -72,7 +74,7 @@ struct tree_buffer *alloc_free_block(struct ctree_root *root) struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) { - loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header); + loff_t offset = blocknr * CTREE_BLOCKSIZE; struct tree_buffer *buf; int ret; @@ -101,7 +103,7 @@ struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) { u64 blocknr = buf->blocknr; - loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header); + loff_t offset = blocknr * CTREE_BLOCKSIZE; int ret; if (buf->blocknr != buf->node.header.blocknr) @@ -114,11 +116,32 @@ int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) return 0; } +struct ctree_super_block { + struct ctree_root_info root_info; + struct ctree_root_info extent_info; +} __attribute__ ((__packed__)); + +static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, + struct ctree_root_info *info, int fp) +{ + root->fp = fp; + root->node = read_tree_block(root, info->tree_root); + root->extent_root = extent_root; + memcpy(&root->ai1, &info->alloc_extent, sizeof(info->alloc_extent)); + memcpy(&root->ai2, &info->reserve_extent, sizeof(info->reserve_extent)); + root->alloc_extent = &root->ai1; + root->reserve_extent = &root->ai2; + INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); + printf("setup done reading root %p, used %lu\n", root, root->alloc_extent->num_used); + return 0; +} + struct ctree_root *open_ctree(char *filename) { struct ctree_root *root = malloc(sizeof(struct ctree_root)); + struct ctree_root *extent_root = malloc(sizeof(struct ctree_root)); + struct ctree_super_block super; int fp; - u64 root_block; int ret; fp = open(filename, O_CREAT | O_RDWR); @@ -126,14 +149,20 @@ struct ctree_root *open_ctree(char *filename) free(root); return NULL; } - root->fp = fp; - INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); - ret = pread(fp, &root_block, sizeof(u64), 0); - if (ret == sizeof(u64)) { - printf("reading root node at block %lu\n", root_block); - root->node = read_tree_block(root, root_block); - } else - root->node = NULL; + ret = pread(fp, &super, sizeof(struct ctree_super_block), + CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); + if (ret == 0) { + ret = mkfs(fp); + if (ret) + return NULL; + ret = pread(fp, &super, sizeof(struct ctree_super_block), + CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); + if (ret != sizeof(struct ctree_super_block)) + return NULL; + } + BUG_ON(ret < 0); + __setup_root(root, extent_root, &super.root_info, fp); + __setup_root(extent_root, extent_root, &super.extent_info, fp); return root; } @@ -160,6 +189,7 @@ int update_root_block(struct ctree_root *root) void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) { + return; buf->count--; if (buf->count == 0) { if (!radix_tree_lookup(&root->cache_radix, buf->blocknr)) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index ee95fa05c4a3..41acb65eee19 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -17,5 +17,8 @@ int close_ctree(struct ctree_root *root); void tree_block_release(struct ctree_root *root, struct tree_buffer *buf); struct tree_buffer *alloc_free_block(struct ctree_root *root); int update_root_block(struct ctree_root *root); +int mkfs(int fd); + +#define CTREE_SUPER_INFO_OFFSET(bs) (16 * (bs)) #endif From 06ed4b316e8e24b6899ece7186c6a7a0129326ba Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 20 Feb 2007 16:41:09 -0500 Subject: [PATCH 0006/2985] Btrfs: add mkfs.c Signed-off-by: Chris Mason --- fs/btrfs/mkfs.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 fs/btrfs/mkfs.c diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c new file mode 100644 index 000000000000..bab98c6d6fa7 --- /dev/null +++ b/fs/btrfs/mkfs.c @@ -0,0 +1,69 @@ +#define _XOPEN_SOURCE 500 +#include +#include +#include +#include +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" + +int mkfs(int fd) +{ + struct ctree_root_info info[2]; + struct leaf empty_leaf; + struct item item; + struct extent_item extent_item; + int ret; + + memset(info, 0, sizeof(info)); + info[0].blocknr = 16; + info[0].objectid = 1; + info[0].tree_root = 17; + info[0].alloc_extent.blocknr = 0; + info[0].alloc_extent.num_blocks = 20; + /* 0-17 are used (inclusive) */ + info[0].alloc_extent.num_used = 18; + + info[1].blocknr = 16; + info[1].objectid = 2; + info[1].tree_root = 64; + info[1].alloc_extent.blocknr = 64; + info[1].alloc_extent.num_blocks = 8; + info[1].alloc_extent.num_used = 1; + ret = pwrite(fd, info, sizeof(info), + CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); + if (ret != sizeof(info)) + return -1; + memset(&empty_leaf, 0, sizeof(empty_leaf)); + empty_leaf.header.parentid = 1; + empty_leaf.header.blocknr = 17; + ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 17 * CTREE_BLOCKSIZE); + if (ret != sizeof(empty_leaf)) + return -1; + + empty_leaf.header.parentid = 2; + empty_leaf.header.blocknr = 64; + empty_leaf.header.nritems = 2; + item.key.objectid = 0; + item.key.offset = 64; + item.key.flags = 0; + item.offset = LEAF_DATA_SIZE - sizeof(struct extent_item); + item.size = sizeof(struct extent_item); + extent_item.refs = 1; + extent_item.owner = 1; + memcpy(empty_leaf.items, &item, sizeof(item)); + memcpy(empty_leaf.data + item.offset, &extent_item, item.size); + item.key.objectid = 64; + item.key.offset = 64; + item.offset = LEAF_DATA_SIZE - sizeof(struct extent_item) * 2; + extent_item.owner = 2; + memcpy(empty_leaf.items + 1, &item, sizeof(item)); + memcpy(empty_leaf.data + item.offset, &extent_item, item.size); + ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 64 * CTREE_BLOCKSIZE); + if (ret != sizeof(empty_leaf)) + return -1; + return 0; +} From cfaa72952fa7b44aa5d967cbc266110900552aef Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 21 Feb 2007 17:04:57 -0500 Subject: [PATCH 0007/2985] Btrfs: extent fixes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 69 ++++++++++++++++++++++++++-------------- fs/btrfs/ctree.h | 5 +++ fs/btrfs/disk-io.c | 79 ++++++++++++++++++++++++++-------------------- fs/btrfs/disk-io.h | 4 +-- fs/btrfs/mkfs.c | 7 ++-- 5 files changed, 103 insertions(+), 61 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2177744dedd3..2891b582e26f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -354,6 +354,7 @@ int __insert_ptr(struct ctree_root *root, c->header.nritems = 2; c->header.flags = node_level(level); c->header.blocknr = t->blocknr; + c->header.parentid = root->node->node.header.parentid; lower = &path->nodes[level-1]->node; if (is_leaf(lower->header.flags)) lower_key = &((struct leaf *)lower)->items[0].key; @@ -363,7 +364,7 @@ int __insert_ptr(struct ctree_root *root, memcpy(c->keys + 1, key, sizeof(struct key)); c->blockptrs[0] = path->nodes[level-1]->blocknr; c->blockptrs[1] = blocknr; - /* the path has an extra ref to root->node */ + /* the super has an extra ref to root->node */ tree_block_release(root, root->node); root->node = t; t->count++; @@ -439,6 +440,7 @@ int insert_ptr(struct ctree_root *root, b = &b_buffer->node; b->header.flags = c->header.flags; b->header.blocknr = b_buffer->blocknr; + b->header.parentid = root->node->node.header.parentid; mid = (c->header.nritems + 1) / 2; memcpy(b->keys, c->keys + mid, (c->header.nritems - mid) * sizeof(struct key)); @@ -642,6 +644,7 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) right->header.nritems = nritems - mid; right->header.blocknr = right_buffer->blocknr; right->header.flags = node_level(0); + right->header.parentid = root->node->node.header.parentid; data_copy_size = l->items[mid].offset + l->items[mid].size - leaf_data_end(l); memcpy(right->items, l->items + mid, @@ -689,8 +692,12 @@ int insert_item(struct ctree_root *root, struct key *key, unsigned int data_end; struct ctree_path path; + refill_alloc_extent(root); + /* create a root if there isn't one */ if (!root->node) { + BUG(); +#if 0 struct tree_buffer *t; t = alloc_free_block(root); BUG_ON(!t); @@ -699,6 +706,7 @@ int insert_item(struct ctree_root *root, struct key *key, t->node.header.blocknr = t->blocknr; root->node = t; write_tree_block(root, t); +#endif } init_path(&path); ret = search_slot(root, key, &path); @@ -758,7 +766,6 @@ int insert_item(struct ctree_root *root, struct key *key, if (leaf_free_space(leaf) < 0) BUG(); release_path(root, &path); - refill_alloc_extent(root); return 0; } @@ -893,7 +900,7 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) int level = 1; u64 blocknr; struct tree_buffer *c; - struct tree_buffer *next; + struct tree_buffer *next = NULL; while(level < MAX_LEVEL) { if (!path->nodes[level]) @@ -905,6 +912,8 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) continue; } blocknr = c->node.blockptrs[slot]; + if (next) + tree_block_release(root, next); next = read_tree_block(root, blocknr); break; } @@ -922,7 +931,7 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) return 0; } -int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, +int alloc_extent(struct ctree_root *orig_root, u64 num_blocks, u64 search_start, u64 search_end, u64 owner, struct key *ins) { struct ctree_path path; @@ -934,6 +943,7 @@ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, int start_found = 0; struct leaf *l; struct extent_item extent_item; + struct ctree_root * root = orig_root->extent_root; init_path(&path); ins->objectid = search_start; @@ -974,13 +984,18 @@ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, start_found = 1; last_block = key->objectid + key->offset; path.slots[0]++; - printf("last block is not %lu\n", last_block); } // FIXME -ENOSPC insert: + release_path(root, &path); extent_item.refs = 1; extent_item.owner = owner; - ret = insert_item(root, ins, &extent_item, sizeof(extent_item)); + if (root == orig_root && root->reserve_extent->num_blocks == 0) { + root->reserve_extent->blocknr = ins->objectid; + root->reserve_extent->num_blocks = ins->offset; + root->reserve_extent->num_used = 0; + } + ret = insert_item(root->extent_root, ins, &extent_item, sizeof(extent_item)); return ret; } @@ -991,7 +1006,6 @@ static int refill_alloc_extent(struct ctree_root *root) int ret; int min_blocks = MAX_LEVEL * 2; - printf("refill alloc root %p, numused %lu total %lu\n", root, ae->num_used, ae->num_blocks); if (ae->num_blocks > ae->num_used && ae->num_blocks - ae->num_used > min_blocks) return 0; @@ -1007,9 +1021,9 @@ static int refill_alloc_extent(struct ctree_root *root) BUG(); return 0; } - // FIXME, this recurses - ret = alloc_extent(root->extent_root, - min_blocks * 2, 0, (unsigned long)-1, 0, &key); + ret = alloc_extent(root, + min_blocks * 2, 0, (unsigned long)-1, + root->node->node.header.parentid, &key); ae->blocknr = key.objectid; ae->num_blocks = key.offset; ae->num_used = 0; @@ -1021,6 +1035,7 @@ void print_leaf(struct leaf *l) int i; int nr = l->header.nritems; struct item *item; + struct extent_item *ei; printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr, leaf_free_space(l)); fflush(stdout); @@ -1032,6 +1047,8 @@ void print_leaf(struct leaf *l) item->offset, item->size); fflush(stdout); printf("\t\titem data %.*s\n", item->size, l->data+item->offset); + ei = (struct extent_item *)(l->data + item->offset); + printf("\t\textent data %u %lu\n", ei->refs, ei->owner); fflush(stdout); } } @@ -1080,8 +1097,8 @@ void print_tree(struct ctree_root *root, struct tree_buffer *t) /* for testing only */ int next_key(int i, int max_key) { - // return rand() % max_key; - return i; + return rand() % max_key; + // return i; } int main() { @@ -1092,15 +1109,20 @@ int main() { int i; int num; int ret; - int run_size = 256; + int run_size = 10000; int max_key = 100000000; int tree_size = 0; struct ctree_path path; + struct ctree_super_block super; radix_tree_init(); - root = open_ctree("dbfile"); + root = open_ctree("dbfile", &super); + printf("root tree\n"); + print_tree(root, root->node); + printf("map tree\n"); + print_tree(root->extent_root, root->extent_root->node); srand(55); for (i = 0; i < run_size; i++) { @@ -1112,22 +1134,20 @@ int main() { ins.objectid = num; ins.offset = 0; ins.flags = 0; - printf("insert %d\n", i); ret = insert_item(root, &ins, buf, strlen(buf)); if (!ret) tree_size++; - printf("done insert %d\n", i); } printf("root used: %lu\n", root->alloc_extent->num_used); printf("root tree\n"); - print_tree(root, root->node); + // print_tree(root, root->node); printf("map tree\n"); printf("map used: %lu\n", root->extent_root->alloc_extent->num_used); - print_tree(root->extent_root, root->extent_root->node); - exit(1); - + // print_tree(root->extent_root, root->extent_root->node); + write_ctree_super(root, &super); close_ctree(root); - root = open_ctree("dbfile"); + + root = open_ctree("dbfile", &super); printf("starting search\n"); srand(55); for (i = 0; i < run_size; i++) { @@ -1142,8 +1162,9 @@ int main() { } release_path(root, &path); } + write_ctree_super(root, &super); close_ctree(root); - root = open_ctree("dbfile"); + root = open_ctree("dbfile", &super); printf("node %p level %d total ptrs %d free spc %lu\n", root->node, node_level(root->node->node.header.flags), root->node->node.header.nritems, @@ -1174,8 +1195,9 @@ int main() { if (!ret) tree_size++; } + write_ctree_super(root, &super); close_ctree(root); - root = open_ctree("dbfile"); + root = open_ctree("dbfile", &super); printf("starting search2\n"); srand(128); for (i = 0; i < run_size; i++) { @@ -1221,6 +1243,7 @@ int main() { } release_path(root, &path); } + write_ctree_super(root, &super); close_ctree(root); printf("tree size is now %d\n", tree_size); return 0; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b737925be314..78407d32db78 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -57,6 +57,11 @@ struct ctree_root_info { u64 snapuuid[2]; /* root specific uuid */ } __attribute__ ((__packed__)); +struct ctree_super_block { + struct ctree_root_info root_info; + struct ctree_root_info extent_info; +} __attribute__ ((__packed__)); + struct item { struct key key; u16 offset; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 653f18aab330..2f71ccf1c3cb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -15,7 +15,7 @@ static int allocated_blocks = 0; static int get_free_block(struct ctree_root *root, u64 *block) { struct stat st; - int ret; + int ret = 0; if (root->alloc_extent->num_used >= root->alloc_extent->num_blocks) return -1; @@ -30,9 +30,14 @@ static int get_free_block(struct ctree_root *root, u64 *block) } st.st_size = 0; ret = fstat(root->fp, &st); - if (st.st_size < (*block + 1) * CTREE_BLOCKSIZE) + if (st.st_size < (*block + 1) * CTREE_BLOCKSIZE) { ret = ftruncate(root->fp, (*block + 1) * CTREE_BLOCKSIZE); + if (ret) { + perror("ftruncate"); + exit(1); + } + } return ret; } @@ -81,11 +86,7 @@ struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) buf = radix_tree_lookup(&root->cache_radix, blocknr); if (buf) { buf->count++; - if (buf->blocknr != blocknr) - BUG(); - if (buf->blocknr != buf->node.header.blocknr) - BUG(); - return buf; + goto test; } buf = alloc_tree_block(root, blocknr); if (!buf) @@ -95,8 +96,11 @@ struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) free(buf); return NULL; } +test: if (buf->blocknr != buf->node.header.blocknr) BUG(); + if (root->node && buf->node.header.parentid != root->node->node.header.parentid) + BUG(); return buf; } @@ -111,36 +115,30 @@ int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) ret = pwrite(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); if (ret != CTREE_BLOCKSIZE) return ret; - if (buf == root->node) - return update_root_block(root); return 0; } -struct ctree_super_block { - struct ctree_root_info root_info; - struct ctree_root_info extent_info; -} __attribute__ ((__packed__)); - static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, struct ctree_root_info *info, int fp) { + INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); root->fp = fp; + root->node = NULL; root->node = read_tree_block(root, info->tree_root); root->extent_root = extent_root; memcpy(&root->ai1, &info->alloc_extent, sizeof(info->alloc_extent)); memcpy(&root->ai2, &info->reserve_extent, sizeof(info->reserve_extent)); root->alloc_extent = &root->ai1; root->reserve_extent = &root->ai2; - INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); - printf("setup done reading root %p, used %lu\n", root, root->alloc_extent->num_used); + printf("setup done reading root %p, used %lu available %lu\n", root, root->alloc_extent->num_used, root->alloc_extent->num_blocks); + printf("setup done reading root %p, reserve used %lu available %lu\n", root, root->reserve_extent->num_used, root->reserve_extent->num_blocks); return 0; } -struct ctree_root *open_ctree(char *filename) +struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) { struct ctree_root *root = malloc(sizeof(struct ctree_root)); struct ctree_root *extent_root = malloc(sizeof(struct ctree_root)); - struct ctree_super_block super; int fp; int ret; @@ -149,48 +147,61 @@ struct ctree_root *open_ctree(char *filename) free(root); return NULL; } - ret = pread(fp, &super, sizeof(struct ctree_super_block), + ret = pread(fp, super, sizeof(struct ctree_super_block), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); if (ret == 0) { ret = mkfs(fp); if (ret) return NULL; - ret = pread(fp, &super, sizeof(struct ctree_super_block), + ret = pread(fp, super, sizeof(struct ctree_super_block), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); if (ret != sizeof(struct ctree_super_block)) return NULL; } BUG_ON(ret < 0); - __setup_root(root, extent_root, &super.root_info, fp); - __setup_root(extent_root, extent_root, &super.extent_info, fp); + __setup_root(root, extent_root, &super->root_info, fp); + __setup_root(extent_root, extent_root, &super->extent_info, fp); return root; } +static int __update_root(struct ctree_root *root, struct ctree_root_info *info) +{ + info->tree_root = root->node->blocknr; + memcpy(&info->alloc_extent, root->alloc_extent, sizeof(struct alloc_extent)); + memcpy(&info->reserve_extent, root->reserve_extent, sizeof(struct alloc_extent)); + return 0; +} + +int write_ctree_super(struct ctree_root *root, struct ctree_super_block *s) +{ + int ret; + __update_root(root, &s->root_info); + __update_root(root->extent_root, &s->extent_info); + ret = pwrite(root->fp, s, sizeof(*s), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); + if (ret != sizeof(*s)) { + fprintf(stderr, "failed to write new super block err %d\n", ret); + return ret; + } + return 0; +} + int close_ctree(struct ctree_root *root) { close(root->fp); if (root->node) tree_block_release(root, root->node); + if (root->extent_root->node) + tree_block_release(root->extent_root, root->extent_root->node); free(root); printf("on close %d blocks are allocated\n", allocated_blocks); return 0; } -int update_root_block(struct ctree_root *root) -{ - int ret; - u64 root_block = root->node->blocknr; - - ret = pwrite(root->fp, &root_block, sizeof(u64), 0); - if (ret != sizeof(u64)) - return ret; - return 0; -} - void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) { - return; buf->count--; + if (buf->count < 0) + BUG(); if (buf->count == 0) { if (!radix_tree_lookup(&root->cache_radix, buf->blocknr)) BUG(); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 41acb65eee19..e288fe883b9a 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -12,11 +12,11 @@ struct tree_buffer { struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr); int write_tree_block(struct ctree_root *root, struct tree_buffer *buf); -struct ctree_root *open_ctree(char *filename); +struct ctree_root *open_ctree(char *filename, struct ctree_super_block *s); int close_ctree(struct ctree_root *root); void tree_block_release(struct ctree_root *root, struct tree_buffer *buf); struct tree_buffer *alloc_free_block(struct ctree_root *root); -int update_root_block(struct ctree_root *root); +int write_ctree_super(struct ctree_root *root, struct ctree_super_block *s); int mkfs(int fd); #define CTREE_SUPER_INFO_OFFSET(bs) (16 * (bs)) diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index bab98c6d6fa7..584aba44e801 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -18,12 +18,13 @@ int mkfs(int fd) struct extent_item extent_item; int ret; + /* setup the super block area */ memset(info, 0, sizeof(info)); info[0].blocknr = 16; info[0].objectid = 1; info[0].tree_root = 17; info[0].alloc_extent.blocknr = 0; - info[0].alloc_extent.num_blocks = 20; + info[0].alloc_extent.num_blocks = 64; /* 0-17 are used (inclusive) */ info[0].alloc_extent.num_used = 18; @@ -31,12 +32,14 @@ int mkfs(int fd) info[1].objectid = 2; info[1].tree_root = 64; info[1].alloc_extent.blocknr = 64; - info[1].alloc_extent.num_blocks = 8; + info[1].alloc_extent.num_blocks = 64; info[1].alloc_extent.num_used = 1; ret = pwrite(fd, info, sizeof(info), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); if (ret != sizeof(info)) return -1; + + /* create leaves for the tree root and extent root */ memset(&empty_leaf, 0, sizeof(empty_leaf)); empty_leaf.header.parentid = 1; empty_leaf.header.blocknr = 17; From 5c680ed620c2b69cf751aecf1a5e03ce2c89c7f3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 22 Feb 2007 11:39:13 -0500 Subject: [PATCH 0008/2985] Btrfs: switch to early splits Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 263 ++++++++++++++++++++------------------------- fs/btrfs/disk-io.c | 3 +- 2 files changed, 120 insertions(+), 146 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2891b582e26f..1b4e82d8074d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5,7 +5,12 @@ #include "ctree.h" #include "disk-io.h" +#define SEARCH_READ 0 +#define SEARCH_WRITE 1 + static int refill_alloc_extent(struct ctree_root *root); +int split_node(struct ctree_root *root, struct ctree_path *path, int level); +int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size); static inline void init_path(struct ctree_path *p) { @@ -125,14 +130,14 @@ int bin_search(struct node *c, struct key *key, int *slot) * If the key isn't found, the path points to the slot where it should * be inserted. */ -int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p) +int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, int ins_len) { struct tree_buffer *b = root->node; struct node *c; - int slot; int ret; int level; + b->count++; while (b) { c = &b->node; @@ -143,10 +148,26 @@ int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p) if (ret && slot > 0) slot -= 1; p->slots[level] = slot; + if (ins_len && c->header.nritems == NODEPTRS_PER_BLOCK) { + int sret = split_node(root, p, level); + BUG_ON(sret > 0); + if (sret) + return sret; + b = p->nodes[level]; + c = &b->node; + slot = p->slots[level]; + } b = read_tree_block(root, c->blockptrs[slot]); continue; } else { + struct leaf *l = (struct leaf *)c; p->slots[level] = slot; + if (ins_len && leaf_free_space(l) < sizeof(struct item) + ins_len) { + int sret = split_leaf(root, p, ins_len); + BUG_ON(sret > 0); + if (sret) + return sret; + } return ret; } } @@ -331,50 +352,54 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) return 0; } +static int insert_new_root(struct ctree_root *root, struct ctree_path *path, int level) +{ + struct tree_buffer *t; + struct node *lower; + struct node *c; + struct key *lower_key; + + BUG_ON(path->nodes[level]); + BUG_ON(path->nodes[level-1] != root->node); + + t = alloc_free_block(root); + c = &t->node; + memset(c, 0, sizeof(c)); + c->header.nritems = 1; + c->header.flags = node_level(level); + c->header.blocknr = t->blocknr; + c->header.parentid = root->node->node.header.parentid; + lower = &path->nodes[level-1]->node; + if (is_leaf(lower->header.flags)) + lower_key = &((struct leaf *)lower)->items[0].key; + else + lower_key = lower->keys; + memcpy(c->keys, lower_key, sizeof(struct key)); + c->blockptrs[0] = path->nodes[level-1]->blocknr; + /* the super has an extra ref to root->node */ + tree_block_release(root, root->node); + root->node = t; + t->count++; + write_tree_block(root, t); + path->nodes[level] = t; + path->slots[level] = 0; + return 0; +} + /* * worker function to insert a single pointer in a node. * the node should have enough room for the pointer already * slot and level indicate where you want the key to go, and * blocknr is the block the key points to. */ -int __insert_ptr(struct ctree_root *root, +int insert_ptr(struct ctree_root *root, struct ctree_path *path, struct key *key, u64 blocknr, int slot, int level) { - struct node *c; struct node *lower; - struct key *lower_key; int nritems; - /* need a new root */ - if (!path->nodes[level]) { - struct tree_buffer *t; - t = alloc_free_block(root); - c = &t->node; - memset(c, 0, sizeof(c)); - c->header.nritems = 2; - c->header.flags = node_level(level); - c->header.blocknr = t->blocknr; - c->header.parentid = root->node->node.header.parentid; - lower = &path->nodes[level-1]->node; - if (is_leaf(lower->header.flags)) - lower_key = &((struct leaf *)lower)->items[0].key; - else - lower_key = lower->keys; - memcpy(c->keys, lower_key, sizeof(struct key)); - memcpy(c->keys + 1, key, sizeof(struct key)); - c->blockptrs[0] = path->nodes[level-1]->blocknr; - c->blockptrs[1] = blocknr; - /* the super has an extra ref to root->node */ - tree_block_release(root, root->node); - root->node = t; - t->count++; - write_tree_block(root, t); - path->nodes[level] = t; - path->slots[level] = 0; - if (c->keys[1].objectid == 0) - BUG(); - return 0; - } + + BUG_ON(!path->nodes[level]); lower = &path->nodes[level]->node; nritems = lower->header.nritems; if (slot > nritems) @@ -396,93 +421,54 @@ int __insert_ptr(struct ctree_root *root, return 0; } - -/* - * insert a key,blocknr pair into the tree at a given level - * If the node at that level in the path doesn't have room, - * it is split or shifted as appropriate. - */ -int insert_ptr(struct ctree_root *root, - struct ctree_path *path, struct key *key, - u64 blocknr, int level) +int split_node(struct ctree_root *root, struct ctree_path *path, int level) { - struct tree_buffer *t = path->nodes[level]; - struct node *c = &path->nodes[level]->node; - struct node *b; - struct tree_buffer *b_buffer; - struct tree_buffer *bal[MAX_LEVEL]; - int bal_level = level; + struct tree_buffer *t; + struct node *c; + struct tree_buffer *split_buffer; + struct node *split; int mid; - int bal_start = -1; + int ret; - /* - * check to see if we need to make room in the node for this - * pointer. If we do, keep walking the tree, making sure there - * is enough room in each level for the required insertions. - * - * The bal array is filled in with any nodes to be inserted - * due to splitting. Once we've done all the splitting required - * do the inserts based on the data in the bal array. - */ - memset(bal, 0, sizeof(bal)); - while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) { - c = &t->node; - if (push_node_left(root, path, - node_level(c->header.flags)) == 0) - break; - if (push_node_right(root, path, - node_level(c->header.flags)) == 0) - break; - bal_start = bal_level; - if (bal_level == MAX_LEVEL - 1) - BUG(); - b_buffer = alloc_free_block(root); - b = &b_buffer->node; - b->header.flags = c->header.flags; - b->header.blocknr = b_buffer->blocknr; - b->header.parentid = root->node->node.header.parentid; - mid = (c->header.nritems + 1) / 2; - memcpy(b->keys, c->keys + mid, - (c->header.nritems - mid) * sizeof(struct key)); - memcpy(b->blockptrs, c->blockptrs + mid, - (c->header.nritems - mid) * sizeof(u64)); - b->header.nritems = c->header.nritems - mid; - c->header.nritems = mid; - - write_tree_block(root, t); - write_tree_block(root, b_buffer); - - bal[bal_level] = b_buffer; - if (bal_level == MAX_LEVEL - 1) - break; - bal_level += 1; - t = path->nodes[bal_level]; + ret = push_node_left(root, path, level); + if (!ret) + return 0; + ret = push_node_right(root, path, level); + if (!ret) + return 0; + t = path->nodes[level]; + c = &t->node; + if (t == root->node) { + /* trying to split the root, lets make a new one */ + ret = insert_new_root(root, path, level + 1); + if (ret) + return ret; } - /* - * bal_start tells us the first level in the tree that needed to - * be split. Go through the bal array inserting the new nodes - * as needed. The path is fixed as we go. - */ - while(bal_start > 0) { - b_buffer = bal[bal_start]; - c = &path->nodes[bal_start]->node; - __insert_ptr(root, path, b_buffer->node.keys, b_buffer->blocknr, - path->slots[bal_start + 1] + 1, bal_start + 1); - if (path->slots[bal_start] >= c->header.nritems) { - path->slots[bal_start] -= c->header.nritems; - tree_block_release(root, path->nodes[bal_start]); - path->nodes[bal_start] = b_buffer; - path->slots[bal_start + 1] += 1; - } else { - tree_block_release(root, b_buffer); - } - bal_start--; - if (!bal[bal_start]) - break; + split_buffer = alloc_free_block(root); + split = &split_buffer->node; + split->header.flags = c->header.flags; + split->header.blocknr = split_buffer->blocknr; + split->header.parentid = root->node->node.header.parentid; + mid = (c->header.nritems + 1) / 2; + memcpy(split->keys, c->keys + mid, + (c->header.nritems - mid) * sizeof(struct key)); + memcpy(split->blockptrs, c->blockptrs + mid, + (c->header.nritems - mid) * sizeof(u64)); + split->header.nritems = c->header.nritems - mid; + c->header.nritems = mid; + write_tree_block(root, t); + write_tree_block(root, split_buffer); + insert_ptr(root, path, split->keys, split_buffer->blocknr, + path->slots[level + 1] + 1, level + 1); + if (path->slots[level] > mid) { + path->slots[level] -= mid; + tree_block_release(root, t); + path->nodes[level] = split_buffer; + path->slots[level + 1] += 1; + } else { + tree_block_release(root, split_buffer); } - /* Now that the tree has room, insert the requested pointer */ - return __insert_ptr(root, path, key, blocknr, path->slots[level] + 1, - level); + return 0; } /* @@ -623,6 +609,11 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) if (leaf_free_space(l) >= sizeof(struct item) + data_size) return 0; } + if (!path->nodes[1]) { + ret = insert_new_root(root, path, 1); + if (ret) + return ret; + } slot = path->slots[0]; nritems = l->header.nritems; mid = (nritems + 1)/ 2; @@ -659,8 +650,7 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) l->header.nritems = mid; ret = insert_ptr(root, path, &right->items[0].key, - right_buffer->blocknr, 1); - + right_buffer->blocknr, path->slots[1] + 1, 1); write_tree_block(root, right_buffer); write_tree_block(root, l_buf); @@ -695,21 +685,10 @@ int insert_item(struct ctree_root *root, struct key *key, refill_alloc_extent(root); /* create a root if there isn't one */ - if (!root->node) { + if (!root->node) BUG(); -#if 0 - struct tree_buffer *t; - t = alloc_free_block(root); - BUG_ON(!t); - t->node.header.nritems = 0; - t->node.header.flags = node_level(0); - t->node.header.blocknr = t->blocknr; - root->node = t; - write_tree_block(root, t); -#endif - } init_path(&path); - ret = search_slot(root, key, &path); + ret = search_slot(root, key, &path, data_size); if (ret == 0) { release_path(root, &path); return -EEXIST; @@ -719,12 +698,6 @@ int insert_item(struct ctree_root *root, struct key *key, leaf_buf = path.nodes[0]; leaf = &leaf_buf->leaf; - /* make room if needed */ - if (leaf_free_space(leaf) < sizeof(struct item) + data_size) { - split_leaf(root, &path, data_size); - leaf_buf = path.nodes[0]; - leaf = &path.nodes[0]->leaf; - } nritems = leaf->header.nritems; data_end = leaf_data_end(leaf); @@ -950,7 +923,7 @@ int alloc_extent(struct ctree_root *orig_root, u64 num_blocks, u64 search_start, ins->offset = 0; ins->flags = 0; - ret = search_slot(root, ins, &path); + ret = search_slot(root, ins, &path, sizeof(struct extent_item)); while (1) { l = &path.nodes[0]->leaf; slot = path.slots[0]; @@ -1097,8 +1070,8 @@ void print_tree(struct ctree_root *root, struct tree_buffer *t) /* for testing only */ int next_key(int i, int max_key) { - return rand() % max_key; - // return i; + // return rand() % max_key; + return i; } int main() { @@ -1154,7 +1127,7 @@ int main() { num = next_key(i, max_key); ins.objectid = num; init_path(&path); - ret = search_slot(root, &ins, &path); + ret = search_slot(root, &ins, &path, 0); if (ret) { print_tree(root, root->node); printf("unable to find %d\n", num); @@ -1176,7 +1149,7 @@ int main() { num = next_key(i, max_key); ins.objectid = num; init_path(&path); - ret = search_slot(root, &ins, &path); + ret = search_slot(root, &ins, &path, 0); if (ret) continue; ret = del_item(root, &path); @@ -1204,7 +1177,7 @@ int main() { num = next_key(i, max_key); ins.objectid = num; init_path(&path); - ret = search_slot(root, &ins, &path); + ret = search_slot(root, &ins, &path, 0); if (ret) { print_tree(root, root->node); printf("unable to find %d\n", num); @@ -1218,7 +1191,7 @@ int main() { int slot; ins.objectid = (u64)-1; init_path(&path); - ret = search_slot(root, &ins, &path); + ret = search_slot(root, &ins, &path, 0); if (ret == 0) BUG(); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2f71ccf1c3cb..a696a4278ac5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -149,7 +149,8 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) } ret = pread(fp, super, sizeof(struct ctree_super_block), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); - if (ret == 0) { + if (ret == 0 || super->root_info.tree_root == 0) { + printf("making new FS!\n"); ret = mkfs(fp); if (ret) return NULL; From 9a8dd1502de6aa683ae46cf0397e9b6e636416fb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 23 Feb 2007 08:38:36 -0500 Subject: [PATCH 0009/2985] Btrfs: Block sized tree extents and extent deletion Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 247 ++++++++++++++++++++++++++++++++++++--------- fs/btrfs/ctree.h | 13 +-- fs/btrfs/disk-io.c | 90 ++++++----------- fs/btrfs/disk-io.h | 2 +- fs/btrfs/mkfs.c | 37 ++++--- 5 files changed, 253 insertions(+), 136 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1b4e82d8074d..f0abcf1f3939 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -8,9 +8,12 @@ #define SEARCH_READ 0 #define SEARCH_WRITE 1 -static int refill_alloc_extent(struct ctree_root *root); +#define CTREE_EXTENT_PENDING 0 + int split_node(struct ctree_root *root, struct ctree_path *path, int level); int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size); +struct tree_buffer *alloc_free_block(struct ctree_root *root); +int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks); static inline void init_path(struct ctree_path *p) { @@ -682,8 +685,6 @@ int insert_item(struct ctree_root *root, struct key *key, unsigned int data_end; struct ctree_path path; - refill_alloc_extent(root); - /* create a root if there isn't one */ if (!root->node) BUG(); @@ -756,6 +757,7 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) struct tree_buffer *t; struct node *node; int nritems; + u64 blocknr; while(1) { t = path->nodes[level]; @@ -774,6 +776,7 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) } node->header.nritems--; write_tree_block(root, t); + blocknr = t->blocknr; if (node->header.nritems != 0) { int tslot; if (slot == 0) @@ -799,6 +802,7 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) break; } level++; + free_extent(root, blocknr, 1); if (!path->nodes[level]) BUG(); } @@ -841,8 +845,10 @@ int del_item(struct ctree_root *root, struct ctree_path *path) if (leaf_buf == root->node) { leaf->header.flags = node_level(0); write_tree_block(root, leaf_buf); - } else + } else { del_ptr(root, path, 1); + free_extent(root, leaf_buf->blocknr, 1); + } } else { if (slot == 0) fixup_low_keys(root, path, &leaf->items[0].key, 1); @@ -867,6 +873,72 @@ int del_item(struct ctree_root *root, struct ctree_path *path) return 0; } +static int del_pending_extents(struct ctree_root *extent_root) +{ + int ret; + struct key key; + struct tree_buffer *gang[4]; + int i; + struct ctree_path path; + + while(1) { + ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, + (void **)gang, 0, ARRAY_SIZE(gang), + CTREE_EXTENT_PENDING); + if (!ret) + break; + for (i = 0; i < ret; i++) { + key.objectid = gang[i]->blocknr; + key.flags = 0; + key.offset = 1; + init_path(&path); + ret = search_slot(extent_root, &key, &path, 0); + if (ret) { + BUG(); + // FIXME undo it and return sane + return ret; + } + ret = del_item(extent_root, &path); + if (ret) { + BUG(); + return ret; + } + release_path(extent_root, &path); + radix_tree_tag_clear(&extent_root->cache_radix, gang[i]->blocknr, + CTREE_EXTENT_PENDING); + tree_block_release(extent_root, gang[i]); + } + } + return 0; +} + +int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) +{ + struct ctree_path path; + struct key key; + struct ctree_root *extent_root = root->extent_root; + struct tree_buffer *t; + int pending_ret; + int ret; + + key.objectid = blocknr; + key.flags = 0; + key.offset = num_blocks; + if (root == extent_root) { + t = read_tree_block(root, key.objectid); + radix_tree_tag_set(&root->cache_radix, key.objectid, CTREE_EXTENT_PENDING); + return 0; + } + init_path(&path); + ret = search_slot(extent_root, &key, &path, 0); + if (ret) + BUG(); + ret = del_item(extent_root, &path); + release_path(extent_root, &path); + pending_ret = del_pending_extents(root->extent_root); + return ret ? ret : pending_ret; +} + int next_leaf(struct ctree_root *root, struct ctree_path *path) { int slot; @@ -904,8 +976,8 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) return 0; } -int alloc_extent(struct ctree_root *orig_root, u64 num_blocks, u64 search_start, - u64 search_end, u64 owner, struct key *ins) +int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, u64 search_start, + u64 search_end, struct key *ins) { struct ctree_path path; struct key *key; @@ -915,15 +987,13 @@ int alloc_extent(struct ctree_root *orig_root, u64 num_blocks, u64 search_start, u64 last_block; int start_found = 0; struct leaf *l; - struct extent_item extent_item; struct ctree_root * root = orig_root->extent_root; init_path(&path); ins->objectid = search_start; ins->offset = 0; ins->flags = 0; - - ret = search_slot(root, ins, &path, sizeof(struct extent_item)); + ret = search_slot(root, ins, &path, 0); while (1) { l = &path.nodes[0]->leaf; slot = path.slots[0]; @@ -938,6 +1008,7 @@ int alloc_extent(struct ctree_root *orig_root, u64 num_blocks, u64 search_start, ins->objectid = search_start; ins->offset = num_blocks; hole_size = search_end - search_start; + start_found = 1; goto insert; } ins->objectid = last_block; @@ -956,51 +1027,119 @@ int alloc_extent(struct ctree_root *orig_root, u64 num_blocks, u64 search_start, } else start_found = 1; last_block = key->objectid + key->offset; +insert_failed: path.slots[0]++; } // FIXME -ENOSPC insert: - release_path(root, &path); - extent_item.refs = 1; - extent_item.owner = owner; - if (root == orig_root && root->reserve_extent->num_blocks == 0) { - root->reserve_extent->blocknr = ins->objectid; - root->reserve_extent->num_blocks = ins->offset; - root->reserve_extent->num_used = 0; + if (orig_root->extent_root == orig_root) { + BUG_ON(num_blocks != 1); + if ((root->current_insert.objectid <= ins->objectid && + root->current_insert.objectid + root->current_insert.offset > + ins->objectid) || + (root->current_insert.objectid > ins->objectid && + root->current_insert.objectid <= ins->objectid + ins->offset) || + radix_tree_tag_get(&root->cache_radix, ins->objectid, + CTREE_EXTENT_PENDING)) { + last_block = ins->objectid + 1; + search_start = last_block; + goto insert_failed; + } } - ret = insert_item(root->extent_root, ins, &extent_item, sizeof(extent_item)); - return ret; + release_path(root, &path); + if (ins->offset != 1) + BUG(); + return 0; } -static int refill_alloc_extent(struct ctree_root *root) +static int insert_pending_extents(struct ctree_root *extent_root) { - struct alloc_extent *ae = root->alloc_extent; - struct key key; int ret; - int min_blocks = MAX_LEVEL * 2; + struct key key; + struct extent_item item; + struct tree_buffer *gang[4]; + int i; - if (ae->num_blocks > ae->num_used && ae->num_blocks - ae->num_used > - min_blocks) - return 0; - ae = root->reserve_extent; - if (ae->num_blocks > ae->num_used) { - if (root->alloc_extent->num_blocks == 0) { - /* we should swap reserve/alloc_extent when alloc - * fills up - */ - BUG(); + // FIXME -ENOSPC + item.refs = 1; + item.owner = extent_root->node->node.header.parentid; + while(1) { + ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, + (void **)gang, 0, ARRAY_SIZE(gang), + CTREE_EXTENT_PENDING); + if (!ret) + break; + for (i = 0; i < ret; i++) { + key.objectid = gang[i]->blocknr; + key.flags = 0; + key.offset = 1; + ret = insert_item(extent_root, &key, &item, sizeof(item)); + if (ret) { + BUG(); + // FIXME undo it and return sane + return ret; + } + radix_tree_tag_clear(&extent_root->cache_radix, gang[i]->blocknr, + CTREE_EXTENT_PENDING); + tree_block_release(extent_root, gang[i]); } - if (ae->num_blocks - ae->num_used < min_blocks) - BUG(); + } + return 0; +} + +int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, + u64 search_end, u64 owner, struct key *ins, struct tree_buffer **buf) +{ + int ret; + int pending_ret; + struct extent_item extent_item; + + extent_item.refs = 1; + extent_item.owner = owner; + + ret = find_free_extent(root, num_blocks, search_start, search_end, ins); + if (ret) + return ret; + + if (root != root->extent_root) { + memcpy(&root->extent_root->current_insert, ins, sizeof(*ins)); + ret = insert_item(root->extent_root, ins, &extent_item, sizeof(extent_item)); + memset(&root->extent_root->current_insert, 0, sizeof(struct key)); + pending_ret = insert_pending_extents(root->extent_root); + if (ret) + return ret; + if (pending_ret) + return pending_ret; + *buf = find_tree_block(root, ins->objectid); return 0; } - ret = alloc_extent(root, - min_blocks * 2, 0, (unsigned long)-1, - root->node->node.header.parentid, &key); - ae->blocknr = key.objectid; - ae->num_blocks = key.offset; - ae->num_used = 0; - return ret; + /* we're allocating an extent for the extent tree, don't recurse */ + BUG_ON(ins->offset != 1); + *buf = find_tree_block(root, ins->objectid); + BUG_ON(!*buf); + radix_tree_tag_set(&root->cache_radix, ins->objectid, CTREE_EXTENT_PENDING); + (*buf)->count++; + return 0; + +} + +struct tree_buffer *alloc_free_block(struct ctree_root *root) +{ + struct key ins; + int ret; + struct tree_buffer *buf = NULL; + + ret = alloc_extent(root, 1, 0, (unsigned long)-1, root->node->node.header.parentid, + &ins, &buf); + + if (ret) { + BUG(); + return NULL; + } + if (root != root->extent_root) + BUG_ON(radix_tree_tag_get(&root->extent_root->cache_radix, buf->blocknr, + CTREE_EXTENT_PENDING)); + return buf; } void print_leaf(struct leaf *l) @@ -1096,6 +1235,7 @@ int main() { print_tree(root, root->node); printf("map tree\n"); print_tree(root->extent_root, root->extent_root->node); + fflush(stdout); srand(55); for (i = 0; i < run_size; i++) { @@ -1111,12 +1251,6 @@ int main() { if (!ret) tree_size++; } - printf("root used: %lu\n", root->alloc_extent->num_used); - printf("root tree\n"); - // print_tree(root, root->node); - printf("map tree\n"); - printf("map used: %lu\n", root->extent_root->alloc_extent->num_used); - // print_tree(root->extent_root, root->extent_root->node); write_ctree_super(root, &super); close_ctree(root); @@ -1167,12 +1301,27 @@ int main() { ret = insert_item(root, &ins, buf, strlen(buf)); if (!ret) tree_size++; + if (i >= 5) { + struct key ugh; + ugh.objectid = 5; + ugh.flags = 0; + ugh.offset = 0; + init_path(&path); + ret = search_slot(root, &ugh, &path, 0); + if (ret) { + print_tree(root, root->node); + printf("unable to find 5 %d\n", num); + exit(1); + } + release_path(root, &path); + + } } write_ctree_super(root, &super); close_ctree(root); root = open_ctree("dbfile", &super); - printf("starting search2\n"); srand(128); + printf("starting search2\n"); for (i = 0; i < run_size; i++) { num = next_key(i, max_key); ins.objectid = num; @@ -1219,5 +1368,7 @@ int main() { write_ctree_super(root, &super); close_ctree(root); printf("tree size is now %d\n", tree_size); + printf("map tree\n"); + print_tree(root->extent_root, root->extent_root->node); return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 78407d32db78..8c32c0e9267d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -28,21 +28,12 @@ struct header { struct tree_buffer; -struct alloc_extent { - u64 blocknr; - u64 num_blocks; - u64 num_used; -} __attribute__ ((__packed__)); - struct ctree_root { struct tree_buffer *node; struct ctree_root *extent_root; - struct alloc_extent *alloc_extent; - struct alloc_extent *reserve_extent; + struct key current_insert; int fp; struct radix_tree_root cache_radix; - struct alloc_extent ai1; - struct alloc_extent ai2; }; struct ctree_root_info { @@ -52,8 +43,6 @@ struct ctree_root_info { u64 tree_root; /* the tree root */ u32 csum; u32 ham; - struct alloc_extent alloc_extent; - struct alloc_extent reserve_extent; u64 snapuuid[2]; /* root specific uuid */ } __attribute__ ((__packed__)); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a696a4278ac5..14955e440773 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -12,33 +12,13 @@ static int allocated_blocks = 0; -static int get_free_block(struct ctree_root *root, u64 *block) +static int check_tree_block(struct ctree_root *root, struct tree_buffer *buf) { - struct stat st; - int ret = 0; - - if (root->alloc_extent->num_used >= root->alloc_extent->num_blocks) - return -1; - - *block = root->alloc_extent->blocknr + root->alloc_extent->num_used; - root->alloc_extent->num_used += 1; - if (root->alloc_extent->num_used >= root->alloc_extent->num_blocks) { - struct alloc_extent *ae = root->alloc_extent; - root->alloc_extent = root->reserve_extent; - root->reserve_extent = ae; - ae->num_blocks = 0; - } - st.st_size = 0; - ret = fstat(root->fp, &st); - if (st.st_size < (*block + 1) * CTREE_BLOCKSIZE) { - ret = ftruncate(root->fp, - (*block + 1) * CTREE_BLOCKSIZE); - if (ret) { - perror("ftruncate"); - exit(1); - } - } - return ret; + if (buf->blocknr != buf->node.header.blocknr) + BUG(); + if (root->node && buf->node.header.parentid != root->node->node.header.parentid) + BUG(); + return 0; } struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr) @@ -61,22 +41,23 @@ struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr) return buf; } -struct tree_buffer *alloc_free_block(struct ctree_root *root) +struct tree_buffer *find_tree_block(struct ctree_root *root, u64 blocknr) { - u64 free_block; - int ret; - struct tree_buffer * buf; - ret = get_free_block(root, &free_block); - if (ret) { - BUG(); - return NULL; + struct tree_buffer *buf; + buf = radix_tree_lookup(&root->cache_radix, blocknr); + if (buf) { + buf->count++; + } else { + buf = alloc_tree_block(root, blocknr); + if (!buf) { + BUG(); + return NULL; + } } - buf = alloc_tree_block(root, free_block); - if (!buf) - BUG(); return buf; } + struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) { loff_t offset = blocknr * CTREE_BLOCKSIZE; @@ -86,20 +67,17 @@ struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) buf = radix_tree_lookup(&root->cache_radix, blocknr); if (buf) { buf->count++; - goto test; + } else { + buf = alloc_tree_block(root, blocknr); + if (!buf) + return NULL; + ret = pread(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); + if (ret != CTREE_BLOCKSIZE) { + free(buf); + return NULL; + } } - buf = alloc_tree_block(root, blocknr); - if (!buf) - return NULL; - ret = pread(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); - if (ret != CTREE_BLOCKSIZE) { - free(buf); - return NULL; - } -test: - if (buf->blocknr != buf->node.header.blocknr) - BUG(); - if (root->node && buf->node.header.parentid != root->node->node.header.parentid) + if (check_tree_block(root, buf)) BUG(); return buf; } @@ -121,17 +99,10 @@ int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, struct ctree_root_info *info, int fp) { - INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); root->fp = fp; root->node = NULL; root->node = read_tree_block(root, info->tree_root); root->extent_root = extent_root; - memcpy(&root->ai1, &info->alloc_extent, sizeof(info->alloc_extent)); - memcpy(&root->ai2, &info->reserve_extent, sizeof(info->reserve_extent)); - root->alloc_extent = &root->ai1; - root->reserve_extent = &root->ai2; - printf("setup done reading root %p, used %lu available %lu\n", root, root->alloc_extent->num_used, root->alloc_extent->num_blocks); - printf("setup done reading root %p, reserve used %lu available %lu\n", root, root->reserve_extent->num_used, root->reserve_extent->num_blocks); return 0; } @@ -147,6 +118,8 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) free(root); return NULL; } + INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); + INIT_RADIX_TREE(&extent_root->cache_radix, GFP_KERNEL); ret = pread(fp, super, sizeof(struct ctree_super_block), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); if (ret == 0 || super->root_info.tree_root == 0) { @@ -168,8 +141,6 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) static int __update_root(struct ctree_root *root, struct ctree_root_info *info) { info->tree_root = root->node->blocknr; - memcpy(&info->alloc_extent, root->alloc_extent, sizeof(struct alloc_extent)); - memcpy(&info->reserve_extent, root->reserve_extent, sizeof(struct alloc_extent)); return 0; } @@ -201,6 +172,7 @@ int close_ctree(struct ctree_root *root) void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) { buf->count--; + write_tree_block(root, buf); if (buf->count < 0) BUG(); if (buf->count == 0) { diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index e288fe883b9a..2729b757ddc1 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -11,11 +11,11 @@ struct tree_buffer { }; struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr); +struct tree_buffer *find_tree_block(struct ctree_root *root, u64 blocknr); int write_tree_block(struct ctree_root *root, struct tree_buffer *buf); struct ctree_root *open_ctree(char *filename, struct ctree_super_block *s); int close_ctree(struct ctree_root *root); void tree_block_release(struct ctree_root *root, struct tree_buffer *buf); -struct tree_buffer *alloc_free_block(struct ctree_root *root); int write_ctree_super(struct ctree_root *root, struct ctree_super_block *s); int mkfs(int fd); diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index 584aba44e801..fd4e5dea7e1b 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -23,17 +23,10 @@ int mkfs(int fd) info[0].blocknr = 16; info[0].objectid = 1; info[0].tree_root = 17; - info[0].alloc_extent.blocknr = 0; - info[0].alloc_extent.num_blocks = 64; - /* 0-17 are used (inclusive) */ - info[0].alloc_extent.num_used = 18; info[1].blocknr = 16; info[1].objectid = 2; - info[1].tree_root = 64; - info[1].alloc_extent.blocknr = 64; - info[1].alloc_extent.num_blocks = 64; - info[1].alloc_extent.num_used = 1; + info[1].tree_root = 18; ret = pwrite(fd, info, sizeof(info), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); if (ret != sizeof(info)) @@ -48,24 +41,36 @@ int mkfs(int fd) return -1; empty_leaf.header.parentid = 2; - empty_leaf.header.blocknr = 64; - empty_leaf.header.nritems = 2; + empty_leaf.header.blocknr = 18; + empty_leaf.header.nritems = 3; + + /* item1, reserve blocks 0-16 */ item.key.objectid = 0; - item.key.offset = 64; + item.key.offset = 17; item.key.flags = 0; item.offset = LEAF_DATA_SIZE - sizeof(struct extent_item); item.size = sizeof(struct extent_item); extent_item.refs = 1; - extent_item.owner = 1; + extent_item.owner = 0; memcpy(empty_leaf.items, &item, sizeof(item)); memcpy(empty_leaf.data + item.offset, &extent_item, item.size); - item.key.objectid = 64; - item.key.offset = 64; + + /* item2, give block 17 to the root */ + item.key.objectid = 17; + item.key.offset = 1; item.offset = LEAF_DATA_SIZE - sizeof(struct extent_item) * 2; - extent_item.owner = 2; + extent_item.owner = 1; memcpy(empty_leaf.items + 1, &item, sizeof(item)); memcpy(empty_leaf.data + item.offset, &extent_item, item.size); - ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 64 * CTREE_BLOCKSIZE); + + /* item3, give block 18 for the extent root */ + item.key.objectid = 18; + item.key.offset = 1; + item.offset = LEAF_DATA_SIZE - sizeof(struct extent_item) * 3; + extent_item.owner = 2; + memcpy(empty_leaf.items + 2, &item, sizeof(item)); + memcpy(empty_leaf.data + item.offset, &extent_item, item.size); + ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 18 * CTREE_BLOCKSIZE); if (ret != sizeof(empty_leaf)) return -1; return 0; From 5de08d7d50ba535b968d97101ed2ab07c2b8eb7d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 24 Feb 2007 06:24:44 -0500 Subject: [PATCH 0010/2985] Btrfs: Break up ctree.c a little Extent fixes Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 16 +- fs/btrfs/ctree.c | 446 ++++++++---------------------------------- fs/btrfs/ctree.h | 12 +- fs/btrfs/disk-io.c | 1 - fs/btrfs/print-tree.c | 72 +++++++ fs/btrfs/print-tree.h | 3 + 6 files changed, 176 insertions(+), 374 deletions(-) create mode 100644 fs/btrfs/print-tree.c create mode 100644 fs/btrfs/print-tree.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index df065dd2dce7..fe73ab9d81d6 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,12 +1,16 @@ -CFLAGS= -g -Wall +CFLAGS = -g -Wall +headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h +objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o -.c.o: - $(CC) $(CFLAGS) -c $< +#.c.o: +# $(CC) $(CFLAGS) -c $< -ctree: ctree.o disk-io.h ctree.h disk-io.o radix-tree.o radix-tree.h mkfs.o - gcc $(CFLAGS) -o ctree ctree.o disk-io.o radix-tree.o mkfs.o +ctree : $(objects) + gcc $(CFLAGS) -o ctree $(objects) -clean: +$(objects) : $(headers) + +clean : rm ctree *.o diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f0abcf1f3939..e497fd963118 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -4,23 +4,21 @@ #include "radix-tree.h" #include "ctree.h" #include "disk-io.h" - -#define SEARCH_READ 0 -#define SEARCH_WRITE 1 - -#define CTREE_EXTENT_PENDING 0 +#include "print-tree.h" int split_node(struct ctree_root *root, struct ctree_path *path, int level); int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size); -struct tree_buffer *alloc_free_block(struct ctree_root *root); -int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks); +int push_node_left(struct ctree_root *root, struct ctree_path *path, int level); +int push_node_right(struct ctree_root *root, + struct ctree_path *path, int level); +int del_ptr(struct ctree_root *root, struct ctree_path *path, int level); -static inline void init_path(struct ctree_path *p) +inline void init_path(struct ctree_path *p) { memset(p, 0, sizeof(*p)); } -static void release_path(struct ctree_root *root, struct ctree_path *p) +void release_path(struct ctree_root *root, struct ctree_path *p) { int i; for (i = 0; i < MAX_LEVEL; i++) { @@ -48,7 +46,7 @@ static inline unsigned int leaf_data_end(struct leaf *leaf) * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ -static inline int leaf_free_space(struct leaf *leaf) +int leaf_free_space(struct leaf *leaf) { int data_end = leaf_data_end(leaf); int nritems = leaf->header.nritems; @@ -133,7 +131,8 @@ int bin_search(struct node *c, struct key *key, int *slot) * If the key isn't found, the path points to the slot where it should * be inserted. */ -int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, int ins_len) +int search_slot(struct ctree_root *root, struct key *key, + struct ctree_path *p, int ins_len) { struct tree_buffer *b = root->node; struct node *c; @@ -151,7 +150,8 @@ int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, if (ret && slot > 0) slot -= 1; p->slots[level] = slot; - if (ins_len && c->header.nritems == NODEPTRS_PER_BLOCK) { + if (ins_len > 0 && + c->header.nritems == NODEPTRS_PER_BLOCK) { int sret = split_node(root, p, level); BUG_ON(sret > 0); if (sret) @@ -159,13 +159,37 @@ int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, b = p->nodes[level]; c = &b->node; slot = p->slots[level]; + } else if (ins_len < 0 && + c->header.nritems <= NODEPTRS_PER_BLOCK/4) { + u64 blocknr = b->blocknr; + slot = p->slots[level +1]; + b->count++; + if (push_node_left(root, p, level)) + push_node_right(root, p, level); + if (c->header.nritems == 0 && + level < MAX_LEVEL - 1 && + p->nodes[level + 1]) { + int tslot = p->slots[level + 1]; + + p->slots[level + 1] = slot; + del_ptr(root, p, level + 1); + p->slots[level + 1] = tslot; + tree_block_release(root, b); + free_extent(root, blocknr, 1); + } else { + tree_block_release(root, b); + } + b = p->nodes[level]; + c = &b->node; + slot = p->slots[level]; } b = read_tree_block(root, c->blockptrs[slot]); continue; } else { struct leaf *l = (struct leaf *)c; p->slots[level] = slot; - if (ins_len && leaf_free_space(l) < sizeof(struct item) + ins_len) { + if (ins_len > 0 && leaf_free_space(l) < + sizeof(struct item) + ins_len) { int sret = split_leaf(root, p, ins_len); BUG_ON(sret > 0); if (sret) @@ -355,7 +379,8 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) return 0; } -static int insert_new_root(struct ctree_root *root, struct ctree_path *path, int level) +static int insert_new_root(struct ctree_root *root, + struct ctree_path *path, int level) { struct tree_buffer *t; struct node *lower; @@ -463,7 +488,7 @@ int split_node(struct ctree_root *root, struct ctree_path *path, int level) write_tree_block(root, split_buffer); insert_ptr(root, path, split->keys, split_buffer->blocknr, path->slots[level + 1] + 1, level + 1); - if (path->slots[level] > mid) { + if (path->slots[level] >= mid) { path->slots[level] -= mid; tree_block_release(root, t); path->nodes[level] = split_buffer; @@ -744,8 +769,7 @@ int insert_item(struct ctree_root *root, struct key *key, } /* - * delete the pointer from a given level in the path. The path is not - * fixed up, so after calling this it is not valid at that level. + * delete the pointer from a given node. * * If the delete empties a node, the node is removed from the tree, * continuing all the way the root if required. The root is converted into @@ -778,22 +802,10 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) write_tree_block(root, t); blocknr = t->blocknr; if (node->header.nritems != 0) { - int tslot; if (slot == 0) fixup_low_keys(root, path, node->keys, level + 1); - tslot = path->slots[level+1]; - t->count++; - push_node_left(root, path, level); - if (node->header.nritems) { - push_node_right(root, path, level); - } - if (node->header.nritems) { - tree_block_release(root, t); - break; - } - tree_block_release(root, t); - path->slots[level+1] = tslot; + break; } if (t == root->node) { /* just turn the root into a leaf and break */ @@ -850,12 +862,12 @@ int del_item(struct ctree_root *root, struct ctree_path *path) free_extent(root, leaf_buf->blocknr, 1); } } else { + int used = leaf_space_used(leaf, 0, leaf->header.nritems); if (slot == 0) fixup_low_keys(root, path, &leaf->items[0].key, 1); write_tree_block(root, leaf_buf); /* delete the leaf if it is mostly empty */ - if (leaf_space_used(leaf, 0, leaf->header.nritems) < - LEAF_DATA_SIZE / 4) { + if (used < LEAF_DATA_SIZE / 3) { /* push_leaf_left fixes the path. * make sure the path still points to our leaf * for possible call to del_ptr below @@ -864,81 +876,19 @@ int del_item(struct ctree_root *root, struct ctree_path *path) leaf_buf->count++; push_leaf_left(root, path, 1); if (leaf->header.nritems == 0) { + u64 blocknr = leaf_buf->blocknr; path->slots[1] = slot; del_ptr(root, path, 1); + tree_block_release(root, leaf_buf); + free_extent(root, blocknr, 1); + } else { + tree_block_release(root, leaf_buf); } - tree_block_release(root, leaf_buf); } } return 0; } -static int del_pending_extents(struct ctree_root *extent_root) -{ - int ret; - struct key key; - struct tree_buffer *gang[4]; - int i; - struct ctree_path path; - - while(1) { - ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, - (void **)gang, 0, ARRAY_SIZE(gang), - CTREE_EXTENT_PENDING); - if (!ret) - break; - for (i = 0; i < ret; i++) { - key.objectid = gang[i]->blocknr; - key.flags = 0; - key.offset = 1; - init_path(&path); - ret = search_slot(extent_root, &key, &path, 0); - if (ret) { - BUG(); - // FIXME undo it and return sane - return ret; - } - ret = del_item(extent_root, &path); - if (ret) { - BUG(); - return ret; - } - release_path(extent_root, &path); - radix_tree_tag_clear(&extent_root->cache_radix, gang[i]->blocknr, - CTREE_EXTENT_PENDING); - tree_block_release(extent_root, gang[i]); - } - } - return 0; -} - -int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) -{ - struct ctree_path path; - struct key key; - struct ctree_root *extent_root = root->extent_root; - struct tree_buffer *t; - int pending_ret; - int ret; - - key.objectid = blocknr; - key.flags = 0; - key.offset = num_blocks; - if (root == extent_root) { - t = read_tree_block(root, key.objectid); - radix_tree_tag_set(&root->cache_radix, key.objectid, CTREE_EXTENT_PENDING); - return 0; - } - init_path(&path); - ret = search_slot(extent_root, &key, &path, 0); - if (ret) - BUG(); - ret = del_item(extent_root, &path); - release_path(extent_root, &path); - pending_ret = del_pending_extents(root->extent_root); - return ret ? ret : pending_ret; -} - int next_leaf(struct ctree_root *root, struct ctree_path *path) { int slot; @@ -976,241 +926,10 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) return 0; } -int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, u64 search_start, - u64 search_end, struct key *ins) -{ - struct ctree_path path; - struct key *key; - int ret; - u64 hole_size = 0; - int slot = 0; - u64 last_block; - int start_found = 0; - struct leaf *l; - struct ctree_root * root = orig_root->extent_root; - - init_path(&path); - ins->objectid = search_start; - ins->offset = 0; - ins->flags = 0; - ret = search_slot(root, ins, &path, 0); - while (1) { - l = &path.nodes[0]->leaf; - slot = path.slots[0]; - if (!l) { - // FIXME allocate root - } - if (slot >= l->header.nritems) { - ret = next_leaf(root, &path); - if (ret == 0) - continue; - if (!start_found) { - ins->objectid = search_start; - ins->offset = num_blocks; - hole_size = search_end - search_start; - start_found = 1; - goto insert; - } - ins->objectid = last_block; - ins->offset = num_blocks; - hole_size = search_end - last_block; - goto insert; - } - key = &l->items[slot].key; - if (start_found) { - hole_size = key->objectid - last_block; - if (hole_size > num_blocks) { - ins->objectid = last_block; - ins->offset = num_blocks; - goto insert; - } - } else - start_found = 1; - last_block = key->objectid + key->offset; -insert_failed: - path.slots[0]++; - } - // FIXME -ENOSPC -insert: - if (orig_root->extent_root == orig_root) { - BUG_ON(num_blocks != 1); - if ((root->current_insert.objectid <= ins->objectid && - root->current_insert.objectid + root->current_insert.offset > - ins->objectid) || - (root->current_insert.objectid > ins->objectid && - root->current_insert.objectid <= ins->objectid + ins->offset) || - radix_tree_tag_get(&root->cache_radix, ins->objectid, - CTREE_EXTENT_PENDING)) { - last_block = ins->objectid + 1; - search_start = last_block; - goto insert_failed; - } - } - release_path(root, &path); - if (ins->offset != 1) - BUG(); - return 0; -} - -static int insert_pending_extents(struct ctree_root *extent_root) -{ - int ret; - struct key key; - struct extent_item item; - struct tree_buffer *gang[4]; - int i; - - // FIXME -ENOSPC - item.refs = 1; - item.owner = extent_root->node->node.header.parentid; - while(1) { - ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, - (void **)gang, 0, ARRAY_SIZE(gang), - CTREE_EXTENT_PENDING); - if (!ret) - break; - for (i = 0; i < ret; i++) { - key.objectid = gang[i]->blocknr; - key.flags = 0; - key.offset = 1; - ret = insert_item(extent_root, &key, &item, sizeof(item)); - if (ret) { - BUG(); - // FIXME undo it and return sane - return ret; - } - radix_tree_tag_clear(&extent_root->cache_radix, gang[i]->blocknr, - CTREE_EXTENT_PENDING); - tree_block_release(extent_root, gang[i]); - } - } - return 0; -} - -int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, - u64 search_end, u64 owner, struct key *ins, struct tree_buffer **buf) -{ - int ret; - int pending_ret; - struct extent_item extent_item; - - extent_item.refs = 1; - extent_item.owner = owner; - - ret = find_free_extent(root, num_blocks, search_start, search_end, ins); - if (ret) - return ret; - - if (root != root->extent_root) { - memcpy(&root->extent_root->current_insert, ins, sizeof(*ins)); - ret = insert_item(root->extent_root, ins, &extent_item, sizeof(extent_item)); - memset(&root->extent_root->current_insert, 0, sizeof(struct key)); - pending_ret = insert_pending_extents(root->extent_root); - if (ret) - return ret; - if (pending_ret) - return pending_ret; - *buf = find_tree_block(root, ins->objectid); - return 0; - } - /* we're allocating an extent for the extent tree, don't recurse */ - BUG_ON(ins->offset != 1); - *buf = find_tree_block(root, ins->objectid); - BUG_ON(!*buf); - radix_tree_tag_set(&root->cache_radix, ins->objectid, CTREE_EXTENT_PENDING); - (*buf)->count++; - return 0; - -} - -struct tree_buffer *alloc_free_block(struct ctree_root *root) -{ - struct key ins; - int ret; - struct tree_buffer *buf = NULL; - - ret = alloc_extent(root, 1, 0, (unsigned long)-1, root->node->node.header.parentid, - &ins, &buf); - - if (ret) { - BUG(); - return NULL; - } - if (root != root->extent_root) - BUG_ON(radix_tree_tag_get(&root->extent_root->cache_radix, buf->blocknr, - CTREE_EXTENT_PENDING)); - return buf; -} - -void print_leaf(struct leaf *l) -{ - int i; - int nr = l->header.nritems; - struct item *item; - struct extent_item *ei; - printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr, - leaf_free_space(l)); - fflush(stdout); - for (i = 0 ; i < nr ; i++) { - item = l->items + i; - printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n", - i, - item->key.objectid, item->key.flags, item->key.offset, - item->offset, item->size); - fflush(stdout); - printf("\t\titem data %.*s\n", item->size, l->data+item->offset); - ei = (struct extent_item *)(l->data + item->offset); - printf("\t\textent data %u %lu\n", ei->refs, ei->owner); - fflush(stdout); - } -} -void print_tree(struct ctree_root *root, struct tree_buffer *t) -{ - int i; - int nr; - struct node *c; - - if (!t) - return; - c = &t->node; - nr = c->header.nritems; - if (c->header.blocknr != t->blocknr) - BUG(); - if (is_leaf(c->header.flags)) { - print_leaf((struct leaf *)c); - return; - } - printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr, - node_level(c->header.flags), c->header.nritems, - NODEPTRS_PER_BLOCK - c->header.nritems); - fflush(stdout); - for (i = 0; i < nr; i++) { - printf("\tkey %d (%lu %u %lu) block %lu\n", - i, - c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset, - c->blockptrs[i]); - fflush(stdout); - } - for (i = 0; i < nr; i++) { - struct tree_buffer *next_buf = read_tree_block(root, - c->blockptrs[i]); - struct node *next = &next_buf->node; - if (is_leaf(next->header.flags) && - node_level(c->header.flags) != 1) - BUG(); - if (node_level(next->header.flags) != - node_level(c->header.flags) - 1) - BUG(); - print_tree(root, next_buf); - tree_block_release(root, next_buf); - } - -} - /* for testing only */ int next_key(int i, int max_key) { - // return rand() % max_key; - return i; + return rand() % max_key; + // return i; } int main() { @@ -1221,8 +940,8 @@ int main() { int i; int num; int ret; - int run_size = 10000; - int max_key = 100000000; + int run_size = 20000000; + int max_key = 100000000; int tree_size = 0; struct ctree_path path; struct ctree_super_block super; @@ -1231,11 +950,6 @@ int main() { root = open_ctree("dbfile", &super); - printf("root tree\n"); - print_tree(root, root->node); - printf("map tree\n"); - print_tree(root->extent_root, root->extent_root->node); - fflush(stdout); srand(55); for (i = 0; i < run_size; i++) { @@ -1243,13 +957,15 @@ int main() { num = next_key(i, max_key); // num = i; sprintf(buf, "string-%d", num); - // printf("insert %d\n", num); + if (i % 10000 == 0) + printf("insert %d:%d\n", num, i); ins.objectid = num; ins.offset = 0; ins.flags = 0; ret = insert_item(root, &ins, buf, strlen(buf)); if (!ret) tree_size++; + free(buf); } write_ctree_super(root, &super); close_ctree(root); @@ -1261,6 +977,8 @@ int main() { num = next_key(i, max_key); ins.objectid = num; init_path(&path); + if (i % 10000 == 0) + printf("search %d:%d\n", num, i); ret = search_slot(root, &ins, &path, 0); if (ret) { print_tree(root, root->node); @@ -1283,39 +1001,32 @@ int main() { num = next_key(i, max_key); ins.objectid = num; init_path(&path); - ret = search_slot(root, &ins, &path, 0); - if (ret) - continue; - ret = del_item(root, &path); - if (ret != 0) - BUG(); + ret = search_slot(root, &ins, &path, -1); + if (!ret) { + if (i % 10000 == 0) + printf("del %d:%d\n", num, i); + ret = del_item(root, &path); + if (ret != 0) + BUG(); + tree_size--; + } release_path(root, &path); - tree_size--; } + write_ctree_super(root, &super); + close_ctree(root); + root = open_ctree("dbfile", &super); srand(128); for (i = 0; i < run_size; i++) { buf = malloc(64); num = next_key(i, max_key); sprintf(buf, "string-%d", num); ins.objectid = num; + if (i % 10000 == 0) + printf("insert %d:%d\n", num, i); ret = insert_item(root, &ins, buf, strlen(buf)); if (!ret) tree_size++; - if (i >= 5) { - struct key ugh; - ugh.objectid = 5; - ugh.flags = 0; - ugh.offset = 0; - init_path(&path); - ret = search_slot(root, &ugh, &path, 0); - if (ret) { - print_tree(root, root->node); - printf("unable to find 5 %d\n", num); - exit(1); - } - release_path(root, &path); - - } + free(buf); } write_ctree_super(root, &super); close_ctree(root); @@ -1326,6 +1037,8 @@ int main() { num = next_key(i, max_key); ins.objectid = num; init_path(&path); + if (i % 10000 == 0) + printf("search %d:%d\n", num, i); ret = search_slot(root, &ins, &path, 0); if (ret) { print_tree(root, root->node); @@ -1340,7 +1053,7 @@ int main() { int slot; ins.objectid = (u64)-1; init_path(&path); - ret = search_slot(root, &ins, &path, 0); + ret = search_slot(root, &ins, &path, -1); if (ret == 0) BUG(); @@ -1356,6 +1069,8 @@ int main() { if (comp_keys(&last, &leaf->items[slot].key) <= 0) BUG(); memcpy(&last, &leaf->items[slot].key, sizeof(last)); + if (tree_size % 10000 == 0) + printf("big del %d:%d\n", tree_size, i); ret = del_item(root, &path); if (ret != 0) { printf("del_item returned %d\n", ret); @@ -1365,10 +1080,9 @@ int main() { } release_path(root, &path); } - write_ctree_super(root, &super); - close_ctree(root); printf("tree size is now %d\n", tree_size); printf("map tree\n"); - print_tree(root->extent_root, root->extent_root->node); + write_ctree_super(root, &super); + close_ctree(root); return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8c32c0e9267d..b92fbbb5ecd7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,7 +1,7 @@ #ifndef __CTREE__ #define __CTREE__ -#define CTREE_BLOCKSIZE 256 +#define CTREE_BLOCKSIZE 4096 struct key { u64 objectid; @@ -81,4 +81,14 @@ struct ctree_path { struct tree_buffer *nodes[MAX_LEVEL]; int slots[MAX_LEVEL]; }; + +struct tree_buffer *alloc_free_block(struct ctree_root *root); +int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks); +int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, int ins_len); +void release_path(struct ctree_root *root, struct ctree_path *p); +void init_path(struct ctree_path *p); +int del_item(struct ctree_root *root, struct ctree_path *path); +int insert_item(struct ctree_root *root, struct key *key, void *data, int data_size); +int next_leaf(struct ctree_root *root, struct ctree_path *path); +int leaf_free_space(struct leaf *leaf); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 14955e440773..f4c6ff202ba9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -172,7 +172,6 @@ int close_ctree(struct ctree_root *root) void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) { buf->count--; - write_tree_block(root, buf); if (buf->count < 0) BUG(); if (buf->count == 0) { diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c new file mode 100644 index 000000000000..594d23b5b24c --- /dev/null +++ b/fs/btrfs/print-tree.c @@ -0,0 +1,72 @@ +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" + +void print_leaf(struct leaf *l) +{ + int i; + int nr = l->header.nritems; + struct item *item; + struct extent_item *ei; + printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr, + leaf_free_space(l)); + fflush(stdout); + for (i = 0 ; i < nr ; i++) { + item = l->items + i; + printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n", + i, + item->key.objectid, item->key.flags, item->key.offset, + item->offset, item->size); + fflush(stdout); + printf("\t\titem data %.*s\n", item->size, l->data+item->offset); + ei = (struct extent_item *)(l->data + item->offset); + printf("\t\textent data %u %lu\n", ei->refs, ei->owner); + fflush(stdout); + } +} +void print_tree(struct ctree_root *root, struct tree_buffer *t) +{ + int i; + int nr; + struct node *c; + + if (!t) + return; + c = &t->node; + nr = c->header.nritems; + if (c->header.blocknr != t->blocknr) + BUG(); + if (is_leaf(c->header.flags)) { + print_leaf((struct leaf *)c); + return; + } + printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr, + node_level(c->header.flags), c->header.nritems, + NODEPTRS_PER_BLOCK - c->header.nritems); + fflush(stdout); + for (i = 0; i < nr; i++) { + printf("\tkey %d (%lu %u %lu) block %lu\n", + i, + c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset, + c->blockptrs[i]); + fflush(stdout); + } + for (i = 0; i < nr; i++) { + struct tree_buffer *next_buf = read_tree_block(root, + c->blockptrs[i]); + struct node *next = &next_buf->node; + if (is_leaf(next->header.flags) && + node_level(c->header.flags) != 1) + BUG(); + if (node_level(next->header.flags) != + node_level(c->header.flags) - 1) + BUG(); + print_tree(root, next_buf); + tree_block_release(root, next_buf); + } + +} + diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h new file mode 100644 index 000000000000..3c1e9a3e0260 --- /dev/null +++ b/fs/btrfs/print-tree.h @@ -0,0 +1,3 @@ + +void print_leaf(struct leaf *l); +void print_tree(struct ctree_root *root, struct tree_buffer *t); From 00ec4c5161e0adcf8be3cd844cb40239dc393d70 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 24 Feb 2007 12:47:20 -0500 Subject: [PATCH 0011/2985] Btrfs: push_leaf_right Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 109 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 102 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e497fd963118..ef8bfa837532 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -517,6 +517,97 @@ int leaf_space_used(struct leaf *l, int start, int nr) return data_len; } +/* + * push some data in the path leaf to the right, trying to free up at + * least data_size bytes. returns zero if the push worked, nonzero otherwise + */ +int push_leaf_right(struct ctree_root *root, struct ctree_path *path, + int data_size) +{ + struct tree_buffer *left_buf = path->nodes[0]; + struct leaf *left = &left_buf->leaf; + struct leaf *right; + struct tree_buffer *right_buf; + struct tree_buffer *upper; + int slot; + int i; + int free_space; + int push_space = 0; + int push_items = 0; + struct item *item; + + slot = path->slots[1]; + if (!path->nodes[1]) { + return 1; + } + upper = path->nodes[1]; + if (slot >= upper->node.header.nritems - 1) { + return 1; + } + right_buf = read_tree_block(root, upper->node.blockptrs[slot + 1]); + right = &right_buf->leaf; + free_space = leaf_free_space(right); + if (free_space < data_size + sizeof(struct item)) { + tree_block_release(root, right_buf); + return 1; + } + for (i = left->header.nritems - 1; i >= 0; i--) { + item = left->items + i; + if (path->slots[0] == i) + push_space += data_size + sizeof(*item); + if (item->size + sizeof(*item) + push_space > free_space) + break; + push_items++; + push_space += item->size + sizeof(*item); + } + if (push_items == 0) { + tree_block_release(root, right_buf); + return 1; + } + /* push left to right */ + push_space = left->items[left->header.nritems - push_items].offset + + left->items[left->header.nritems - push_items].size; + push_space -= leaf_data_end(left); + /* make room in the right data area */ + memmove(right->data + leaf_data_end(right) - push_space, + right->data + leaf_data_end(right), + LEAF_DATA_SIZE - leaf_data_end(right)); + /* copy from the left data area */ + memcpy(right->data + LEAF_DATA_SIZE - push_space, + left->data + leaf_data_end(left), + push_space); + memmove(right->items + push_items, right->items, + right->header.nritems * sizeof(struct item)); + /* copy the items from left to right */ + memcpy(right->items, left->items + left->header.nritems - push_items, + push_items * sizeof(struct item)); + + /* update the item pointers */ + right->header.nritems += push_items; + push_space = LEAF_DATA_SIZE; + for (i = 0; i < right->header.nritems; i++) { + right->items[i].offset = push_space - right->items[i].size; + push_space = right->items[i].offset; + } + left->header.nritems -= push_items; + + write_tree_block(root, left_buf); + write_tree_block(root, right_buf); + memcpy(upper->node.keys + slot + 1, + &right->items[0].key, sizeof(struct key)); + write_tree_block(root, upper); + /* then fixup the leaf pointer in the path */ + // FIXME use nritems in here somehow + if (path->slots[0] >= left->header.nritems) { + path->slots[0] -= left->header.nritems; + tree_block_release(root, path->nodes[0]); + path->nodes[0] = right_buf; + path->slots[1] += 1; + } else { + tree_block_release(root, right_buf); + } + return 0; +} /* * push some data in the path leaf to the left, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise @@ -631,7 +722,8 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) int i; int ret; - if (push_leaf_left(root, path, data_size) == 0) { + if (push_leaf_left(root, path, data_size) == 0 || + push_leaf_right(root, path, data_size) == 0) { l_buf = path->nodes[0]; l = &l_buf->leaf; if (leaf_free_space(l) >= sizeof(struct item) + data_size) @@ -875,6 +967,8 @@ int del_item(struct ctree_root *root, struct ctree_path *path) slot = path->slots[1]; leaf_buf->count++; push_leaf_left(root, path, 1); + if (leaf->header.nritems) + push_leaf_right(root, path, 1); if (leaf->header.nritems == 0) { u64 blocknr = leaf_buf->blocknr; path->slots[1] = slot; @@ -929,7 +1023,7 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) /* for testing only */ int next_key(int i, int max_key) { return rand() % max_key; - // return i; + //return i; } int main() { @@ -958,7 +1052,7 @@ int main() { // num = i; sprintf(buf, "string-%d", num); if (i % 10000 == 0) - printf("insert %d:%d\n", num, i); + fprintf(stderr, "insert %d:%d\n", num, i); ins.objectid = num; ins.offset = 0; ins.flags = 0; @@ -978,7 +1072,7 @@ int main() { ins.objectid = num; init_path(&path); if (i % 10000 == 0) - printf("search %d:%d\n", num, i); + fprintf(stderr, "search %d:%d\n", num, i); ret = search_slot(root, &ins, &path, 0); if (ret) { print_tree(root, root->node); @@ -1004,7 +1098,7 @@ int main() { ret = search_slot(root, &ins, &path, -1); if (!ret) { if (i % 10000 == 0) - printf("del %d:%d\n", num, i); + fprintf(stderr, "del %d:%d\n", num, i); ret = del_item(root, &path); if (ret != 0) BUG(); @@ -1022,7 +1116,7 @@ int main() { sprintf(buf, "string-%d", num); ins.objectid = num; if (i % 10000 == 0) - printf("insert %d:%d\n", num, i); + fprintf(stderr, "insert %d:%d\n", num, i); ret = insert_item(root, &ins, buf, strlen(buf)); if (!ret) tree_size++; @@ -1038,7 +1132,7 @@ int main() { ins.objectid = num; init_path(&path); if (i % 10000 == 0) - printf("search %d:%d\n", num, i); + fprintf(stderr, "search %d:%d\n", num, i); ret = search_slot(root, &ins, &path, 0); if (ret) { print_tree(root, root->node); @@ -1082,6 +1176,7 @@ int main() { } printf("tree size is now %d\n", tree_size); printf("map tree\n"); + print_tree(root->extent_root, root->extent_root->node); write_ctree_super(root, &super); close_ctree(root); return 0; From 97571fd0c939be8ae9cb57a8c57430a244ce13ae Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 24 Feb 2007 13:39:08 -0500 Subject: [PATCH 0012/2985] Btrfs: cleanup & comment Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ef8bfa837532..7645ab3259ea 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -110,6 +110,10 @@ int generic_bin_search(char *p, int item_size, struct key *key, return 1; } +/* + * simple bin_search frontend that does the right thing for + * leaves vs nodes + */ int bin_search(struct node *c, struct key *key, int *slot) { if (is_leaf(c->header.flags)) { @@ -130,6 +134,10 @@ int bin_search(struct node *c, struct key *key, int *slot) * * If the key isn't found, the path points to the slot where it should * be inserted. + * + * if ins_len > 0, nodes and leaves will be split as we walk down the + * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if + * possible) */ int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, int ins_len) @@ -379,6 +387,11 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) return 0; } +/* + * helper function to insert a new root level in the tree. + * A new node is allocated, and a single item is inserted to + * point to the existing root + */ static int insert_new_root(struct ctree_root *root, struct ctree_path *path, int level) { @@ -417,6 +430,7 @@ static int insert_new_root(struct ctree_root *root, /* * worker function to insert a single pointer in a node. * the node should have enough room for the pointer already + * * slot and level indicate where you want the key to go, and * blocknr is the block the key points to. */ @@ -449,6 +463,13 @@ int insert_ptr(struct ctree_root *root, return 0; } +/* + * split the node at the specified level in path in two. + * The path is corrected to point to the appropriate node after the split + * + * Before splitting this tries to make some room in the node by pushing + * left and right, if either one works, it returns right away. + */ int split_node(struct ctree_root *root, struct ctree_path *path, int level) { struct tree_buffer *t; @@ -744,10 +765,12 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) right = &right_buffer->leaf; memset(right, 0, sizeof(*right)); if (mid <= slot) { + /* FIXME, just alloc a new leaf here */ if (leaf_space_used(l, mid, nritems - mid) + space_needed > LEAF_DATA_SIZE) BUG(); } else { + /* FIXME, just alloc a new leaf here */ if (leaf_space_used(l, 0, mid + 1) + space_needed > LEAF_DATA_SIZE) BUG(); @@ -983,6 +1006,10 @@ int del_item(struct ctree_root *root, struct ctree_path *path) return 0; } +/* + * walk up the tree as far as required to find the next leaf. + * returns 0 if it found something or -1 if there are no greater leaves. + */ int next_leaf(struct ctree_root *root, struct ctree_path *path) { int slot; @@ -1044,7 +1071,6 @@ int main() { root = open_ctree("dbfile", &super); - srand(55); for (i = 0; i < run_size; i++) { buf = malloc(64); From fec577fb7f516e0d12ff821b1af272fd754e120a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Feb 2007 10:40:21 -0500 Subject: [PATCH 0013/2985] Btrfs: Add fsx-style randomized tree tester Add debug-tree command to print the tree Add extent-tree.c to the repo Comment ctree.h Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 10 +- fs/btrfs/ctree.c | 6 +- fs/btrfs/ctree.h | 62 ++++++++- fs/btrfs/debug-tree.c | 19 +++ fs/btrfs/extent-tree.c | 296 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/random-test.c | 295 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 680 insertions(+), 8 deletions(-) create mode 100644 fs/btrfs/debug-tree.c create mode 100644 fs/btrfs/extent-tree.c create mode 100644 fs/btrfs/random-test.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index fe73ab9d81d6..855e8f499e37 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -6,11 +6,17 @@ objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o #.c.o: # $(CC) $(CFLAGS) -c $< -ctree : $(objects) - gcc $(CFLAGS) -o ctree $(objects) +all: tester debug-tree + +debug-tree: $(objects) debug-tree.o + gcc $(CFLAGS) -o debug-tree $(objects) debug-tree.o + +tester: $(objects) random-test.o + gcc $(CFLAGS) -o tester $(objects) random-test.o $(objects) : $(headers) clean : rm ctree *.o + diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7645ab3259ea..25dc7b2f7426 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1047,14 +1047,14 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) return 0; } +/* some sample code to insert,search & delete items */ +#if 0 /* for testing only */ int next_key(int i, int max_key) { return rand() % max_key; //return i; } - int main() { - struct ctree_root *root; struct key ins; struct key last = { (u64)-1, 0, 0}; char *buf; @@ -1066,6 +1066,7 @@ int main() { int tree_size = 0; struct ctree_path path; struct ctree_super_block super; + struct ctree_root *root; radix_tree_init(); @@ -1207,3 +1208,4 @@ int main() { close_ctree(root); return 0; } +#endif diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b92fbbb5ecd7..18daccd84535 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,22 +1,36 @@ #ifndef __CTREE__ #define __CTREE__ -#define CTREE_BLOCKSIZE 4096 +#define CTREE_BLOCKSIZE 1024 +/* + * the key defines the order in the tree, and so it also defines (optimal) + * block layout. objectid corresonds to the inode number. The flags + * tells us things about the object, and is a kind of stream selector. + * so for a given inode, keys with flags of 1 might refer to the inode + * data, flags of 2 may point to file data in the btree and flags == 3 + * may point to extents. + * + * offset is the starting byte offset for this key in the stream. + */ struct key { u64 objectid; u32 flags; u64 offset; } __attribute__ ((__packed__)); +/* + * every tree block (leaf or node) starts with this header. + */ struct header { u64 fsid[2]; /* FS specific uuid */ - u64 blocknr; - u64 parentid; + u64 blocknr; /* which block this node is supposed to live in */ + u64 parentid; /* objectid of the tree root */ u32 csum; u32 ham; u16 nritems; u16 flags; + /* generation flags to be added */ } __attribute__ ((__packed__)); #define NODEPTRS_PER_BLOCK ((CTREE_BLOCKSIZE - sizeof(struct header)) / \ @@ -28,6 +42,11 @@ struct header { struct tree_buffer; +/* + * in ram representation of the tree. extent_root is used for all allocations + * and for the extent tree extent_root root. current_insert is used + * only for the extent tree. + */ struct ctree_root { struct tree_buffer *node; struct ctree_root *extent_root; @@ -36,27 +55,46 @@ struct ctree_root { struct radix_tree_root cache_radix; }; +/* + * describes a tree on disk + */ struct ctree_root_info { u64 fsid[2]; /* FS specific uuid */ u64 blocknr; /* blocknr of this block */ u64 objectid; /* inode number of this root */ - u64 tree_root; /* the tree root */ + u64 tree_root; /* the tree root block */ u32 csum; u32 ham; u64 snapuuid[2]; /* root specific uuid */ } __attribute__ ((__packed__)); +/* + * the super block basically lists the main trees of the FS + * it currently lacks any block count etc etc + */ struct ctree_super_block { struct ctree_root_info root_info; struct ctree_root_info extent_info; } __attribute__ ((__packed__)); +/* + * A leaf is full of items. The exact type of item is defined by + * the key flags parameter. offset and size tell us where to find + * the item in the leaf (relative to the start of the data area) + */ struct item { struct key key; u16 offset; u16 size; } __attribute__ ((__packed__)); +/* + * leaves have an item area and a data area: + * [item0, item1....itemN] [free space] [dataN...data1, data0] + * + * The data is separate from the items to get the keys closer together + * during searches. + */ #define LEAF_DATA_SIZE (CTREE_BLOCKSIZE - sizeof(struct header)) struct leaf { struct header header; @@ -66,17 +104,33 @@ struct leaf { }; } __attribute__ ((__packed__)); +/* + * all non-leaf blocks are nodes, they hold only keys and pointers to + * other blocks + */ struct node { struct header header; struct key keys[NODEPTRS_PER_BLOCK]; u64 blockptrs[NODEPTRS_PER_BLOCK]; } __attribute__ ((__packed__)); +/* + * items in the extent btree are used to record the objectid of the + * owner of the block and the number of references + */ struct extent_item { u32 refs; u64 owner; } __attribute__ ((__packed__)); +/* + * ctree_paths remember the path taken from the root down to the leaf. + * level 0 is always the leaf, and nodes[1...MAX_LEVEL] will point + * to any other levels that are present. + * + * The slots array records the index of the item or block pointer + * used while walking the tree. + */ struct ctree_path { struct tree_buffer *nodes[MAX_LEVEL]; int slots[MAX_LEVEL]; diff --git a/fs/btrfs/debug-tree.c b/fs/btrfs/debug-tree.c new file mode 100644 index 000000000000..b2c5abfdc74a --- /dev/null +++ b/fs/btrfs/debug-tree.c @@ -0,0 +1,19 @@ +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" + +int main() { + struct ctree_super_block super; + struct ctree_root *root; + radix_tree_init(); + root = open_ctree("dbfile", &super); + printf("root tree\n"); + print_tree(root, root->node); + printf("map tree\n"); + print_tree(root->extent_root, root->extent_root->node); + return 0; +} diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c new file mode 100644 index 000000000000..2b663ef84496 --- /dev/null +++ b/fs/btrfs/extent-tree.c @@ -0,0 +1,296 @@ +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" + +/* + * pending extents are blocks that we're trying to allocate in the extent + * map while trying to grow the map because of other allocations. To avoid + * recursing, they are tagged in the radix tree and cleaned up after + * other allocations are done. The pending tag is also used in the same + * manner for deletes. + */ +#define CTREE_EXTENT_PENDING 0 + +/* + * find all the blocks marked as pending in the radix tree and remove + * them from the extent map + */ +static int del_pending_extents(struct ctree_root *extent_root) +{ + int ret; + struct key key; + struct tree_buffer *gang[4]; + int i; + struct ctree_path path; + + while(1) { + ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, + (void **)gang, 0, + ARRAY_SIZE(gang), + CTREE_EXTENT_PENDING); + if (!ret) + break; + for (i = 0; i < ret; i++) { + key.objectid = gang[i]->blocknr; + key.flags = 0; + key.offset = 1; + init_path(&path); + ret = search_slot(extent_root, &key, &path, 0); + if (ret) { + print_tree(extent_root, extent_root->node); + printf("unable to find %lu\n", key.objectid); + BUG(); + // FIXME undo it and return sane + return ret; + } + ret = del_item(extent_root, &path); + if (ret) { + BUG(); + return ret; + } + release_path(extent_root, &path); + radix_tree_tag_clear(&extent_root->cache_radix, + gang[i]->blocknr, + CTREE_EXTENT_PENDING); + tree_block_release(extent_root, gang[i]); + } + } + return 0; +} + +/* + * remove an extent from the root, returns 0 on success + */ +int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) +{ + struct ctree_path path; + struct key key; + struct ctree_root *extent_root = root->extent_root; + struct tree_buffer *t; + int pending_ret; + int ret; + key.objectid = blocknr; + key.flags = 0; + key.offset = num_blocks; + if (root == extent_root) { + t = read_tree_block(root, key.objectid); + radix_tree_tag_set(&root->cache_radix, key.objectid, + CTREE_EXTENT_PENDING); + return 0; + } + init_path(&path); + ret = search_slot(extent_root, &key, &path, 0); + if (ret) { + print_tree(extent_root, extent_root->node); + printf("failed to find %lu\n", key.objectid); + BUG(); + } + ret = del_item(extent_root, &path); + if (ret) + BUG(); + release_path(extent_root, &path); + pending_ret = del_pending_extents(root->extent_root); + return ret ? ret : pending_ret; +} + +/* + * walks the btree of allocated extents and find a hole of a given size. + * The key ins is changed to record the hole: + * ins->objectid == block start + * ins->flags = 0 + * ins->offset == number of blocks + * Any available blocks before search_start are skipped. + */ +int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, + u64 search_start, u64 search_end, struct key *ins) +{ + struct ctree_path path; + struct key *key; + int ret; + u64 hole_size = 0; + int slot = 0; + u64 last_block; + int start_found; + struct leaf *l; + struct ctree_root * root = orig_root->extent_root; + +check_failed: + init_path(&path); + ins->objectid = search_start; + ins->offset = 0; + ins->flags = 0; + start_found = 0; + ret = search_slot(root, ins, &path, 0); + while (1) { + l = &path.nodes[0]->leaf; + slot = path.slots[0]; + if (slot >= l->header.nritems) { + ret = next_leaf(root, &path); + if (ret == 0) + continue; + if (!start_found) { + ins->objectid = search_start; + ins->offset = num_blocks; + start_found = 1; + goto check_pending; + } + ins->objectid = last_block > search_start ? + last_block : search_start; + ins->offset = num_blocks; + goto check_pending; + } + key = &l->items[slot].key; + if (key->objectid >= search_start) { + if (start_found) { + hole_size = key->objectid - last_block; + if (hole_size > num_blocks) { + ins->objectid = last_block; + ins->offset = num_blocks; + goto check_pending; + } + } else + start_found = 1; + last_block = key->objectid + key->offset; + } + path.slots[0]++; + } + // FIXME -ENOSPC +check_pending: + /* we have to make sure we didn't find an extent that has already + * been allocated by the map tree or the original allocation + */ + release_path(root, &path); + BUG_ON(ins->objectid < search_start); + if (orig_root->extent_root == orig_root) { + BUG_ON(num_blocks != 1); + if ((root->current_insert.objectid <= ins->objectid && + root->current_insert.objectid + + root->current_insert.offset > ins->objectid) || + (root->current_insert.objectid > ins->objectid && + root->current_insert.objectid <= ins->objectid + + ins->offset) || + radix_tree_tag_get(&root->cache_radix, ins->objectid, + CTREE_EXTENT_PENDING)) { + search_start = ins->objectid + 1; + goto check_failed; + } + } + if (ins->offset != 1) + BUG(); + return 0; +} + +/* + * insert all of the pending extents reserved during the original + * allocation. (CTREE_EXTENT_PENDING). Returns zero if it all worked out + */ +static int insert_pending_extents(struct ctree_root *extent_root) +{ + int ret; + struct key key; + struct extent_item item; + struct tree_buffer *gang[4]; + int i; + + // FIXME -ENOSPC + item.refs = 1; + item.owner = extent_root->node->node.header.parentid; + while(1) { + ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, + (void **)gang, 0, + ARRAY_SIZE(gang), + CTREE_EXTENT_PENDING); + if (!ret) + break; + for (i = 0; i < ret; i++) { + key.objectid = gang[i]->blocknr; + key.flags = 0; + key.offset = 1; + ret = insert_item(extent_root, &key, &item, + sizeof(item)); + if (ret) { + BUG(); + // FIXME undo it and return sane + return ret; + } + radix_tree_tag_clear(&extent_root->cache_radix, + gang[i]->blocknr, + CTREE_EXTENT_PENDING); + tree_block_release(extent_root, gang[i]); + } + } + return 0; +} + +/* + * finds a free extent and does all the dirty work required for allocation + * returns the key for the extent through ins, and a tree buffer for + * the first block of the extent through buf. + * + * returns 0 if everything worked, non-zero otherwise. + */ +int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, + u64 search_end, u64 owner, struct key *ins, + struct tree_buffer **buf) +{ + int ret; + int pending_ret; + struct extent_item extent_item; + extent_item.refs = 1; + extent_item.owner = owner; + + ret = find_free_extent(root, num_blocks, search_start, search_end, ins); + if (ret) + return ret; + if (root != root->extent_root) { + memcpy(&root->extent_root->current_insert, ins, sizeof(*ins)); + ret = insert_item(root->extent_root, ins, &extent_item, + sizeof(extent_item)); + memset(&root->extent_root->current_insert, 0, + sizeof(struct key)); + pending_ret = insert_pending_extents(root->extent_root); + if (ret) + return ret; + if (pending_ret) + return pending_ret; + *buf = find_tree_block(root, ins->objectid); + return 0; + } + /* we're allocating an extent for the extent tree, don't recurse */ + BUG_ON(ins->offset != 1); + *buf = find_tree_block(root, ins->objectid); + BUG_ON(!*buf); + radix_tree_tag_set(&root->cache_radix, ins->objectid, + CTREE_EXTENT_PENDING); + (*buf)->count++; + return 0; + +} + +/* + * helper function to allocate a block for a given tree + * returns the tree buffer or NULL. + */ +struct tree_buffer *alloc_free_block(struct ctree_root *root) +{ + struct key ins; + int ret; + struct tree_buffer *buf = NULL; + + ret = alloc_extent(root, 1, 0, (unsigned long)-1, + root->node->node.header.parentid, + &ins, &buf); + + if (ret) { + BUG(); + return NULL; + } + if (root != root->extent_root) + BUG_ON(radix_tree_tag_get(&root->extent_root->cache_radix, + buf->blocknr, CTREE_EXTENT_PENDING)); + return buf; +} diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c new file mode 100644 index 000000000000..3c8c68d55d2f --- /dev/null +++ b/fs/btrfs/random-test.c @@ -0,0 +1,295 @@ +#include +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" + +int keep_running = 1; + +static int setup_key(struct radix_tree_root *root, struct key *key, int exists) +{ + int num = rand(); + unsigned long res[2]; + int ret; + + key->flags = 0; + key->offset = 0; +again: + ret = radix_tree_gang_lookup(root, (void **)res, num, 2); + if (exists) { + if (ret == 0) + return -1; + num = res[0]; + } else if (ret != 0 && num == res[0]) { + num++; + if (ret > 1 && num == res[1]) { + num++; + goto again; + } + } + key->objectid = num; + return 0; +} + +static int ins_one(struct ctree_root *root, struct radix_tree_root *radix) +{ + struct ctree_path path; + struct key key; + int ret; + char buf[128]; + init_path(&path); + ret = setup_key(radix, &key, 0); + sprintf(buf, "str-%lu\n", key.objectid); + ret = insert_item(root, &key, buf, strlen(buf)); + if (ret) + goto error; + radix_tree_preload(GFP_KERNEL); + ret = radix_tree_insert(radix, key.objectid, + (void *)key.objectid); + radix_tree_preload_end(); + if (ret) + goto error; + return ret; +error: + printf("failed to insert %lu\n", key.objectid); + return -1; +} + +static int insert_dup(struct ctree_root *root, struct radix_tree_root *radix) +{ + struct ctree_path path; + struct key key; + int ret; + char buf[128]; + init_path(&path); + ret = setup_key(radix, &key, 1); + if (ret < 0) + return 0; + sprintf(buf, "str-%lu\n", key.objectid); + ret = insert_item(root, &key, buf, strlen(buf)); + if (ret != -EEXIST) { + printf("insert on %lu gave us %d\n", key.objectid, ret); + return 1; + } + return 0; +} + +static int del_one(struct ctree_root *root, struct radix_tree_root *radix) +{ + struct ctree_path path; + struct key key; + int ret; + unsigned long *ptr; + init_path(&path); + ret = setup_key(radix, &key, 1); + if (ret < 0) + return 0; + ret = search_slot(root, &key, &path, -1); + if (ret) + goto error; + ret = del_item(root, &path); + release_path(root, &path); + if (ret != 0) + goto error; + ptr = radix_tree_delete(radix, key.objectid); + if (!ptr) + goto error; + return 0; +error: + printf("failed to delete %lu\n", key.objectid); + return -1; +} + +static int lookup_item(struct ctree_root *root, struct radix_tree_root *radix) +{ + struct ctree_path path; + struct key key; + int ret; + init_path(&path); + ret = setup_key(radix, &key, 1); + if (ret < 0) + return 0; + ret = search_slot(root, &key, &path, 0); + release_path(root, &path); + if (ret) + goto error; + return 0; +error: + printf("unable to find key %lu\n", key.objectid); + return -1; +} + +static int lookup_enoent(struct ctree_root *root, struct radix_tree_root *radix) +{ + struct ctree_path path; + struct key key; + int ret; + init_path(&path); + ret = setup_key(radix, &key, 0); + if (ret < 0) + return ret; + ret = search_slot(root, &key, &path, 0); + release_path(root, &path); + if (ret == 0) + goto error; + return 0; +error: + printf("able to find key that should not exist %lu\n", key.objectid); + return -1; +} + +int (*ops[])(struct ctree_root *root, struct radix_tree_root *radix) = +{ ins_one, insert_dup, del_one, lookup_item, lookup_enoent }; + +static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) +{ + struct ctree_path path; + struct key key; + u64 found; + int ret; + int slot; + int i; + key.offset = 0; + key.flags = 0; + key.objectid = (unsigned long)-1; + while(1) { + init_path(&path); + ret = search_slot(root, &key, &path, 0); + slot = path.slots[0]; + if (ret != 0) { + if (slot == 0) { + release_path(root, &path); + break; + } + slot -= 1; + } + for (i = slot; i >= 0; i--) { + found = path.nodes[0]->leaf.items[i].key.objectid; + radix_tree_preload(GFP_KERNEL); + ret = radix_tree_insert(radix, found, (void *)found); + if (ret) { + fprintf(stderr, + "failed to insert %lu into radix\n", + found); + exit(1); + } + + radix_tree_preload_end(); + } + release_path(root, &path); + key.objectid = found - 1; + if (key.objectid > found) + break; + } + return 0; +} + +void sigstopper(int ignored) +{ + keep_running = 0; + fprintf(stderr, "caught exit signal, stopping\n"); +} + +int print_usage(void) +{ + printf("usage: tester [-ih] [-c count] [-f count]\n"); + printf("\t -c count -- iteration count after filling\n"); + printf("\t -f count -- run this many random inserts before starting\n"); + printf("\t -i -- only do initial fill\n"); + printf("\t -h -- this help text\n"); + exit(1); +} +int main(int ac, char **av) +{ + RADIX_TREE(radix, GFP_KERNEL); + struct ctree_super_block super; + struct ctree_root *root; + int i; + int ret; + int count; + int op; + int iterations = 20000; + int init_fill_count = 800000; + int err = 0; + int initial_only = 0; + radix_tree_init(); + root = open_ctree("dbfile", &super); + fill_radix(root, &radix); + + signal(SIGTERM, sigstopper); + signal(SIGINT, sigstopper); + + for (i = 1 ; i < ac ; i++) { + if (strcmp(av[i], "-i") == 0) { + initial_only = 1; + } else if (strcmp(av[i], "-c") == 0) { + iterations = atoi(av[i+1]); + i++; + } else if (strcmp(av[i], "-f") == 0) { + init_fill_count = atoi(av[i+1]); + i++; + } else { + print_usage(); + } + } + for (i = 0; i < init_fill_count; i++) { + ret = ins_one(root, &radix); + if (ret) { + printf("initial fill failed\n"); + err = ret; + goto out; + } + if (i % 10000 == 0) { + printf("initial fill %d level %d count %d\n", i, + node_level(root->node->node.header.flags), + root->node->node.header.nritems); + } + if (keep_running == 0) { + err = 0; + goto out; + } + } + if (initial_only == 1) { + goto out; + } + for (i = 0; i < iterations; i++) { + op = rand() % ARRAY_SIZE(ops); + count = rand() % 128; + if (i % 2000 == 0) { + printf("%d\n", i); + fflush(stdout); + } + if (i && i % 5000 == 0) { + printf("open & close, root level %d nritems %d\n", + node_level(root->node->node.header.flags), + root->node->node.header.nritems); + write_ctree_super(root, &super); + close_ctree(root); + root = open_ctree("dbfile", &super); + } + while(count--) { + ret = ops[op](root, &radix); + if (ret) { + fprintf(stderr, "op %d failed %d:%d\n", + op, i, iterations); + print_tree(root, root->node); + fprintf(stderr, "op %d failed %d:%d\n", + op, i, iterations); + err = ret; + goto out; + } + if (keep_running == 0) { + err = 0; + goto out; + } + } + } +out: + write_ctree_super(root, &super); + close_ctree(root); + return err; +} + From c673024aba596e57c07196cb3400cdcc9d28f3aa Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Feb 2007 10:46:55 -0500 Subject: [PATCH 0014/2985] Btrfs: fixup dbfile perms Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f4c6ff202ba9..c42dc72706bf 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -113,7 +113,7 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) int fp; int ret; - fp = open(filename, O_CREAT | O_RDWR); + fp = open(filename, O_CREAT | O_RDWR, 0600); if (fp < 0) { free(root); return NULL; From 7cf75962acd27ed0f819df791486e7b803c7109c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Feb 2007 10:55:01 -0500 Subject: [PATCH 0015/2985] Btrfs: u64 cleanups Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/kerncompat.h | 2 +- fs/btrfs/print-tree.c | 10 +++++----- fs/btrfs/random-test.c | 16 ++++++++-------- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 855e8f499e37..ea9b6665da34 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -17,6 +17,6 @@ tester: $(objects) random-test.o $(objects) : $(headers) clean : - rm ctree *.o + rm debug-tree tester *.o diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2b663ef84496..26321524c186 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -42,7 +42,7 @@ static int del_pending_extents(struct ctree_root *extent_root) ret = search_slot(extent_root, &key, &path, 0); if (ret) { print_tree(extent_root, extent_root->node); - printf("unable to find %lu\n", key.objectid); + printf("unable to find %Lu\n", key.objectid); BUG(); // FIXME undo it and return sane return ret; @@ -86,7 +86,7 @@ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) ret = search_slot(extent_root, &key, &path, 0); if (ret) { print_tree(extent_root, extent_root->node); - printf("failed to find %lu\n", key.objectid); + printf("failed to find %Lu\n", key.objectid); BUG(); } ret = del_item(extent_root, &path); diff --git a/fs/btrfs/kerncompat.h b/fs/btrfs/kerncompat.h index 347ca06205c2..f5efc5f0fff0 100644 --- a/fs/btrfs/kerncompat.h +++ b/fs/btrfs/kerncompat.h @@ -15,7 +15,7 @@ #define BUG() abort() typedef unsigned int u32; -typedef unsigned long u64; +typedef unsigned long long u64; typedef unsigned char u8; typedef unsigned short u16; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 594d23b5b24c..1d591270f4c4 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -11,19 +11,19 @@ void print_leaf(struct leaf *l) int nr = l->header.nritems; struct item *item; struct extent_item *ei; - printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr, + printf("leaf %Lu total ptrs %d free space %d\n", l->header.blocknr, nr, leaf_free_space(l)); fflush(stdout); for (i = 0 ; i < nr ; i++) { item = l->items + i; - printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n", + printf("\titem %d key (%Lu %u %Lu) itemoff %d itemsize %d\n", i, item->key.objectid, item->key.flags, item->key.offset, item->offset, item->size); fflush(stdout); printf("\t\titem data %.*s\n", item->size, l->data+item->offset); ei = (struct extent_item *)(l->data + item->offset); - printf("\t\textent data %u %lu\n", ei->refs, ei->owner); + printf("\t\textent data %u %Lu\n", ei->refs, ei->owner); fflush(stdout); } } @@ -43,12 +43,12 @@ void print_tree(struct ctree_root *root, struct tree_buffer *t) print_leaf((struct leaf *)c); return; } - printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr, + printf("node %Lu level %d total ptrs %d free spc %lu\n", t->blocknr, node_level(c->header.flags), c->header.nritems, NODEPTRS_PER_BLOCK - c->header.nritems); fflush(stdout); for (i = 0; i < nr; i++) { - printf("\tkey %d (%lu %u %lu) block %lu\n", + printf("\tkey %d (%Lu %u %Lu) block %Lu\n", i, c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset, c->blockptrs[i]); diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index 3c8c68d55d2f..111a248bd5c2 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -42,7 +42,7 @@ static int ins_one(struct ctree_root *root, struct radix_tree_root *radix) char buf[128]; init_path(&path); ret = setup_key(radix, &key, 0); - sprintf(buf, "str-%lu\n", key.objectid); + sprintf(buf, "str-%Lu\n", key.objectid); ret = insert_item(root, &key, buf, strlen(buf)); if (ret) goto error; @@ -54,7 +54,7 @@ static int ins_one(struct ctree_root *root, struct radix_tree_root *radix) goto error; return ret; error: - printf("failed to insert %lu\n", key.objectid); + printf("failed to insert %Lu\n", key.objectid); return -1; } @@ -68,10 +68,10 @@ static int insert_dup(struct ctree_root *root, struct radix_tree_root *radix) ret = setup_key(radix, &key, 1); if (ret < 0) return 0; - sprintf(buf, "str-%lu\n", key.objectid); + sprintf(buf, "str-%Lu\n", key.objectid); ret = insert_item(root, &key, buf, strlen(buf)); if (ret != -EEXIST) { - printf("insert on %lu gave us %d\n", key.objectid, ret); + printf("insert on %Lu gave us %d\n", key.objectid, ret); return 1; } return 0; @@ -99,7 +99,7 @@ static int del_one(struct ctree_root *root, struct radix_tree_root *radix) goto error; return 0; error: - printf("failed to delete %lu\n", key.objectid); + printf("failed to delete %Lu\n", key.objectid); return -1; } @@ -118,7 +118,7 @@ static int lookup_item(struct ctree_root *root, struct radix_tree_root *radix) goto error; return 0; error: - printf("unable to find key %lu\n", key.objectid); + printf("unable to find key %Lu\n", key.objectid); return -1; } @@ -137,7 +137,7 @@ static int lookup_enoent(struct ctree_root *root, struct radix_tree_root *radix) goto error; return 0; error: - printf("able to find key that should not exist %lu\n", key.objectid); + printf("able to find key that should not exist %Lu\n", key.objectid); return -1; } @@ -148,7 +148,7 @@ static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; struct key key; - u64 found; + unsigned long found; int ret; int slot; int i; From 41903fe666ff8e7b1d47ac7b2f5f9a91c7749498 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Feb 2007 10:55:42 -0500 Subject: [PATCH 0016/2985] Btrfs: 32bit cleanups Signed-off-by: Chris Mason --- fs/btrfs/print-tree.c | 2 +- fs/btrfs/random-test.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 1d591270f4c4..a9a14223e186 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -43,7 +43,7 @@ void print_tree(struct ctree_root *root, struct tree_buffer *t) print_leaf((struct leaf *)c); return; } - printf("node %Lu level %d total ptrs %d free spc %lu\n", t->blocknr, + printf("node %Lu level %d total ptrs %d free spc %u\n", t->blocknr, node_level(c->header.flags), c->header.nritems, NODEPTRS_PER_BLOCK - c->header.nritems); fflush(stdout); diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index 111a248bd5c2..7078263acc3d 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -40,15 +40,16 @@ static int ins_one(struct ctree_root *root, struct radix_tree_root *radix) struct key key; int ret; char buf[128]; + unsigned long oid; init_path(&path); ret = setup_key(radix, &key, 0); sprintf(buf, "str-%Lu\n", key.objectid); ret = insert_item(root, &key, buf, strlen(buf)); if (ret) goto error; + oid = (unsigned long)key.objectid; radix_tree_preload(GFP_KERNEL); - ret = radix_tree_insert(radix, key.objectid, - (void *)key.objectid); + ret = radix_tree_insert(radix, oid, (void *)oid); radix_tree_preload_end(); if (ret) goto error; From d16302ab0ed26e236e9bde8cd54fd604d7d97297 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Feb 2007 14:26:58 -0500 Subject: [PATCH 0017/2985] Btrfs: more 32 bit cleanups Signed-off-by: Chris Mason --- fs/btrfs/print-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index a9a14223e186..dda08f32c154 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -45,7 +45,7 @@ void print_tree(struct ctree_root *root, struct tree_buffer *t) } printf("node %Lu level %d total ptrs %d free spc %u\n", t->blocknr, node_level(c->header.flags), c->header.nritems, - NODEPTRS_PER_BLOCK - c->header.nritems); + (u32)NODEPTRS_PER_BLOCK - c->header.nritems); fflush(stdout); for (i = 0; i < nr; i++) { printf("\tkey %d (%Lu %u %Lu) block %Lu\n", From 8e19f2cd452853d1ca0895c2488c24d40de3d255 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 28 Feb 2007 09:27:02 -0500 Subject: [PATCH 0018/2985] Btrfs: Take out the merge-during-search-on-delete code, it is buggy. Signed-off-by: Chris Mason --- fs/btrfs/TODO | 25 +++++++++++++++++++++++++ fs/btrfs/ctree.c | 42 ++++++++++++++---------------------------- 2 files changed, 39 insertions(+), 28 deletions(-) create mode 100644 fs/btrfs/TODO diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO new file mode 100644 index 000000000000..5c4395c3784a --- /dev/null +++ b/fs/btrfs/TODO @@ -0,0 +1,25 @@ +* cleanup, add more error checking, get rid of BUG_ONs +* Make IO functions look more like the page cache +* Fix ENOSPC handling +* make blocksize a mkfs parameter instead of #define +* make a real mkfs and superblock +* Add shadowing and transactions +* Do checksumming +* Define FS objects in terms of different item types +* Add block mapping tree (simple dm layer) +* Add simple tree locking (semaphore per tree) +* Make allocator smarter +* Port into the kernel +* Add virtual filesystems, mountable snapshots +* Get rid of struct ctree_path, limiting tree levels held at one time +* Release +* Do real tree locking +* Add extent mirroring (backup copies of blocks) +* Add fancy interface to get access to incremental backups +* Add fancy striped extents to make big reads faster +* Use relocation to try and fix write errors +* Make allocator much smarter +* xattrs (directory streams for regular files) +* fsck +* Scrub & defrag + diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 25dc7b2f7426..0aea94224ba3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -167,29 +167,6 @@ int search_slot(struct ctree_root *root, struct key *key, b = p->nodes[level]; c = &b->node; slot = p->slots[level]; - } else if (ins_len < 0 && - c->header.nritems <= NODEPTRS_PER_BLOCK/4) { - u64 blocknr = b->blocknr; - slot = p->slots[level +1]; - b->count++; - if (push_node_left(root, p, level)) - push_node_right(root, p, level); - if (c->header.nritems == 0 && - level < MAX_LEVEL - 1 && - p->nodes[level + 1]) { - int tslot = p->slots[level + 1]; - - p->slots[level + 1] = slot; - del_ptr(root, p, level + 1); - p->slots[level + 1] = tslot; - tree_block_release(root, b); - free_extent(root, blocknr, 1); - } else { - tree_block_release(root, b); - } - b = p->nodes[level]; - c = &b->node; - slot = p->slots[level]; } b = read_tree_block(root, c->blockptrs[slot]); continue; @@ -618,7 +595,6 @@ int push_leaf_right(struct ctree_root *root, struct ctree_path *path, &right->items[0].key, sizeof(struct key)); write_tree_block(root, upper); /* then fixup the leaf pointer in the path */ - // FIXME use nritems in here somehow if (path->slots[0] >= left->header.nritems) { path->slots[0] -= left->header.nritems; tree_block_release(root, path->nodes[0]); @@ -847,8 +823,6 @@ int insert_item(struct ctree_root *root, struct key *key, slot = path.slots[0]; BUG_ON(slot < 0); - if (slot == 0) - fixup_low_keys(root, &path, key, 1); if (slot != nritems) { int i; unsigned int old_data = leaf->items[slot].offset + @@ -877,6 +851,8 @@ int insert_item(struct ctree_root *root, struct key *key, memcpy(leaf->data + data_end - data_size, data, data_size); leaf->header.nritems += 1; write_tree_block(root, leaf_buf); + if (slot == 0) + fixup_low_keys(root, &path, key, 1); if (leaf_free_space(leaf) < 0) BUG(); release_path(root, &path); @@ -914,13 +890,23 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) sizeof(u64) * (nritems - slot - 1)); } node->header.nritems--; - write_tree_block(root, t); blocknr = t->blocknr; + write_tree_block(root, t); if (node->header.nritems != 0) { + int tslot; if (slot == 0) fixup_low_keys(root, path, node->keys, level + 1); - break; + tslot = path->slots[level + 1]; + t->count++; + if (push_node_left(root, path, level)) + push_node_right(root, path, level); + path->slots[level + 1] = tslot; + if (node->header.nritems != 0) { + tree_block_release(root, t); + break; + } + tree_block_release(root, t); } if (t == root->node) { /* just turn the root into a leaf and break */ From af86d07efe83e2f2da3658702a720bc48b1a6d78 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 28 Feb 2007 09:40:58 -0500 Subject: [PATCH 0019/2985] Btrfs: Add sparse checking to Makefile Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 9 +++++++-- fs/btrfs/debug-tree.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index ea9b6665da34..078061c321b0 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -3,8 +3,13 @@ CFLAGS = -g -Wall headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o -#.c.o: -# $(CC) $(CFLAGS) -c $< +# if you don't have sparse installed, use ls instead +check=sparse +#check=ls + +.c.o: + $(check) $< + $(CC) $(CFLAGS) -c $< all: tester debug-tree diff --git a/fs/btrfs/debug-tree.c b/fs/btrfs/debug-tree.c index b2c5abfdc74a..21f607d8b050 100644 --- a/fs/btrfs/debug-tree.c +++ b/fs/btrfs/debug-tree.c @@ -6,7 +6,7 @@ #include "disk-io.h" #include "print-tree.h" -int main() { +int main(int ac, char **av) { struct ctree_super_block super; struct ctree_root *root; radix_tree_init(); From aa5d6bed255d7f8c655a8f10d760f4247bc8385c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 28 Feb 2007 16:35:06 -0500 Subject: [PATCH 0020/2985] Btrfs: return code checking Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 5 +- fs/btrfs/ctree.c | 478 ++++++++++++++++++++++------------------- fs/btrfs/extent-tree.c | 5 + fs/btrfs/random-test.c | 7 +- 4 files changed, 268 insertions(+), 227 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 078061c321b0..58d4260a04b2 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,10 +1,13 @@ +CC=gcc CFLAGS = -g -Wall headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o # if you don't have sparse installed, use ls instead -check=sparse +CHECKFLAGS=-D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \ + -Wcontext -Wcast-truncate -Wuninitialized -Wshadow -Wundef +check=sparse $(CHECKFLAGS) #check=ls .c.o: diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0aea94224ba3..be2be0272513 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -6,12 +6,15 @@ #include "disk-io.h" #include "print-tree.h" -int split_node(struct ctree_root *root, struct ctree_path *path, int level); -int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size); -int push_node_left(struct ctree_root *root, struct ctree_path *path, int level); -int push_node_right(struct ctree_root *root, +static int split_node(struct ctree_root *root, struct ctree_path *path, + int level); +static int split_leaf(struct ctree_root *root, struct ctree_path *path, + int data_size); +static int push_node_left(struct ctree_root *root, struct ctree_path *path, + int level); +static int push_node_right(struct ctree_root *root, struct ctree_path *path, int level); -int del_ptr(struct ctree_root *root, struct ctree_path *path, int level); +static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level); inline void init_path(struct ctree_path *p) { @@ -26,6 +29,7 @@ void release_path(struct ctree_root *root, struct ctree_path *p) break; tree_block_release(root, p->nodes[i]); } + memset(p, 0, sizeof(*p)); } /* @@ -74,6 +78,67 @@ int comp_keys(struct key *k1, struct key *k2) return 0; } +int check_node(struct ctree_path *path, int level) +{ + int i; + struct node *parent = NULL; + struct node *node = &path->nodes[level]->node; + int parent_slot; + + if (path->nodes[level + 1]) + parent = &path->nodes[level + 1]->node; + parent_slot = path->slots[level + 1]; + if (parent && node->header.nritems > 0) { + struct key *parent_key; + parent_key = &parent->keys[parent_slot]; + BUG_ON(memcmp(parent_key, node->keys, sizeof(struct key))); + BUG_ON(parent->blockptrs[parent_slot] != node->header.blocknr); + } + BUG_ON(node->header.nritems > NODEPTRS_PER_BLOCK); + for (i = 0; i < node->header.nritems - 2; i++) { + BUG_ON(comp_keys(&node->keys[i], &node->keys[i+1]) >= 0); + } + return 0; +} + +int check_leaf(struct ctree_path *path, int level) +{ + int i; + struct leaf *leaf = &path->nodes[level]->leaf; + struct node *parent = NULL; + int parent_slot; + + if (path->nodes[level + 1]) + parent = &path->nodes[level + 1]->node; + parent_slot = path->slots[level + 1]; + if (parent && leaf->header.nritems > 0) { + struct key *parent_key; + parent_key = &parent->keys[parent_slot]; + BUG_ON(memcmp(parent_key, &leaf->items[0].key, + sizeof(struct key))); + BUG_ON(parent->blockptrs[parent_slot] != leaf->header.blocknr); + } + for (i = 0; i < leaf->header.nritems - 2; i++) { + BUG_ON(comp_keys(&leaf->items[i].key, + &leaf->items[i+1].key) >= 0); + BUG_ON(leaf->items[i].offset != leaf->items[i + 1].offset + + leaf->items[i + 1].size); + if (i == 0) { + BUG_ON(leaf->items[i].offset + leaf->items[i].size != + LEAF_DATA_SIZE); + } + } + BUG_ON(leaf_free_space(leaf) < 0); + return 0; +} + +int check_block(struct ctree_path *path, int level) +{ + if (level == 0) + return check_leaf(path, level); + return check_node(path, level); +} + /* * search for key in the array p. items p are item_size apart * and there are 'max' items in p @@ -133,7 +198,8 @@ int bin_search(struct node *c, struct key *key, int *slot) * level of the path (level 0) * * If the key isn't found, the path points to the slot where it should - * be inserted. + * be inserted, and 1 is returned. If there are other errors during the + * search a negative error number is returned. * * if ins_len > 0, nodes and leaves will be split as we walk down the * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if @@ -153,6 +219,9 @@ int search_slot(struct ctree_root *root, struct key *key, c = &b->node; level = node_level(c->header.flags); p->nodes[level] = b; + ret = check_block(p, level); + if (ret) + return -1; ret = bin_search(c, key, &slot); if (!is_leaf(c->header.flags)) { if (ret && slot > 0) @@ -183,7 +252,7 @@ int search_slot(struct ctree_root *root, struct key *key, return ret; } } - return -1; + return 1; } /* @@ -192,12 +261,17 @@ int search_slot(struct ctree_root *root, struct key *key, * This is used after shifting pointers to the left, so it stops * fixing up pointers when a given leaf/node is not in slot 0 of the * higher levels + * + * If this fails to write a tree block, it returns -1, but continues + * fixing up the blocks in ram so the tree is consistent. */ -static void fixup_low_keys(struct ctree_root *root, +static int fixup_low_keys(struct ctree_root *root, struct ctree_path *path, struct key *key, int level) { int i; + int ret = 0; + int wret; for (i = level; i < MAX_LEVEL; i++) { struct node *t; int tslot = path->slots[i]; @@ -205,10 +279,13 @@ static void fixup_low_keys(struct ctree_root *root, break; t = &path->nodes[i]->node; memcpy(t->keys + tslot, key, sizeof(*key)); - write_tree_block(root, path->nodes[i]); + wret = write_tree_block(root, path->nodes[i]); + if (wret) + ret = wret; if (tslot != 0) break; } + return ret; } /* @@ -220,8 +297,12 @@ static void fixup_low_keys(struct ctree_root *root, * be modified to reflect the push. * * The path is altered to reflect the push. + * + * returns 0 if some ptrs were pushed left, < 0 if there was some horrible + * error, and > 0 if there was no room in the left hand block. */ -int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) +static int push_node_left(struct ctree_root *root, struct ctree_path *path, + int level) { int slot; struct node *left; @@ -231,6 +312,8 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) int right_nritems; struct tree_buffer *t; struct tree_buffer *right_buf; + int ret = 0; + int wret; if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) return 1; @@ -265,10 +348,17 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) left->header.nritems += push_items; /* adjust the pointers going up the tree */ - fixup_low_keys(root, path, right->keys, level + 1); + wret = fixup_low_keys(root, path, right->keys, level + 1); + if (wret < 0) + ret = wret; - write_tree_block(root, t); - write_tree_block(root, right_buf); + wret = write_tree_block(root, t); + if (wret < 0) + ret = wret; + + wret = write_tree_block(root, right_buf); + if (wret < 0) + ret = wret; /* then fixup the leaf pointer in the path */ if (path->slots[level] < push_items) { @@ -280,7 +370,7 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) path->slots[level] -= push_items; tree_block_release(root, t); } - return 0; + return ret; } /* @@ -292,8 +382,12 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) * be modified to reflect the push. * * The path is altered to reflect the push. + * + * returns 0 if some ptrs were pushed, < 0 if there was some horrible + * error, and > 0 if there was no room in the right hand block. */ -int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) +static int push_node_right(struct ctree_root *root, struct ctree_path *path, + int level) { int slot; struct tree_buffer *t; @@ -368,6 +462,8 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) * helper function to insert a new root level in the tree. * A new node is allocated, and a single item is inserted to * point to the existing root + * + * returns zero on success or < 0 on failure. */ static int insert_new_root(struct ctree_root *root, struct ctree_path *path, int level) @@ -410,8 +506,10 @@ static int insert_new_root(struct ctree_root *root, * * slot and level indicate where you want the key to go, and * blocknr is the block the key points to. + * + * returns zero on success and < 0 on any error */ -int insert_ptr(struct ctree_root *root, +static int insert_ptr(struct ctree_root *root, struct ctree_path *path, struct key *key, u64 blocknr, int slot, int level) { @@ -446,8 +544,11 @@ int insert_ptr(struct ctree_root *root, * * Before splitting this tries to make some room in the node by pushing * left and right, if either one works, it returns right away. + * + * returns 0 on success and < 0 on failure */ -int split_node(struct ctree_root *root, struct ctree_path *path, int level) +static int split_node(struct ctree_root *root, struct ctree_path *path, + int level) { struct tree_buffer *t; struct node *c; @@ -455,13 +556,18 @@ int split_node(struct ctree_root *root, struct ctree_path *path, int level) struct node *split; int mid; int ret; + int wret; ret = push_node_left(root, path, level); if (!ret) return 0; + if (ret < 0) + return ret; ret = push_node_right(root, path, level); if (!ret) return 0; + if (ret < 0) + return ret; t = path->nodes[level]; c = &t->node; if (t == root->node) { @@ -482,10 +588,19 @@ int split_node(struct ctree_root *root, struct ctree_path *path, int level) (c->header.nritems - mid) * sizeof(u64)); split->header.nritems = c->header.nritems - mid; c->header.nritems = mid; - write_tree_block(root, t); - write_tree_block(root, split_buffer); - insert_ptr(root, path, split->keys, split_buffer->blocknr, - path->slots[level + 1] + 1, level + 1); + ret = 0; + + wret = write_tree_block(root, t); + if (wret) + ret = wret; + wret = write_tree_block(root, split_buffer); + if (wret) + ret = wret; + wret = insert_ptr(root, path, split->keys, split_buffer->blocknr, + path->slots[level + 1] + 1, level + 1); + if (wret) + ret = wret; + if (path->slots[level] >= mid) { path->slots[level] -= mid; tree_block_release(root, t); @@ -494,7 +609,7 @@ int split_node(struct ctree_root *root, struct ctree_path *path, int level) } else { tree_block_release(root, split_buffer); } - return 0; + return ret; } /* @@ -502,7 +617,7 @@ int split_node(struct ctree_root *root, struct ctree_path *path, int level) * and nr indicate which items in the leaf to check. This totals up the * space used both by the item structs and the item data */ -int leaf_space_used(struct leaf *l, int start, int nr) +static int leaf_space_used(struct leaf *l, int start, int nr) { int data_len; int end = start + nr - 1; @@ -518,9 +633,12 @@ int leaf_space_used(struct leaf *l, int start, int nr) /* * push some data in the path leaf to the right, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise + * + * returns 1 if the push failed because the other node didn't have enough + * room, 0 if everything worked out and < 0 if there were major errors. */ -int push_leaf_right(struct ctree_root *root, struct ctree_path *path, - int data_size) +static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, + int data_size) { struct tree_buffer *left_buf = path->nodes[0]; struct leaf *left = &left_buf->leaf; @@ -609,8 +727,8 @@ int push_leaf_right(struct ctree_root *root, struct ctree_path *path, * push some data in the path leaf to the left, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise */ -int push_leaf_left(struct ctree_root *root, struct ctree_path *path, - int data_size) +static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, + int data_size) { struct tree_buffer *right_buf = path->nodes[0]; struct leaf *right = &right_buf->leaf; @@ -623,6 +741,8 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path, int push_items = 0; struct item *item; int old_left_nritems; + int ret = 0; + int wret; slot = path->slots[1]; if (slot == 0) { @@ -681,10 +801,16 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path, push_space = right->items[i].offset; } - write_tree_block(root, t); - write_tree_block(root, right_buf); + wret = write_tree_block(root, t); + if (wret) + ret = wret; + wret = write_tree_block(root, right_buf); + if (wret) + ret = wret; - fixup_low_keys(root, path, &right->items[0].key, 1); + wret = fixup_low_keys(root, path, &right->items[0].key, 1); + if (wret) + ret = wret; /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { @@ -697,17 +823,20 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path, path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); - return 0; + return ret; } /* * split the path's leaf in two, making sure there is at least data_size * available for the resulting leaf level of the path. + * + * returns 0 if all went well and < 0 on failure. */ -int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) +static int split_leaf(struct ctree_root *root, struct ctree_path *path, + int data_size) { - struct tree_buffer *l_buf = path->nodes[0]; - struct leaf *l = &l_buf->leaf; + struct tree_buffer *l_buf; + struct leaf *l; int nritems; int mid; int slot; @@ -718,14 +847,23 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) int rt_data_off; int i; int ret; + int wret; - if (push_leaf_left(root, path, data_size) == 0 || - push_leaf_right(root, path, data_size) == 0) { - l_buf = path->nodes[0]; - l = &l_buf->leaf; - if (leaf_free_space(l) >= sizeof(struct item) + data_size) - return 0; + wret = push_leaf_left(root, path, data_size); + if (wret < 0) + return wret; + if (wret) { + wret = push_leaf_right(root, path, data_size); + if (wret < 0) + return wret; } + l_buf = path->nodes[0]; + l = &l_buf->leaf; + + /* did the pushes work? */ + if (leaf_free_space(l) >= sizeof(struct item) + data_size) + return 0; + if (!path->nodes[1]) { ret = insert_new_root(root, path, 1); if (ret) @@ -768,10 +906,17 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) right->items[i].offset += rt_data_off; l->header.nritems = mid; - ret = insert_ptr(root, path, &right->items[0].key, + ret = 0; + wret = insert_ptr(root, path, &right->items[0].key, right_buffer->blocknr, path->slots[1] + 1, 1); - write_tree_block(root, right_buffer); - write_tree_block(root, l_buf); + if (wret) + ret = wret; + wret = write_tree_block(root, right_buffer); + if (wret) + ret = wret; + wret = write_tree_block(root, l_buf); + if (wret) + ret = wret; BUG_ON(path->slots[0] != slot); if (mid <= slot) { @@ -792,7 +937,8 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) int insert_item(struct ctree_root *root, struct key *key, void *data, int data_size) { - int ret; + int ret = 0; + int wret; int slot; int slot_orig; struct leaf *leaf; @@ -810,6 +956,10 @@ int insert_item(struct ctree_root *root, struct key *key, release_path(root, &path); return -EEXIST; } + if (ret < 0) { + release_path(root, &path); + return ret; + } slot_orig = path.slots[0]; leaf_buf = path.nodes[0]; @@ -850,13 +1000,19 @@ int insert_item(struct ctree_root *root, struct key *key, leaf->items[slot].size = data_size; memcpy(leaf->data + data_end - data_size, data, data_size); leaf->header.nritems += 1; - write_tree_block(root, leaf_buf); + + ret = 0; if (slot == 0) - fixup_low_keys(root, &path, key, 1); + ret = fixup_low_keys(root, &path, key, 1); + + wret = write_tree_block(root, leaf_buf); + if (wret) + ret = wret; + if (leaf_free_space(leaf) < 0) BUG(); release_path(root, &path); - return 0; + return ret; } /* @@ -866,13 +1022,15 @@ int insert_item(struct ctree_root *root, struct key *key, * continuing all the way the root if required. The root is converted into * a leaf if all the nodes are emptied. */ -int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) +static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) { int slot; struct tree_buffer *t; struct node *node; int nritems; u64 blocknr; + int wret; + int ret = 0; while(1) { t = path->nodes[level]; @@ -894,13 +1052,27 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) write_tree_block(root, t); if (node->header.nritems != 0) { int tslot; - if (slot == 0) - fixup_low_keys(root, path, node->keys, - level + 1); + if (slot == 0) { + wret = fixup_low_keys(root, path, + node->keys, + level + 1); + if (wret) + ret = wret; + } tslot = path->slots[level + 1]; t->count++; - if (push_node_left(root, path, level)) - push_node_right(root, path, level); + wret = push_node_left(root, path, level); + if (wret < 0) { + ret = wret; + break; + } + if (node->header.nritems != 0) { + wret = push_node_right(root, path, level); + if (wret < 0) { + ret = wret; + break; + } + } path->slots[level + 1] = tslot; if (node->header.nritems != 0) { tree_block_release(root, t); @@ -919,7 +1091,7 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) if (!path->nodes[level]) BUG(); } - return 0; + return ret; } /* @@ -933,6 +1105,8 @@ int del_item(struct ctree_root *root, struct ctree_path *path) struct tree_buffer *leaf_buf; int doff; int dsize; + int ret = 0; + int wret; leaf_buf = path->nodes[0]; leaf = &leaf_buf->leaf; @@ -959,14 +1133,23 @@ int del_item(struct ctree_root *root, struct ctree_path *path) leaf->header.flags = node_level(0); write_tree_block(root, leaf_buf); } else { - del_ptr(root, path, 1); + wret = del_ptr(root, path, 1); + if (wret) + ret = wret; free_extent(root, leaf_buf->blocknr, 1); } } else { int used = leaf_space_used(leaf, 0, leaf->header.nritems); - if (slot == 0) - fixup_low_keys(root, path, &leaf->items[0].key, 1); - write_tree_block(root, leaf_buf); + if (slot == 0) { + wret = fixup_low_keys(root, path, + &leaf->items[0].key, 1); + if (wret) + ret = wret; + } + wret = write_tree_block(root, leaf_buf); + if (wret) + ret = wret; + /* delete the leaf if it is mostly empty */ if (used < LEAF_DATA_SIZE / 3) { /* push_leaf_left fixes the path. @@ -975,13 +1158,20 @@ int del_item(struct ctree_root *root, struct ctree_path *path) */ slot = path->slots[1]; leaf_buf->count++; - push_leaf_left(root, path, 1); - if (leaf->header.nritems) - push_leaf_right(root, path, 1); + wret = push_leaf_left(root, path, 1); + if (wret < 0) + ret = wret; + if (leaf->header.nritems) { + wret = push_leaf_right(root, path, 1); + if (wret < 0) + ret = wret; + } if (leaf->header.nritems == 0) { u64 blocknr = leaf_buf->blocknr; path->slots[1] = slot; - del_ptr(root, path, 1); + wret = del_ptr(root, path, 1); + if (wret) + ret = wret; tree_block_release(root, leaf_buf); free_extent(root, blocknr, 1); } else { @@ -989,7 +1179,7 @@ int del_item(struct ctree_root *root, struct ctree_path *path) } } } - return 0; + return ret; } /* @@ -1033,165 +1223,3 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) return 0; } -/* some sample code to insert,search & delete items */ -#if 0 -/* for testing only */ -int next_key(int i, int max_key) { - return rand() % max_key; - //return i; -} -int main() { - struct key ins; - struct key last = { (u64)-1, 0, 0}; - char *buf; - int i; - int num; - int ret; - int run_size = 20000000; - int max_key = 100000000; - int tree_size = 0; - struct ctree_path path; - struct ctree_super_block super; - struct ctree_root *root; - - radix_tree_init(); - - - root = open_ctree("dbfile", &super); - srand(55); - for (i = 0; i < run_size; i++) { - buf = malloc(64); - num = next_key(i, max_key); - // num = i; - sprintf(buf, "string-%d", num); - if (i % 10000 == 0) - fprintf(stderr, "insert %d:%d\n", num, i); - ins.objectid = num; - ins.offset = 0; - ins.flags = 0; - ret = insert_item(root, &ins, buf, strlen(buf)); - if (!ret) - tree_size++; - free(buf); - } - write_ctree_super(root, &super); - close_ctree(root); - - root = open_ctree("dbfile", &super); - printf("starting search\n"); - srand(55); - for (i = 0; i < run_size; i++) { - num = next_key(i, max_key); - ins.objectid = num; - init_path(&path); - if (i % 10000 == 0) - fprintf(stderr, "search %d:%d\n", num, i); - ret = search_slot(root, &ins, &path, 0); - if (ret) { - print_tree(root, root->node); - printf("unable to find %d\n", num); - exit(1); - } - release_path(root, &path); - } - write_ctree_super(root, &super); - close_ctree(root); - root = open_ctree("dbfile", &super); - printf("node %p level %d total ptrs %d free spc %lu\n", root->node, - node_level(root->node->node.header.flags), - root->node->node.header.nritems, - NODEPTRS_PER_BLOCK - root->node->node.header.nritems); - printf("all searches good, deleting some items\n"); - i = 0; - srand(55); - for (i = 0 ; i < run_size/4; i++) { - num = next_key(i, max_key); - ins.objectid = num; - init_path(&path); - ret = search_slot(root, &ins, &path, -1); - if (!ret) { - if (i % 10000 == 0) - fprintf(stderr, "del %d:%d\n", num, i); - ret = del_item(root, &path); - if (ret != 0) - BUG(); - tree_size--; - } - release_path(root, &path); - } - write_ctree_super(root, &super); - close_ctree(root); - root = open_ctree("dbfile", &super); - srand(128); - for (i = 0; i < run_size; i++) { - buf = malloc(64); - num = next_key(i, max_key); - sprintf(buf, "string-%d", num); - ins.objectid = num; - if (i % 10000 == 0) - fprintf(stderr, "insert %d:%d\n", num, i); - ret = insert_item(root, &ins, buf, strlen(buf)); - if (!ret) - tree_size++; - free(buf); - } - write_ctree_super(root, &super); - close_ctree(root); - root = open_ctree("dbfile", &super); - srand(128); - printf("starting search2\n"); - for (i = 0; i < run_size; i++) { - num = next_key(i, max_key); - ins.objectid = num; - init_path(&path); - if (i % 10000 == 0) - fprintf(stderr, "search %d:%d\n", num, i); - ret = search_slot(root, &ins, &path, 0); - if (ret) { - print_tree(root, root->node); - printf("unable to find %d\n", num); - exit(1); - } - release_path(root, &path); - } - printf("starting big long delete run\n"); - while(root->node && root->node->node.header.nritems > 0) { - struct leaf *leaf; - int slot; - ins.objectid = (u64)-1; - init_path(&path); - ret = search_slot(root, &ins, &path, -1); - if (ret == 0) - BUG(); - - leaf = &path.nodes[0]->leaf; - slot = path.slots[0]; - if (slot != leaf->header.nritems) - BUG(); - while(path.slots[0] > 0) { - path.slots[0] -= 1; - slot = path.slots[0]; - leaf = &path.nodes[0]->leaf; - - if (comp_keys(&last, &leaf->items[slot].key) <= 0) - BUG(); - memcpy(&last, &leaf->items[slot].key, sizeof(last)); - if (tree_size % 10000 == 0) - printf("big del %d:%d\n", tree_size, i); - ret = del_item(root, &path); - if (ret != 0) { - printf("del_item returned %d\n", ret); - BUG(); - } - tree_size--; - } - release_path(root, &path); - } - printf("tree size is now %d\n", tree_size); - printf("map tree\n"); - print_tree(root->extent_root, root->extent_root->node); - write_ctree_super(root, &super); - close_ctree(root); - return 0; -} -#endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 26321524c186..97715bae7847 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -125,6 +125,11 @@ check_failed: ins->flags = 0; start_found = 0; ret = search_slot(root, ins, &path, 0); + if (ret < 0) { + release_path(root, &path); + return ret; + } + while (1) { l = &path.nodes[0]->leaf; slot = path.slots[0]; diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index 7078263acc3d..cebaf6483927 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -134,7 +134,7 @@ static int lookup_enoent(struct ctree_root *root, struct radix_tree_root *radix) return ret; ret = search_slot(root, &key, &path, 0); release_path(root, &path); - if (ret == 0) + if (ret <= 0) goto error; return 0; error: @@ -153,12 +153,17 @@ static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) int ret; int slot; int i; + key.offset = 0; key.flags = 0; key.objectid = (unsigned long)-1; while(1) { init_path(&path); ret = search_slot(root, &key, &path, 0); + if (ret < 0) { + release_path(root, &path); + return ret; + } slot = path.slots[0]; if (ret != 0) { if (slot == 0) { From 0f70abe2b39d19171d4133d2ffdf77fb9113106a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 28 Feb 2007 16:46:22 -0500 Subject: [PATCH 0021/2985] Btrfs: more return code checking Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 17 ++++++++++++----- fs/btrfs/extent-tree.c | 15 +++++++++------ 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index be2be0272513..273239932255 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1087,7 +1087,9 @@ static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) break; } level++; - free_extent(root, blocknr, 1); + wret = free_extent(root, blocknr, 1); + if (wret) + ret = wret; if (!path->nodes[level]) BUG(); } @@ -1136,7 +1138,9 @@ int del_item(struct ctree_root *root, struct ctree_path *path) wret = del_ptr(root, path, 1); if (wret) ret = wret; - free_extent(root, leaf_buf->blocknr, 1); + wret = free_extent(root, leaf_buf->blocknr, 1); + if (wret) + ret = wret; } } else { int used = leaf_space_used(leaf, 0, leaf->header.nritems); @@ -1173,7 +1177,9 @@ int del_item(struct ctree_root *root, struct ctree_path *path) if (wret) ret = wret; tree_block_release(root, leaf_buf); - free_extent(root, blocknr, 1); + wret = free_extent(root, blocknr, 1); + if (wret) + ret = wret; } else { tree_block_release(root, leaf_buf); } @@ -1184,7 +1190,8 @@ int del_item(struct ctree_root *root, struct ctree_path *path) /* * walk up the tree as far as required to find the next leaf. - * returns 0 if it found something or -1 if there are no greater leaves. + * returns 0 if it found something or 1 if there are no greater leaves. + * returns < 0 on io errors. */ int next_leaf(struct ctree_root *root, struct ctree_path *path) { @@ -1196,7 +1203,7 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) while(level < MAX_LEVEL) { if (!path->nodes[level]) - return -1; + return 1; slot = path->slots[level] + 1; c = path->nodes[level]; if (slot >= c->node.header.nritems) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 97715bae7847..074f4b182f16 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -105,8 +105,8 @@ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) * ins->offset == number of blocks * Any available blocks before search_start are skipped. */ -int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, - u64 search_start, u64 search_end, struct key *ins) +static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, + u64 search_start, u64 search_end, struct key *ins) { struct ctree_path path; struct key *key; @@ -125,10 +125,8 @@ check_failed: ins->flags = 0; start_found = 0; ret = search_slot(root, ins, &path, 0); - if (ret < 0) { - release_path(root, &path); - return ret; - } + if (ret < 0) + goto error; while (1) { l = &path.nodes[0]->leaf; @@ -137,6 +135,8 @@ check_failed: ret = next_leaf(root, &path); if (ret == 0) continue; + if (ret < 0) + goto error; if (!start_found) { ins->objectid = search_start; ins->offset = num_blocks; @@ -187,6 +187,9 @@ check_pending: if (ins->offset != 1) BUG(); return 0; +error: + release_path(root, &path); + return ret; } /* From bb8039515d7c1b521ea22f095b43618ccc771885 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 1 Mar 2007 12:04:21 -0500 Subject: [PATCH 0022/2985] Btrfs: merge on the way down during deletes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 420 ++++++++++++++++++++++------------------------- 1 file changed, 200 insertions(+), 220 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 273239932255..df4a19d65206 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -10,11 +10,10 @@ static int split_node(struct ctree_root *root, struct ctree_path *path, int level); static int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size); -static int push_node_left(struct ctree_root *root, struct ctree_path *path, - int level); -static int push_node_right(struct ctree_root *root, - struct ctree_path *path, int level); -static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level); +static int push_node_left(struct ctree_root *root, struct tree_buffer *dst, + struct tree_buffer *src); +static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, + int slot); inline void init_path(struct ctree_path *p) { @@ -192,6 +191,138 @@ int bin_search(struct node *c, struct key *key, int *slot) return -1; } +struct tree_buffer *read_node_slot(struct ctree_root *root, + struct tree_buffer *parent_buf, + int slot) +{ + struct node *node = &parent_buf->node; + if (slot < 0) + return NULL; + if (slot >= node->header.nritems) + return NULL; + return read_tree_block(root, node->blockptrs[slot]); +} + +static int balance_level(struct ctree_root *root, struct ctree_path *path, + int level) +{ + struct tree_buffer *right_buf; + struct tree_buffer *mid_buf; + struct tree_buffer *left_buf; + struct tree_buffer *parent_buf = NULL; + struct node *right = NULL; + struct node *mid; + struct node *left = NULL; + struct node *parent = NULL; + int ret = 0; + int wret; + int pslot; + int used = 0; + int count; + int orig_slot = path->slots[level]; + + if (level == 0) + return 0; + + mid_buf = path->nodes[level]; + mid = &mid_buf->node; + if (level < MAX_LEVEL - 1) + parent_buf = path->nodes[level + 1]; + pslot = path->slots[level + 1]; + + if (!parent_buf) { + struct tree_buffer *child; + u64 blocknr = mid_buf->blocknr; + + if (mid->header.nritems != 1) + return 0; + + /* promote the child to a root */ + child = read_node_slot(root, mid_buf, 0); + BUG_ON(!child); + root->node = child; + path->nodes[level] = NULL; + /* once for the path */ + tree_block_release(root, mid_buf); + /* once for the root ptr */ + tree_block_release(root, mid_buf); + return free_extent(root, blocknr, 1); + } + parent = &parent_buf->node; + + if (mid->header.nritems > NODEPTRS_PER_BLOCK / 4) + return 0; + + // print_tree(root, root->node); + left_buf = read_node_slot(root, parent_buf, pslot - 1); + right_buf = read_node_slot(root, parent_buf, pslot + 1); + if (right_buf) { + right = &right_buf->node; + used = right->header.nritems; + count = 1; + } + if (left_buf) { + left = &left_buf->node; + used += left->header.nritems; + orig_slot += left->header.nritems; + count++; + } + if (left_buf) + push_node_left(root, left_buf, mid_buf); + if (right_buf) { + push_node_left(root, mid_buf, right_buf); + if (right->header.nritems == 0) { + u64 blocknr = right_buf->blocknr; + tree_block_release(root, right_buf); + right_buf = NULL; + right = NULL; + wret = del_ptr(root, path, level + 1, pslot + 1); + if (wret) + ret = wret; + wret = free_extent(root, blocknr, 1); + if (wret) + ret = wret; + } else { + memcpy(parent->keys + pslot + 1, right->keys, + sizeof(struct key)); + } + } + if (mid->header.nritems == 0) { + u64 blocknr = mid_buf->blocknr; + tree_block_release(root, mid_buf); + mid_buf = NULL; + mid = NULL; + wret = del_ptr(root, path, level + 1, pslot); + if (wret) + ret = wret; + wret = free_extent(root, blocknr, 1); + if (wret) + ret = wret; + } else + memcpy(parent->keys + pslot, mid->keys, sizeof(struct key)); + + if (left_buf) { + if (left->header.nritems >= orig_slot) { + left_buf->count++; // released below + path->nodes[level] = left_buf; + path->slots[level + 1] -= 1; + path->slots[level] = orig_slot; + if (mid_buf) + tree_block_release(root, mid_buf); + } else { + orig_slot -= left->header.nritems; + path->slots[level] = orig_slot; + } + } + + if (right_buf) + tree_block_release(root, right_buf); + if (left_buf) + tree_block_release(root, left_buf); + + return ret; +} + /* * look for key in the tree. path is filled in with nodes along the way * if key is found, we return zero and you can find the item in the leaf @@ -208,12 +339,14 @@ int bin_search(struct node *c, struct key *key, int *slot) int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, int ins_len) { - struct tree_buffer *b = root->node; + struct tree_buffer *b; struct node *c; int slot; int ret; int level; +again: + b = root->node; b->count++; while (b) { c = &b->node; @@ -236,9 +369,17 @@ int search_slot(struct ctree_root *root, struct key *key, b = p->nodes[level]; c = &b->node; slot = p->slots[level]; + } else if (ins_len < 0) { + int sret = balance_level(root, p, level); + if (sret) + return sret; + b = p->nodes[level]; + if (!b) + goto again; + c = &b->node; + slot = p->slots[level]; } b = read_tree_block(root, c->blockptrs[slot]); - continue; } else { struct leaf *l = (struct leaf *)c; p->slots[level] = slot; @@ -249,9 +390,11 @@ int search_slot(struct ctree_root *root, struct key *key, if (sret) return sret; } + BUG_ON(root->node->count == 1); return ret; } } + BUG_ON(root->node->count == 1); return 1; } @@ -301,161 +444,47 @@ static int fixup_low_keys(struct ctree_root *root, * returns 0 if some ptrs were pushed left, < 0 if there was some horrible * error, and > 0 if there was no room in the left hand block. */ -static int push_node_left(struct ctree_root *root, struct ctree_path *path, - int level) +static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, + struct tree_buffer *src_buf) { - int slot; - struct node *left; - struct node *right; + struct node *src = &src_buf->node; + struct node *dst = &dst_buf->node; int push_items = 0; - int left_nritems; - int right_nritems; - struct tree_buffer *t; - struct tree_buffer *right_buf; + int src_nritems; + int dst_nritems; int ret = 0; int wret; - if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) - return 1; - slot = path->slots[level + 1]; - if (slot == 0) - return 1; - - t = read_tree_block(root, - path->nodes[level + 1]->node.blockptrs[slot - 1]); - left = &t->node; - right_buf = path->nodes[level]; - right = &right_buf->node; - left_nritems = left->header.nritems; - right_nritems = right->header.nritems; - push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1); - if (push_items <= 0) { - tree_block_release(root, t); - return 1; - } - - if (right_nritems < push_items) - push_items = right_nritems; - memcpy(left->keys + left_nritems, right->keys, - push_items * sizeof(struct key)); - memcpy(left->blockptrs + left_nritems, right->blockptrs, - push_items * sizeof(u64)); - memmove(right->keys, right->keys + push_items, - (right_nritems - push_items) * sizeof(struct key)); - memmove(right->blockptrs, right->blockptrs + push_items, - (right_nritems - push_items) * sizeof(u64)); - right->header.nritems -= push_items; - left->header.nritems += push_items; - - /* adjust the pointers going up the tree */ - wret = fixup_low_keys(root, path, right->keys, level + 1); - if (wret < 0) - ret = wret; - - wret = write_tree_block(root, t); - if (wret < 0) - ret = wret; - - wret = write_tree_block(root, right_buf); - if (wret < 0) - ret = wret; - - /* then fixup the leaf pointer in the path */ - if (path->slots[level] < push_items) { - path->slots[level] += left_nritems; - tree_block_release(root, path->nodes[level]); - path->nodes[level] = t; - path->slots[level + 1] -= 1; - } else { - path->slots[level] -= push_items; - tree_block_release(root, t); - } - return ret; -} - -/* - * try to push data from one node into the next node right in the - * tree. The src node is found at specified level in the path. - * If some bytes were pushed, return 0, otherwise return 1. - * - * Lower nodes/leaves in the path are not touched, higher nodes may - * be modified to reflect the push. - * - * The path is altered to reflect the push. - * - * returns 0 if some ptrs were pushed, < 0 if there was some horrible - * error, and > 0 if there was no room in the right hand block. - */ -static int push_node_right(struct ctree_root *root, struct ctree_path *path, - int level) -{ - int slot; - struct tree_buffer *t; - struct tree_buffer *src_buffer; - struct node *dst; - struct node *src; - int push_items = 0; - int dst_nritems; - int src_nritems; - - /* can't push from the root */ - if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) - return 1; - - /* only try to push inside the node higher up */ - slot = path->slots[level + 1]; - if (slot == NODEPTRS_PER_BLOCK - 1) - return 1; - - if (slot >= path->nodes[level + 1]->node.header.nritems -1) - return 1; - - t = read_tree_block(root, - path->nodes[level + 1]->node.blockptrs[slot + 1]); - dst = &t->node; - src_buffer = path->nodes[level]; - src = &src_buffer->node; - dst_nritems = dst->header.nritems; src_nritems = src->header.nritems; - push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1); + dst_nritems = dst->header.nritems; + push_items = NODEPTRS_PER_BLOCK - dst_nritems; if (push_items <= 0) { - tree_block_release(root, t); return 1; } if (src_nritems < push_items) - push_items = src_nritems; - memmove(dst->keys + push_items, dst->keys, - dst_nritems * sizeof(struct key)); - memcpy(dst->keys, src->keys + src_nritems - push_items, + push_items =src_nritems; + memcpy(dst->keys + dst_nritems, src->keys, push_items * sizeof(struct key)); - - memmove(dst->blockptrs + push_items, dst->blockptrs, - dst_nritems * sizeof(u64)); - memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items, + memcpy(dst->blockptrs + dst_nritems, src->blockptrs, push_items * sizeof(u64)); - + if (push_items < src_nritems) { + memmove(src->keys, src->keys + push_items, + (src_nritems - push_items) * sizeof(struct key)); + memmove(src->blockptrs, src->blockptrs + push_items, + (src_nritems - push_items) * sizeof(u64)); + } src->header.nritems -= push_items; dst->header.nritems += push_items; - /* adjust the pointers going up the tree */ - memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1, - dst->keys, sizeof(struct key)); + wret = write_tree_block(root, src_buf); + if (wret < 0) + ret = wret; - write_tree_block(root, path->nodes[level + 1]); - write_tree_block(root, t); - write_tree_block(root, src_buffer); - - /* then fixup the pointers in the path */ - if (path->slots[level] >= src->header.nritems) { - path->slots[level] -= src->header.nritems; - tree_block_release(root, path->nodes[level]); - path->nodes[level] = t; - path->slots[level + 1] += 1; - } else { - tree_block_release(root, t); - } - return 0; + wret = write_tree_block(root, dst_buf); + if (wret < 0) + ret = wret; + return ret; } /* @@ -558,16 +587,6 @@ static int split_node(struct ctree_root *root, struct ctree_path *path, int ret; int wret; - ret = push_node_left(root, path, level); - if (!ret) - return 0; - if (ret < 0) - return ret; - ret = push_node_right(root, path, level); - if (!ret) - return 0; - if (ret < 0) - return ret; t = path->nodes[level]; c = &t->node; if (t == root->node) { @@ -1011,6 +1030,7 @@ int insert_item(struct ctree_root *root, struct key *key, if (leaf_free_space(leaf) < 0) BUG(); + check_leaf(&path, 0); release_path(root, &path); return ret; } @@ -1022,77 +1042,38 @@ int insert_item(struct ctree_root *root, struct key *key, * continuing all the way the root if required. The root is converted into * a leaf if all the nodes are emptied. */ -static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) +static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, + int slot) { - int slot; - struct tree_buffer *t; struct node *node; + struct tree_buffer *parent = path->nodes[level]; int nritems; - u64 blocknr; - int wret; int ret = 0; + int wret; - while(1) { - t = path->nodes[level]; - if (!t) - break; - node = &t->node; - slot = path->slots[level]; - nritems = node->header.nritems; + node = &parent->node; + nritems = node->header.nritems; - if (slot != nritems -1) { - memmove(node->keys + slot, node->keys + slot + 1, - sizeof(struct key) * (nritems - slot - 1)); - memmove(node->blockptrs + slot, - node->blockptrs + slot + 1, - sizeof(u64) * (nritems - slot - 1)); - } - node->header.nritems--; - blocknr = t->blocknr; - write_tree_block(root, t); - if (node->header.nritems != 0) { - int tslot; - if (slot == 0) { - wret = fixup_low_keys(root, path, - node->keys, - level + 1); - if (wret) - ret = wret; - } - tslot = path->slots[level + 1]; - t->count++; - wret = push_node_left(root, path, level); - if (wret < 0) { - ret = wret; - break; - } - if (node->header.nritems != 0) { - wret = push_node_right(root, path, level); - if (wret < 0) { - ret = wret; - break; - } - } - path->slots[level + 1] = tslot; - if (node->header.nritems != 0) { - tree_block_release(root, t); - break; - } - tree_block_release(root, t); - } - if (t == root->node) { - /* just turn the root into a leaf and break */ - root->node->node.header.flags = node_level(0); - write_tree_block(root, t); - break; - } - level++; - wret = free_extent(root, blocknr, 1); + if (slot != nritems -1) { + memmove(node->keys + slot, node->keys + slot + 1, + sizeof(struct key) * (nritems - slot - 1)); + memmove(node->blockptrs + slot, + node->blockptrs + slot + 1, + sizeof(u64) * (nritems - slot - 1)); + } + node->header.nritems--; + if (node->header.nritems == 0 && parent == root->node) { + BUG_ON(node_level(root->node->node.header.flags) != 1); + /* just turn the root into a leaf and break */ + root->node->node.header.flags = node_level(0); + } else if (slot == 0) { + wret = fixup_low_keys(root, path, node->keys, level + 1); if (wret) ret = wret; - if (!path->nodes[level]) - BUG(); } + wret = write_tree_block(root, parent); + if (wret) + ret = wret; return ret; } @@ -1135,7 +1116,7 @@ int del_item(struct ctree_root *root, struct ctree_path *path) leaf->header.flags = node_level(0); write_tree_block(root, leaf_buf); } else { - wret = del_ptr(root, path, 1); + wret = del_ptr(root, path, 1, path->slots[1]); if (wret) ret = wret; wret = free_extent(root, leaf_buf->blocknr, 1); @@ -1172,8 +1153,7 @@ int del_item(struct ctree_root *root, struct ctree_path *path) } if (leaf->header.nritems == 0) { u64 blocknr = leaf_buf->blocknr; - path->slots[1] = slot; - wret = del_ptr(root, path, 1); + wret = del_ptr(root, path, 1, slot); if (wret) ret = wret; tree_block_release(root, leaf_buf); From 79f95c82dca7665f32bafd68b7cdf4a01fab0840 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 1 Mar 2007 15:16:26 -0500 Subject: [PATCH 0023/2985] Btrfs: Fixup the code to merge during path walks Add a bulk insert/remove test to random-test Add the quick-test code back as another regression test Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 5 +- fs/btrfs/ctree.c | 137 ++++++++++++++++++++++++++++------ fs/btrfs/quick-test.c | 165 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/random-test.c | 117 ++++++++++++++++++++++++----- 4 files changed, 381 insertions(+), 43 deletions(-) create mode 100644 fs/btrfs/quick-test.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 58d4260a04b2..756b1cd4cf4d 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -14,7 +14,7 @@ check=sparse $(CHECKFLAGS) $(check) $< $(CC) $(CFLAGS) -c $< -all: tester debug-tree +all: tester debug-tree quick-test debug-tree: $(objects) debug-tree.o gcc $(CFLAGS) -o debug-tree $(objects) debug-tree.o @@ -22,6 +22,9 @@ debug-tree: $(objects) debug-tree.o tester: $(objects) random-test.o gcc $(CFLAGS) -o tester $(objects) random-test.o +quick-test: $(objects) quick-test.o + gcc $(CFLAGS) -o quick-test $(objects) quick-test.o + $(objects) : $(headers) clean : diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index df4a19d65206..afa5bc5c7c1a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -12,6 +12,9 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size); static int push_node_left(struct ctree_root *root, struct tree_buffer *dst, struct tree_buffer *src); +static int balance_node_right(struct ctree_root *root, + struct tree_buffer *dst_buf, + struct tree_buffer *src_buf); static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, int slot); @@ -217,15 +220,16 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, int ret = 0; int wret; int pslot; - int used = 0; - int count; int orig_slot = path->slots[level]; + u64 orig_ptr; if (level == 0) return 0; mid_buf = path->nodes[level]; mid = &mid_buf->node; + orig_ptr = mid->blockptrs[orig_slot]; + if (level < MAX_LEVEL - 1) parent_buf = path->nodes[level + 1]; pslot = path->slots[level + 1]; @@ -253,24 +257,26 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, if (mid->header.nritems > NODEPTRS_PER_BLOCK / 4) return 0; - // print_tree(root, root->node); left_buf = read_node_slot(root, parent_buf, pslot - 1); right_buf = read_node_slot(root, parent_buf, pslot + 1); - if (right_buf) { - right = &right_buf->node; - used = right->header.nritems; - count = 1; - } + + /* first, try to make some room in the middle buffer */ if (left_buf) { left = &left_buf->node; - used += left->header.nritems; orig_slot += left->header.nritems; - count++; + wret = push_node_left(root, left_buf, mid_buf); + if (wret < 0) + ret = wret; } - if (left_buf) - push_node_left(root, left_buf, mid_buf); + + /* + * then try to empty the right most buffer into the middle + */ if (right_buf) { - push_node_left(root, mid_buf, right_buf); + right = &right_buf->node; + wret = push_node_left(root, mid_buf, right_buf); + if (wret < 0) + ret = wret; if (right->header.nritems == 0) { u64 blocknr = right_buf->blocknr; tree_block_release(root, right_buf); @@ -285,9 +291,29 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, } else { memcpy(parent->keys + pslot + 1, right->keys, sizeof(struct key)); + wret = write_tree_block(root, parent_buf); + if (wret) + ret = wret; } } + if (mid->header.nritems == 1) { + /* + * we're not allowed to leave a node with one item in the + * tree during a delete. A deletion from lower in the tree + * could try to delete the only pointer in this node. + * So, pull some keys from the left. + * There has to be a left pointer at this point because + * otherwise we would have pulled some pointers from the + * right + */ + BUG_ON(!left_buf); + wret = balance_node_right(root, mid_buf, left_buf); + if (wret < 0) + ret = wret; + BUG_ON(wret == 1); + } if (mid->header.nritems == 0) { + /* we've managed to empty the middle node, drop it */ u64 blocknr = mid_buf->blocknr; tree_block_release(root, mid_buf); mid_buf = NULL; @@ -298,11 +324,17 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, wret = free_extent(root, blocknr, 1); if (wret) ret = wret; - } else + } else { + /* update the parent key to reflect our changes */ memcpy(parent->keys + pslot, mid->keys, sizeof(struct key)); + wret = write_tree_block(root, parent_buf); + if (wret) + ret = wret; + } + /* update the path */ if (left_buf) { - if (left->header.nritems >= orig_slot) { + if (left->header.nritems > orig_slot) { left_buf->count++; // released below path->nodes[level] = left_buf; path->slots[level + 1] -= 1; @@ -314,12 +346,15 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, path->slots[level] = orig_slot; } } + /* double check we haven't messed things up */ + check_block(path, level); + if (orig_ptr != path->nodes[level]->node.blockptrs[path->slots[level]]) + BUG(); if (right_buf) tree_block_release(root, right_buf); if (left_buf) tree_block_release(root, left_buf); - return ret; } @@ -378,6 +413,7 @@ again: goto again; c = &b->node; slot = p->slots[level]; + BUG_ON(c->header.nritems == 1); } b = read_tree_block(root, c->blockptrs[slot]); } else { @@ -433,13 +469,7 @@ static int fixup_low_keys(struct ctree_root *root, /* * try to push data from one node into the next node left in the - * tree. The src node is found at specified level in the path. - * If some bytes were pushed, return 0, otherwise return 1. - * - * Lower nodes/leaves in the path are not touched, higher nodes may - * be modified to reflect the push. - * - * The path is altered to reflect the push. + * tree. * * returns 0 if some ptrs were pushed left, < 0 if there was some horrible * error, and > 0 if there was no room in the left hand block. @@ -463,7 +493,8 @@ static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, } if (src_nritems < push_items) - push_items =src_nritems; + push_items = src_nritems; + memcpy(dst->keys + dst_nritems, src->keys, push_items * sizeof(struct key)); memcpy(dst->blockptrs + dst_nritems, src->blockptrs, @@ -487,6 +518,64 @@ static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, return ret; } +/* + * try to push data from one node into the next node right in the + * tree. + * + * returns 0 if some ptrs were pushed, < 0 if there was some horrible + * error, and > 0 if there was no room in the right hand block. + * + * this will only push up to 1/2 the contents of the left node over + */ +static int balance_node_right(struct ctree_root *root, + struct tree_buffer *dst_buf, + struct tree_buffer *src_buf) +{ + struct node *src = &src_buf->node; + struct node *dst = &dst_buf->node; + int push_items = 0; + int max_push; + int src_nritems; + int dst_nritems; + int ret = 0; + int wret; + + src_nritems = src->header.nritems; + dst_nritems = dst->header.nritems; + push_items = NODEPTRS_PER_BLOCK - dst_nritems; + if (push_items <= 0) { + return 1; + } + + max_push = src_nritems / 2 + 1; + /* don't try to empty the node */ + if (max_push > src_nritems) + return 1; + if (max_push < push_items) + push_items = max_push; + + memmove(dst->keys + push_items, dst->keys, + dst_nritems * sizeof(struct key)); + memmove(dst->blockptrs + push_items, dst->blockptrs, + dst_nritems * sizeof(u64)); + memcpy(dst->keys, src->keys + src_nritems - push_items, + push_items * sizeof(struct key)); + memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items, + push_items * sizeof(u64)); + + src->header.nritems -= push_items; + dst->header.nritems += push_items; + + wret = write_tree_block(root, src_buf); + if (wret < 0) + ret = wret; + + wret = write_tree_block(root, dst_buf); + if (wret < 0) + ret = wret; + return ret; +} + /* * helper function to insert a new root level in the tree. * A new node is allocated, and a single item is inserted to diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c new file mode 100644 index 000000000000..dbd00c3b7ab4 --- /dev/null +++ b/fs/btrfs/quick-test.c @@ -0,0 +1,165 @@ +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" + +/* for testing only */ +int next_key(int i, int max_key) { + return rand() % max_key; + //return i; +} + +int main(int ac, char **av) { + struct key ins; + struct key last = { (u64)-1, 0, 0}; + char *buf; + int i; + int num; + int ret; + int run_size = 100000; + int max_key = 100000000; + int tree_size = 0; + struct ctree_path path; + struct ctree_super_block super; + struct ctree_root *root; + + radix_tree_init(); + + root = open_ctree("dbfile", &super); + srand(55); + for (i = 0; i < run_size; i++) { + buf = malloc(64); + num = next_key(i, max_key); + // num = i; + sprintf(buf, "string-%d", num); + if (i % 10000 == 0) + fprintf(stderr, "insert %d:%d\n", num, i); + ins.objectid = num; + ins.offset = 0; + ins.flags = 0; + ret = insert_item(root, &ins, buf, strlen(buf)); + if (!ret) + tree_size++; + free(buf); + } + write_ctree_super(root, &super); + close_ctree(root); + + root = open_ctree("dbfile", &super); + printf("starting search\n"); + srand(55); + for (i = 0; i < run_size; i++) { + num = next_key(i, max_key); + ins.objectid = num; + init_path(&path); + if (i % 10000 == 0) + fprintf(stderr, "search %d:%d\n", num, i); + ret = search_slot(root, &ins, &path, 0); + if (ret) { + print_tree(root, root->node); + printf("unable to find %d\n", num); + exit(1); + } + release_path(root, &path); + } + write_ctree_super(root, &super); + close_ctree(root); + root = open_ctree("dbfile", &super); + printf("node %p level %d total ptrs %d free spc %lu\n", root->node, + node_level(root->node->node.header.flags), + root->node->node.header.nritems, + NODEPTRS_PER_BLOCK - root->node->node.header.nritems); + printf("all searches good, deleting some items\n"); + i = 0; + srand(55); + for (i = 0 ; i < run_size/4; i++) { + num = next_key(i, max_key); + ins.objectid = num; + init_path(&path); + ret = search_slot(root, &ins, &path, -1); + if (!ret) { + if (i % 10000 == 0) + fprintf(stderr, "del %d:%d\n", num, i); + ret = del_item(root, &path); + if (ret != 0) + BUG(); + tree_size--; + } + release_path(root, &path); + } + write_ctree_super(root, &super); + close_ctree(root); + root = open_ctree("dbfile", &super); + srand(128); + for (i = 0; i < run_size; i++) { + buf = malloc(64); + num = next_key(i, max_key); + sprintf(buf, "string-%d", num); + ins.objectid = num; + if (i % 10000 == 0) + fprintf(stderr, "insert %d:%d\n", num, i); + ret = insert_item(root, &ins, buf, strlen(buf)); + if (!ret) + tree_size++; + free(buf); + } + write_ctree_super(root, &super); + close_ctree(root); + root = open_ctree("dbfile", &super); + srand(128); + printf("starting search2\n"); + for (i = 0; i < run_size; i++) { + num = next_key(i, max_key); + ins.objectid = num; + init_path(&path); + if (i % 10000 == 0) + fprintf(stderr, "search %d:%d\n", num, i); + ret = search_slot(root, &ins, &path, 0); + if (ret) { + print_tree(root, root->node); + printf("unable to find %d\n", num); + exit(1); + } + release_path(root, &path); + } + printf("starting big long delete run\n"); + while(root->node && root->node->node.header.nritems > 0) { + struct leaf *leaf; + int slot; + ins.objectid = (u64)-1; + init_path(&path); + ret = search_slot(root, &ins, &path, -1); + if (ret == 0) + BUG(); + + leaf = &path.nodes[0]->leaf; + slot = path.slots[0]; + if (slot != leaf->header.nritems) + BUG(); + while(path.slots[0] > 0) { + path.slots[0] -= 1; + slot = path.slots[0]; + leaf = &path.nodes[0]->leaf; + + memcpy(&last, &leaf->items[slot].key, sizeof(last)); + if (tree_size % 10000 == 0) + printf("big del %d:%d\n", tree_size, i); + ret = del_item(root, &path); + if (ret != 0) { + printf("del_item returned %d\n", ret); + BUG(); + } + tree_size--; + } + release_path(root, &path); + } + printf("tree size is now %d\n", tree_size); + printf("map tree\n"); + print_tree(root->extent_root, root->extent_root->node); + write_ctree_super(root, &super); + close_ctree(root); + return 0; +} diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index cebaf6483927..bbd554e88db2 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -142,8 +142,98 @@ error: return -1; } +static int empty_tree(struct ctree_root *root, struct radix_tree_root *radix, + int nr) +{ + struct ctree_path path; + struct key key; + unsigned long found = 0; + int ret; + int slot; + int *ptr; + int count = 0; + + key.offset = 0; + key.flags = 0; + key.objectid = (unsigned long)-1; + while(nr-- >= 0) { + init_path(&path); + ret = search_slot(root, &key, &path, -1); + if (ret < 0) { + release_path(root, &path); + return ret; + } + if (ret != 0) { + if (path.slots[0] == 0) { + release_path(root, &path); + break; + } + path.slots[0] -= 1; + } + slot = path.slots[0]; + found = path.nodes[0]->leaf.items[slot].key.objectid; + ret = del_item(root, &path); + count++; + if (ret) { + fprintf(stderr, + "failed to remove %lu from tree\n", + found); + return -1; + } + release_path(root, &path); + ptr = radix_tree_delete(radix, found); + if (!ptr) + goto error; + if (!keep_running) + break; + } + return 0; +error: + fprintf(stderr, "failed to delete from the radix %lu\n", found); + return -1; +} + +static int fill_tree(struct ctree_root *root, struct radix_tree_root *radix, + int count) +{ + int i; + int err; + int ret = 0; + for (i = 0; i < count; i++) { + ret = ins_one(root, radix); + if (ret) { + printf("fill failed\n"); + err = ret; + goto out; + } + if (!keep_running) + break; + } +out: + return ret; +} + +static int bulk_op(struct ctree_root *root, struct radix_tree_root *radix) +{ + int ret; + int nr = rand() % 20000; + static int run_nr = 0; + + /* do the bulk op much less frequently */ + if (run_nr++ % 100) + return 0; + ret = empty_tree(root, radix, nr); + if (ret) + return ret; + ret = fill_tree(root, radix, nr); + if (ret) + return ret; + return 0; +} + + int (*ops[])(struct ctree_root *root, struct radix_tree_root *radix) = -{ ins_one, insert_dup, del_one, lookup_item, lookup_enoent }; +{ ins_one, insert_dup, del_one, lookup_item, lookup_enoent, bulk_op }; static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) { @@ -192,7 +282,6 @@ static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) } return 0; } - void sigstopper(int ignored) { keep_running = 0; @@ -241,22 +330,12 @@ int main(int ac, char **av) print_usage(); } } - for (i = 0; i < init_fill_count; i++) { - ret = ins_one(root, &radix); - if (ret) { - printf("initial fill failed\n"); - err = ret; - goto out; - } - if (i % 10000 == 0) { - printf("initial fill %d level %d count %d\n", i, - node_level(root->node->node.header.flags), - root->node->node.header.nritems); - } - if (keep_running == 0) { - err = 0; - goto out; - } + printf("initial fill\n"); + ret = fill_tree(root, &radix, init_fill_count); + printf("starting run\n"); + if (ret) { + err = ret; + goto out; } if (initial_only == 1) { goto out; @@ -287,6 +366,8 @@ int main(int ac, char **av) err = ret; goto out; } + if (ops[op] == bulk_op) + break; if (keep_running == 0) { err = 0; goto out; From ed2ff2cba766dfe7976a0113f667c9a0a50dff02 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 1 Mar 2007 18:59:40 -0500 Subject: [PATCH 0024/2985] Btrfs: pretend page cache & commit code Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 66 ++++--- fs/btrfs/ctree.h | 5 + fs/btrfs/disk-io.c | 91 +++++++++- fs/btrfs/disk-io.h | 6 + fs/btrfs/list.h | 418 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 558 insertions(+), 28 deletions(-) create mode 100644 fs/btrfs/list.h diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index afa5bc5c7c1a..57fa505fb9f6 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -250,6 +250,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, tree_block_release(root, mid_buf); /* once for the root ptr */ tree_block_release(root, mid_buf); + clean_tree_block(root, mid_buf); return free_extent(root, blocknr, 1); } parent = &parent_buf->node; @@ -280,6 +281,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, if (right->header.nritems == 0) { u64 blocknr = right_buf->blocknr; tree_block_release(root, right_buf); + clean_tree_block(root, right_buf); right_buf = NULL; right = NULL; wret = del_ptr(root, path, level + 1, pslot + 1); @@ -291,7 +293,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, } else { memcpy(parent->keys + pslot + 1, right->keys, sizeof(struct key)); - wret = write_tree_block(root, parent_buf); + wret = dirty_tree_block(root, parent_buf); if (wret) ret = wret; } @@ -316,6 +318,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, /* we've managed to empty the middle node, drop it */ u64 blocknr = mid_buf->blocknr; tree_block_release(root, mid_buf); + clean_tree_block(root, mid_buf); mid_buf = NULL; mid = NULL; wret = del_ptr(root, path, level + 1, pslot); @@ -327,7 +330,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, } else { /* update the parent key to reflect our changes */ memcpy(parent->keys + pslot, mid->keys, sizeof(struct key)); - wret = write_tree_block(root, parent_buf); + wret = dirty_tree_block(root, parent_buf); if (wret) ret = wret; } @@ -458,7 +461,7 @@ static int fixup_low_keys(struct ctree_root *root, break; t = &path->nodes[i]->node; memcpy(t->keys + tslot, key, sizeof(*key)); - wret = write_tree_block(root, path->nodes[i]); + wret = dirty_tree_block(root, path->nodes[i]); if (wret) ret = wret; if (tslot != 0) @@ -508,11 +511,11 @@ static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, src->header.nritems -= push_items; dst->header.nritems += push_items; - wret = write_tree_block(root, src_buf); + wret = dirty_tree_block(root, src_buf); if (wret < 0) ret = wret; - wret = write_tree_block(root, dst_buf); + wret = dirty_tree_block(root, dst_buf); if (wret < 0) ret = wret; return ret; @@ -566,11 +569,11 @@ static int balance_node_right(struct ctree_root *root, src->header.nritems -= push_items; dst->header.nritems += push_items; - wret = write_tree_block(root, src_buf); + wret = dirty_tree_block(root, src_buf); if (wret < 0) ret = wret; - wret = write_tree_block(root, dst_buf); + wret = dirty_tree_block(root, dst_buf); if (wret < 0) ret = wret; return ret; @@ -612,7 +615,7 @@ static int insert_new_root(struct ctree_root *root, tree_block_release(root, root->node); root->node = t; t->count++; - write_tree_block(root, t); + dirty_tree_block(root, t); path->nodes[level] = t; path->slots[level] = 0; return 0; @@ -652,7 +655,7 @@ static int insert_ptr(struct ctree_root *root, lower->header.nritems++; if (lower->keys[1].objectid == 0) BUG(); - write_tree_block(root, path->nodes[level]); + dirty_tree_block(root, path->nodes[level]); return 0; } @@ -698,10 +701,10 @@ static int split_node(struct ctree_root *root, struct ctree_path *path, c->header.nritems = mid; ret = 0; - wret = write_tree_block(root, t); + wret = dirty_tree_block(root, t); if (wret) ret = wret; - wret = write_tree_block(root, split_buffer); + wret = dirty_tree_block(root, split_buffer); if (wret) ret = wret; wret = insert_ptr(root, path, split->keys, split_buffer->blocknr, @@ -815,11 +818,11 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, } left->header.nritems -= push_items; - write_tree_block(root, left_buf); - write_tree_block(root, right_buf); + dirty_tree_block(root, left_buf); + dirty_tree_block(root, right_buf); memcpy(upper->node.keys + slot + 1, &right->items[0].key, sizeof(struct key)); - write_tree_block(root, upper); + dirty_tree_block(root, upper); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left->header.nritems) { path->slots[0] -= left->header.nritems; @@ -909,10 +912,10 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, push_space = right->items[i].offset; } - wret = write_tree_block(root, t); + wret = dirty_tree_block(root, t); if (wret) ret = wret; - wret = write_tree_block(root, right_buf); + wret = dirty_tree_block(root, right_buf); if (wret) ret = wret; @@ -1019,10 +1022,10 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, right_buffer->blocknr, path->slots[1] + 1, 1); if (wret) ret = wret; - wret = write_tree_block(root, right_buffer); + wret = dirty_tree_block(root, right_buffer); if (wret) ret = wret; - wret = write_tree_block(root, l_buf); + wret = dirty_tree_block(root, l_buf); if (wret) ret = wret; @@ -1062,12 +1065,14 @@ int insert_item(struct ctree_root *root, struct key *key, ret = search_slot(root, key, &path, data_size); if (ret == 0) { release_path(root, &path); - return -EEXIST; - } - if (ret < 0) { - release_path(root, &path); + ret = -EEXIST; + wret = commit_transaction(root); + if (wret) + ret = wret; return ret; } + if (ret < 0) + goto out; slot_orig = path.slots[0]; leaf_buf = path.nodes[0]; @@ -1113,14 +1118,18 @@ int insert_item(struct ctree_root *root, struct key *key, if (slot == 0) ret = fixup_low_keys(root, &path, key, 1); - wret = write_tree_block(root, leaf_buf); + wret = dirty_tree_block(root, leaf_buf); if (wret) ret = wret; if (leaf_free_space(leaf) < 0) BUG(); check_leaf(&path, 0); +out: release_path(root, &path); + wret = commit_transaction(root); + if (wret) + ret = wret; return ret; } @@ -1160,7 +1169,7 @@ static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, if (wret) ret = wret; } - wret = write_tree_block(root, parent); + wret = dirty_tree_block(root, parent); if (wret) ret = wret; return ret; @@ -1203,8 +1212,9 @@ int del_item(struct ctree_root *root, struct ctree_path *path) if (leaf->header.nritems == 0) { if (leaf_buf == root->node) { leaf->header.flags = node_level(0); - write_tree_block(root, leaf_buf); + dirty_tree_block(root, leaf_buf); } else { + clean_tree_block(root, leaf_buf); wret = del_ptr(root, path, 1, path->slots[1]); if (wret) ret = wret; @@ -1220,7 +1230,7 @@ int del_item(struct ctree_root *root, struct ctree_path *path) if (wret) ret = wret; } - wret = write_tree_block(root, leaf_buf); + wret = dirty_tree_block(root, leaf_buf); if (wret) ret = wret; @@ -1242,6 +1252,7 @@ int del_item(struct ctree_root *root, struct ctree_path *path) } if (leaf->header.nritems == 0) { u64 blocknr = leaf_buf->blocknr; + clean_tree_block(root, leaf_buf); wret = del_ptr(root, path, 1, slot); if (wret) ret = wret; @@ -1254,6 +1265,9 @@ int del_item(struct ctree_root *root, struct ctree_path *path) } } } + wret = commit_transaction(root); + if (wret) + ret = wret; return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 18daccd84535..6b4dabd47698 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,6 +1,8 @@ #ifndef __CTREE__ #define __CTREE__ +#include "list.h" + #define CTREE_BLOCKSIZE 1024 /* @@ -53,6 +55,9 @@ struct ctree_root { struct key current_insert; int fp; struct radix_tree_root cache_radix; + struct list_head trans; + struct list_head cache; + int cache_size; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c42dc72706bf..656ace6147a8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -11,6 +11,8 @@ #include "disk-io.h" static int allocated_blocks = 0; +int cache_size = 0; +int cache_max = 10000; static int check_tree_block(struct ctree_root *root, struct tree_buffer *buf) { @@ -21,6 +23,25 @@ static int check_tree_block(struct ctree_root *root, struct tree_buffer *buf) return 0; } +static int free_some_buffers(struct ctree_root *root) +{ + struct list_head *node, *next; + struct tree_buffer *b; + if (root->cache_size < cache_max) + return 0; + list_for_each_safe(node, next, &root->cache) { + b = list_entry(node, struct tree_buffer, cache); + if (b->count == 1) { + BUG_ON(!list_empty(&b->dirty)); + list_del_init(&b->cache); + tree_block_release(root, b); + if (root->cache_size < cache_max) + return 0; + } + } + return 0; +} + struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr) { struct tree_buffer *buf; @@ -30,10 +51,14 @@ struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr) return buf; allocated_blocks++; buf->blocknr = blocknr; - buf->count = 1; + buf->count = 2; + INIT_LIST_HEAD(&buf->dirty); + free_some_buffers(root); radix_tree_preload(GFP_KERNEL); ret = radix_tree_insert(&root->cache_radix, blocknr, buf); radix_tree_preload_end(); + list_add_tail(&buf->cache, &root->cache); + root->cache_size++; if (ret) { free(buf); return NULL; @@ -57,7 +82,6 @@ struct tree_buffer *find_tree_block(struct ctree_root *root, u64 blocknr) return buf; } - struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) { loff_t offset = blocknr * CTREE_BLOCKSIZE; @@ -82,6 +106,24 @@ struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) return buf; } +int dirty_tree_block(struct ctree_root *root, struct tree_buffer *buf) +{ + if (!list_empty(&buf->dirty)) + return 0; + list_add_tail(&buf->dirty, &root->trans); + buf->count++; + return 0; +} + +int clean_tree_block(struct ctree_root *root, struct tree_buffer *buf) +{ + if (!list_empty(&buf->dirty)) { + list_del_init(&buf->dirty); + tree_block_release(root, buf); + } + return 0; +} + int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) { u64 blocknr = buf->blocknr; @@ -96,9 +138,37 @@ int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) return 0; } +static int __commit_transaction(struct ctree_root *root) +{ + struct tree_buffer *b; + int ret = 0; + int wret; + while(!list_empty(&root->trans)) { + b = list_entry(root->trans.next, struct tree_buffer, dirty); + list_del_init(&b->dirty); + wret = write_tree_block(root, b); + if (wret) + ret = wret; + tree_block_release(root, b); + } + return ret; +} + +int commit_transaction(struct ctree_root *root) +{ + int ret; + ret = __commit_transaction(root); + if (!ret && root != root->extent_root) + ret = __commit_transaction(root->extent_root); + BUG_ON(ret); + return ret; +} + static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, struct ctree_root_info *info, int fp) { + INIT_LIST_HEAD(&root->trans); + INIT_LIST_HEAD(&root->cache); root->fp = fp; root->node = NULL; root->node = read_tree_block(root, info->tree_root); @@ -157,8 +227,23 @@ int write_ctree_super(struct ctree_root *root, struct ctree_super_block *s) return 0; } +static int drop_cache(struct ctree_root *root) +{ + while(!list_empty(&root->cache)) { + struct tree_buffer *b = list_entry(root->cache.next, + struct tree_buffer, cache); + list_del_init(&b->cache); + tree_block_release(root, b); + } + return 0; +} int close_ctree(struct ctree_root *root) { + drop_cache(root->extent_root); + drop_cache(root); + BUG_ON(!list_empty(&root->trans)); + BUG_ON(!list_empty(&root->extent_root->trans)); + close(root->fp); if (root->node) tree_block_release(root, root->node); @@ -182,6 +267,8 @@ void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) free(buf); BUG_ON(allocated_blocks == 0); allocated_blocks--; + BUG_ON(root->cache_size == 0); + root->cache_size--; } } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 2729b757ddc1..b5dee2fae4da 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -1,5 +1,6 @@ #ifndef __DISKIO__ #define __DISKIO__ +#include "list.h" struct tree_buffer { u64 blocknr; @@ -8,11 +9,16 @@ struct tree_buffer { struct node node; struct leaf leaf; }; + struct list_head dirty; + struct list_head cache; }; struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr); struct tree_buffer *find_tree_block(struct ctree_root *root, u64 blocknr); int write_tree_block(struct ctree_root *root, struct tree_buffer *buf); +int dirty_tree_block(struct ctree_root *root, struct tree_buffer *buf); +int clean_tree_block(struct ctree_root *root, struct tree_buffer *buf); +int commit_transaction(struct ctree_root *root); struct ctree_root *open_ctree(char *filename, struct ctree_super_block *s); int close_ctree(struct ctree_root *root); void tree_block_release(struct ctree_root *root, struct tree_buffer *buf); diff --git a/fs/btrfs/list.h b/fs/btrfs/list.h new file mode 100644 index 000000000000..1aafafb13370 --- /dev/null +++ b/fs/btrfs/list.h @@ -0,0 +1,418 @@ +#ifndef _LINUX_LIST_H +#define _LINUX_LIST_H + +#define LIST_POISON1 ((void *) 0x00100100) +#define LIST_POISON2 ((void *) 0x00200200) + +/* + * Simple doubly linked list implementation. + * + * Some of the internal functions ("__xxx") are useful when + * manipulating whole lists rather than single entries, as + * sometimes we already know the next/prev entries and we can + * generate better code by using them directly rather than + * using the generic single-entry routines. + */ + +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_HEAD_INIT(name) { &(name), &(name) } + +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +static inline void INIT_LIST_HEAD(struct list_head *list) +{ + list->next = list; + list->prev = list; +} + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +#ifndef CONFIG_DEBUG_LIST +static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} +#else +extern void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next); +#endif + +/** + * list_add - add a new entry + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +#ifndef CONFIG_DEBUG_LIST +static inline void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} +#else +extern void list_add(struct list_head *new, struct list_head *head); +#endif + + +/** + * list_add_tail - add a new entry + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static inline void list_add_tail(struct list_head *new, struct list_head *head) +{ + __list_add(new, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_del(struct list_head * prev, struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty on entry does not return true after this, the entry is + * in an undefined state. + */ +#ifndef CONFIG_DEBUG_LIST +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->next = LIST_POISON1; + entry->prev = LIST_POISON2; +} +#else +extern void list_del(struct list_head *entry); +#endif + +/** + * list_replace - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * Note: if 'old' was empty, it will be overwritten. + */ +static inline void list_replace(struct list_head *old, + struct list_head *new) +{ + new->next = old->next; + new->next->prev = new; + new->prev = old->prev; + new->prev->next = new; +} + +static inline void list_replace_init(struct list_head *old, + struct list_head *new) +{ + list_replace(old, new); + INIT_LIST_HEAD(old); +} +/** + * list_del_init - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + */ +static inline void list_del_init(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + INIT_LIST_HEAD(entry); +} + +/** + * list_move - delete from one list and add as another's head + * @list: the entry to move + * @head: the head that will precede our entry + */ +static inline void list_move(struct list_head *list, struct list_head *head) +{ + __list_del(list->prev, list->next); + list_add(list, head); +} + +/** + * list_move_tail - delete from one list and add as another's tail + * @list: the entry to move + * @head: the head that will follow our entry + */ +static inline void list_move_tail(struct list_head *list, + struct list_head *head) +{ + __list_del(list->prev, list->next); + list_add_tail(list, head); +} + +/** + * list_is_last - tests whether @list is the last entry in list @head + * @list: the entry to test + * @head: the head of the list + */ +static inline int list_is_last(const struct list_head *list, + const struct list_head *head) +{ + return list->next == head; +} + +/** + * list_empty - tests whether a list is empty + * @head: the list to test. + */ +static inline int list_empty(const struct list_head *head) +{ + return head->next == head; +} + +/** + * list_empty_careful - tests whether a list is empty and not being modified + * @head: the list to test + * + * Description: + * tests whether a list is empty _and_ checks that no other CPU might be + * in the process of modifying either member (next or prev) + * + * NOTE: using list_empty_careful() without synchronization + * can only be safe if the only activity that can happen + * to the list entry is list_del_init(). Eg. it cannot be used + * if another CPU could re-list_add() it. + */ +static inline int list_empty_careful(const struct list_head *head) +{ + struct list_head *next = head->next; + return (next == head) && (next == head->prev); +} + +static inline void __list_splice(struct list_head *list, + struct list_head *head) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + struct list_head *at = head->next; + + first->prev = head; + head->next = first; + + last->next = at; + at->prev = last; +} + +/** + * list_splice - join two lists + * @list: the new list to add. + * @head: the place to add it in the first list. + */ +static inline void list_splice(struct list_head *list, struct list_head *head) +{ + if (!list_empty(list)) + __list_splice(list, head); +} + +/** + * list_splice_init - join two lists and reinitialise the emptied list. + * @list: the new list to add. + * @head: the place to add it in the first list. + * + * The list at @list is reinitialised + */ +static inline void list_splice_init(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) { + __list_splice(list, head); + INIT_LIST_HEAD(list); + } +} + +/** + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +/** + * list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each(pos, head) \ + for (pos = (head)->next; prefetch(pos->next), pos != (head); \ + pos = pos->next) + +/** + * __list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + * + * This variant differs from list_for_each() in that it's the + * simplest possible list iteration code, no prefetching is done. + * Use this for code that knows the list to be very short (empty + * or 1 entry) most of the time. + */ +#define __list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); pos = pos->next) + +/** + * list_for_each_prev - iterate over a list backwards + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each_prev(pos, head) \ + for (pos = (head)->prev; prefetch(pos->prev), pos != (head); \ + pos = pos->prev) + +/** + * list_for_each_safe - iterate over a list safe against removal of list entry + * @pos: the &struct list_head to use as a loop cursor. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +/** + * list_for_each_entry - iterate over list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + prefetch(pos->member.next), &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +/** + * list_for_each_entry_reverse - iterate backwards over list of given type. + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_reverse(pos, head, member) \ + for (pos = list_entry((head)->prev, typeof(*pos), member); \ + prefetch(pos->member.prev), &pos->member != (head); \ + pos = list_entry(pos->member.prev, typeof(*pos), member)) + +/** + * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue + * @pos: the type * to use as a start point + * @head: the head of the list + * @member: the name of the list_struct within the struct. + * + * Prepares a pos entry for use as a start point in list_for_each_entry_continue. + */ +#define list_prepare_entry(pos, head, member) \ + ((pos) ? : list_entry(head, typeof(*pos), member)) + +/** + * list_for_each_entry_continue - continue iteration over list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * Continue to iterate over list of given type, continuing after + * the current position. + */ +#define list_for_each_entry_continue(pos, head, member) \ + for (pos = list_entry(pos->member.next, typeof(*pos), member); \ + prefetch(pos->member.next), &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +/** + * list_for_each_entry_from - iterate over list of given type from the current point + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * Iterate over list of given type, continuing from current position. + */ +#define list_for_each_entry_from(pos, head, member) \ + for (; prefetch(pos->member.next), &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +/** + * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) + +/** + * list_for_each_entry_safe_continue + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * Iterate over list of given type, continuing after current point, + * safe against removal of list entry. + */ +#define list_for_each_entry_safe_continue(pos, n, head, member) \ + for (pos = list_entry(pos->member.next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) + +/** + * list_for_each_entry_safe_from + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * Iterate over list of given type from current point, safe against + * removal of list entry. + */ +#define list_for_each_entry_safe_from(pos, n, head, member) \ + for (n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) + +/** + * list_for_each_entry_safe_reverse + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * Iterate backwards over list of given type, safe against removal + * of list entry. + */ +#define list_for_each_entry_safe_reverse(pos, n, head, member) \ + for (pos = list_entry((head)->prev, typeof(*pos), member), \ + n = list_entry(pos->member.prev, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.prev, typeof(*n), member)) + +#endif From f0930a37f1c096c3a8f6a17b1e251c7fdf4d4457 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 Mar 2007 09:47:58 -0500 Subject: [PATCH 0025/2985] Btrfs: Fix extent code to use merge during delete Remove implicit commit in del_item and insert_item Add implicit commit to close() Add commit op in random-test Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 15 +++------------ fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/random-test.c | 10 ++++++++-- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 57fa505fb9f6..3c5f4c2dd525 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1065,11 +1065,7 @@ int insert_item(struct ctree_root *root, struct key *key, ret = search_slot(root, key, &path, data_size); if (ret == 0) { release_path(root, &path); - ret = -EEXIST; - wret = commit_transaction(root); - if (wret) - ret = wret; - return ret; + return -EEXIST; } if (ret < 0) goto out; @@ -1127,9 +1123,6 @@ int insert_item(struct ctree_root *root, struct key *key, check_leaf(&path, 0); out: release_path(root, &path); - wret = commit_transaction(root); - if (wret) - ret = wret; return ret; } @@ -1245,7 +1238,8 @@ int del_item(struct ctree_root *root, struct ctree_path *path) wret = push_leaf_left(root, path, 1); if (wret < 0) ret = wret; - if (leaf->header.nritems) { + if (path->nodes[0] == leaf_buf && + leaf->header.nritems) { wret = push_leaf_right(root, path, 1); if (wret < 0) ret = wret; @@ -1265,9 +1259,6 @@ int del_item(struct ctree_root *root, struct ctree_path *path) } } } - wret = commit_transaction(root); - if (wret) - ret = wret; return ret; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 656ace6147a8..f7ca5362291e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -239,6 +239,7 @@ static int drop_cache(struct ctree_root *root) } int close_ctree(struct ctree_root *root) { + commit_transaction(root); drop_cache(root->extent_root); drop_cache(root); BUG_ON(!list_empty(&root->trans)); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 074f4b182f16..fb2144496215 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -39,7 +39,7 @@ static int del_pending_extents(struct ctree_root *extent_root) key.flags = 0; key.offset = 1; init_path(&path); - ret = search_slot(extent_root, &key, &path, 0); + ret = search_slot(extent_root, &key, &path, -1); if (ret) { print_tree(extent_root, extent_root->node); printf("unable to find %Lu\n", key.objectid); @@ -83,7 +83,7 @@ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) return 0; } init_path(&path); - ret = search_slot(extent_root, &key, &path, 0); + ret = search_slot(extent_root, &key, &path, -1); if (ret) { print_tree(extent_root, extent_root->node); printf("failed to find %Lu\n", key.objectid); diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index bbd554e88db2..22955753c3a7 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -59,6 +59,11 @@ error: return -1; } +static int run_commit(struct ctree_root *root, struct radix_tree_root *radix) +{ + return commit_transaction(root); +} + static int insert_dup(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; @@ -233,7 +238,8 @@ static int bulk_op(struct ctree_root *root, struct radix_tree_root *radix) int (*ops[])(struct ctree_root *root, struct radix_tree_root *radix) = -{ ins_one, insert_dup, del_one, lookup_item, lookup_enoent, bulk_op }; + { ins_one, insert_dup, del_one, lookup_item, + lookup_enoent, bulk_op, run_commit }; static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) { @@ -366,7 +372,7 @@ int main(int ac, char **av) err = ret; goto out; } - if (ops[op] == bulk_op) + if (ops[op] == bulk_op || ops[op] == run_commit) break; if (keep_running == 0) { err = 0; From 77ce6846c40e83193df01295e5af437f8b6c7a2d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 Mar 2007 10:06:43 -0500 Subject: [PATCH 0026/2985] Btrfs: period commit during initial fill in the random tester Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 +-- fs/btrfs/random-test.c | 14 +++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f7ca5362291e..b1a8149bbc84 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -11,7 +11,6 @@ #include "disk-io.h" static int allocated_blocks = 0; -int cache_size = 0; int cache_max = 10000; static int check_tree_block(struct ctree_root *root, struct tree_buffer *buf) @@ -36,7 +35,7 @@ static int free_some_buffers(struct ctree_root *root) list_del_init(&b->cache); tree_block_release(root, b); if (root->cache_size < cache_max) - return 0; + break; } } return 0; diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index 22955753c3a7..53245c5039dc 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -202,15 +202,23 @@ static int fill_tree(struct ctree_root *root, struct radix_tree_root *radix, int count) { int i; - int err; int ret = 0; for (i = 0; i < count; i++) { ret = ins_one(root, radix); if (ret) { - printf("fill failed\n"); - err = ret; + fprintf(stderr, "fill failed\n"); goto out; } + if (i % 1000 == 0) { + ret = commit_transaction(root); + if (ret) { + fprintf(stderr, "fill commit failed\n"); + return ret; + } + } + if (i % 10000 == 0) { + printf("bigfill %d\n", i); + } if (!keep_running) break; } From 02217ed299c6340a35696e0610047eb96826de2d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 Mar 2007 16:08:05 -0500 Subject: [PATCH 0027/2985] Btrfs: early reference counting Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 153 ++++++++++++++++++++++++----------------- fs/btrfs/ctree.h | 3 +- fs/btrfs/disk-io.c | 2 + fs/btrfs/extent-tree.c | 45 +++++++++++- fs/btrfs/quick-test.c | 10 +-- fs/btrfs/random-test.c | 12 ++-- 6 files changed, 145 insertions(+), 80 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3c5f4c2dd525..1118986d7569 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -34,6 +34,37 @@ void release_path(struct ctree_root *root, struct ctree_path *p) memset(p, 0, sizeof(*p)); } +int btrfs_cow_block(struct ctree_root *root, + struct tree_buffer *buf, + struct tree_buffer *parent, + int parent_slot, + struct tree_buffer **cow_ret) +{ + struct tree_buffer *cow; + + if (!list_empty(&buf->dirty)) { + *cow_ret = buf; + return 0; + } + cow = alloc_free_block(root); + memcpy(&cow->node, &buf->node, sizeof(buf->node)); + cow->node.header.blocknr = cow->blocknr; + *cow_ret = cow; + if (buf == root->node) { + root->node = cow; + cow->count++; + tree_block_release(root, buf); + } else { + parent->node.blockptrs[parent_slot] = cow->blocknr; + BUG_ON(list_empty(&parent->dirty)); + } + if (0 && root != root->extent_root && !is_leaf(cow->node.header.flags)) { + btrfs_inc_ref(root, cow); + } + tree_block_release(root, buf); + return 0; +} + /* * The leaf data grows from end-to-front in the node. * this returns the address of the start of the last item, @@ -263,6 +294,8 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, /* first, try to make some room in the middle buffer */ if (left_buf) { + btrfs_cow_block(root, left_buf, parent_buf, + pslot - 1, &left_buf); left = &left_buf->node; orig_slot += left->header.nritems; wret = push_node_left(root, left_buf, mid_buf); @@ -274,6 +307,8 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, * then try to empty the right most buffer into the middle */ if (right_buf) { + btrfs_cow_block(root, right_buf, parent_buf, + pslot + 1, &right_buf); right = &right_buf->node; wret = push_node_left(root, mid_buf, right_buf); if (wret < 0) @@ -293,9 +328,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, } else { memcpy(parent->keys + pslot + 1, right->keys, sizeof(struct key)); - wret = dirty_tree_block(root, parent_buf); - if (wret) - ret = wret; + BUG_ON(list_empty(&parent_buf->dirty)); } } if (mid->header.nritems == 1) { @@ -330,9 +363,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, } else { /* update the parent key to reflect our changes */ memcpy(parent->keys + pslot, mid->keys, sizeof(struct key)); - wret = dirty_tree_block(root, parent_buf); - if (wret) - ret = wret; + BUG_ON(list_empty(&parent_buf->dirty)); } /* update the path */ @@ -375,9 +406,10 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, * possible) */ int search_slot(struct ctree_root *root, struct key *key, - struct ctree_path *p, int ins_len) + struct ctree_path *p, int ins_len, int cow) { struct tree_buffer *b; + struct tree_buffer *cow_buf; struct node *c; int slot; int ret; @@ -387,8 +419,15 @@ again: b = root->node; b->count++; while (b) { + level = node_level(b->node.header.flags); + if (cow) { + int wret; + wret = btrfs_cow_block(root, b, p->nodes[level + 1], + p->slots[level + 1], &cow_buf); + b = cow_buf; + } + BUG_ON(!cow && ins_len); c = &b->node; - level = node_level(c->header.flags); p->nodes[level] = b; ret = check_block(p, level); if (ret) @@ -453,7 +492,6 @@ static int fixup_low_keys(struct ctree_root *root, { int i; int ret = 0; - int wret; for (i = level; i < MAX_LEVEL; i++) { struct node *t; int tslot = path->slots[i]; @@ -461,9 +499,7 @@ static int fixup_low_keys(struct ctree_root *root, break; t = &path->nodes[i]->node; memcpy(t->keys + tslot, key, sizeof(*key)); - wret = dirty_tree_block(root, path->nodes[i]); - if (wret) - ret = wret; + BUG_ON(list_empty(&path->nodes[i]->dirty)); if (tslot != 0) break; } @@ -486,7 +522,6 @@ static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, int src_nritems; int dst_nritems; int ret = 0; - int wret; src_nritems = src->header.nritems; dst_nritems = dst->header.nritems; @@ -511,13 +546,8 @@ static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, src->header.nritems -= push_items; dst->header.nritems += push_items; - wret = dirty_tree_block(root, src_buf); - if (wret < 0) - ret = wret; - - wret = dirty_tree_block(root, dst_buf); - if (wret < 0) - ret = wret; + BUG_ON(list_empty(&src_buf->dirty)); + BUG_ON(list_empty(&dst_buf->dirty)); return ret; } @@ -541,7 +571,6 @@ static int balance_node_right(struct ctree_root *root, int src_nritems; int dst_nritems; int ret = 0; - int wret; src_nritems = src->header.nritems; dst_nritems = dst->header.nritems; @@ -569,13 +598,8 @@ static int balance_node_right(struct ctree_root *root, src->header.nritems -= push_items; dst->header.nritems += push_items; - wret = dirty_tree_block(root, src_buf); - if (wret < 0) - ret = wret; - - wret = dirty_tree_block(root, dst_buf); - if (wret < 0) - ret = wret; + BUG_ON(list_empty(&src_buf->dirty)); + BUG_ON(list_empty(&dst_buf->dirty)); return ret; } @@ -615,7 +639,6 @@ static int insert_new_root(struct ctree_root *root, tree_block_release(root, root->node); root->node = t; t->count++; - dirty_tree_block(root, t); path->nodes[level] = t; path->slots[level] = 0; return 0; @@ -655,7 +678,7 @@ static int insert_ptr(struct ctree_root *root, lower->header.nritems++; if (lower->keys[1].objectid == 0) BUG(); - dirty_tree_block(root, path->nodes[level]); + BUG_ON(list_empty(&path->nodes[level]->dirty)); return 0; } @@ -701,12 +724,7 @@ static int split_node(struct ctree_root *root, struct ctree_path *path, c->header.nritems = mid; ret = 0; - wret = dirty_tree_block(root, t); - if (wret) - ret = wret; - wret = dirty_tree_block(root, split_buffer); - if (wret) - ret = wret; + BUG_ON(list_empty(&t->dirty)); wret = insert_ptr(root, path, split->keys, split_buffer->blocknr, path->slots[level + 1] + 1, level + 1); if (wret) @@ -778,6 +796,15 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, tree_block_release(root, right_buf); return 1; } + /* cow and double check */ + btrfs_cow_block(root, right_buf, upper, slot + 1, &right_buf); + right = &right_buf->leaf; + free_space = leaf_free_space(right); + if (free_space < data_size + sizeof(struct item)) { + tree_block_release(root, right_buf); + return 1; + } + for (i = left->header.nritems - 1; i >= 0; i--) { item = left->items + i; if (path->slots[0] == i) @@ -818,11 +845,12 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, } left->header.nritems -= push_items; - dirty_tree_block(root, left_buf); - dirty_tree_block(root, right_buf); + BUG_ON(list_empty(&left_buf->dirty)); + BUG_ON(list_empty(&right_buf->dirty)); memcpy(upper->node.keys + slot + 1, &right->items[0].key, sizeof(struct key)); - dirty_tree_block(root, upper); + BUG_ON(list_empty(&upper->dirty)); + /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left->header.nritems) { path->slots[0] -= left->header.nritems; @@ -869,6 +897,16 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, tree_block_release(root, t); return 1; } + + /* cow and double check */ + btrfs_cow_block(root, t, path->nodes[1], slot - 1, &t); + left = &t->leaf; + free_space = leaf_free_space(left); + if (free_space < data_size + sizeof(struct item)) { + tree_block_release(root, t); + return 1; + } + for (i = 0; i < right->header.nritems; i++) { item = right->items + i; if (path->slots[0] == i) @@ -912,12 +950,8 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, push_space = right->items[i].offset; } - wret = dirty_tree_block(root, t); - if (wret) - ret = wret; - wret = dirty_tree_block(root, right_buf); - if (wret) - ret = wret; + BUG_ON(list_empty(&t->dirty)); + BUG_ON(list_empty(&right_buf->dirty)); wret = fixup_low_keys(root, path, &right->items[0].key, 1); if (wret) @@ -968,6 +1002,7 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, if (wret < 0) return wret; } + l_buf = path->nodes[0]; l = &l_buf->leaf; @@ -1022,13 +1057,8 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, right_buffer->blocknr, path->slots[1] + 1, 1); if (wret) ret = wret; - wret = dirty_tree_block(root, right_buffer); - if (wret) - ret = wret; - wret = dirty_tree_block(root, l_buf); - if (wret) - ret = wret; - + BUG_ON(list_empty(&right_buffer->dirty)); + BUG_ON(list_empty(&l_buf->dirty)); BUG_ON(path->slots[0] != slot); if (mid <= slot) { tree_block_release(root, path->nodes[0]); @@ -1049,7 +1079,6 @@ int insert_item(struct ctree_root *root, struct key *key, void *data, int data_size) { int ret = 0; - int wret; int slot; int slot_orig; struct leaf *leaf; @@ -1062,7 +1091,7 @@ int insert_item(struct ctree_root *root, struct key *key, if (!root->node) BUG(); init_path(&path); - ret = search_slot(root, key, &path, data_size); + ret = search_slot(root, key, &path, data_size, 1); if (ret == 0) { release_path(root, &path); return -EEXIST; @@ -1114,10 +1143,7 @@ int insert_item(struct ctree_root *root, struct key *key, if (slot == 0) ret = fixup_low_keys(root, &path, key, 1); - wret = dirty_tree_block(root, leaf_buf); - if (wret) - ret = wret; - + BUG_ON(list_empty(&leaf_buf->dirty)); if (leaf_free_space(leaf) < 0) BUG(); check_leaf(&path, 0); @@ -1162,9 +1188,7 @@ static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, if (wret) ret = wret; } - wret = dirty_tree_block(root, parent); - if (wret) - ret = wret; + BUG_ON(list_empty(&parent->dirty)); return ret; } @@ -1205,7 +1229,7 @@ int del_item(struct ctree_root *root, struct ctree_path *path) if (leaf->header.nritems == 0) { if (leaf_buf == root->node) { leaf->header.flags = node_level(0); - dirty_tree_block(root, leaf_buf); + BUG_ON(list_empty(&leaf_buf->dirty)); } else { clean_tree_block(root, leaf_buf); wret = del_ptr(root, path, 1, path->slots[1]); @@ -1223,9 +1247,7 @@ int del_item(struct ctree_root *root, struct ctree_path *path) if (wret) ret = wret; } - wret = dirty_tree_block(root, leaf_buf); - if (wret) - ret = wret; + BUG_ON(list_empty(&leaf_buf->dirty)); /* delete the leaf if it is mostly empty */ if (used < LEAF_DATA_SIZE / 3) { @@ -1304,3 +1326,4 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) return 0; } + diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6b4dabd47698..9fe8ba6e25c1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -142,8 +142,9 @@ struct ctree_path { }; struct tree_buffer *alloc_free_block(struct ctree_root *root); +int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf); int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks); -int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, int ins_len); +int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, int ins_len, int cow); void release_path(struct ctree_root *root, struct ctree_path *p); void init_path(struct ctree_path *p); int del_item(struct ctree_root *root, struct ctree_path *path); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b1a8149bbc84..0e1c31e682fb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -260,6 +260,8 @@ void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) if (buf->count < 0) BUG(); if (buf->count == 0) { + BUG_ON(!list_empty(&buf->cache)); + BUG_ON(!list_empty(&buf->dirty)); if (!radix_tree_lookup(&root->cache_radix, buf->blocknr)) BUG(); radix_tree_delete(&root->cache_radix, buf->blocknr); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fb2144496215..25d9cd169209 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -15,6 +15,39 @@ */ #define CTREE_EXTENT_PENDING 0 +static int inc_block_ref(struct ctree_root *root, u64 blocknr) +{ + struct ctree_path path; + int ret; + struct key key; + struct leaf *l; + struct extent_item *item; + init_path(&path); + key.objectid = blocknr; + key.flags = 0; + key.offset = 1; + ret = search_slot(root->extent_root, &key, &path, 0, 1); + BUG_ON(ret != 0); + l = &path.nodes[0]->leaf; + item = (struct extent_item *)(l->data + + l->items[path.slots[0]].offset); + item->refs++; + BUG_ON(list_empty(&path.nodes[0]->dirty)); + release_path(root->extent_root, &path); + return 0; +} + +int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf) +{ + u64 blocknr; + int i; + for (i = 0; i < buf->node.header.nritems; i++) { + blocknr = buf->node.blockptrs[i]; + inc_block_ref(root, blocknr); + } + return 0; +} + /* * find all the blocks marked as pending in the radix tree and remove * them from the extent map @@ -39,7 +72,7 @@ static int del_pending_extents(struct ctree_root *extent_root) key.flags = 0; key.offset = 1; init_path(&path); - ret = search_slot(extent_root, &key, &path, -1); + ret = search_slot(extent_root, &key, &path, -1, 1); if (ret) { print_tree(extent_root, extent_root->node); printf("unable to find %Lu\n", key.objectid); @@ -83,7 +116,7 @@ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) return 0; } init_path(&path); - ret = search_slot(extent_root, &key, &path, -1); + ret = search_slot(extent_root, &key, &path, -1, 1); if (ret) { print_tree(extent_root, extent_root->node); printf("failed to find %Lu\n", key.objectid); @@ -124,7 +157,7 @@ check_failed: ins->offset = 0; ins->flags = 0; start_found = 0; - ret = search_slot(root, ins, &path, 0); + ret = search_slot(root, ins, &path, 0, 0); if (ret < 0) goto error; @@ -221,6 +254,8 @@ static int insert_pending_extents(struct ctree_root *extent_root) ret = insert_item(extent_root, &key, &item, sizeof(item)); if (ret) { + printf("%Lu already in tree\n", key.objectid); + print_tree(extent_root, extent_root->node); BUG(); // FIXME undo it and return sane return ret; @@ -228,6 +263,7 @@ static int insert_pending_extents(struct ctree_root *extent_root) radix_tree_tag_clear(&extent_root->cache_radix, gang[i]->blocknr, CTREE_EXTENT_PENDING); + printf("%Lu is not pending\n", gang[i]->blocknr); tree_block_release(extent_root, gang[i]); } } @@ -266,15 +302,18 @@ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, if (pending_ret) return pending_ret; *buf = find_tree_block(root, ins->objectid); + dirty_tree_block(root, *buf); return 0; } /* we're allocating an extent for the extent tree, don't recurse */ BUG_ON(ins->offset != 1); *buf = find_tree_block(root, ins->objectid); BUG_ON(!*buf); + printf("%Lu is pending\n", ins->objectid); radix_tree_tag_set(&root->cache_radix, ins->objectid, CTREE_EXTENT_PENDING); (*buf)->count++; + dirty_tree_block(root, *buf); return 0; } diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index dbd00c3b7ab4..8255f79ceca5 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -19,7 +19,7 @@ int main(int ac, char **av) { int i; int num; int ret; - int run_size = 100000; + int run_size = 1024; int max_key = 100000000; int tree_size = 0; struct ctree_path path; @@ -57,7 +57,7 @@ int main(int ac, char **av) { init_path(&path); if (i % 10000 == 0) fprintf(stderr, "search %d:%d\n", num, i); - ret = search_slot(root, &ins, &path, 0); + ret = search_slot(root, &ins, &path, 0, 0); if (ret) { print_tree(root, root->node); printf("unable to find %d\n", num); @@ -79,7 +79,7 @@ int main(int ac, char **av) { num = next_key(i, max_key); ins.objectid = num; init_path(&path); - ret = search_slot(root, &ins, &path, -1); + ret = search_slot(root, &ins, &path, -1, 1); if (!ret) { if (i % 10000 == 0) fprintf(stderr, "del %d:%d\n", num, i); @@ -117,7 +117,7 @@ int main(int ac, char **av) { init_path(&path); if (i % 10000 == 0) fprintf(stderr, "search %d:%d\n", num, i); - ret = search_slot(root, &ins, &path, 0); + ret = search_slot(root, &ins, &path, 0, 0); if (ret) { print_tree(root, root->node); printf("unable to find %d\n", num); @@ -131,7 +131,7 @@ int main(int ac, char **av) { int slot; ins.objectid = (u64)-1; init_path(&path); - ret = search_slot(root, &ins, &path, -1); + ret = search_slot(root, &ins, &path, -1, 1); if (ret == 0) BUG(); diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index 53245c5039dc..dcc852ad6737 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -93,7 +93,7 @@ static int del_one(struct ctree_root *root, struct radix_tree_root *radix) ret = setup_key(radix, &key, 1); if (ret < 0) return 0; - ret = search_slot(root, &key, &path, -1); + ret = search_slot(root, &key, &path, -1, 1); if (ret) goto error; ret = del_item(root, &path); @@ -118,7 +118,7 @@ static int lookup_item(struct ctree_root *root, struct radix_tree_root *radix) ret = setup_key(radix, &key, 1); if (ret < 0) return 0; - ret = search_slot(root, &key, &path, 0); + ret = search_slot(root, &key, &path, 0, 1); release_path(root, &path); if (ret) goto error; @@ -137,7 +137,7 @@ static int lookup_enoent(struct ctree_root *root, struct radix_tree_root *radix) ret = setup_key(radix, &key, 0); if (ret < 0) return ret; - ret = search_slot(root, &key, &path, 0); + ret = search_slot(root, &key, &path, 0, 0); release_path(root, &path); if (ret <= 0) goto error; @@ -163,7 +163,7 @@ static int empty_tree(struct ctree_root *root, struct radix_tree_root *radix, key.objectid = (unsigned long)-1; while(nr-- >= 0) { init_path(&path); - ret = search_slot(root, &key, &path, -1); + ret = search_slot(root, &key, &path, -1, 1); if (ret < 0) { release_path(root, &path); return ret; @@ -216,7 +216,7 @@ static int fill_tree(struct ctree_root *root, struct radix_tree_root *radix, return ret; } } - if (i % 10000 == 0) { + if (i && i % 10000 == 0) { printf("bigfill %d\n", i); } if (!keep_running) @@ -263,7 +263,7 @@ static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) key.objectid = (unsigned long)-1; while(1) { init_path(&path); - ret = search_slot(root, &key, &path, 0); + ret = search_slot(root, &key, &path, 0, 0); if (ret < 0) { release_path(root, &path); return ret; From a28ec19775d62d673b034082128aca95780d3737 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 6 Mar 2007 20:08:01 -0500 Subject: [PATCH 0028/2985] Btrfs: Fixup reference counting on cows Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.c | 9 +- fs/btrfs/ctree.h | 4 + fs/btrfs/disk-io.c | 31 ++++- fs/btrfs/disk-io.h | 4 +- fs/btrfs/extent-tree.c | 305 ++++++++++++++++++++++++++++------------- fs/btrfs/print-tree.c | 6 +- fs/btrfs/quick-test.c | 26 ++-- fs/btrfs/random-test.c | 21 +-- 9 files changed, 274 insertions(+), 134 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 756b1cd4cf4d..ae7f4c00c39c 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,7 +1,7 @@ CC=gcc CFLAGS = -g -Wall -headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h +headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o # if you don't have sparse installed, use ls instead diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1118986d7569..72816381d203 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -50,16 +50,17 @@ int btrfs_cow_block(struct ctree_root *root, memcpy(&cow->node, &buf->node, sizeof(buf->node)); cow->node.header.blocknr = cow->blocknr; *cow_ret = cow; + btrfs_inc_ref(root, buf); if (buf == root->node) { root->node = cow; cow->count++; + if (buf != root->commit_root) + free_extent(root, buf->blocknr, 1); tree_block_release(root, buf); } else { parent->node.blockptrs[parent_slot] = cow->blocknr; BUG_ON(list_empty(&parent->dirty)); - } - if (0 && root != root->extent_root && !is_leaf(cow->node.header.flags)) { - btrfs_inc_ref(root, cow); + free_extent(root, buf->blocknr, 1); } tree_block_release(root, buf); return 0; @@ -1018,7 +1019,6 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, slot = path->slots[0]; nritems = l->header.nritems; mid = (nritems + 1)/ 2; - right_buffer = alloc_free_block(root); BUG_ON(!right_buffer); BUG_ON(mid == nritems); @@ -1170,7 +1170,6 @@ static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, node = &parent->node; nritems = node->header.nritems; - if (slot != nritems -1) { memmove(node->keys + slot, node->keys + slot + 1, sizeof(struct key) * (nritems - slot - 1)); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9fe8ba6e25c1..4a7bc4e6e747 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -51,10 +51,12 @@ struct tree_buffer; */ struct ctree_root { struct tree_buffer *node; + struct tree_buffer *commit_root; struct ctree_root *extent_root; struct key current_insert; int fp; struct radix_tree_root cache_radix; + struct radix_tree_root pinned_radix; struct list_head trans; struct list_head cache; int cache_size; @@ -151,4 +153,6 @@ int del_item(struct ctree_root *root, struct ctree_path *path); int insert_item(struct ctree_root *root, struct key *key, void *data, int data_size); int next_leaf(struct ctree_root *root, struct ctree_path *path); int leaf_free_space(struct leaf *leaf); +int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap); +int btrfs_finish_extent_commit(struct ctree_root *root); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0e1c31e682fb..2fe31c3508c1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -153,13 +153,24 @@ static int __commit_transaction(struct ctree_root *root) return ret; } -int commit_transaction(struct ctree_root *root) +int commit_transaction(struct ctree_root *root, struct ctree_super_block *s) { - int ret; + int ret = 0; + ret = __commit_transaction(root); if (!ret && root != root->extent_root) ret = __commit_transaction(root->extent_root); BUG_ON(ret); + if (root->commit_root != root->node) { + struct tree_buffer *snap = root->commit_root; + root->commit_root = root->node; + root->node->count++; + ret = btrfs_drop_snapshot(root, snap); + BUG_ON(ret); + tree_block_release(root, snap); + } + write_ctree_super(root, s); + btrfs_finish_extent_commit(root); return ret; } @@ -168,10 +179,13 @@ static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, { INIT_LIST_HEAD(&root->trans); INIT_LIST_HEAD(&root->cache); + root->cache_size = 0; root->fp = fp; root->node = NULL; - root->node = read_tree_block(root, info->tree_root); root->extent_root = extent_root; + root->commit_root = NULL; + root->node = read_tree_block(root, info->tree_root); + memset(&root->current_insert, 0, sizeof(root->current_insert)); return 0; } @@ -188,6 +202,8 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) return NULL; } INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); + INIT_RADIX_TREE(&root->pinned_radix, GFP_KERNEL); + INIT_RADIX_TREE(&extent_root->pinned_radix, GFP_KERNEL); INIT_RADIX_TREE(&extent_root->cache_radix, GFP_KERNEL); ret = pread(fp, super, sizeof(struct ctree_super_block), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); @@ -204,6 +220,8 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) BUG_ON(ret < 0); __setup_root(root, extent_root, &super->root_info, fp); __setup_root(extent_root, extent_root, &super->extent_info, fp); + root->commit_root = root->node; + root->node->count++; return root; } @@ -236,9 +254,11 @@ static int drop_cache(struct ctree_root *root) } return 0; } -int close_ctree(struct ctree_root *root) +int close_ctree(struct ctree_root *root, struct ctree_super_block *s) { - commit_transaction(root); + commit_transaction(root, s); + __commit_transaction(root->extent_root); + write_ctree_super(root, s); drop_cache(root->extent_root); drop_cache(root); BUG_ON(!list_empty(&root->trans)); @@ -249,6 +269,7 @@ int close_ctree(struct ctree_root *root) tree_block_release(root, root->node); if (root->extent_root->node) tree_block_release(root->extent_root, root->extent_root->node); + tree_block_release(root, root->commit_root); free(root); printf("on close %d blocks are allocated\n", allocated_blocks); return 0; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index b5dee2fae4da..1c0af7c56c21 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -18,9 +18,9 @@ struct tree_buffer *find_tree_block(struct ctree_root *root, u64 blocknr); int write_tree_block(struct ctree_root *root, struct tree_buffer *buf); int dirty_tree_block(struct ctree_root *root, struct tree_buffer *buf); int clean_tree_block(struct ctree_root *root, struct tree_buffer *buf); -int commit_transaction(struct ctree_root *root); +int commit_transaction(struct ctree_root *root, struct ctree_super_block *s); struct ctree_root *open_ctree(char *filename, struct ctree_super_block *s); -int close_ctree(struct ctree_root *root); +int close_ctree(struct ctree_root *root, struct ctree_super_block *s); void tree_block_release(struct ctree_root *root, struct tree_buffer *buf); int write_ctree_super(struct ctree_root *root, struct ctree_super_block *s); int mkfs(int fd); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 25d9cd169209..0723b7f3f0c3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -13,7 +13,8 @@ * other allocations are done. The pending tag is also used in the same * manner for deletes. */ -#define CTREE_EXTENT_PENDING 0 +#define CTREE_EXTENT_PENDING_ADD 0 +#define CTREE_EXTENT_PENDING_DEL 1 static int inc_block_ref(struct ctree_root *root, u64 blocknr) { @@ -27,20 +28,51 @@ static int inc_block_ref(struct ctree_root *root, u64 blocknr) key.flags = 0; key.offset = 1; ret = search_slot(root->extent_root, &key, &path, 0, 1); + if (ret != 0) + BUG(); BUG_ON(ret != 0); l = &path.nodes[0]->leaf; item = (struct extent_item *)(l->data + l->items[path.slots[0]].offset); item->refs++; + BUG_ON(list_empty(&path.nodes[0]->dirty)); release_path(root->extent_root, &path); return 0; } +static int lookup_block_ref(struct ctree_root *root, u64 blocknr, int *refs) +{ + struct ctree_path path; + int ret; + struct key key; + struct leaf *l; + struct extent_item *item; + init_path(&path); + key.objectid = blocknr; + key.flags = 0; + key.offset = 1; + ret = search_slot(root->extent_root, &key, &path, 0, 0); + if (ret != 0) + BUG(); + l = &path.nodes[0]->leaf; + item = (struct extent_item *)(l->data + + l->items[path.slots[0]].offset); + *refs = item->refs; + release_path(root->extent_root, &path); + return 0; +} + int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf) { u64 blocknr; int i; + + if (root == root->extent_root) + return 0; + if (is_leaf(buf->node.header.flags)) + return 0; + for (i = 0; i < buf->node.header.nritems; i++) { blocknr = buf->node.blockptrs[i]; inc_block_ref(root, blocknr); @@ -48,6 +80,113 @@ int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf) return 0; } +int btrfs_finish_extent_commit(struct ctree_root *root) +{ + struct ctree_root *extent_root = root->extent_root; + unsigned long gang[8]; + int ret; + int i; + + while(1) { + ret = radix_tree_gang_lookup(&extent_root->pinned_radix, + (void **)gang, 0, + ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) + radix_tree_delete(&extent_root->pinned_radix, gang[i]); + } + return 0; +} + +/* + * remove an extent from the root, returns 0 on success + */ +int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) +{ + struct ctree_path path; + struct key key; + struct ctree_root *extent_root = root->extent_root; + int ret; + struct item *item; + struct extent_item *ei; + key.objectid = blocknr; + key.flags = 0; + key.offset = num_blocks; + + init_path(&path); + ret = search_slot(extent_root, &key, &path, -1, 1); + if (ret) { + printf("failed to find %Lu\n", key.objectid); + print_tree(extent_root, extent_root->node); + printf("failed to find %Lu\n", key.objectid); + BUG(); + } + item = path.nodes[0]->leaf.items + path.slots[0]; + ei = (struct extent_item *)(path.nodes[0]->leaf.data + item->offset); + BUG_ON(ei->refs == 0); + ei->refs--; + if (ei->refs == 0) { + if (root == extent_root) { + int err; + radix_tree_preload(GFP_KERNEL); + err = radix_tree_insert(&extent_root->pinned_radix, + blocknr, (void *)blocknr); + BUG_ON(err); + radix_tree_preload_end(); + } + ret = del_item(extent_root, &path); + if (ret) + BUG(); + } + release_path(extent_root, &path); + return ret; +} + +/* + * insert all of the pending extents reserved during the original + * allocation. (CTREE_EXTENT_PENDING). Returns zero if it all worked out + */ +static int insert_pending_extents(struct ctree_root *extent_root) +{ + int ret; + struct key key; + struct extent_item item; + struct tree_buffer *gang[4]; + int i; + + // FIXME -ENOSPC + item.owner = extent_root->node->node.header.parentid; + item.refs = 1; + while(1) { + ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, + (void **)gang, 0, + ARRAY_SIZE(gang), + CTREE_EXTENT_PENDING_ADD); + if (!ret) + break; + for (i = 0; i < ret; i++) { + key.objectid = gang[i]->blocknr; + key.flags = 0; + key.offset = 1; + ret = insert_item(extent_root, &key, &item, + sizeof(item)); + if (ret) { + printf("%Lu already in tree\n", key.objectid); + print_tree(extent_root, extent_root->node); + BUG(); + // FIXME undo it and return sane + return ret; + } + radix_tree_tag_clear(&extent_root->cache_radix, + gang[i]->blocknr, + CTREE_EXTENT_PENDING_ADD); + tree_block_release(extent_root, gang[i]); + } + } + return 0; +} + /* * find all the blocks marked as pending in the radix tree and remove * them from the extent map @@ -55,78 +194,73 @@ int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf) static int del_pending_extents(struct ctree_root *extent_root) { int ret; - struct key key; struct tree_buffer *gang[4]; int i; - struct ctree_path path; while(1) { ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, (void **)gang, 0, ARRAY_SIZE(gang), - CTREE_EXTENT_PENDING); + CTREE_EXTENT_PENDING_DEL); if (!ret) break; for (i = 0; i < ret; i++) { - key.objectid = gang[i]->blocknr; - key.flags = 0; - key.offset = 1; - init_path(&path); - ret = search_slot(extent_root, &key, &path, -1, 1); - if (ret) { - print_tree(extent_root, extent_root->node); - printf("unable to find %Lu\n", key.objectid); - BUG(); - // FIXME undo it and return sane - return ret; - } - ret = del_item(extent_root, &path); - if (ret) { - BUG(); - return ret; - } - release_path(extent_root, &path); + ret = __free_extent(extent_root, gang[i]->blocknr, 1); radix_tree_tag_clear(&extent_root->cache_radix, gang[i]->blocknr, - CTREE_EXTENT_PENDING); + CTREE_EXTENT_PENDING_DEL); tree_block_release(extent_root, gang[i]); } } return 0; } +static int run_pending(struct ctree_root *extent_root) +{ + while(radix_tree_tagged(&extent_root->cache_radix, + CTREE_EXTENT_PENDING_DEL) || + radix_tree_tagged(&extent_root->cache_radix, + CTREE_EXTENT_PENDING_ADD)) { + insert_pending_extents(extent_root); + del_pending_extents(extent_root); + } + return 0; +} + + /* * remove an extent from the root, returns 0 on success */ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) { - struct ctree_path path; struct key key; struct ctree_root *extent_root = root->extent_root; struct tree_buffer *t; int pending_ret; int ret; + + if (root == extent_root) { + t = find_tree_block(root, blocknr); + if (radix_tree_tag_get(&root->cache_radix, blocknr, + CTREE_EXTENT_PENDING_ADD)) { + radix_tree_tag_clear(&root->cache_radix, + blocknr, + CTREE_EXTENT_PENDING_ADD); + /* once for us */ + tree_block_release(root, t); + /* once for the pending add */ + tree_block_release(root, t); + } else { + radix_tree_tag_set(&root->cache_radix, blocknr, + CTREE_EXTENT_PENDING_DEL); + } + return 0; + } key.objectid = blocknr; key.flags = 0; key.offset = num_blocks; - if (root == extent_root) { - t = read_tree_block(root, key.objectid); - radix_tree_tag_set(&root->cache_radix, key.objectid, - CTREE_EXTENT_PENDING); - return 0; - } - init_path(&path); - ret = search_slot(extent_root, &key, &path, -1, 1); - if (ret) { - print_tree(extent_root, extent_root->node); - printf("failed to find %Lu\n", key.objectid); - BUG(); - } - ret = del_item(extent_root, &path); - if (ret) - BUG(); - release_path(extent_root, &path); - pending_ret = del_pending_extents(root->extent_root); + ret = __free_extent(root, blocknr, num_blocks); + pending_ret = run_pending(root->extent_root); return ret ? ret : pending_ret; } @@ -203,7 +337,7 @@ check_pending: */ release_path(root, &path); BUG_ON(ins->objectid < search_start); - if (orig_root->extent_root == orig_root) { + if (1 || orig_root->extent_root == orig_root) { BUG_ON(num_blocks != 1); if ((root->current_insert.objectid <= ins->objectid && root->current_insert.objectid + @@ -211,8 +345,9 @@ check_pending: (root->current_insert.objectid > ins->objectid && root->current_insert.objectid <= ins->objectid + ins->offset) || + radix_tree_lookup(&root->pinned_radix, ins->objectid) || radix_tree_tag_get(&root->cache_radix, ins->objectid, - CTREE_EXTENT_PENDING)) { + CTREE_EXTENT_PENDING_ADD)) { search_start = ins->objectid + 1; goto check_failed; } @@ -225,51 +360,6 @@ error: return ret; } -/* - * insert all of the pending extents reserved during the original - * allocation. (CTREE_EXTENT_PENDING). Returns zero if it all worked out - */ -static int insert_pending_extents(struct ctree_root *extent_root) -{ - int ret; - struct key key; - struct extent_item item; - struct tree_buffer *gang[4]; - int i; - - // FIXME -ENOSPC - item.refs = 1; - item.owner = extent_root->node->node.header.parentid; - while(1) { - ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, - (void **)gang, 0, - ARRAY_SIZE(gang), - CTREE_EXTENT_PENDING); - if (!ret) - break; - for (i = 0; i < ret; i++) { - key.objectid = gang[i]->blocknr; - key.flags = 0; - key.offset = 1; - ret = insert_item(extent_root, &key, &item, - sizeof(item)); - if (ret) { - printf("%Lu already in tree\n", key.objectid); - print_tree(extent_root, extent_root->node); - BUG(); - // FIXME undo it and return sane - return ret; - } - radix_tree_tag_clear(&extent_root->cache_radix, - gang[i]->blocknr, - CTREE_EXTENT_PENDING); - printf("%Lu is not pending\n", gang[i]->blocknr); - tree_block_release(extent_root, gang[i]); - } - } - return 0; -} - /* * finds a free extent and does all the dirty work required for allocation * returns the key for the extent through ins, and a tree buffer for @@ -296,7 +386,7 @@ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, sizeof(extent_item)); memset(&root->extent_root->current_insert, 0, sizeof(struct key)); - pending_ret = insert_pending_extents(root->extent_root); + pending_ret = run_pending(root->extent_root); if (ret) return ret; if (pending_ret) @@ -309,9 +399,8 @@ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, BUG_ON(ins->offset != 1); *buf = find_tree_block(root, ins->objectid); BUG_ON(!*buf); - printf("%Lu is pending\n", ins->objectid); radix_tree_tag_set(&root->cache_radix, ins->objectid, - CTREE_EXTENT_PENDING); + CTREE_EXTENT_PENDING_ADD); (*buf)->count++; dirty_tree_block(root, *buf); return 0; @@ -331,13 +420,41 @@ struct tree_buffer *alloc_free_block(struct ctree_root *root) ret = alloc_extent(root, 1, 0, (unsigned long)-1, root->node->node.header.parentid, &ins, &buf); - if (ret) { BUG(); return NULL; } if (root != root->extent_root) BUG_ON(radix_tree_tag_get(&root->extent_root->cache_radix, - buf->blocknr, CTREE_EXTENT_PENDING)); + buf->blocknr, + CTREE_EXTENT_PENDING_ADD)); return buf; } + +int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) +{ + int ret; + int level; + int refs; + u64 blocknr = snap->blocknr; + + level = node_level(snap->node.header.flags); + ret = lookup_block_ref(root, snap->blocknr, &refs); + BUG_ON(ret); + if (refs == 1 && level != 0) { + struct node *n = &snap->node; + struct tree_buffer *b; + int i; + for (i = 0; i < n->header.nritems; i++) { + b = read_tree_block(root, n->blockptrs[i]); + /* FIXME, don't recurse here */ + ret = btrfs_drop_snapshot(root, b); + BUG_ON(ret); + tree_block_release(root, b); + } + } + ret = free_extent(root, blocknr, 1); + BUG_ON(ret); + return 0; +} + diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index dda08f32c154..e32a959dd3e5 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -21,9 +21,11 @@ void print_leaf(struct leaf *l) item->key.objectid, item->key.flags, item->key.offset, item->offset, item->size); fflush(stdout); - printf("\t\titem data %.*s\n", item->size, l->data+item->offset); + printf("\t\titem data %.*s\n", item->size, + l->data+item->offset); ei = (struct extent_item *)(l->data + item->offset); - printf("\t\textent data %u %Lu\n", ei->refs, ei->owner); + printf("\t\textent data refs %u owner %Lu\n", ei->refs, + ei->owner); fflush(stdout); } } diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index 8255f79ceca5..6400c7100a6a 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -19,7 +19,7 @@ int main(int ac, char **av) { int i; int num; int ret; - int run_size = 1024; + int run_size = 100000; int max_key = 100000000; int tree_size = 0; struct ctree_path path; @@ -44,9 +44,9 @@ int main(int ac, char **av) { if (!ret) tree_size++; free(buf); + } - write_ctree_super(root, &super); - close_ctree(root); + close_ctree(root, &super); root = open_ctree("dbfile", &super); printf("starting search\n"); @@ -65,8 +65,7 @@ int main(int ac, char **av) { } release_path(root, &path); } - write_ctree_super(root, &super); - close_ctree(root); + close_ctree(root, &super); root = open_ctree("dbfile", &super); printf("node %p level %d total ptrs %d free spc %lu\n", root->node, node_level(root->node->node.header.flags), @@ -90,8 +89,7 @@ int main(int ac, char **av) { } release_path(root, &path); } - write_ctree_super(root, &super); - close_ctree(root); + close_ctree(root, &super); root = open_ctree("dbfile", &super); srand(128); for (i = 0; i < run_size; i++) { @@ -106,8 +104,7 @@ int main(int ac, char **av) { tree_size++; free(buf); } - write_ctree_super(root, &super); - close_ctree(root); + close_ctree(root, &super); root = open_ctree("dbfile", &super); srand(128); printf("starting search2\n"); @@ -156,10 +153,17 @@ int main(int ac, char **av) { } release_path(root, &path); } + /* + printf("previous tree:\n"); + print_tree(root, root->commit_root); + printf("map before commit\n"); + print_tree(root->extent_root, root->extent_root->node); + */ + commit_transaction(root, &super); printf("tree size is now %d\n", tree_size); + printf("root %p commit root %p\n", root->node, root->commit_root); printf("map tree\n"); print_tree(root->extent_root, root->extent_root->node); - write_ctree_super(root, &super); - close_ctree(root); + close_ctree(root, &super); return 0; } diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index dcc852ad6737..7b37b6bae105 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -8,6 +8,7 @@ #include "print-tree.h" int keep_running = 1; +struct ctree_super_block super; static int setup_key(struct radix_tree_root *root, struct key *key, int exists) { @@ -59,11 +60,6 @@ error: return -1; } -static int run_commit(struct ctree_root *root, struct radix_tree_root *radix) -{ - return commit_transaction(root); -} - static int insert_dup(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; @@ -210,7 +206,7 @@ static int fill_tree(struct ctree_root *root, struct radix_tree_root *radix, goto out; } if (i % 1000 == 0) { - ret = commit_transaction(root); + ret = commit_transaction(root, &super); if (ret) { fprintf(stderr, "fill commit failed\n"); return ret; @@ -229,7 +225,7 @@ out: static int bulk_op(struct ctree_root *root, struct radix_tree_root *radix) { int ret; - int nr = rand() % 20000; + int nr = rand() % 5000; static int run_nr = 0; /* do the bulk op much less frequently */ @@ -247,7 +243,7 @@ static int bulk_op(struct ctree_root *root, struct radix_tree_root *radix) int (*ops[])(struct ctree_root *root, struct radix_tree_root *radix) = { ins_one, insert_dup, del_one, lookup_item, - lookup_enoent, bulk_op, run_commit }; + lookup_enoent, bulk_op }; static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) { @@ -314,7 +310,6 @@ int print_usage(void) int main(int ac, char **av) { RADIX_TREE(radix, GFP_KERNEL); - struct ctree_super_block super; struct ctree_root *root; int i; int ret; @@ -365,8 +360,7 @@ int main(int ac, char **av) printf("open & close, root level %d nritems %d\n", node_level(root->node->node.header.flags), root->node->node.header.nritems); - write_ctree_super(root, &super); - close_ctree(root); + close_ctree(root, &super); root = open_ctree("dbfile", &super); } while(count--) { @@ -380,7 +374,7 @@ int main(int ac, char **av) err = ret; goto out; } - if (ops[op] == bulk_op || ops[op] == run_commit) + if (ops[op] == bulk_op) break; if (keep_running == 0) { err = 0; @@ -389,8 +383,7 @@ int main(int ac, char **av) } } out: - write_ctree_super(root, &super); - close_ctree(root); + close_ctree(root, &super); return err; } From 037e6390488af8ab96137e1e5cccc15ad14ef887 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 7 Mar 2007 11:50:24 -0500 Subject: [PATCH 0029/2985] Btrfs: get rid of add recursion Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.c | 9 -- fs/btrfs/extent-tree.c | 209 +++++++++++++++++++---------------------- 3 files changed, 96 insertions(+), 124 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index ae7f4c00c39c..d92d08dde0ff 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,6 +1,6 @@ CC=gcc -CFLAGS = -g -Wall +CFLAGS = -Wall headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 72816381d203..729d4ddb3746 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -995,15 +995,6 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, int ret; int wret; - wret = push_leaf_left(root, path, data_size); - if (wret < 0) - return wret; - if (wret) { - wret = push_leaf_right(root, path, data_size); - if (wret < 0) - return wret; - } - l_buf = path->nodes[0]; l = &l_buf->leaf; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0723b7f3f0c3..8a2b8aaf9b86 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6,6 +6,11 @@ #include "disk-io.h" #include "print-tree.h" +static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, + u64 search_start, u64 search_end, struct key *ins); +static int finish_current_insert(struct ctree_root *extent_root); +static int run_pending(struct ctree_root *extent_root); + /* * pending extents are blocks that we're trying to allocate in the extent * map while trying to grow the map because of other allocations. To avoid @@ -13,8 +18,7 @@ * other allocations are done. The pending tag is also used in the same * manner for deletes. */ -#define CTREE_EXTENT_PENDING_ADD 0 -#define CTREE_EXTENT_PENDING_DEL 1 +#define CTREE_EXTENT_PENDING_DEL 0 static int inc_block_ref(struct ctree_root *root, u64 blocknr) { @@ -23,6 +27,9 @@ static int inc_block_ref(struct ctree_root *root, u64 blocknr) struct key key; struct leaf *l; struct extent_item *item; + struct key ins; + + find_free_extent(root->extent_root, 0, 0, (u64)-1, &ins); init_path(&path); key.objectid = blocknr; key.flags = 0; @@ -38,6 +45,8 @@ static int inc_block_ref(struct ctree_root *root, u64 blocknr) BUG_ON(list_empty(&path.nodes[0]->dirty)); release_path(root->extent_root, &path); + finish_current_insert(root->extent_root); + run_pending(root->extent_root); return 0; } @@ -99,6 +108,28 @@ int btrfs_finish_extent_commit(struct ctree_root *root) return 0; } +static int finish_current_insert(struct ctree_root *extent_root) +{ + struct key ins; + struct extent_item extent_item; + int i; + int ret; + + extent_item.refs = 1; + extent_item.owner = extent_root->node->node.header.parentid; + ins.offset = 1; + ins.flags = 0; + + for (i = 0; i < extent_root->current_insert.flags; i++) { + ins.objectid = extent_root->current_insert.objectid + i; + ret = insert_item(extent_root, &ins, &extent_item, + sizeof(extent_item)); + BUG_ON(ret); + } + extent_root->current_insert.offset = 0; + return 0; +} + /* * remove an extent from the root, returns 0 on success */ @@ -110,10 +141,13 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) int ret; struct item *item; struct extent_item *ei; + struct key ins; + key.objectid = blocknr; key.flags = 0; key.offset = num_blocks; + find_free_extent(root, 0, 0, (u64)-1, &ins); init_path(&path); ret = search_slot(extent_root, &key, &path, -1, 1); if (ret) { @@ -140,53 +174,10 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) BUG(); } release_path(extent_root, &path); + finish_current_insert(extent_root); return ret; } -/* - * insert all of the pending extents reserved during the original - * allocation. (CTREE_EXTENT_PENDING). Returns zero if it all worked out - */ -static int insert_pending_extents(struct ctree_root *extent_root) -{ - int ret; - struct key key; - struct extent_item item; - struct tree_buffer *gang[4]; - int i; - - // FIXME -ENOSPC - item.owner = extent_root->node->node.header.parentid; - item.refs = 1; - while(1) { - ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, - (void **)gang, 0, - ARRAY_SIZE(gang), - CTREE_EXTENT_PENDING_ADD); - if (!ret) - break; - for (i = 0; i < ret; i++) { - key.objectid = gang[i]->blocknr; - key.flags = 0; - key.offset = 1; - ret = insert_item(extent_root, &key, &item, - sizeof(item)); - if (ret) { - printf("%Lu already in tree\n", key.objectid); - print_tree(extent_root, extent_root->node); - BUG(); - // FIXME undo it and return sane - return ret; - } - radix_tree_tag_clear(&extent_root->cache_radix, - gang[i]->blocknr, - CTREE_EXTENT_PENDING_ADD); - tree_block_release(extent_root, gang[i]); - } - } - return 0; -} - /* * find all the blocks marked as pending in the radix tree and remove * them from the extent map @@ -218,12 +209,8 @@ static int del_pending_extents(struct ctree_root *extent_root) static int run_pending(struct ctree_root *extent_root) { while(radix_tree_tagged(&extent_root->cache_radix, - CTREE_EXTENT_PENDING_DEL) || - radix_tree_tagged(&extent_root->cache_radix, - CTREE_EXTENT_PENDING_ADD)) { - insert_pending_extents(extent_root); + CTREE_EXTENT_PENDING_DEL)) del_pending_extents(extent_root); - } return 0; } @@ -241,19 +228,8 @@ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) if (root == extent_root) { t = find_tree_block(root, blocknr); - if (radix_tree_tag_get(&root->cache_radix, blocknr, - CTREE_EXTENT_PENDING_ADD)) { - radix_tree_tag_clear(&root->cache_radix, - blocknr, - CTREE_EXTENT_PENDING_ADD); - /* once for us */ - tree_block_release(root, t); - /* once for the pending add */ - tree_block_release(root, t); - } else { - radix_tree_tag_set(&root->cache_radix, blocknr, + radix_tree_tag_set(&root->cache_radix, blocknr, CTREE_EXTENT_PENDING_DEL); - } return 0; } key.objectid = blocknr; @@ -281,9 +257,11 @@ static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, u64 hole_size = 0; int slot = 0; u64 last_block; + u64 test_block; int start_found; struct leaf *l; struct ctree_root * root = orig_root->extent_root; + int total_needed = num_blocks + MAX_LEVEL * 3; check_failed: init_path(&path); @@ -306,22 +284,34 @@ check_failed: goto error; if (!start_found) { ins->objectid = search_start; - ins->offset = num_blocks; + ins->offset = (u64)-1; start_found = 1; goto check_pending; } ins->objectid = last_block > search_start ? last_block : search_start; - ins->offset = num_blocks; + ins->offset = (u64)-1; goto check_pending; } + if (slot == 0) { + int last_slot = l->header.nritems - 1; + u64 span = l->items[last_slot].key.objectid; + span -= l->items[slot].key.objectid; + if (span + total_needed > last_slot - slot) { + path.slots[0] = last_slot + 1; + key = &l->items[last_slot].key; + last_block = key->objectid + key->offset; + start_found = 1; + continue; + } + } key = &l->items[slot].key; if (key->objectid >= search_start) { if (start_found) { hole_size = key->objectid - last_block; - if (hole_size > num_blocks) { + if (hole_size > total_needed) { ins->objectid = last_block; - ins->offset = num_blocks; + ins->offset = hole_size; goto check_pending; } } else @@ -337,23 +327,18 @@ check_pending: */ release_path(root, &path); BUG_ON(ins->objectid < search_start); - if (1 || orig_root->extent_root == orig_root) { - BUG_ON(num_blocks != 1); - if ((root->current_insert.objectid <= ins->objectid && - root->current_insert.objectid + - root->current_insert.offset > ins->objectid) || - (root->current_insert.objectid > ins->objectid && - root->current_insert.objectid <= ins->objectid + - ins->offset) || - radix_tree_lookup(&root->pinned_radix, ins->objectid) || - radix_tree_tag_get(&root->cache_radix, ins->objectid, - CTREE_EXTENT_PENDING_ADD)) { - search_start = ins->objectid + 1; + for (test_block = ins->objectid; + test_block < ins->objectid + total_needed; test_block++) { + if (radix_tree_lookup(&root->pinned_radix, test_block)) { + search_start = test_block + 1; goto check_failed; } } - if (ins->offset != 1) - BUG(); + BUG_ON(root->current_insert.offset); + root->current_insert.offset = total_needed; + root->current_insert.objectid = ins->objectid + num_blocks; + root->current_insert.flags = 0; + ins->offset = num_blocks; return 0; error: release_path(root, &path); @@ -368,43 +353,41 @@ error: * returns 0 if everything worked, non-zero otherwise. */ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, - u64 search_end, u64 owner, struct key *ins, - struct tree_buffer **buf) + u64 search_end, u64 owner, struct key *ins) { int ret; int pending_ret; + struct ctree_root *extent_root = root->extent_root; struct extent_item extent_item; + extent_item.refs = 1; extent_item.owner = owner; - ret = find_free_extent(root, num_blocks, search_start, search_end, ins); - if (ret) - return ret; - if (root != root->extent_root) { - memcpy(&root->extent_root->current_insert, ins, sizeof(*ins)); - ret = insert_item(root->extent_root, ins, &extent_item, - sizeof(extent_item)); - memset(&root->extent_root->current_insert, 0, - sizeof(struct key)); - pending_ret = run_pending(root->extent_root); - if (ret) - return ret; - if (pending_ret) - return pending_ret; - *buf = find_tree_block(root, ins->objectid); - dirty_tree_block(root, *buf); + if (root == extent_root) { + BUG_ON(extent_root->current_insert.offset == 0); + BUG_ON(num_blocks != 1); + BUG_ON(extent_root->current_insert.flags == + extent_root->current_insert.offset); + ins->offset = 1; + ins->objectid = extent_root->current_insert.objectid + + extent_root->current_insert.flags++; return 0; } - /* we're allocating an extent for the extent tree, don't recurse */ - BUG_ON(ins->offset != 1); - *buf = find_tree_block(root, ins->objectid); - BUG_ON(!*buf); - radix_tree_tag_set(&root->cache_radix, ins->objectid, - CTREE_EXTENT_PENDING_ADD); - (*buf)->count++; - dirty_tree_block(root, *buf); - return 0; + ret = find_free_extent(root, num_blocks, search_start, + search_end, ins); + if (ret) + return ret; + ret = insert_item(extent_root, ins, &extent_item, + sizeof(extent_item)); + + finish_current_insert(extent_root); + pending_ret = run_pending(extent_root); + if (ret) + return ret; + if (pending_ret) + return pending_ret; + return 0; } /* @@ -415,19 +398,17 @@ struct tree_buffer *alloc_free_block(struct ctree_root *root) { struct key ins; int ret; - struct tree_buffer *buf = NULL; + struct tree_buffer *buf; ret = alloc_extent(root, 1, 0, (unsigned long)-1, root->node->node.header.parentid, - &ins, &buf); + &ins); if (ret) { BUG(); return NULL; } - if (root != root->extent_root) - BUG_ON(radix_tree_tag_get(&root->extent_root->cache_radix, - buf->blocknr, - CTREE_EXTENT_PENDING_ADD)); + buf = find_tree_block(root, ins.objectid); + dirty_tree_block(root, buf); return buf; } From 0579da4280812f34f382fb0f8004d7b0219e7a33 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 7 Mar 2007 16:15:30 -0500 Subject: [PATCH 0030/2985] Btrfs: Fixup last found extent caching Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 39 +++++++++++++++++++++------------------ 4 files changed, 24 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d92d08dde0ff..ae7f4c00c39c 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,6 +1,6 @@ CC=gcc -CFLAGS = -Wall +CFLAGS = -g -Wall headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4a7bc4e6e747..518326fa3694 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -54,6 +54,7 @@ struct ctree_root { struct tree_buffer *commit_root; struct ctree_root *extent_root; struct key current_insert; + struct key last_insert; int fp; struct radix_tree_root cache_radix; struct radix_tree_root pinned_radix; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2fe31c3508c1..997cc578a185 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -186,6 +186,7 @@ static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, root->commit_root = NULL; root->node = read_tree_block(root, info->tree_root); memset(&root->current_insert, 0, sizeof(root->current_insert)); + memset(&root->last_insert, 0, sizeof(root->last_insert)); return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8a2b8aaf9b86..dd11532cb2f6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -102,9 +102,12 @@ int btrfs_finish_extent_commit(struct ctree_root *root) ARRAY_SIZE(gang)); if (!ret) break; - for (i = 0; i < ret; i++) + for (i = 0; i < ret; i++) { radix_tree_delete(&extent_root->pinned_radix, gang[i]); + } } + extent_root->last_insert.objectid = 0; + extent_root->last_insert.offset = 0; return 0; } @@ -170,6 +173,9 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) radix_tree_preload_end(); } ret = del_item(extent_root, &path); + if (root != extent_root && + extent_root->last_insert.objectid < blocknr) + extent_root->last_insert.objectid = blocknr; if (ret) BUG(); } @@ -261,8 +267,11 @@ static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, int start_found; struct leaf *l; struct ctree_root * root = orig_root->extent_root; - int total_needed = num_blocks + MAX_LEVEL * 3; + int total_needed = num_blocks; + total_needed += (node_level(root->node->node.header.flags) + 1) * 3; + if (root->last_insert.objectid > search_start) + search_start = root->last_insert.objectid; check_failed: init_path(&path); ins->objectid = search_start; @@ -273,6 +282,9 @@ check_failed: if (ret < 0) goto error; + if (path.slots[0] > 0) + path.slots[0]--; + while (1) { l = &path.nodes[0]->leaf; slot = path.slots[0]; @@ -293,31 +305,21 @@ check_failed: ins->offset = (u64)-1; goto check_pending; } - if (slot == 0) { - int last_slot = l->header.nritems - 1; - u64 span = l->items[last_slot].key.objectid; - span -= l->items[slot].key.objectid; - if (span + total_needed > last_slot - slot) { - path.slots[0] = last_slot + 1; - key = &l->items[last_slot].key; - last_block = key->objectid + key->offset; - start_found = 1; - continue; - } - } key = &l->items[slot].key; if (key->objectid >= search_start) { if (start_found) { + if (last_block < search_start) + last_block = search_start; hole_size = key->objectid - last_block; if (hole_size > total_needed) { ins->objectid = last_block; ins->offset = hole_size; goto check_pending; } - } else - start_found = 1; - last_block = key->objectid + key->offset; + } } + start_found = 1; + last_block = key->objectid + key->offset; path.slots[0]++; } // FIXME -ENOSPC @@ -335,9 +337,10 @@ check_pending: } } BUG_ON(root->current_insert.offset); - root->current_insert.offset = total_needed; + root->current_insert.offset = total_needed - num_blocks; root->current_insert.objectid = ins->objectid + num_blocks; root->current_insert.flags = 0; + root->last_insert.objectid = ins->objectid; ins->offset = num_blocks; return 0; error: From 20524f02260910db1e67bd5335d3854e5e555efc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 10 Mar 2007 06:35:47 -0500 Subject: [PATCH 0031/2985] Btrfs: recursion free-first pass Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 98 +++++++++++++++++++++++++++++++++++++++++- fs/btrfs/quick-test.c | 5 ++- 2 files changed, 100 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index dd11532cb2f6..6fbaece43ff6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -50,7 +50,7 @@ static int inc_block_ref(struct ctree_root *root, u64 blocknr) return 0; } -static int lookup_block_ref(struct ctree_root *root, u64 blocknr, int *refs) +static int lookup_block_ref(struct ctree_root *root, u64 blocknr, u32 *refs) { struct ctree_path path; int ret; @@ -415,6 +415,100 @@ struct tree_buffer *alloc_free_block(struct ctree_root *root) return buf; } +int walk_down_tree(struct ctree_root *root, struct ctree_path *path, int *level) +{ + struct tree_buffer *next; + struct tree_buffer *cur; + u64 blocknr; + int ret; + u32 refs; + + ret = lookup_block_ref(root, path->nodes[*level]->blocknr, &refs); + BUG_ON(ret); + if (refs > 1) + goto out; + while(*level > 0) { + cur = path->nodes[*level]; + if (path->slots[*level] >= cur->node.header.nritems) + break; + blocknr = cur->node.blockptrs[path->slots[*level]]; + ret = lookup_block_ref(root, blocknr, &refs); + if (refs != 1 || *level == 1) { + path->slots[*level]++; + ret = free_extent(root, blocknr, 1); + BUG_ON(ret); + continue; + } + BUG_ON(ret); + next = read_tree_block(root, blocknr); + if (path->nodes[*level-1]) { + tree_block_release(root, path->nodes[*level-1]); + } + path->nodes[*level-1] = next; + *level = node_level(next->node.header.flags); + path->slots[*level] = 0; + } +out: + ret = free_extent(root, path->nodes[*level]->blocknr, 1); + path->nodes[*level] = NULL; + *level += 1; + BUG_ON(ret); + return 0; +} + +int walk_up_tree(struct ctree_root *root, struct ctree_path *path, int *level) +{ + int i; + int slot; + int ret; + for(i = *level; i < MAX_LEVEL - 1 && path->nodes[i]; i++) { + slot = path->slots[i]; + if (slot < path->nodes[i]->node.header.nritems - 1) { + path->slots[i]++; + *level = i; + return 0; + } else { + ret = free_extent(root, + path->nodes[*level]->blocknr, 1); + *level = i + 1; + BUG_ON(ret); + } + } + return 1; +} + +int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) +{ + int ret; + int level; + struct ctree_path path; + int i; + int orig_level; + + init_path(&path); + + level = node_level(snap->node.header.flags); + orig_level = level; + path.nodes[level] = snap; + path.slots[level] = 0; + while(1) { + ret = walk_down_tree(root, &path, &level); + if (ret > 0) + break; + ret = walk_up_tree(root, &path, &level); + if (ret > 0) + break; + } + for (i = 0; i < orig_level; i++) { + if (path.nodes[i]) + tree_block_release(root, path.nodes[i]); + } + + return 0; +} + + +#if 0 int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) { int ret; @@ -441,4 +535,4 @@ int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) BUG_ON(ret); return 0; } - +#endif diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index 6400c7100a6a..ccca9b2c14a3 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -9,7 +9,7 @@ /* for testing only */ int next_key(int i, int max_key) { return rand() % max_key; - //return i; + // return i; } int main(int ac, char **av) { @@ -44,6 +44,9 @@ int main(int ac, char **av) { if (!ret) tree_size++; free(buf); + if (i == run_size - 5) { + commit_transaction(root, &super); + } } close_ctree(root, &super); From 83e15a28e046dbb4534dd263d1d3dc867a8994a2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 12 Mar 2007 09:03:27 -0400 Subject: [PATCH 0032/2985] fix leak in btrfs_drop_snapshot Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 997cc578a185..c9201ec81fef 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -167,7 +167,7 @@ int commit_transaction(struct ctree_root *root, struct ctree_super_block *s) root->node->count++; ret = btrfs_drop_snapshot(root, snap); BUG_ON(ret); - tree_block_release(root, snap); + // tree_block_release(root, snap); } write_ctree_super(root, s); btrfs_finish_extent_commit(root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6fbaece43ff6..e7b8a70b5e52 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -441,15 +441,15 @@ int walk_down_tree(struct ctree_root *root, struct ctree_path *path, int *level) } BUG_ON(ret); next = read_tree_block(root, blocknr); - if (path->nodes[*level-1]) { + if (path->nodes[*level-1]) tree_block_release(root, path->nodes[*level-1]); - } path->nodes[*level-1] = next; *level = node_level(next->node.header.flags); path->slots[*level] = 0; } out: ret = free_extent(root, path->nodes[*level]->blocknr, 1); + tree_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; BUG_ON(ret); @@ -470,6 +470,8 @@ int walk_up_tree(struct ctree_root *root, struct ctree_path *path, int *level) } else { ret = free_extent(root, path->nodes[*level]->blocknr, 1); + tree_block_release(root, path->nodes[*level]); + path->nodes[*level] = NULL; *level = i + 1; BUG_ON(ret); } @@ -499,9 +501,10 @@ int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) if (ret > 0) break; } - for (i = 0; i < orig_level; i++) { - if (path.nodes[i]) + for (i = 0; i <= orig_level; i++) { + if (path.nodes[i]) { tree_block_release(root, path.nodes[i]); + } } return 0; From 7518a238ea0152dc849d1ed76d3cae8b44e12f46 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 12 Mar 2007 12:01:18 -0400 Subject: [PATCH 0033/2985] Btrfs: get/set for struct header fields Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/TODO | 1 + fs/btrfs/ctree.c | 224 +++++++++++++++++++++++------------------ fs/btrfs/ctree.h | 63 +++++++++++- fs/btrfs/disk-io.c | 7 +- fs/btrfs/extent-tree.c | 53 +++------- fs/btrfs/mkfs.c | 10 +- fs/btrfs/print-tree.c | 26 +++-- fs/btrfs/quick-test.c | 12 ++- fs/btrfs/random-test.c | 4 +- 10 files changed, 230 insertions(+), 172 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index ae7f4c00c39c..f8532200e9ad 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -25,7 +25,7 @@ tester: $(objects) random-test.o quick-test: $(objects) quick-test.o gcc $(CFLAGS) -o quick-test $(objects) quick-test.o -$(objects) : $(headers) +$(objects): $(headers) clean : rm debug-tree tester *.o diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 5c4395c3784a..52d2a7a071f4 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -1,4 +1,5 @@ * cleanup, add more error checking, get rid of BUG_ONs +* endian fixes * Make IO functions look more like the page cache * Fix ENOSPC handling * make blocksize a mkfs parameter instead of #define diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 729d4ddb3746..e43c827e0dfd 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -48,7 +48,7 @@ int btrfs_cow_block(struct ctree_root *root, } cow = alloc_free_block(root); memcpy(&cow->node, &buf->node, sizeof(buf->node)); - cow->node.header.blocknr = cow->blocknr; + btrfs_set_header_blocknr(&cow->node.header, cow->blocknr); *cow_ret = cow; btrfs_inc_ref(root, buf); if (buf == root->node) { @@ -73,7 +73,7 @@ int btrfs_cow_block(struct ctree_root *root, */ static inline unsigned int leaf_data_end(struct leaf *leaf) { - unsigned int nr = leaf->header.nritems; + u32 nr = btrfs_header_nritems(&leaf->header); if (nr == 0) return sizeof(leaf->data); return leaf->items[nr-1].offset; @@ -87,7 +87,7 @@ static inline unsigned int leaf_data_end(struct leaf *leaf) int leaf_free_space(struct leaf *leaf) { int data_end = leaf_data_end(leaf); - int nritems = leaf->header.nritems; + int nritems = btrfs_header_nritems(&leaf->header); char *items_end = (char *)(leaf->items + nritems + 1); return (char *)(leaf->data + data_end) - (char *)items_end; } @@ -118,18 +118,21 @@ int check_node(struct ctree_path *path, int level) struct node *parent = NULL; struct node *node = &path->nodes[level]->node; int parent_slot; + u32 nritems = btrfs_header_nritems(&node->header); if (path->nodes[level + 1]) parent = &path->nodes[level + 1]->node; parent_slot = path->slots[level + 1]; - if (parent && node->header.nritems > 0) { + BUG_ON(nritems == 0); + if (parent) { struct key *parent_key; parent_key = &parent->keys[parent_slot]; BUG_ON(memcmp(parent_key, node->keys, sizeof(struct key))); - BUG_ON(parent->blockptrs[parent_slot] != node->header.blocknr); + BUG_ON(parent->blockptrs[parent_slot] != + btrfs_header_blocknr(&node->header)); } - BUG_ON(node->header.nritems > NODEPTRS_PER_BLOCK); - for (i = 0; i < node->header.nritems - 2; i++) { + BUG_ON(nritems > NODEPTRS_PER_BLOCK); + for (i = 0; nritems > 1 && i < nritems - 2; i++) { BUG_ON(comp_keys(&node->keys[i], &node->keys[i+1]) >= 0); } return 0; @@ -141,18 +144,25 @@ int check_leaf(struct ctree_path *path, int level) struct leaf *leaf = &path->nodes[level]->leaf; struct node *parent = NULL; int parent_slot; + u32 nritems = btrfs_header_nritems(&leaf->header); if (path->nodes[level + 1]) parent = &path->nodes[level + 1]->node; parent_slot = path->slots[level + 1]; - if (parent && leaf->header.nritems > 0) { + BUG_ON(leaf_free_space(leaf) < 0); + + if (nritems == 0) + return 0; + + if (parent) { struct key *parent_key; parent_key = &parent->keys[parent_slot]; BUG_ON(memcmp(parent_key, &leaf->items[0].key, sizeof(struct key))); - BUG_ON(parent->blockptrs[parent_slot] != leaf->header.blocknr); + BUG_ON(parent->blockptrs[parent_slot] != + btrfs_header_blocknr(&leaf->header)); } - for (i = 0; i < leaf->header.nritems - 2; i++) { + for (i = 0; nritems > 1 && i < nritems - 2; i++) { BUG_ON(comp_keys(&leaf->items[i].key, &leaf->items[i+1].key) >= 0); BUG_ON(leaf->items[i].offset != leaf->items[i + 1].offset + @@ -162,7 +172,6 @@ int check_leaf(struct ctree_path *path, int level) LEAF_DATA_SIZE); } } - BUG_ON(leaf_free_space(leaf) < 0); return 0; } @@ -215,13 +224,15 @@ int generic_bin_search(char *p, int item_size, struct key *key, */ int bin_search(struct node *c, struct key *key, int *slot) { - if (is_leaf(c->header.flags)) { + if (btrfs_is_leaf(c)) { struct leaf *l = (struct leaf *)c; return generic_bin_search((void *)l->items, sizeof(struct item), - key, c->header.nritems, slot); + key, btrfs_header_nritems(&c->header), + slot); } else { return generic_bin_search((void *)c->keys, sizeof(struct key), - key, c->header.nritems, slot); + key, btrfs_header_nritems(&c->header), + slot); } return -1; } @@ -233,7 +244,7 @@ struct tree_buffer *read_node_slot(struct ctree_root *root, struct node *node = &parent_buf->node; if (slot < 0) return NULL; - if (slot >= node->header.nritems) + if (slot >= btrfs_header_nritems(&node->header)) return NULL; return read_tree_block(root, node->blockptrs[slot]); } @@ -270,7 +281,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, struct tree_buffer *child; u64 blocknr = mid_buf->blocknr; - if (mid->header.nritems != 1) + if (btrfs_header_nritems(&mid->header) != 1) return 0; /* promote the child to a root */ @@ -287,7 +298,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, } parent = &parent_buf->node; - if (mid->header.nritems > NODEPTRS_PER_BLOCK / 4) + if (btrfs_header_nritems(&mid->header) > NODEPTRS_PER_BLOCK / 4) return 0; left_buf = read_node_slot(root, parent_buf, pslot - 1); @@ -298,7 +309,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, btrfs_cow_block(root, left_buf, parent_buf, pslot - 1, &left_buf); left = &left_buf->node; - orig_slot += left->header.nritems; + orig_slot += btrfs_header_nritems(&left->header); wret = push_node_left(root, left_buf, mid_buf); if (wret < 0) ret = wret; @@ -314,7 +325,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, wret = push_node_left(root, mid_buf, right_buf); if (wret < 0) ret = wret; - if (right->header.nritems == 0) { + if (btrfs_header_nritems(&right->header) == 0) { u64 blocknr = right_buf->blocknr; tree_block_release(root, right_buf); clean_tree_block(root, right_buf); @@ -332,7 +343,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, BUG_ON(list_empty(&parent_buf->dirty)); } } - if (mid->header.nritems == 1) { + if (btrfs_header_nritems(&mid->header) == 1) { /* * we're not allowed to leave a node with one item in the * tree during a delete. A deletion from lower in the tree @@ -348,7 +359,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, ret = wret; BUG_ON(wret == 1); } - if (mid->header.nritems == 0) { + if (btrfs_header_nritems(&mid->header) == 0) { /* we've managed to empty the middle node, drop it */ u64 blocknr = mid_buf->blocknr; tree_block_release(root, mid_buf); @@ -369,7 +380,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, /* update the path */ if (left_buf) { - if (left->header.nritems > orig_slot) { + if (btrfs_header_nritems(&left->header) > orig_slot) { left_buf->count++; // released below path->nodes[level] = left_buf; path->slots[level + 1] -= 1; @@ -377,7 +388,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, if (mid_buf) tree_block_release(root, mid_buf); } else { - orig_slot -= left->header.nritems; + orig_slot -= btrfs_header_nritems(&left->header); path->slots[level] = orig_slot; } } @@ -420,7 +431,7 @@ again: b = root->node; b->count++; while (b) { - level = node_level(b->node.header.flags); + level = btrfs_header_level(&b->node.header); if (cow) { int wret; wret = btrfs_cow_block(root, b, p->nodes[level + 1], @@ -434,12 +445,12 @@ again: if (ret) return -1; ret = bin_search(c, key, &slot); - if (!is_leaf(c->header.flags)) { + if (!btrfs_is_leaf(c)) { if (ret && slot > 0) slot -= 1; p->slots[level] = slot; - if (ins_len > 0 && - c->header.nritems == NODEPTRS_PER_BLOCK) { + if (ins_len > 0 && btrfs_header_nritems(&c->header) == + NODEPTRS_PER_BLOCK) { int sret = split_node(root, p, level); BUG_ON(sret > 0); if (sret) @@ -456,7 +467,7 @@ again: goto again; c = &b->node; slot = p->slots[level]; - BUG_ON(c->header.nritems == 1); + BUG_ON(btrfs_header_nritems(&c->header) == 1); } b = read_tree_block(root, c->blockptrs[slot]); } else { @@ -524,8 +535,8 @@ static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, int dst_nritems; int ret = 0; - src_nritems = src->header.nritems; - dst_nritems = dst->header.nritems; + src_nritems = btrfs_header_nritems(&src->header); + dst_nritems = btrfs_header_nritems(&dst->header); push_items = NODEPTRS_PER_BLOCK - dst_nritems; if (push_items <= 0) { return 1; @@ -544,9 +555,8 @@ static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, memmove(src->blockptrs, src->blockptrs + push_items, (src_nritems - push_items) * sizeof(u64)); } - src->header.nritems -= push_items; - dst->header.nritems += push_items; - + btrfs_set_header_nritems(&src->header, src_nritems - push_items); + btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); BUG_ON(list_empty(&src_buf->dirty)); BUG_ON(list_empty(&dst_buf->dirty)); return ret; @@ -573,8 +583,8 @@ static int balance_node_right(struct ctree_root *root, int dst_nritems; int ret = 0; - src_nritems = src->header.nritems; - dst_nritems = dst->header.nritems; + src_nritems = btrfs_header_nritems(&src->header); + dst_nritems = btrfs_header_nritems(&dst->header); push_items = NODEPTRS_PER_BLOCK - dst_nritems; if (push_items <= 0) { return 1; @@ -596,8 +606,8 @@ static int balance_node_right(struct ctree_root *root, memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items, push_items * sizeof(u64)); - src->header.nritems -= push_items; - dst->header.nritems += push_items; + btrfs_set_header_nritems(&src->header, src_nritems - push_items); + btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); BUG_ON(list_empty(&src_buf->dirty)); BUG_ON(list_empty(&dst_buf->dirty)); @@ -625,12 +635,13 @@ static int insert_new_root(struct ctree_root *root, t = alloc_free_block(root); c = &t->node; memset(c, 0, sizeof(c)); - c->header.nritems = 1; - c->header.flags = node_level(level); - c->header.blocknr = t->blocknr; - c->header.parentid = root->node->node.header.parentid; + btrfs_set_header_nritems(&c->header, 1); + btrfs_set_header_level(&c->header, level); + btrfs_set_header_blocknr(&c->header, t->blocknr); + btrfs_set_header_parentid(&c->header, + btrfs_header_parentid(&root->node->node.header)); lower = &path->nodes[level-1]->node; - if (is_leaf(lower->header.flags)) + if (btrfs_is_leaf(lower)) lower_key = &((struct leaf *)lower)->items[0].key; else lower_key = lower->keys; @@ -663,7 +674,7 @@ static int insert_ptr(struct ctree_root *root, BUG_ON(!path->nodes[level]); lower = &path->nodes[level]->node; - nritems = lower->header.nritems; + nritems = btrfs_header_nritems(&lower->header); if (slot > nritems) BUG(); if (nritems == NODEPTRS_PER_BLOCK) @@ -676,7 +687,7 @@ static int insert_ptr(struct ctree_root *root, } memcpy(lower->keys + slot, key, sizeof(struct key)); lower->blockptrs[slot] = blocknr; - lower->header.nritems++; + btrfs_set_header_nritems(&lower->header, nritems + 1); if (lower->keys[1].objectid == 0) BUG(); BUG_ON(list_empty(&path->nodes[level]->dirty)); @@ -702,6 +713,7 @@ static int split_node(struct ctree_root *root, struct ctree_path *path, int mid; int ret; int wret; + u32 c_nritems; t = path->nodes[level]; c = &t->node; @@ -711,18 +723,20 @@ static int split_node(struct ctree_root *root, struct ctree_path *path, if (ret) return ret; } + c_nritems = btrfs_header_nritems(&c->header); split_buffer = alloc_free_block(root); split = &split_buffer->node; - split->header.flags = c->header.flags; - split->header.blocknr = split_buffer->blocknr; - split->header.parentid = root->node->node.header.parentid; - mid = (c->header.nritems + 1) / 2; + btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); + btrfs_set_header_blocknr(&split->header, split_buffer->blocknr); + btrfs_set_header_parentid(&split->header, + btrfs_header_parentid(&root->node->node.header)); + mid = (c_nritems + 1) / 2; memcpy(split->keys, c->keys + mid, - (c->header.nritems - mid) * sizeof(struct key)); + (c_nritems - mid) * sizeof(struct key)); memcpy(split->blockptrs, c->blockptrs + mid, - (c->header.nritems - mid) * sizeof(u64)); - split->header.nritems = c->header.nritems - mid; - c->header.nritems = mid; + (c_nritems - mid) * sizeof(u64)); + btrfs_set_header_nritems(&split->header, c_nritems - mid); + btrfs_set_header_nritems(&c->header, mid); ret = 0; BUG_ON(list_empty(&t->dirty)); @@ -781,13 +795,15 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, int push_space = 0; int push_items = 0; struct item *item; + u32 left_nritems; + u32 right_nritems; slot = path->slots[1]; if (!path->nodes[1]) { return 1; } upper = path->nodes[1]; - if (slot >= upper->node.header.nritems - 1) { + if (slot >= btrfs_header_nritems(&upper->node.header) - 1) { return 1; } right_buf = read_tree_block(root, upper->node.blockptrs[slot + 1]); @@ -806,7 +822,8 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, return 1; } - for (i = left->header.nritems - 1; i >= 0; i--) { + left_nritems = btrfs_header_nritems(&left->header); + for (i = left_nritems - 1; i >= 0; i--) { item = left->items + i; if (path->slots[0] == i) push_space += data_size + sizeof(*item); @@ -819,9 +836,10 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, tree_block_release(root, right_buf); return 1; } + right_nritems = btrfs_header_nritems(&right->header); /* push left to right */ - push_space = left->items[left->header.nritems - push_items].offset + - left->items[left->header.nritems - push_items].size; + push_space = left->items[left_nritems - push_items].offset + + left->items[left_nritems - push_items].size; push_space -= leaf_data_end(left); /* make room in the right data area */ memmove(right->data + leaf_data_end(right) - push_space, @@ -832,19 +850,21 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, left->data + leaf_data_end(left), push_space); memmove(right->items + push_items, right->items, - right->header.nritems * sizeof(struct item)); + right_nritems * sizeof(struct item)); /* copy the items from left to right */ - memcpy(right->items, left->items + left->header.nritems - push_items, + memcpy(right->items, left->items + left_nritems - push_items, push_items * sizeof(struct item)); /* update the item pointers */ - right->header.nritems += push_items; + right_nritems += push_items; + btrfs_set_header_nritems(&right->header, right_nritems); push_space = LEAF_DATA_SIZE; - for (i = 0; i < right->header.nritems; i++) { + for (i = 0; i < right_nritems; i++) { right->items[i].offset = push_space - right->items[i].size; push_space = right->items[i].offset; } - left->header.nritems -= push_items; + left_nritems -= push_items; + btrfs_set_header_nritems(&left->header, left_nritems); BUG_ON(list_empty(&left_buf->dirty)); BUG_ON(list_empty(&right_buf->dirty)); @@ -853,8 +873,8 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, BUG_ON(list_empty(&upper->dirty)); /* then fixup the leaf pointer in the path */ - if (path->slots[0] >= left->header.nritems) { - path->slots[0] -= left->header.nritems; + if (path->slots[0] >= left_nritems) { + path->slots[0] -= left_nritems; tree_block_release(root, path->nodes[0]); path->nodes[0] = right_buf; path->slots[1] += 1; @@ -880,7 +900,7 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, int push_space = 0; int push_items = 0; struct item *item; - int old_left_nritems; + u32 old_left_nritems; int ret = 0; int wret; @@ -908,7 +928,7 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, return 1; } - for (i = 0; i < right->header.nritems; i++) { + for (i = 0; i < btrfs_header_nritems(&right->header); i++) { item = right->items + i; if (path->slots[0] == i) push_space += data_size + sizeof(*item); @@ -922,31 +942,34 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, return 1; } /* push data from right to left */ - memcpy(left->items + left->header.nritems, + memcpy(left->items + btrfs_header_nritems(&left->header), right->items, push_items * sizeof(struct item)); push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset; memcpy(left->data + leaf_data_end(left) - push_space, right->data + right->items[push_items - 1].offset, push_space); - old_left_nritems = left->header.nritems; + old_left_nritems = btrfs_header_nritems(&left->header); BUG_ON(old_left_nritems < 0); for(i = old_left_nritems; i < old_left_nritems + push_items; i++) { left->items[i].offset -= LEAF_DATA_SIZE - left->items[old_left_nritems -1].offset; } - left->header.nritems += push_items; + btrfs_set_header_nritems(&left->header, old_left_nritems + push_items); /* fixup right node */ push_space = right->items[push_items-1].offset - leaf_data_end(right); memmove(right->data + LEAF_DATA_SIZE - push_space, right->data + leaf_data_end(right), push_space); memmove(right->items, right->items + push_items, - (right->header.nritems - push_items) * sizeof(struct item)); - right->header.nritems -= push_items; + (btrfs_header_nritems(&right->header) - push_items) * + sizeof(struct item)); + btrfs_set_header_nritems(&right->header, + btrfs_header_nritems(&right->header) - + push_items); push_space = LEAF_DATA_SIZE; - for (i = 0; i < right->header.nritems; i++) { + for (i = 0; i < btrfs_header_nritems(&right->header); i++) { right->items[i].offset = push_space - right->items[i].size; push_space = right->items[i].offset; } @@ -983,7 +1006,7 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, { struct tree_buffer *l_buf; struct leaf *l; - int nritems; + u32 nritems; int mid; int slot; struct leaf *right; @@ -1008,7 +1031,7 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, return ret; } slot = path->slots[0]; - nritems = l->header.nritems; + nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; right_buffer = alloc_free_block(root); BUG_ON(!right_buffer); @@ -1026,10 +1049,11 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, LEAF_DATA_SIZE) BUG(); } - right->header.nritems = nritems - mid; - right->header.blocknr = right_buffer->blocknr; - right->header.flags = node_level(0); - right->header.parentid = root->node->node.header.parentid; + btrfs_set_header_nritems(&right->header, nritems - mid); + btrfs_set_header_blocknr(&right->header, right_buffer->blocknr); + btrfs_set_header_level(&right->header, 0); + btrfs_set_header_parentid(&right->header, + btrfs_header_parentid(&root->node->node.header)); data_copy_size = l->items[mid].offset + l->items[mid].size - leaf_data_end(l); memcpy(right->items, l->items + mid, @@ -1039,10 +1063,10 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, rt_data_off = LEAF_DATA_SIZE - (l->items[mid].offset + l->items[mid].size); - for (i = 0; i < right->header.nritems; i++) + for (i = 0; i < btrfs_header_nritems(&right->header); i++) right->items[i].offset += rt_data_off; - l->header.nritems = mid; + btrfs_set_header_nritems(&l->header, mid); ret = 0; wret = insert_ptr(root, path, &right->items[0].key, right_buffer->blocknr, path->slots[1] + 1, 1); @@ -1074,7 +1098,7 @@ int insert_item(struct ctree_root *root, struct key *key, int slot_orig; struct leaf *leaf; struct tree_buffer *leaf_buf; - unsigned int nritems; + u32 nritems; unsigned int data_end; struct ctree_path path; @@ -1094,7 +1118,7 @@ int insert_item(struct ctree_root *root, struct key *key, leaf_buf = path.nodes[0]; leaf = &leaf_buf->leaf; - nritems = leaf->header.nritems; + nritems = btrfs_header_nritems(&leaf->header); data_end = leaf_data_end(leaf); if (leaf_free_space(leaf) < sizeof(struct item) + data_size) @@ -1128,7 +1152,7 @@ int insert_item(struct ctree_root *root, struct key *key, leaf->items[slot].offset = data_end - data_size; leaf->items[slot].size = data_size; memcpy(leaf->data + data_end - data_size, data, data_size); - leaf->header.nritems += 1; + btrfs_set_header_nritems(&leaf->header, nritems + 1); ret = 0; if (slot == 0) @@ -1155,12 +1179,12 @@ static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, { struct node *node; struct tree_buffer *parent = path->nodes[level]; - int nritems; + u32 nritems; int ret = 0; int wret; node = &parent->node; - nritems = node->header.nritems; + nritems = btrfs_header_nritems(&node->header); if (slot != nritems -1) { memmove(node->keys + slot, node->keys + slot + 1, sizeof(struct key) * (nritems - slot - 1)); @@ -1168,11 +1192,12 @@ static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, node->blockptrs + slot + 1, sizeof(u64) * (nritems - slot - 1)); } - node->header.nritems--; - if (node->header.nritems == 0 && parent == root->node) { - BUG_ON(node_level(root->node->node.header.flags) != 1); + nritems--; + btrfs_set_header_nritems(&node->header, nritems); + if (nritems == 0 && parent == root->node) { + BUG_ON(btrfs_header_level(&root->node->node.header) != 1); /* just turn the root into a leaf and break */ - root->node->node.header.flags = node_level(0); + btrfs_set_header_level(&root->node->node.header, 0); } else if (slot == 0) { wret = fixup_low_keys(root, path, node->keys, level + 1); if (wret) @@ -1195,30 +1220,33 @@ int del_item(struct ctree_root *root, struct ctree_path *path) int dsize; int ret = 0; int wret; + u32 nritems; leaf_buf = path->nodes[0]; leaf = &leaf_buf->leaf; slot = path->slots[0]; doff = leaf->items[slot].offset; dsize = leaf->items[slot].size; + nritems = btrfs_header_nritems(&leaf->header); - if (slot != leaf->header.nritems - 1) { + if (slot != nritems - 1) { int i; int data_end = leaf_data_end(leaf); memmove(leaf->data + data_end + dsize, leaf->data + data_end, doff - data_end); - for (i = slot + 1; i < leaf->header.nritems; i++) + for (i = slot + 1; i < nritems; i++) leaf->items[i].offset += dsize; memmove(leaf->items + slot, leaf->items + slot + 1, sizeof(struct item) * - (leaf->header.nritems - slot - 1)); + (nritems - slot - 1)); } - leaf->header.nritems -= 1; + btrfs_set_header_nritems(&leaf->header, nritems - 1); + nritems--; /* delete the leaf if we've emptied it */ - if (leaf->header.nritems == 0) { + if (nritems == 0) { if (leaf_buf == root->node) { - leaf->header.flags = node_level(0); + btrfs_set_header_level(&leaf->header, 0); BUG_ON(list_empty(&leaf_buf->dirty)); } else { clean_tree_block(root, leaf_buf); @@ -1230,7 +1258,7 @@ int del_item(struct ctree_root *root, struct ctree_path *path) ret = wret; } } else { - int used = leaf_space_used(leaf, 0, leaf->header.nritems); + int used = leaf_space_used(leaf, 0, nritems); if (slot == 0) { wret = fixup_low_keys(root, path, &leaf->items[0].key, 1); @@ -1251,12 +1279,12 @@ int del_item(struct ctree_root *root, struct ctree_path *path) if (wret < 0) ret = wret; if (path->nodes[0] == leaf_buf && - leaf->header.nritems) { + btrfs_header_nritems(&leaf->header)) { wret = push_leaf_right(root, path, 1); if (wret < 0) ret = wret; } - if (leaf->header.nritems == 0) { + if (btrfs_header_nritems(&leaf->header) == 0) { u64 blocknr = leaf_buf->blocknr; clean_tree_block(root, leaf_buf); wret = del_ptr(root, path, 1, slot); @@ -1292,7 +1320,7 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) return 1; slot = path->slots[level] + 1; c = path->nodes[level]; - if (slot >= c->node.header.nritems) { + if (slot >= btrfs_header_nritems(&c->node.header)) { level++; continue; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 518326fa3694..60c21fe1566a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -35,13 +35,10 @@ struct header { /* generation flags to be added */ } __attribute__ ((__packed__)); +#define MAX_LEVEL 8 #define NODEPTRS_PER_BLOCK ((CTREE_BLOCKSIZE - sizeof(struct header)) / \ (sizeof(struct key) + sizeof(u64))) -#define MAX_LEVEL 8 -#define node_level(f) ((f) & (MAX_LEVEL-1)) -#define is_leaf(f) (node_level(f) == 0) - struct tree_buffer; /* @@ -144,6 +141,64 @@ struct ctree_path { int slots[MAX_LEVEL]; }; +static inline u64 btrfs_header_blocknr(struct header *h) +{ + return h->blocknr; +} + +static inline void btrfs_set_header_blocknr(struct header *h, u64 blocknr) +{ + h->blocknr = blocknr; +} + +static inline u64 btrfs_header_parentid(struct header *h) +{ + return h->parentid; +} + +static inline void btrfs_set_header_parentid(struct header *h, u64 parentid) +{ + h->parentid = parentid; +} + +static inline u32 btrfs_header_nritems(struct header *h) +{ + return h->nritems; +} + +static inline void btrfs_set_header_nritems(struct header *h, u32 val) +{ + h->nritems = val; +} + +static inline u32 btrfs_header_flags(struct header *h) +{ + return h->flags; +} + +static inline void btrfs_set_header_flags(struct header *h, u32 val) +{ + h->flags = val; +} + +static inline int btrfs_header_level(struct header *h) +{ + return btrfs_header_flags(h) & (MAX_LEVEL - 1); +} + +static inline void btrfs_set_header_level(struct header *h, int level) +{ + u32 flags; + BUG_ON(level > MAX_LEVEL); + flags = btrfs_header_flags(h) & ~(MAX_LEVEL - 1); + btrfs_set_header_flags(h, flags | level); +} + +static inline int btrfs_is_leaf(struct node *n) +{ + return (btrfs_header_level(&n->header) == 0); +} + struct tree_buffer *alloc_free_block(struct ctree_root *root); int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf); int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c9201ec81fef..065e888d2c0b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -15,9 +15,10 @@ int cache_max = 10000; static int check_tree_block(struct ctree_root *root, struct tree_buffer *buf) { - if (buf->blocknr != buf->node.header.blocknr) + if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); - if (root->node && buf->node.header.parentid != root->node->node.header.parentid) + if (root->node && btrfs_header_parentid(&buf->node.header) != + btrfs_header_parentid(&root->node->node.header)) BUG(); return 0; } @@ -129,7 +130,7 @@ int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) loff_t offset = blocknr * CTREE_BLOCKSIZE; int ret; - if (buf->blocknr != buf->node.header.blocknr) + if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); ret = pwrite(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); if (ret != CTREE_BLOCKSIZE) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e7b8a70b5e52..524c11fea24a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -79,10 +79,10 @@ int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf) if (root == root->extent_root) return 0; - if (is_leaf(buf->node.header.flags)) + if (btrfs_is_leaf(&buf->node)) return 0; - for (i = 0; i < buf->node.header.nritems; i++) { + for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) { blocknr = buf->node.blockptrs[i]; inc_block_ref(root, blocknr); } @@ -119,7 +119,8 @@ static int finish_current_insert(struct ctree_root *extent_root) int ret; extent_item.refs = 1; - extent_item.owner = extent_root->node->node.header.parentid; + extent_item.owner = + btrfs_header_parentid(&extent_root->node->node.header); ins.offset = 1; ins.flags = 0; @@ -269,7 +270,7 @@ static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, struct ctree_root * root = orig_root->extent_root; int total_needed = num_blocks; - total_needed += (node_level(root->node->node.header.flags) + 1) * 3; + total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3; if (root->last_insert.objectid > search_start) search_start = root->last_insert.objectid; check_failed: @@ -288,7 +289,7 @@ check_failed: while (1) { l = &path.nodes[0]->leaf; slot = path.slots[0]; - if (slot >= l->header.nritems) { + if (slot >= btrfs_header_nritems(&l->header)) { ret = next_leaf(root, &path); if (ret == 0) continue; @@ -404,7 +405,7 @@ struct tree_buffer *alloc_free_block(struct ctree_root *root) struct tree_buffer *buf; ret = alloc_extent(root, 1, 0, (unsigned long)-1, - root->node->node.header.parentid, + btrfs_header_parentid(&root->node->node.header), &ins); if (ret) { BUG(); @@ -429,7 +430,8 @@ int walk_down_tree(struct ctree_root *root, struct ctree_path *path, int *level) goto out; while(*level > 0) { cur = path->nodes[*level]; - if (path->slots[*level] >= cur->node.header.nritems) + if (path->slots[*level] >= + btrfs_header_nritems(&cur->node.header)) break; blocknr = cur->node.blockptrs[path->slots[*level]]; ret = lookup_block_ref(root, blocknr, &refs); @@ -444,7 +446,7 @@ int walk_down_tree(struct ctree_root *root, struct ctree_path *path, int *level) if (path->nodes[*level-1]) tree_block_release(root, path->nodes[*level-1]); path->nodes[*level-1] = next; - *level = node_level(next->node.header.flags); + *level = btrfs_header_level(&next->node.header); path->slots[*level] = 0; } out: @@ -463,7 +465,8 @@ int walk_up_tree(struct ctree_root *root, struct ctree_path *path, int *level) int ret; for(i = *level; i < MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; - if (slot < path->nodes[i]->node.header.nritems - 1) { + if (slot < + btrfs_header_nritems(&path->nodes[i]->node.header)- 1) { path->slots[i]++; *level = i; return 0; @@ -489,7 +492,7 @@ int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) init_path(&path); - level = node_level(snap->node.header.flags); + level = btrfs_header_level(&snap->node.header); orig_level = level; path.nodes[level] = snap; path.slots[level] = 0; @@ -509,33 +512,3 @@ int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) return 0; } - - -#if 0 -int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) -{ - int ret; - int level; - int refs; - u64 blocknr = snap->blocknr; - - level = node_level(snap->node.header.flags); - ret = lookup_block_ref(root, snap->blocknr, &refs); - BUG_ON(ret); - if (refs == 1 && level != 0) { - struct node *n = &snap->node; - struct tree_buffer *b; - int i; - for (i = 0; i < n->header.nritems; i++) { - b = read_tree_block(root, n->blockptrs[i]); - /* FIXME, don't recurse here */ - ret = btrfs_drop_snapshot(root, b); - BUG_ON(ret); - tree_block_release(root, b); - } - } - ret = free_extent(root, blocknr, 1); - BUG_ON(ret); - return 0; -} -#endif diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index fd4e5dea7e1b..b2ba94696297 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -34,15 +34,15 @@ int mkfs(int fd) /* create leaves for the tree root and extent root */ memset(&empty_leaf, 0, sizeof(empty_leaf)); - empty_leaf.header.parentid = 1; - empty_leaf.header.blocknr = 17; + btrfs_set_header_parentid(&empty_leaf.header, 1); + btrfs_set_header_blocknr(&empty_leaf.header, 17); ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 17 * CTREE_BLOCKSIZE); if (ret != sizeof(empty_leaf)) return -1; - empty_leaf.header.parentid = 2; - empty_leaf.header.blocknr = 18; - empty_leaf.header.nritems = 3; + btrfs_set_header_parentid(&empty_leaf.header, 2); + btrfs_set_header_blocknr(&empty_leaf.header, 18); + btrfs_set_header_nritems(&empty_leaf.header, 3); /* item1, reserve blocks 0-16 */ item.key.objectid = 0; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index e32a959dd3e5..7df16b1e4733 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -8,11 +8,11 @@ void print_leaf(struct leaf *l) { int i; - int nr = l->header.nritems; + u32 nr = btrfs_header_nritems(&l->header); struct item *item; struct extent_item *ei; - printf("leaf %Lu total ptrs %d free space %d\n", l->header.blocknr, nr, - leaf_free_space(l)); + printf("leaf %Lu total ptrs %d free space %d\n", + btrfs_header_blocknr(&l->header), nr, leaf_free_space(l)); fflush(stdout); for (i = 0 ; i < nr ; i++) { item = l->items + i; @@ -32,22 +32,20 @@ void print_leaf(struct leaf *l) void print_tree(struct ctree_root *root, struct tree_buffer *t) { int i; - int nr; + u32 nr; struct node *c; if (!t) return; c = &t->node; - nr = c->header.nritems; - if (c->header.blocknr != t->blocknr) - BUG(); - if (is_leaf(c->header.flags)) { + nr = btrfs_header_nritems(&c->header); + if (btrfs_is_leaf(c)) { print_leaf((struct leaf *)c); return; } printf("node %Lu level %d total ptrs %d free spc %u\n", t->blocknr, - node_level(c->header.flags), c->header.nritems, - (u32)NODEPTRS_PER_BLOCK - c->header.nritems); + btrfs_header_level(&c->header), nr, + (u32)NODEPTRS_PER_BLOCK - nr); fflush(stdout); for (i = 0; i < nr; i++) { printf("\tkey %d (%Lu %u %Lu) block %Lu\n", @@ -60,11 +58,11 @@ void print_tree(struct ctree_root *root, struct tree_buffer *t) struct tree_buffer *next_buf = read_tree_block(root, c->blockptrs[i]); struct node *next = &next_buf->node; - if (is_leaf(next->header.flags) && - node_level(c->header.flags) != 1) + if (btrfs_is_leaf(next) && + btrfs_header_level(&c->header) != 1) BUG(); - if (node_level(next->header.flags) != - node_level(c->header.flags) - 1) + if (btrfs_header_level(&next->header) != + btrfs_header_level(&c->header) - 1) BUG(); print_tree(root, next_buf); tree_block_release(root, next_buf); diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index ccca9b2c14a3..07fd71b77627 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -71,9 +71,10 @@ int main(int ac, char **av) { close_ctree(root, &super); root = open_ctree("dbfile", &super); printf("node %p level %d total ptrs %d free spc %lu\n", root->node, - node_level(root->node->node.header.flags), - root->node->node.header.nritems, - NODEPTRS_PER_BLOCK - root->node->node.header.nritems); + btrfs_header_level(&root->node->node.header), + btrfs_header_nritems(&root->node->node.header), + NODEPTRS_PER_BLOCK - + btrfs_header_nritems(&root->node->node.header)); printf("all searches good, deleting some items\n"); i = 0; srand(55); @@ -126,7 +127,8 @@ int main(int ac, char **av) { release_path(root, &path); } printf("starting big long delete run\n"); - while(root->node && root->node->node.header.nritems > 0) { + while(root->node && + btrfs_header_nritems(&root->node->node.header) > 0) { struct leaf *leaf; int slot; ins.objectid = (u64)-1; @@ -137,7 +139,7 @@ int main(int ac, char **av) { leaf = &path.nodes[0]->leaf; slot = path.slots[0]; - if (slot != leaf->header.nritems) + if (slot != btrfs_header_nritems(&leaf->header)) BUG(); while(path.slots[0] > 0) { path.slots[0] -= 1; diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index 7b37b6bae105..5d99b730a69c 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -358,8 +358,8 @@ int main(int ac, char **av) } if (i && i % 5000 == 0) { printf("open & close, root level %d nritems %d\n", - node_level(root->node->node.header.flags), - root->node->node.header.nritems); + btrfs_header_level(&root->node->node.header), + btrfs_header_nritems(&root->node->node.header)); close_ctree(root, &super); root = open_ctree("dbfile", &super); } From bb492bb0a5453222109a72859353728959c2539d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 12 Mar 2007 12:29:44 -0400 Subject: [PATCH 0034/2985] Btrfs: Add sparse endian annotations to struct header rename struct header to btrfs_header Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 65 ++++++++++++++++++++++--------------------- fs/btrfs/kerncompat.h | 29 ++++++++++++++++++- 2 files changed, 61 insertions(+), 33 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 60c21fe1566a..e1aaca66d590 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -24,19 +24,19 @@ struct key { /* * every tree block (leaf or node) starts with this header. */ -struct header { - u64 fsid[2]; /* FS specific uuid */ - u64 blocknr; /* which block this node is supposed to live in */ - u64 parentid; /* objectid of the tree root */ - u32 csum; - u32 ham; - u16 nritems; - u16 flags; +struct btrfs_header { + __le64 fsid[2]; /* FS specific uuid */ + __le64 blocknr; /* which block this node is supposed to live in */ + __le64 parentid; /* objectid of the tree root */ + __le32 csum; + __le32 ham; + __le16 nritems; + __le16 flags; /* generation flags to be added */ } __attribute__ ((__packed__)); #define MAX_LEVEL 8 -#define NODEPTRS_PER_BLOCK ((CTREE_BLOCKSIZE - sizeof(struct header)) / \ +#define NODEPTRS_PER_BLOCK ((CTREE_BLOCKSIZE - sizeof(struct btrfs_header)) / \ (sizeof(struct key) + sizeof(u64))) struct tree_buffer; @@ -100,12 +100,12 @@ struct item { * The data is separate from the items to get the keys closer together * during searches. */ -#define LEAF_DATA_SIZE (CTREE_BLOCKSIZE - sizeof(struct header)) +#define LEAF_DATA_SIZE (CTREE_BLOCKSIZE - sizeof(struct btrfs_header)) struct leaf { - struct header header; + struct btrfs_header header; union { struct item items[LEAF_DATA_SIZE/sizeof(struct item)]; - u8 data[CTREE_BLOCKSIZE-sizeof(struct header)]; + u8 data[CTREE_BLOCKSIZE-sizeof(struct btrfs_header)]; }; } __attribute__ ((__packed__)); @@ -114,7 +114,7 @@ struct leaf { * other blocks */ struct node { - struct header header; + struct btrfs_header header; struct key keys[NODEPTRS_PER_BLOCK]; u64 blockptrs[NODEPTRS_PER_BLOCK]; } __attribute__ ((__packed__)); @@ -141,54 +141,55 @@ struct ctree_path { int slots[MAX_LEVEL]; }; -static inline u64 btrfs_header_blocknr(struct header *h) +static inline u64 btrfs_header_blocknr(struct btrfs_header *h) { - return h->blocknr; + return le64_to_cpu(h->blocknr); } -static inline void btrfs_set_header_blocknr(struct header *h, u64 blocknr) +static inline void btrfs_set_header_blocknr(struct btrfs_header *h, u64 blocknr) { - h->blocknr = blocknr; + h->blocknr = cpu_to_le64(blocknr); } -static inline u64 btrfs_header_parentid(struct header *h) +static inline u64 btrfs_header_parentid(struct btrfs_header *h) { - return h->parentid; + return le64_to_cpu(h->parentid); } -static inline void btrfs_set_header_parentid(struct header *h, u64 parentid) +static inline void btrfs_set_header_parentid(struct btrfs_header *h, + u64 parentid) { - h->parentid = parentid; + h->parentid = cpu_to_le64(parentid); } -static inline u32 btrfs_header_nritems(struct header *h) +static inline u16 btrfs_header_nritems(struct btrfs_header *h) { - return h->nritems; + return le16_to_cpu(h->nritems); } -static inline void btrfs_set_header_nritems(struct header *h, u32 val) +static inline void btrfs_set_header_nritems(struct btrfs_header *h, u16 val) { - h->nritems = val; + h->nritems = cpu_to_le16(val); } -static inline u32 btrfs_header_flags(struct header *h) +static inline u16 btrfs_header_flags(struct btrfs_header *h) { - return h->flags; + return le16_to_cpu(h->flags); } -static inline void btrfs_set_header_flags(struct header *h, u32 val) +static inline void btrfs_set_header_flags(struct btrfs_header *h, u16 val) { - h->flags = val; + h->flags = cpu_to_le16(val); } -static inline int btrfs_header_level(struct header *h) +static inline int btrfs_header_level(struct btrfs_header *h) { return btrfs_header_flags(h) & (MAX_LEVEL - 1); } -static inline void btrfs_set_header_level(struct header *h, int level) +static inline void btrfs_set_header_level(struct btrfs_header *h, int level) { - u32 flags; + u16 flags; BUG_ON(level > MAX_LEVEL); flags = btrfs_header_flags(h) & ~(MAX_LEVEL - 1); btrfs_set_header_flags(h, flags | level); diff --git a/fs/btrfs/kerncompat.h b/fs/btrfs/kerncompat.h index f5efc5f0fff0..54cf70eaa01f 100644 --- a/fs/btrfs/kerncompat.h +++ b/fs/btrfs/kerncompat.h @@ -9,10 +9,16 @@ #define GFP_KERNEL 0 #define __read_mostly #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#define __force #define PAGE_SHIFT 12 #define ULONG_MAX (~0UL) #define BUG() abort() +#ifdef __CHECKER__ +#define __force __attribute__((force)) +#define __bitwise__ __attribute__((bitwise)) +#else +#define __force +#define __bitwise__ +#endif typedef unsigned int u32; typedef unsigned long long u64; @@ -67,3 +73,24 @@ static inline void __clear_bit(int bit, unsigned long *map) { #define ENOMEM 5 #define EEXIST 6 + +#define __CHECK_ENDIAN__ +#ifdef __CHECK_ENDIAN__ +#define __bitwise __bitwise__ +#else +#define __bitwise +#endif + +typedef u16 __bitwise __le16; +typedef u16 __bitwise __be16; +typedef u32 __bitwise __le32; +typedef u32 __bitwise __be32; +typedef u64 __bitwise __le64; +typedef u64 __bitwise __be64; + +#define cpu_to_le64(x) ((__force __le64)(u64)(x)) +#define le64_to_cpu(x) ((__force u64)(__le64)(x)) +#define cpu_to_le32(x) ((__force __le32)(u32)(x)) +#define le32_to_cpu(x) ((__force u32)(__le32)(x)) +#define cpu_to_le16(x) ((__force __le16)(u16)(x)) +#define le16_to_cpu(x) ((__force u16)(__le16)(x)) From e2fa7227cdf132d72e7410dd0679dc573a1c2618 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 12 Mar 2007 16:22:34 -0400 Subject: [PATCH 0035/2985] Btrfs: struct key endian fixes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 92 +++++++++++++++++++++++++----------------- fs/btrfs/ctree.h | 78 +++++++++++++++++++++++++++++++---- fs/btrfs/extent-tree.c | 34 ++++++++-------- fs/btrfs/kerncompat.h | 3 +- fs/btrfs/mkfs.c | 14 +++---- fs/btrfs/quick-test.c | 6 +-- fs/btrfs/random-test.c | 17 ++++---- 7 files changed, 162 insertions(+), 82 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e43c827e0dfd..489019ac04b8 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -95,19 +95,23 @@ int leaf_free_space(struct leaf *leaf) /* * compare two keys in a memcmp fashion */ -int comp_keys(struct key *k1, struct key *k2) +int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) { - if (k1->objectid > k2->objectid) + struct btrfs_key k1; + + btrfs_disk_key_to_cpu(&k1, disk); + + if (k1.objectid > k2->objectid) return 1; - if (k1->objectid < k2->objectid) + if (k1.objectid < k2->objectid) return -1; - if (k1->flags > k2->flags) + if (k1.flags > k2->flags) return 1; - if (k1->flags < k2->flags) + if (k1.flags < k2->flags) return -1; - if (k1->offset > k2->offset) + if (k1.offset > k2->offset) return 1; - if (k1->offset < k2->offset) + if (k1.offset < k2->offset) return -1; return 0; } @@ -125,15 +129,18 @@ int check_node(struct ctree_path *path, int level) parent_slot = path->slots[level + 1]; BUG_ON(nritems == 0); if (parent) { - struct key *parent_key; + struct btrfs_disk_key *parent_key; parent_key = &parent->keys[parent_slot]; - BUG_ON(memcmp(parent_key, node->keys, sizeof(struct key))); + BUG_ON(memcmp(parent_key, node->keys, + sizeof(struct btrfs_disk_key))); BUG_ON(parent->blockptrs[parent_slot] != btrfs_header_blocknr(&node->header)); } BUG_ON(nritems > NODEPTRS_PER_BLOCK); for (i = 0; nritems > 1 && i < nritems - 2; i++) { - BUG_ON(comp_keys(&node->keys[i], &node->keys[i+1]) >= 0); + struct btrfs_key cpukey; + btrfs_disk_key_to_cpu(&cpukey, &node->keys[i + 1]); + BUG_ON(comp_keys(&node->keys[i], &cpukey) >= 0); } return 0; } @@ -155,16 +162,18 @@ int check_leaf(struct ctree_path *path, int level) return 0; if (parent) { - struct key *parent_key; + struct btrfs_disk_key *parent_key; parent_key = &parent->keys[parent_slot]; BUG_ON(memcmp(parent_key, &leaf->items[0].key, - sizeof(struct key))); + sizeof(struct btrfs_disk_key))); BUG_ON(parent->blockptrs[parent_slot] != btrfs_header_blocknr(&leaf->header)); } for (i = 0; nritems > 1 && i < nritems - 2; i++) { + struct btrfs_key cpukey; + btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key); BUG_ON(comp_keys(&leaf->items[i].key, - &leaf->items[i+1].key) >= 0); + &cpukey) >= 0); BUG_ON(leaf->items[i].offset != leaf->items[i + 1].offset + leaf->items[i + 1].size); if (i == 0) { @@ -191,18 +200,18 @@ int check_block(struct ctree_path *path, int level) * * slot may point to max if the key is bigger than all of the keys */ -int generic_bin_search(char *p, int item_size, struct key *key, +int generic_bin_search(char *p, int item_size, struct btrfs_key *key, int max, int *slot) { int low = 0; int high = max; int mid; int ret; - struct key *tmp; + struct btrfs_disk_key *tmp; while(low < high) { mid = (low + high) / 2; - tmp = (struct key *)(p + mid * item_size); + tmp = (struct btrfs_disk_key *)(p + mid * item_size); ret = comp_keys(tmp, key); if (ret < 0) @@ -222,7 +231,7 @@ int generic_bin_search(char *p, int item_size, struct key *key, * simple bin_search frontend that does the right thing for * leaves vs nodes */ -int bin_search(struct node *c, struct key *key, int *slot) +int bin_search(struct node *c, struct btrfs_key *key, int *slot) { if (btrfs_is_leaf(c)) { struct leaf *l = (struct leaf *)c; @@ -230,7 +239,8 @@ int bin_search(struct node *c, struct key *key, int *slot) key, btrfs_header_nritems(&c->header), slot); } else { - return generic_bin_search((void *)c->keys, sizeof(struct key), + return generic_bin_search((void *)c->keys, + sizeof(struct btrfs_disk_key), key, btrfs_header_nritems(&c->header), slot); } @@ -339,7 +349,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, ret = wret; } else { memcpy(parent->keys + pslot + 1, right->keys, - sizeof(struct key)); + sizeof(struct btrfs_disk_key)); BUG_ON(list_empty(&parent_buf->dirty)); } } @@ -374,7 +384,8 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, ret = wret; } else { /* update the parent key to reflect our changes */ - memcpy(parent->keys + pslot, mid->keys, sizeof(struct key)); + memcpy(parent->keys + pslot, mid->keys, + sizeof(struct btrfs_disk_key)); BUG_ON(list_empty(&parent_buf->dirty)); } @@ -417,7 +428,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if * possible) */ -int search_slot(struct ctree_root *root, struct key *key, +int search_slot(struct ctree_root *root, struct btrfs_key *key, struct ctree_path *p, int ins_len, int cow) { struct tree_buffer *b; @@ -499,7 +510,7 @@ again: * fixing up the blocks in ram so the tree is consistent. */ static int fixup_low_keys(struct ctree_root *root, - struct ctree_path *path, struct key *key, + struct ctree_path *path, struct btrfs_disk_key *key, int level) { int i; @@ -546,12 +557,13 @@ static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, push_items = src_nritems; memcpy(dst->keys + dst_nritems, src->keys, - push_items * sizeof(struct key)); + push_items * sizeof(struct btrfs_disk_key)); memcpy(dst->blockptrs + dst_nritems, src->blockptrs, push_items * sizeof(u64)); if (push_items < src_nritems) { memmove(src->keys, src->keys + push_items, - (src_nritems - push_items) * sizeof(struct key)); + (src_nritems - push_items) * + sizeof(struct btrfs_disk_key)); memmove(src->blockptrs, src->blockptrs + push_items, (src_nritems - push_items) * sizeof(u64)); } @@ -598,11 +610,11 @@ static int balance_node_right(struct ctree_root *root, push_items = max_push; memmove(dst->keys + push_items, dst->keys, - dst_nritems * sizeof(struct key)); + dst_nritems * sizeof(struct btrfs_disk_key)); memmove(dst->blockptrs + push_items, dst->blockptrs, dst_nritems * sizeof(u64)); memcpy(dst->keys, src->keys + src_nritems - push_items, - push_items * sizeof(struct key)); + push_items * sizeof(struct btrfs_disk_key)); memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items, push_items * sizeof(u64)); @@ -627,7 +639,7 @@ static int insert_new_root(struct ctree_root *root, struct tree_buffer *t; struct node *lower; struct node *c; - struct key *lower_key; + struct btrfs_disk_key *lower_key; BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); @@ -645,7 +657,7 @@ static int insert_new_root(struct ctree_root *root, lower_key = &((struct leaf *)lower)->items[0].key; else lower_key = lower->keys; - memcpy(c->keys, lower_key, sizeof(struct key)); + memcpy(c->keys, lower_key, sizeof(struct btrfs_disk_key)); c->blockptrs[0] = path->nodes[level-1]->blocknr; /* the super has an extra ref to root->node */ tree_block_release(root, root->node); @@ -666,7 +678,7 @@ static int insert_new_root(struct ctree_root *root, * returns zero on success and < 0 on any error */ static int insert_ptr(struct ctree_root *root, - struct ctree_path *path, struct key *key, + struct ctree_path *path, struct btrfs_disk_key *key, u64 blocknr, int slot, int level) { struct node *lower; @@ -681,11 +693,11 @@ static int insert_ptr(struct ctree_root *root, BUG(); if (slot != nritems) { memmove(lower->keys + slot + 1, lower->keys + slot, - (nritems - slot) * sizeof(struct key)); + (nritems - slot) * sizeof(struct btrfs_disk_key)); memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot, (nritems - slot) * sizeof(u64)); } - memcpy(lower->keys + slot, key, sizeof(struct key)); + memcpy(lower->keys + slot, key, sizeof(struct btrfs_disk_key)); lower->blockptrs[slot] = blocknr; btrfs_set_header_nritems(&lower->header, nritems + 1); if (lower->keys[1].objectid == 0) @@ -732,7 +744,7 @@ static int split_node(struct ctree_root *root, struct ctree_path *path, btrfs_header_parentid(&root->node->node.header)); mid = (c_nritems + 1) / 2; memcpy(split->keys, c->keys + mid, - (c_nritems - mid) * sizeof(struct key)); + (c_nritems - mid) * sizeof(struct btrfs_disk_key)); memcpy(split->blockptrs, c->blockptrs + mid, (c_nritems - mid) * sizeof(u64)); btrfs_set_header_nritems(&split->header, c_nritems - mid); @@ -869,7 +881,7 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, BUG_ON(list_empty(&left_buf->dirty)); BUG_ON(list_empty(&right_buf->dirty)); memcpy(upper->node.keys + slot + 1, - &right->items[0].key, sizeof(struct key)); + &right->items[0].key, sizeof(struct btrfs_disk_key)); BUG_ON(list_empty(&upper->dirty)); /* then fixup the leaf pointer in the path */ @@ -1090,7 +1102,7 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ -int insert_item(struct ctree_root *root, struct key *key, +int insert_item(struct ctree_root *root, struct btrfs_key *cpu_key, void *data, int data_size) { int ret = 0; @@ -1101,12 +1113,15 @@ int insert_item(struct ctree_root *root, struct key *key, u32 nritems; unsigned int data_end; struct ctree_path path; + struct btrfs_disk_key disk_key; + + btrfs_cpu_key_to_disk(&disk_key, cpu_key); /* create a root if there isn't one */ if (!root->node) BUG(); init_path(&path); - ret = search_slot(root, key, &path, data_size, 1); + ret = search_slot(root, cpu_key, &path, data_size, 1); if (ret == 0) { release_path(root, &path); return -EEXIST; @@ -1148,7 +1163,8 @@ int insert_item(struct ctree_root *root, struct key *key, data_end = old_data; } /* copy the new data in */ - memcpy(&leaf->items[slot].key, key, sizeof(struct key)); + memcpy(&leaf->items[slot].key, &disk_key, + sizeof(struct btrfs_disk_key)); leaf->items[slot].offset = data_end - data_size; leaf->items[slot].size = data_size; memcpy(leaf->data + data_end - data_size, data, data_size); @@ -1156,7 +1172,7 @@ int insert_item(struct ctree_root *root, struct key *key, ret = 0; if (slot == 0) - ret = fixup_low_keys(root, &path, key, 1); + ret = fixup_low_keys(root, &path, &disk_key, 1); BUG_ON(list_empty(&leaf_buf->dirty)); if (leaf_free_space(leaf) < 0) @@ -1187,7 +1203,7 @@ static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, nritems = btrfs_header_nritems(&node->header); if (slot != nritems -1) { memmove(node->keys + slot, node->keys + slot + 1, - sizeof(struct key) * (nritems - slot - 1)); + sizeof(struct btrfs_disk_key) * (nritems - slot - 1)); memmove(node->blockptrs + slot, node->blockptrs + slot + 1, sizeof(u64) * (nritems - slot - 1)); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e1aaca66d590..58e03e90f23a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2,6 +2,7 @@ #define __CTREE__ #include "list.h" +#include "kerncompat.h" #define CTREE_BLOCKSIZE 1024 @@ -14,8 +15,18 @@ * may point to extents. * * offset is the starting byte offset for this key in the stream. + * + * btrfs_disk_key is in disk byte order. struct btrfs_key is always + * in cpu native order. Otherwise they are identical and their sizes + * should be the same (ie both packed) */ -struct key { +struct btrfs_disk_key { + __le64 objectid; + __le32 flags; + __le64 offset; +} __attribute__ ((__packed__)); + +struct btrfs_key { u64 objectid; u32 flags; u64 offset; @@ -37,7 +48,7 @@ struct btrfs_header { #define MAX_LEVEL 8 #define NODEPTRS_PER_BLOCK ((CTREE_BLOCKSIZE - sizeof(struct btrfs_header)) / \ - (sizeof(struct key) + sizeof(u64))) + (sizeof(struct btrfs_disk_key) + sizeof(u64))) struct tree_buffer; @@ -50,8 +61,8 @@ struct ctree_root { struct tree_buffer *node; struct tree_buffer *commit_root; struct ctree_root *extent_root; - struct key current_insert; - struct key last_insert; + struct btrfs_key current_insert; + struct btrfs_key last_insert; int fp; struct radix_tree_root cache_radix; struct radix_tree_root pinned_radix; @@ -88,7 +99,7 @@ struct ctree_super_block { * the item in the leaf (relative to the start of the data area) */ struct item { - struct key key; + struct btrfs_disk_key key; u16 offset; u16 size; } __attribute__ ((__packed__)); @@ -115,7 +126,7 @@ struct leaf { */ struct node { struct btrfs_header header; - struct key keys[NODEPTRS_PER_BLOCK]; + struct btrfs_disk_key keys[NODEPTRS_PER_BLOCK]; u64 blockptrs[NODEPTRS_PER_BLOCK]; } __attribute__ ((__packed__)); @@ -141,6 +152,55 @@ struct ctree_path { int slots[MAX_LEVEL]; }; +static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, + struct btrfs_disk_key *disk) +{ + cpu->offset = le64_to_cpu(disk->offset); + cpu->flags = le32_to_cpu(disk->flags); + cpu->objectid = le64_to_cpu(disk->objectid); +} + +static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, + struct btrfs_key *cpu) +{ + disk->offset = cpu_to_le64(cpu->offset); + disk->flags = cpu_to_le32(cpu->flags); + disk->objectid = cpu_to_le64(cpu->objectid); +} + +static inline u64 btrfs_key_objectid(struct btrfs_disk_key *disk) +{ + return le64_to_cpu(disk->objectid); +} + +static inline void btrfs_set_key_objectid(struct btrfs_disk_key *disk, + u64 val) +{ + disk->objectid = cpu_to_le64(val); +} + +static inline u64 btrfs_key_offset(struct btrfs_disk_key *disk) +{ + return le64_to_cpu(disk->offset); +} + +static inline void btrfs_set_key_offset(struct btrfs_disk_key *disk, + u64 val) +{ + disk->offset = cpu_to_le64(val); +} + +static inline u32 btrfs_key_flags(struct btrfs_disk_key *disk) +{ + return le32_to_cpu(disk->flags); +} + +static inline void btrfs_set_key_flags(struct btrfs_disk_key *disk, + u32 val) +{ + disk->flags = cpu_to_le32(val); +} + static inline u64 btrfs_header_blocknr(struct btrfs_header *h) { return le64_to_cpu(h->blocknr); @@ -203,11 +263,13 @@ static inline int btrfs_is_leaf(struct node *n) struct tree_buffer *alloc_free_block(struct ctree_root *root); int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf); int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks); -int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, int ins_len, int cow); +int search_slot(struct ctree_root *root, struct btrfs_key *key, + struct ctree_path *p, int ins_len, int cow); void release_path(struct ctree_root *root, struct ctree_path *p); void init_path(struct ctree_path *p); int del_item(struct ctree_root *root, struct ctree_path *path); -int insert_item(struct ctree_root *root, struct key *key, void *data, int data_size); +int insert_item(struct ctree_root *root, struct btrfs_key *key, + void *data, int data_size); int next_leaf(struct ctree_root *root, struct ctree_path *path); int leaf_free_space(struct leaf *leaf); int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 524c11fea24a..475e72215964 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7,7 +7,8 @@ #include "print-tree.h" static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, - u64 search_start, u64 search_end, struct key *ins); + u64 search_start, u64 search_end, + struct btrfs_key *ins); static int finish_current_insert(struct ctree_root *extent_root); static int run_pending(struct ctree_root *extent_root); @@ -24,10 +25,10 @@ static int inc_block_ref(struct ctree_root *root, u64 blocknr) { struct ctree_path path; int ret; - struct key key; + struct btrfs_key key; struct leaf *l; struct extent_item *item; - struct key ins; + struct btrfs_key ins; find_free_extent(root->extent_root, 0, 0, (u64)-1, &ins); init_path(&path); @@ -54,7 +55,7 @@ static int lookup_block_ref(struct ctree_root *root, u64 blocknr, u32 *refs) { struct ctree_path path; int ret; - struct key key; + struct btrfs_key key; struct leaf *l; struct extent_item *item; init_path(&path); @@ -113,7 +114,7 @@ int btrfs_finish_extent_commit(struct ctree_root *root) static int finish_current_insert(struct ctree_root *extent_root) { - struct key ins; + struct btrfs_key ins; struct extent_item extent_item; int i; int ret; @@ -140,12 +141,12 @@ static int finish_current_insert(struct ctree_root *extent_root) int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) { struct ctree_path path; - struct key key; + struct btrfs_key key; struct ctree_root *extent_root = root->extent_root; int ret; struct item *item; struct extent_item *ei; - struct key ins; + struct btrfs_key ins; key.objectid = blocknr; key.flags = 0; @@ -227,7 +228,7 @@ static int run_pending(struct ctree_root *extent_root) */ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) { - struct key key; + struct btrfs_key key; struct ctree_root *extent_root = root->extent_root; struct tree_buffer *t; int pending_ret; @@ -256,10 +257,11 @@ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) * Any available blocks before search_start are skipped. */ static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, - u64 search_start, u64 search_end, struct key *ins) + u64 search_start, u64 search_end, + struct btrfs_key *ins) { struct ctree_path path; - struct key *key; + struct btrfs_key key; int ret; u64 hole_size = 0; int slot = 0; @@ -306,12 +308,12 @@ check_failed: ins->offset = (u64)-1; goto check_pending; } - key = &l->items[slot].key; - if (key->objectid >= search_start) { + btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + if (key.objectid >= search_start) { if (start_found) { if (last_block < search_start) last_block = search_start; - hole_size = key->objectid - last_block; + hole_size = key.objectid - last_block; if (hole_size > total_needed) { ins->objectid = last_block; ins->offset = hole_size; @@ -320,7 +322,7 @@ check_failed: } } start_found = 1; - last_block = key->objectid + key->offset; + last_block = key.objectid + key.offset; path.slots[0]++; } // FIXME -ENOSPC @@ -357,7 +359,7 @@ error: * returns 0 if everything worked, non-zero otherwise. */ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, - u64 search_end, u64 owner, struct key *ins) + u64 search_end, u64 owner, struct btrfs_key *ins) { int ret; int pending_ret; @@ -400,7 +402,7 @@ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, */ struct tree_buffer *alloc_free_block(struct ctree_root *root) { - struct key ins; + struct btrfs_key ins; int ret; struct tree_buffer *buf; diff --git a/fs/btrfs/kerncompat.h b/fs/btrfs/kerncompat.h index 54cf70eaa01f..56239b922ba6 100644 --- a/fs/btrfs/kerncompat.h +++ b/fs/btrfs/kerncompat.h @@ -69,8 +69,6 @@ static inline void __clear_bit(int bit, unsigned long *map) { const typeof( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - __builtin_offsetof(type,member) );}) -#endif - #define ENOMEM 5 #define EEXIST 6 @@ -94,3 +92,4 @@ typedef u64 __bitwise __be64; #define le32_to_cpu(x) ((__force u32)(__le32)(x)) #define cpu_to_le16(x) ((__force __le16)(u16)(x)) #define le16_to_cpu(x) ((__force u16)(__le16)(x)) +#endif diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index b2ba94696297..5290423343f8 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -45,9 +45,9 @@ int mkfs(int fd) btrfs_set_header_nritems(&empty_leaf.header, 3); /* item1, reserve blocks 0-16 */ - item.key.objectid = 0; - item.key.offset = 17; - item.key.flags = 0; + item.key.objectid = cpu_to_le64(0); + item.key.offset = cpu_to_le64(17); + item.key.flags = cpu_to_le32(0); item.offset = LEAF_DATA_SIZE - sizeof(struct extent_item); item.size = sizeof(struct extent_item); extent_item.refs = 1; @@ -56,16 +56,16 @@ int mkfs(int fd) memcpy(empty_leaf.data + item.offset, &extent_item, item.size); /* item2, give block 17 to the root */ - item.key.objectid = 17; - item.key.offset = 1; + item.key.objectid = cpu_to_le64(17); + item.key.offset = cpu_to_le64(1); item.offset = LEAF_DATA_SIZE - sizeof(struct extent_item) * 2; extent_item.owner = 1; memcpy(empty_leaf.items + 1, &item, sizeof(item)); memcpy(empty_leaf.data + item.offset, &extent_item, item.size); /* item3, give block 18 for the extent root */ - item.key.objectid = 18; - item.key.offset = 1; + item.key.objectid = cpu_to_le64(18); + item.key.offset = cpu_to_le64(1); item.offset = LEAF_DATA_SIZE - sizeof(struct extent_item) * 3; extent_item.owner = 2; memcpy(empty_leaf.items + 2, &item, sizeof(item)); diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index 07fd71b77627..ab3bda53a2f6 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -13,8 +13,8 @@ int next_key(int i, int max_key) { } int main(int ac, char **av) { - struct key ins; - struct key last = { (u64)-1, 0, 0}; + struct btrfs_key ins; + struct btrfs_key last = { (u64)-1, 0, 0}; char *buf; int i; int num; @@ -146,7 +146,7 @@ int main(int ac, char **av) { slot = path.slots[0]; leaf = &path.nodes[0]->leaf; - memcpy(&last, &leaf->items[slot].key, sizeof(last)); + btrfs_disk_key_to_cpu(&last, &leaf->items[slot].key); if (tree_size % 10000 == 0) printf("big del %d:%d\n", tree_size, i); ret = del_item(root, &path); diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index 5d99b730a69c..34a15841ebd3 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -10,7 +10,8 @@ int keep_running = 1; struct ctree_super_block super; -static int setup_key(struct radix_tree_root *root, struct key *key, int exists) +static int setup_key(struct radix_tree_root *root, struct btrfs_key *key, + int exists) { int num = rand(); unsigned long res[2]; @@ -38,7 +39,7 @@ again: static int ins_one(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; - struct key key; + struct btrfs_key key; int ret; char buf[128]; unsigned long oid; @@ -63,7 +64,7 @@ error: static int insert_dup(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; - struct key key; + struct btrfs_key key; int ret; char buf[128]; init_path(&path); @@ -82,7 +83,7 @@ static int insert_dup(struct ctree_root *root, struct radix_tree_root *radix) static int del_one(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; - struct key key; + struct btrfs_key key; int ret; unsigned long *ptr; init_path(&path); @@ -108,7 +109,7 @@ error: static int lookup_item(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; - struct key key; + struct btrfs_key key; int ret; init_path(&path); ret = setup_key(radix, &key, 1); @@ -127,7 +128,7 @@ error: static int lookup_enoent(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; - struct key key; + struct btrfs_key key; int ret; init_path(&path); ret = setup_key(radix, &key, 0); @@ -147,7 +148,7 @@ static int empty_tree(struct ctree_root *root, struct radix_tree_root *radix, int nr) { struct ctree_path path; - struct key key; + struct btrfs_key key; unsigned long found = 0; int ret; int slot; @@ -248,7 +249,7 @@ int (*ops[])(struct ctree_root *root, struct radix_tree_root *radix) = static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) { struct ctree_path path; - struct key key; + struct btrfs_key key; unsigned long found; int ret; int slot; From 0783fcfc4dc19b8bb99bd51b7afa669ba4cbd377 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 12 Mar 2007 20:12:07 -0400 Subject: [PATCH 0036/2985] Btrfs: struct item endian fixes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 129 +++++++++++++++++++++++------------------ fs/btrfs/ctree.h | 34 +++++++++-- fs/btrfs/extent-tree.c | 12 ++-- fs/btrfs/mkfs.c | 36 +++++++----- fs/btrfs/print-tree.c | 15 +++-- fs/btrfs/random-test.c | 5 +- 6 files changed, 141 insertions(+), 90 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 489019ac04b8..105556470055 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -76,7 +76,7 @@ static inline unsigned int leaf_data_end(struct leaf *leaf) u32 nr = btrfs_header_nritems(&leaf->header); if (nr == 0) return sizeof(leaf->data); - return leaf->items[nr-1].offset; + return btrfs_item_offset(leaf->items + nr - 1); } /* @@ -174,11 +174,12 @@ int check_leaf(struct ctree_path *path, int level) btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key); BUG_ON(comp_keys(&leaf->items[i].key, &cpukey) >= 0); - BUG_ON(leaf->items[i].offset != leaf->items[i + 1].offset + - leaf->items[i + 1].size); + BUG_ON(btrfs_item_offset(leaf->items + i) != + btrfs_item_end(leaf->items + i + 1)); if (i == 0) { - BUG_ON(leaf->items[i].offset + leaf->items[i].size != - LEAF_DATA_SIZE); + BUG_ON(btrfs_item_offset(leaf->items + i) + + btrfs_item_size(leaf->items + i) != + LEAF_DATA_SIZE); } } return 0; @@ -235,7 +236,8 @@ int bin_search(struct node *c, struct btrfs_key *key, int *slot) { if (btrfs_is_leaf(c)) { struct leaf *l = (struct leaf *)c; - return generic_bin_search((void *)l->items, sizeof(struct item), + return generic_bin_search((void *)l->items, + sizeof(struct btrfs_item), key, btrfs_header_nritems(&c->header), slot); } else { @@ -485,7 +487,7 @@ again: struct leaf *l = (struct leaf *)c; p->slots[level] = slot; if (ins_len > 0 && leaf_free_space(l) < - sizeof(struct item) + ins_len) { + sizeof(struct btrfs_item) + ins_len) { int sret = split_leaf(root, p, ins_len); BUG_ON(sret > 0); if (sret) @@ -780,9 +782,9 @@ static int leaf_space_used(struct leaf *l, int start, int nr) if (!nr) return 0; - data_len = l->items[start].offset + l->items[start].size; - data_len = data_len - l->items[end].offset; - data_len += sizeof(struct item) * nr; + data_len = btrfs_item_end(l->items + start); + data_len = data_len - btrfs_item_offset(l->items + end); + data_len += sizeof(struct btrfs_item) * nr; return data_len; } @@ -806,7 +808,7 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, int free_space; int push_space = 0; int push_items = 0; - struct item *item; + struct btrfs_item *item; u32 left_nritems; u32 right_nritems; @@ -821,7 +823,7 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, right_buf = read_tree_block(root, upper->node.blockptrs[slot + 1]); right = &right_buf->leaf; free_space = leaf_free_space(right); - if (free_space < data_size + sizeof(struct item)) { + if (free_space < data_size + sizeof(struct btrfs_item)) { tree_block_release(root, right_buf); return 1; } @@ -829,7 +831,7 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, btrfs_cow_block(root, right_buf, upper, slot + 1, &right_buf); right = &right_buf->leaf; free_space = leaf_free_space(right); - if (free_space < data_size + sizeof(struct item)) { + if (free_space < data_size + sizeof(struct btrfs_item)) { tree_block_release(root, right_buf); return 1; } @@ -839,10 +841,11 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, item = left->items + i; if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (item->size + sizeof(*item) + push_space > free_space) + if (btrfs_item_size(item) + sizeof(*item) + push_space > + free_space) break; push_items++; - push_space += item->size + sizeof(*item); + push_space += btrfs_item_size(item) + sizeof(*item); } if (push_items == 0) { tree_block_release(root, right_buf); @@ -850,8 +853,7 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, } right_nritems = btrfs_header_nritems(&right->header); /* push left to right */ - push_space = left->items[left_nritems - push_items].offset + - left->items[left_nritems - push_items].size; + push_space = btrfs_item_end(left->items + left_nritems - push_items); push_space -= leaf_data_end(left); /* make room in the right data area */ memmove(right->data + leaf_data_end(right) - push_space, @@ -862,18 +864,19 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, left->data + leaf_data_end(left), push_space); memmove(right->items + push_items, right->items, - right_nritems * sizeof(struct item)); + right_nritems * sizeof(struct btrfs_item)); /* copy the items from left to right */ memcpy(right->items, left->items + left_nritems - push_items, - push_items * sizeof(struct item)); + push_items * sizeof(struct btrfs_item)); /* update the item pointers */ right_nritems += push_items; btrfs_set_header_nritems(&right->header, right_nritems); push_space = LEAF_DATA_SIZE; for (i = 0; i < right_nritems; i++) { - right->items[i].offset = push_space - right->items[i].size; - push_space = right->items[i].offset; + btrfs_set_item_offset(right->items + i, push_space - + btrfs_item_size(right->items + i)); + push_space = btrfs_item_offset(right->items + i); } left_nritems -= push_items; btrfs_set_header_nritems(&left->header, left_nritems); @@ -911,7 +914,7 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, int free_space; int push_space = 0; int push_items = 0; - struct item *item; + struct btrfs_item *item; u32 old_left_nritems; int ret = 0; int wret; @@ -926,7 +929,7 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]); left = &t->leaf; free_space = leaf_free_space(left); - if (free_space < data_size + sizeof(struct item)) { + if (free_space < data_size + sizeof(struct btrfs_item)) { tree_block_release(root, t); return 1; } @@ -935,7 +938,7 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, btrfs_cow_block(root, t, path->nodes[1], slot - 1, &t); left = &t->leaf; free_space = leaf_free_space(left); - if (free_space < data_size + sizeof(struct item)) { + if (free_space < data_size + sizeof(struct btrfs_item)) { tree_block_release(root, t); return 1; } @@ -944,10 +947,11 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, item = right->items + i; if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (item->size + sizeof(*item) + push_space > free_space) + if (btrfs_item_size(item) + sizeof(*item) + push_space > + free_space) break; push_items++; - push_space += item->size + sizeof(*item); + push_space += btrfs_item_size(item) + sizeof(*item); } if (push_items == 0) { tree_block_release(root, t); @@ -955,35 +959,40 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, } /* push data from right to left */ memcpy(left->items + btrfs_header_nritems(&left->header), - right->items, push_items * sizeof(struct item)); - push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset; + right->items, push_items * sizeof(struct btrfs_item)); + push_space = LEAF_DATA_SIZE - + btrfs_item_offset(right->items + push_items -1); memcpy(left->data + leaf_data_end(left) - push_space, - right->data + right->items[push_items - 1].offset, + right->data + btrfs_item_offset(right->items + push_items - 1), push_space); old_left_nritems = btrfs_header_nritems(&left->header); BUG_ON(old_left_nritems < 0); - for(i = old_left_nritems; i < old_left_nritems + push_items; i++) { - left->items[i].offset -= LEAF_DATA_SIZE - - left->items[old_left_nritems -1].offset; + for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { + u16 ioff = btrfs_item_offset(left->items + i); + btrfs_set_item_offset(left->items + i, ioff - (LEAF_DATA_SIZE - + btrfs_item_offset(left->items + + old_left_nritems - 1))); } btrfs_set_header_nritems(&left->header, old_left_nritems + push_items); /* fixup right node */ - push_space = right->items[push_items-1].offset - leaf_data_end(right); + push_space = btrfs_item_offset(right->items + push_items - 1) - + leaf_data_end(right); memmove(right->data + LEAF_DATA_SIZE - push_space, right->data + leaf_data_end(right), push_space); memmove(right->items, right->items + push_items, (btrfs_header_nritems(&right->header) - push_items) * - sizeof(struct item)); + sizeof(struct btrfs_item)); btrfs_set_header_nritems(&right->header, btrfs_header_nritems(&right->header) - push_items); push_space = LEAF_DATA_SIZE; for (i = 0; i < btrfs_header_nritems(&right->header); i++) { - right->items[i].offset = push_space - right->items[i].size; - push_space = right->items[i].offset; + btrfs_set_item_offset(right->items + i, push_space - + btrfs_item_size(right->items + i)); + push_space = btrfs_item_offset(right->items + i); } BUG_ON(list_empty(&t->dirty)); @@ -1023,7 +1032,7 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, int slot; struct leaf *right; struct tree_buffer *right_buffer; - int space_needed = data_size + sizeof(struct item); + int space_needed = data_size + sizeof(struct btrfs_item); int data_copy_size; int rt_data_off; int i; @@ -1034,7 +1043,7 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, l = &l_buf->leaf; /* did the pushes work? */ - if (leaf_free_space(l) >= sizeof(struct item) + data_size) + if (leaf_free_space(l) >= sizeof(struct btrfs_item) + data_size) return 0; if (!path->nodes[1]) { @@ -1066,17 +1075,17 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, btrfs_set_header_level(&right->header, 0); btrfs_set_header_parentid(&right->header, btrfs_header_parentid(&root->node->node.header)); - data_copy_size = l->items[mid].offset + l->items[mid].size - - leaf_data_end(l); + data_copy_size = btrfs_item_end(l->items + mid) - leaf_data_end(l); memcpy(right->items, l->items + mid, - (nritems - mid) * sizeof(struct item)); + (nritems - mid) * sizeof(struct btrfs_item)); memcpy(right->data + LEAF_DATA_SIZE - data_copy_size, l->data + leaf_data_end(l), data_copy_size); - rt_data_off = LEAF_DATA_SIZE - - (l->items[mid].offset + l->items[mid].size); + rt_data_off = LEAF_DATA_SIZE - btrfs_item_end(l->items + mid); - for (i = 0; i < btrfs_header_nritems(&right->header); i++) - right->items[i].offset += rt_data_off; + for (i = 0; i < btrfs_header_nritems(&right->header); i++) { + u16 ioff = btrfs_item_offset(right->items + i); + btrfs_set_item_offset(right->items + i, ioff + rt_data_off); + } btrfs_set_header_nritems(&l->header, mid); ret = 0; @@ -1136,26 +1145,28 @@ int insert_item(struct ctree_root *root, struct btrfs_key *cpu_key, nritems = btrfs_header_nritems(&leaf->header); data_end = leaf_data_end(leaf); - if (leaf_free_space(leaf) < sizeof(struct item) + data_size) + if (leaf_free_space(leaf) < sizeof(struct btrfs_item) + data_size) BUG(); slot = path.slots[0]; BUG_ON(slot < 0); if (slot != nritems) { int i; - unsigned int old_data = leaf->items[slot].offset + - leaf->items[slot].size; + unsigned int old_data = btrfs_item_end(leaf->items + slot); /* * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ - for (i = slot; i < nritems; i++) - leaf->items[i].offset -= data_size; + for (i = slot; i < nritems; i++) { + u16 ioff = btrfs_item_offset(leaf->items + i); + btrfs_set_item_offset(leaf->items + i, + ioff - data_size); + } /* shift the items */ memmove(leaf->items + slot + 1, leaf->items + slot, - (nritems - slot) * sizeof(struct item)); + (nritems - slot) * sizeof(struct btrfs_item)); /* shift the data */ memmove(leaf->data + data_end - data_size, leaf->data + @@ -1165,8 +1176,8 @@ int insert_item(struct ctree_root *root, struct btrfs_key *cpu_key, /* copy the new data in */ memcpy(&leaf->items[slot].key, &disk_key, sizeof(struct btrfs_disk_key)); - leaf->items[slot].offset = data_end - data_size; - leaf->items[slot].size = data_size; + btrfs_set_item_offset(leaf->items + slot, data_end - data_size); + btrfs_set_item_size(leaf->items + slot, data_size); memcpy(leaf->data + data_end - data_size, data, data_size); btrfs_set_header_nritems(&leaf->header, nritems + 1); @@ -1241,8 +1252,8 @@ int del_item(struct ctree_root *root, struct ctree_path *path) leaf_buf = path->nodes[0]; leaf = &leaf_buf->leaf; slot = path->slots[0]; - doff = leaf->items[slot].offset; - dsize = leaf->items[slot].size; + doff = btrfs_item_offset(leaf->items + slot); + dsize = btrfs_item_size(leaf->items + slot); nritems = btrfs_header_nritems(&leaf->header); if (slot != nritems - 1) { @@ -1251,10 +1262,12 @@ int del_item(struct ctree_root *root, struct ctree_path *path) memmove(leaf->data + data_end + dsize, leaf->data + data_end, doff - data_end); - for (i = slot + 1; i < nritems; i++) - leaf->items[i].offset += dsize; + for (i = slot + 1; i < nritems; i++) { + u16 ioff = btrfs_item_offset(leaf->items + i); + btrfs_set_item_offset(leaf->items + i, ioff + dsize); + } memmove(leaf->items + slot, leaf->items + slot + 1, - sizeof(struct item) * + sizeof(struct btrfs_item) * (nritems - slot - 1)); } btrfs_set_header_nritems(&leaf->header, nritems - 1); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 58e03e90f23a..b03df154dcdb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -98,10 +98,10 @@ struct ctree_super_block { * the key flags parameter. offset and size tell us where to find * the item in the leaf (relative to the start of the data area) */ -struct item { +struct btrfs_item { struct btrfs_disk_key key; - u16 offset; - u16 size; + __le16 offset; + __le16 size; } __attribute__ ((__packed__)); /* @@ -115,7 +115,8 @@ struct item { struct leaf { struct btrfs_header header; union { - struct item items[LEAF_DATA_SIZE/sizeof(struct item)]; + struct btrfs_item items[LEAF_DATA_SIZE/ + sizeof(struct btrfs_item)]; u8 data[CTREE_BLOCKSIZE-sizeof(struct btrfs_header)]; }; } __attribute__ ((__packed__)); @@ -152,6 +153,31 @@ struct ctree_path { int slots[MAX_LEVEL]; }; +static inline u16 btrfs_item_offset(struct btrfs_item *item) +{ + return le16_to_cpu(item->offset); +} + +static inline void btrfs_set_item_offset(struct btrfs_item *item, u16 val) +{ + item->offset = cpu_to_le16(val); +} + +static inline u16 btrfs_item_end(struct btrfs_item *item) +{ + return le16_to_cpu(item->offset) + le16_to_cpu(item->size); +} + +static inline u16 btrfs_item_size(struct btrfs_item *item) +{ + return le16_to_cpu(item->size); +} + +static inline void btrfs_set_item_size(struct btrfs_item *item, u16 val) +{ + item->size = cpu_to_le16(val); +} + static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, struct btrfs_disk_key *disk) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 475e72215964..fdf95bd07f90 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -40,8 +40,8 @@ static int inc_block_ref(struct ctree_root *root, u64 blocknr) BUG(); BUG_ON(ret != 0); l = &path.nodes[0]->leaf; - item = (struct extent_item *)(l->data + - l->items[path.slots[0]].offset); + item = (struct extent_item *)(l->data + btrfs_item_offset(l->items + + path.slots[0])); item->refs++; BUG_ON(list_empty(&path.nodes[0]->dirty)); @@ -67,7 +67,8 @@ static int lookup_block_ref(struct ctree_root *root, u64 blocknr, u32 *refs) BUG(); l = &path.nodes[0]->leaf; item = (struct extent_item *)(l->data + - l->items[path.slots[0]].offset); + btrfs_item_offset(l->items + + path.slots[0])); *refs = item->refs; release_path(root->extent_root, &path); return 0; @@ -144,7 +145,7 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) struct btrfs_key key; struct ctree_root *extent_root = root->extent_root; int ret; - struct item *item; + struct btrfs_item *item; struct extent_item *ei; struct btrfs_key ins; @@ -162,7 +163,8 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) BUG(); } item = path.nodes[0]->leaf.items + path.slots[0]; - ei = (struct extent_item *)(path.nodes[0]->leaf.data + item->offset); + ei = (struct extent_item *)(path.nodes[0]->leaf.data + + btrfs_item_offset(item)); BUG_ON(ei->refs == 0); ei->refs--; if (ei->refs == 0) { diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index 5290423343f8..0f77babcd306 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -14,7 +14,7 @@ int mkfs(int fd) { struct ctree_root_info info[2]; struct leaf empty_leaf; - struct item item; + struct btrfs_item item; struct extent_item extent_item; int ret; @@ -45,31 +45,37 @@ int mkfs(int fd) btrfs_set_header_nritems(&empty_leaf.header, 3); /* item1, reserve blocks 0-16 */ - item.key.objectid = cpu_to_le64(0); - item.key.offset = cpu_to_le64(17); - item.key.flags = cpu_to_le32(0); - item.offset = LEAF_DATA_SIZE - sizeof(struct extent_item); - item.size = sizeof(struct extent_item); + btrfs_set_key_objectid(&item.key, 0); + btrfs_set_key_offset(&item.key, 17); + btrfs_set_key_flags(&item.key, 0); + btrfs_set_item_offset(&item, + LEAF_DATA_SIZE - sizeof(struct extent_item)); + btrfs_set_item_size(&item, sizeof(struct extent_item)); extent_item.refs = 1; extent_item.owner = 0; memcpy(empty_leaf.items, &item, sizeof(item)); - memcpy(empty_leaf.data + item.offset, &extent_item, item.size); + memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, + btrfs_item_size(&item)); /* item2, give block 17 to the root */ - item.key.objectid = cpu_to_le64(17); - item.key.offset = cpu_to_le64(1); - item.offset = LEAF_DATA_SIZE - sizeof(struct extent_item) * 2; + btrfs_set_key_objectid(&item.key, 17); + btrfs_set_key_offset(&item.key, 1); + btrfs_set_item_offset(&item, + LEAF_DATA_SIZE - sizeof(struct extent_item) * 2); extent_item.owner = 1; memcpy(empty_leaf.items + 1, &item, sizeof(item)); - memcpy(empty_leaf.data + item.offset, &extent_item, item.size); + memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, + btrfs_item_size(&item)); /* item3, give block 18 for the extent root */ - item.key.objectid = cpu_to_le64(18); - item.key.offset = cpu_to_le64(1); - item.offset = LEAF_DATA_SIZE - sizeof(struct extent_item) * 3; + btrfs_set_key_objectid(&item.key, 18); + btrfs_set_key_offset(&item.key, 1); + btrfs_set_item_offset(&item, + LEAF_DATA_SIZE - sizeof(struct extent_item) * 3); extent_item.owner = 2; memcpy(empty_leaf.items + 2, &item, sizeof(item)); - memcpy(empty_leaf.data + item.offset, &extent_item, item.size); + memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, + btrfs_item_size(&item)); ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 18 * CTREE_BLOCKSIZE); if (ret != sizeof(empty_leaf)) return -1; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 7df16b1e4733..33f5ee4052c1 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -9,7 +9,7 @@ void print_leaf(struct leaf *l) { int i; u32 nr = btrfs_header_nritems(&l->header); - struct item *item; + struct btrfs_item *item; struct extent_item *ei; printf("leaf %Lu total ptrs %d free space %d\n", btrfs_header_blocknr(&l->header), nr, leaf_free_space(l)); @@ -18,12 +18,15 @@ void print_leaf(struct leaf *l) item = l->items + i; printf("\titem %d key (%Lu %u %Lu) itemoff %d itemsize %d\n", i, - item->key.objectid, item->key.flags, item->key.offset, - item->offset, item->size); + btrfs_key_objectid(&item->key), + btrfs_key_flags(&item->key), + btrfs_key_offset(&item->key), + btrfs_item_offset(item), + btrfs_item_size(item)); fflush(stdout); - printf("\t\titem data %.*s\n", item->size, - l->data+item->offset); - ei = (struct extent_item *)(l->data + item->offset); + printf("\t\titem data %.*s\n", btrfs_item_size(item), + l->data + btrfs_item_offset(item)); + ei = (struct extent_item *)(l->data + btrfs_item_offset(item)); printf("\t\textent data refs %u owner %Lu\n", ei->refs, ei->owner); fflush(stdout); diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index 34a15841ebd3..e767528bc521 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -173,7 +173,7 @@ static int empty_tree(struct ctree_root *root, struct radix_tree_root *radix, path.slots[0] -= 1; } slot = path.slots[0]; - found = path.nodes[0]->leaf.items[slot].key.objectid; + found=btrfs_key_objectid(&path.nodes[0]->leaf.items[slot].key); ret = del_item(root, &path); count++; if (ret) { @@ -274,7 +274,8 @@ static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) slot -= 1; } for (i = slot; i >= 0; i--) { - found = path.nodes[0]->leaf.items[i].key.objectid; + found = btrfs_key_objectid(&path.nodes[0]-> + leaf.items[i].key); radix_tree_preload(GFP_KERNEL); ret = radix_tree_insert(radix, found, (void *)found); if (ret) { From 1d4f8a0c1eca5586134b56a4114a7cd5e85e3560 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 Mar 2007 09:28:32 -0400 Subject: [PATCH 0037/2985] Btrfs: node->blockptrs endian fixes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 31 ++++++++++++++++++------------- fs/btrfs/ctree.h | 12 +++++++++++- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/print-tree.c | 4 ++-- 4 files changed, 33 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 105556470055..762810731001 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -58,7 +58,8 @@ int btrfs_cow_block(struct ctree_root *root, free_extent(root, buf->blocknr, 1); tree_block_release(root, buf); } else { - parent->node.blockptrs[parent_slot] = cow->blocknr; + btrfs_set_node_blockptr(&parent->node, parent_slot, + cow->blocknr); BUG_ON(list_empty(&parent->dirty)); free_extent(root, buf->blocknr, 1); } @@ -133,7 +134,7 @@ int check_node(struct ctree_path *path, int level) parent_key = &parent->keys[parent_slot]; BUG_ON(memcmp(parent_key, node->keys, sizeof(struct btrfs_disk_key))); - BUG_ON(parent->blockptrs[parent_slot] != + BUG_ON(btrfs_node_blockptr(parent, parent_slot) != btrfs_header_blocknr(&node->header)); } BUG_ON(nritems > NODEPTRS_PER_BLOCK); @@ -166,7 +167,7 @@ int check_leaf(struct ctree_path *path, int level) parent_key = &parent->keys[parent_slot]; BUG_ON(memcmp(parent_key, &leaf->items[0].key, sizeof(struct btrfs_disk_key))); - BUG_ON(parent->blockptrs[parent_slot] != + BUG_ON(btrfs_node_blockptr(parent, parent_slot) != btrfs_header_blocknr(&leaf->header)); } for (i = 0; nritems > 1 && i < nritems - 2; i++) { @@ -258,7 +259,7 @@ struct tree_buffer *read_node_slot(struct ctree_root *root, return NULL; if (slot >= btrfs_header_nritems(&node->header)) return NULL; - return read_tree_block(root, node->blockptrs[slot]); + return read_tree_block(root, btrfs_node_blockptr(node, slot)); } static int balance_level(struct ctree_root *root, struct ctree_path *path, @@ -283,7 +284,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, mid_buf = path->nodes[level]; mid = &mid_buf->node; - orig_ptr = mid->blockptrs[orig_slot]; + orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < MAX_LEVEL - 1) parent_buf = path->nodes[level + 1]; @@ -407,7 +408,8 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, } /* double check we haven't messed things up */ check_block(path, level); - if (orig_ptr != path->nodes[level]->node.blockptrs[path->slots[level]]) + if (orig_ptr != btrfs_node_blockptr(&path->nodes[level]->node, + path->slots[level])) BUG(); if (right_buf) @@ -482,7 +484,7 @@ again: slot = p->slots[level]; BUG_ON(btrfs_header_nritems(&c->header) == 1); } - b = read_tree_block(root, c->blockptrs[slot]); + b = read_tree_block(root, btrfs_node_blockptr(c, slot)); } else { struct leaf *l = (struct leaf *)c; p->slots[level] = slot; @@ -660,7 +662,7 @@ static int insert_new_root(struct ctree_root *root, else lower_key = lower->keys; memcpy(c->keys, lower_key, sizeof(struct btrfs_disk_key)); - c->blockptrs[0] = path->nodes[level-1]->blocknr; + btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->blocknr); /* the super has an extra ref to root->node */ tree_block_release(root, root->node); root->node = t; @@ -700,7 +702,7 @@ static int insert_ptr(struct ctree_root *root, (nritems - slot) * sizeof(u64)); } memcpy(lower->keys + slot, key, sizeof(struct btrfs_disk_key)); - lower->blockptrs[slot] = blocknr; + btrfs_set_node_blockptr(lower, slot, blocknr); btrfs_set_header_nritems(&lower->header, nritems + 1); if (lower->keys[1].objectid == 0) BUG(); @@ -820,7 +822,8 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, if (slot >= btrfs_header_nritems(&upper->node.header) - 1) { return 1; } - right_buf = read_tree_block(root, upper->node.blockptrs[slot + 1]); + right_buf = read_tree_block(root, btrfs_node_blockptr(&upper->node, + slot + 1)); right = &right_buf->leaf; free_space = leaf_free_space(right); if (free_space < data_size + sizeof(struct btrfs_item)) { @@ -926,7 +929,8 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, if (!path->nodes[1]) { return 1; } - t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]); + t = read_tree_block(root, btrfs_node_blockptr(&path->nodes[1]->node, + slot - 1)); left = &t->leaf; free_space = leaf_free_space(left); if (free_space < data_size + sizeof(struct btrfs_item)) { @@ -1353,7 +1357,7 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) level++; continue; } - blocknr = c->node.blockptrs[slot]; + blocknr = btrfs_node_blockptr(&c->node, slot); if (next) tree_block_release(root, next); next = read_tree_block(root, blocknr); @@ -1368,7 +1372,8 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) path->slots[level] = 0; if (!level) break; - next = read_tree_block(root, next->node.blockptrs[0]); + next = read_tree_block(root, + btrfs_node_blockptr(&next->node, 0)); } return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b03df154dcdb..a8454c401cee 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -128,7 +128,7 @@ struct leaf { struct node { struct btrfs_header header; struct btrfs_disk_key keys[NODEPTRS_PER_BLOCK]; - u64 blockptrs[NODEPTRS_PER_BLOCK]; + __le64 blockptrs[NODEPTRS_PER_BLOCK]; } __attribute__ ((__packed__)); /* @@ -153,6 +153,16 @@ struct ctree_path { int slots[MAX_LEVEL]; }; +static inline u64 btrfs_node_blockptr(struct node *n, int nr) +{ + return le64_to_cpu(n->blockptrs[nr]); +} + +static inline void btrfs_set_node_blockptr(struct node *n, int nr, u64 val) +{ + n->blockptrs[nr] = cpu_to_le64(val); +} + static inline u16 btrfs_item_offset(struct btrfs_item *item) { return le16_to_cpu(item->offset); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fdf95bd07f90..e511f48eb48e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -85,7 +85,7 @@ int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf) return 0; for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) { - blocknr = buf->node.blockptrs[i]; + blocknr = btrfs_node_blockptr(&buf->node, i); inc_block_ref(root, blocknr); } return 0; @@ -437,7 +437,7 @@ int walk_down_tree(struct ctree_root *root, struct ctree_path *path, int *level) if (path->slots[*level] >= btrfs_header_nritems(&cur->node.header)) break; - blocknr = cur->node.blockptrs[path->slots[*level]]; + blocknr = btrfs_node_blockptr(&cur->node, path->slots[*level]); ret = lookup_block_ref(root, blocknr, &refs); if (refs != 1 || *level == 1) { path->slots[*level]++; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 33f5ee4052c1..101278e1139a 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -54,12 +54,12 @@ void print_tree(struct ctree_root *root, struct tree_buffer *t) printf("\tkey %d (%Lu %u %Lu) block %Lu\n", i, c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset, - c->blockptrs[i]); + btrfs_node_blockptr(c, i)); fflush(stdout); } for (i = 0; i < nr; i++) { struct tree_buffer *next_buf = read_tree_block(root, - c->blockptrs[i]); + btrfs_node_blockptr(c, i)); struct node *next = &next_buf->node; if (btrfs_is_leaf(next) && btrfs_header_level(&c->header) != 1) From cf27e1eec063fa68a89c57ae0a83f93aa38851d6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 Mar 2007 09:49:06 -0400 Subject: [PATCH 0038/2985] Btrfs: struct extent_item endian Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 24 ++++++++++++++++++++++-- fs/btrfs/extent-tree.c | 22 +++++++++++++--------- fs/btrfs/mkfs.c | 8 ++++---- 3 files changed, 39 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a8454c401cee..90cc2c9bd79e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -136,8 +136,8 @@ struct node { * owner of the block and the number of references */ struct extent_item { - u32 refs; - u64 owner; + __le32 refs; + __le64 owner; } __attribute__ ((__packed__)); /* @@ -153,6 +153,26 @@ struct ctree_path { int slots[MAX_LEVEL]; }; +static inline u64 btrfs_extent_owner(struct extent_item *ei) +{ + return le64_to_cpu(ei->owner); +} + +static inline void btrfs_set_extent_owner(struct extent_item *ei, u64 val) +{ + ei->owner = cpu_to_le64(val); +} + +static inline u32 btrfs_extent_refs(struct extent_item *ei) +{ + return le32_to_cpu(ei->refs); +} + +static inline void btrfs_set_extent_refs(struct extent_item *ei, u32 val) +{ + ei->refs = cpu_to_le32(val); +} + static inline u64 btrfs_node_blockptr(struct node *n, int nr) { return le64_to_cpu(n->blockptrs[nr]); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e511f48eb48e..1676a6595cce 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -29,6 +29,7 @@ static int inc_block_ref(struct ctree_root *root, u64 blocknr) struct leaf *l; struct extent_item *item; struct btrfs_key ins; + u32 refs; find_free_extent(root->extent_root, 0, 0, (u64)-1, &ins); init_path(&path); @@ -42,7 +43,8 @@ static int inc_block_ref(struct ctree_root *root, u64 blocknr) l = &path.nodes[0]->leaf; item = (struct extent_item *)(l->data + btrfs_item_offset(l->items + path.slots[0])); - item->refs++; + refs = btrfs_extent_refs(item); + btrfs_set_extent_refs(item, refs + 1); BUG_ON(list_empty(&path.nodes[0]->dirty)); release_path(root->extent_root, &path); @@ -69,7 +71,7 @@ static int lookup_block_ref(struct ctree_root *root, u64 blocknr, u32 *refs) item = (struct extent_item *)(l->data + btrfs_item_offset(l->items + path.slots[0])); - *refs = item->refs; + *refs = btrfs_extent_refs(item); release_path(root->extent_root, &path); return 0; } @@ -120,9 +122,9 @@ static int finish_current_insert(struct ctree_root *extent_root) int i; int ret; - extent_item.refs = 1; - extent_item.owner = - btrfs_header_parentid(&extent_root->node->node.header); + btrfs_set_extent_refs(&extent_item, 1); + btrfs_set_extent_owner(&extent_item, + btrfs_header_parentid(&extent_root->node->node.header)); ins.offset = 1; ins.flags = 0; @@ -148,6 +150,7 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) struct btrfs_item *item; struct extent_item *ei; struct btrfs_key ins; + u32 refs; key.objectid = blocknr; key.flags = 0; @@ -166,8 +169,9 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) ei = (struct extent_item *)(path.nodes[0]->leaf.data + btrfs_item_offset(item)); BUG_ON(ei->refs == 0); - ei->refs--; - if (ei->refs == 0) { + refs = btrfs_extent_refs(ei) - 1; + btrfs_set_extent_refs(ei, refs); + if (refs == 0) { if (root == extent_root) { int err; radix_tree_preload(GFP_KERNEL); @@ -368,8 +372,8 @@ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, struct ctree_root *extent_root = root->extent_root; struct extent_item extent_item; - extent_item.refs = 1; - extent_item.owner = owner; + btrfs_set_extent_refs(&extent_item, 1); + btrfs_set_extent_owner(&extent_item, owner); if (root == extent_root) { BUG_ON(extent_root->current_insert.offset == 0); diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index 0f77babcd306..e0d3bc1d0259 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -51,8 +51,8 @@ int mkfs(int fd) btrfs_set_item_offset(&item, LEAF_DATA_SIZE - sizeof(struct extent_item)); btrfs_set_item_size(&item, sizeof(struct extent_item)); - extent_item.refs = 1; - extent_item.owner = 0; + btrfs_set_extent_refs(&extent_item, 1); + btrfs_set_extent_owner(&extent_item, 0); memcpy(empty_leaf.items, &item, sizeof(item)); memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, btrfs_item_size(&item)); @@ -62,7 +62,7 @@ int mkfs(int fd) btrfs_set_key_offset(&item.key, 1); btrfs_set_item_offset(&item, LEAF_DATA_SIZE - sizeof(struct extent_item) * 2); - extent_item.owner = 1; + btrfs_set_extent_owner(&extent_item, 1); memcpy(empty_leaf.items + 1, &item, sizeof(item)); memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, btrfs_item_size(&item)); @@ -72,7 +72,7 @@ int mkfs(int fd) btrfs_set_key_offset(&item.key, 1); btrfs_set_item_offset(&item, LEAF_DATA_SIZE - sizeof(struct extent_item) * 3); - extent_item.owner = 2; + btrfs_set_extent_owner(&extent_item, 2); memcpy(empty_leaf.items + 2, &item, sizeof(item)); memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, btrfs_item_size(&item)); From 234b63a091e1df6bd4261dd7b3a7490074830628 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 Mar 2007 10:46:10 -0400 Subject: [PATCH 0039/2985] rename funcs and structs to btrfs Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 317 +++++++++++++++++++++-------------------- fs/btrfs/ctree.h | 95 ++++++------ fs/btrfs/debug-tree.c | 8 +- fs/btrfs/disk-io.c | 106 +++++++------- fs/btrfs/disk-io.h | 29 ++-- fs/btrfs/extent-tree.c | 147 +++++++++---------- fs/btrfs/mkfs.c | 26 ++-- fs/btrfs/print-tree.c | 23 +-- fs/btrfs/print-tree.h | 4 +- fs/btrfs/quick-test.c | 54 +++---- fs/btrfs/random-test.c | 92 ++++++------ 11 files changed, 454 insertions(+), 447 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 762810731001..c8529b394d5f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -6,47 +6,47 @@ #include "disk-io.h" #include "print-tree.h" -static int split_node(struct ctree_root *root, struct ctree_path *path, +static int split_node(struct btrfs_root *root, struct btrfs_path *path, int level); -static int split_leaf(struct ctree_root *root, struct ctree_path *path, +static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, int data_size); -static int push_node_left(struct ctree_root *root, struct tree_buffer *dst, - struct tree_buffer *src); -static int balance_node_right(struct ctree_root *root, - struct tree_buffer *dst_buf, - struct tree_buffer *src_buf); -static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, +static int push_node_left(struct btrfs_root *root, struct btrfs_buffer *dst, + struct btrfs_buffer *src); +static int balance_node_right(struct btrfs_root *root, + struct btrfs_buffer *dst_buf, + struct btrfs_buffer *src_buf); +static int del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, int slot); -inline void init_path(struct ctree_path *p) +inline void btrfs_init_path(struct btrfs_path *p) { memset(p, 0, sizeof(*p)); } -void release_path(struct ctree_root *root, struct ctree_path *p) +void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; - for (i = 0; i < MAX_LEVEL; i++) { + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (!p->nodes[i]) break; - tree_block_release(root, p->nodes[i]); + btrfs_block_release(root, p->nodes[i]); } memset(p, 0, sizeof(*p)); } -int btrfs_cow_block(struct ctree_root *root, - struct tree_buffer *buf, - struct tree_buffer *parent, +int btrfs_cow_block(struct btrfs_root *root, + struct btrfs_buffer *buf, + struct btrfs_buffer *parent, int parent_slot, - struct tree_buffer **cow_ret) + struct btrfs_buffer **cow_ret) { - struct tree_buffer *cow; + struct btrfs_buffer *cow; if (!list_empty(&buf->dirty)) { *cow_ret = buf; return 0; } - cow = alloc_free_block(root); + cow = btrfs_alloc_free_block(root); memcpy(&cow->node, &buf->node, sizeof(buf->node)); btrfs_set_header_blocknr(&cow->node.header, cow->blocknr); *cow_ret = cow; @@ -55,15 +55,15 @@ int btrfs_cow_block(struct ctree_root *root, root->node = cow; cow->count++; if (buf != root->commit_root) - free_extent(root, buf->blocknr, 1); - tree_block_release(root, buf); + btrfs_free_extent(root, buf->blocknr, 1); + btrfs_block_release(root, buf); } else { btrfs_set_node_blockptr(&parent->node, parent_slot, cow->blocknr); BUG_ON(list_empty(&parent->dirty)); - free_extent(root, buf->blocknr, 1); + btrfs_free_extent(root, buf->blocknr, 1); } - tree_block_release(root, buf); + btrfs_block_release(root, buf); return 0; } @@ -72,7 +72,7 @@ int btrfs_cow_block(struct ctree_root *root, * this returns the address of the start of the last item, * which is the stop of the leaf data stack */ -static inline unsigned int leaf_data_end(struct leaf *leaf) +static inline unsigned int leaf_data_end(struct btrfs_leaf *leaf) { u32 nr = btrfs_header_nritems(&leaf->header); if (nr == 0) @@ -85,7 +85,7 @@ static inline unsigned int leaf_data_end(struct leaf *leaf) * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ -int leaf_free_space(struct leaf *leaf) +int btrfs_leaf_free_space(struct btrfs_leaf *leaf) { int data_end = leaf_data_end(leaf); int nritems = btrfs_header_nritems(&leaf->header); @@ -117,11 +117,11 @@ int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 0; } -int check_node(struct ctree_path *path, int level) +int check_node(struct btrfs_path *path, int level) { int i; - struct node *parent = NULL; - struct node *node = &path->nodes[level]->node; + struct btrfs_node *parent = NULL; + struct btrfs_node *node = &path->nodes[level]->node; int parent_slot; u32 nritems = btrfs_header_nritems(&node->header); @@ -146,18 +146,18 @@ int check_node(struct ctree_path *path, int level) return 0; } -int check_leaf(struct ctree_path *path, int level) +int check_leaf(struct btrfs_path *path, int level) { int i; - struct leaf *leaf = &path->nodes[level]->leaf; - struct node *parent = NULL; + struct btrfs_leaf *leaf = &path->nodes[level]->leaf; + struct btrfs_node *parent = NULL; int parent_slot; u32 nritems = btrfs_header_nritems(&leaf->header); if (path->nodes[level + 1]) parent = &path->nodes[level + 1]->node; parent_slot = path->slots[level + 1]; - BUG_ON(leaf_free_space(leaf) < 0); + BUG_ON(btrfs_leaf_free_space(leaf) < 0); if (nritems == 0) return 0; @@ -186,7 +186,7 @@ int check_leaf(struct ctree_path *path, int level) return 0; } -int check_block(struct ctree_path *path, int level) +int check_block(struct btrfs_path *path, int level) { if (level == 0) return check_leaf(path, level); @@ -233,10 +233,10 @@ int generic_bin_search(char *p, int item_size, struct btrfs_key *key, * simple bin_search frontend that does the right thing for * leaves vs nodes */ -int bin_search(struct node *c, struct btrfs_key *key, int *slot) +int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) { if (btrfs_is_leaf(c)) { - struct leaf *l = (struct leaf *)c; + struct btrfs_leaf *l = (struct btrfs_leaf *)c; return generic_bin_search((void *)l->items, sizeof(struct btrfs_item), key, btrfs_header_nritems(&c->header), @@ -250,11 +250,11 @@ int bin_search(struct node *c, struct btrfs_key *key, int *slot) return -1; } -struct tree_buffer *read_node_slot(struct ctree_root *root, - struct tree_buffer *parent_buf, +struct btrfs_buffer *read_node_slot(struct btrfs_root *root, + struct btrfs_buffer *parent_buf, int slot) { - struct node *node = &parent_buf->node; + struct btrfs_node *node = &parent_buf->node; if (slot < 0) return NULL; if (slot >= btrfs_header_nritems(&node->header)) @@ -262,17 +262,17 @@ struct tree_buffer *read_node_slot(struct ctree_root *root, return read_tree_block(root, btrfs_node_blockptr(node, slot)); } -static int balance_level(struct ctree_root *root, struct ctree_path *path, +static int balance_level(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct tree_buffer *right_buf; - struct tree_buffer *mid_buf; - struct tree_buffer *left_buf; - struct tree_buffer *parent_buf = NULL; - struct node *right = NULL; - struct node *mid; - struct node *left = NULL; - struct node *parent = NULL; + struct btrfs_buffer *right_buf; + struct btrfs_buffer *mid_buf; + struct btrfs_buffer *left_buf; + struct btrfs_buffer *parent_buf = NULL; + struct btrfs_node *right = NULL; + struct btrfs_node *mid; + struct btrfs_node *left = NULL; + struct btrfs_node *parent = NULL; int ret = 0; int wret; int pslot; @@ -286,12 +286,12 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, mid = &mid_buf->node; orig_ptr = btrfs_node_blockptr(mid, orig_slot); - if (level < MAX_LEVEL - 1) + if (level < BTRFS_MAX_LEVEL - 1) parent_buf = path->nodes[level + 1]; pslot = path->slots[level + 1]; if (!parent_buf) { - struct tree_buffer *child; + struct btrfs_buffer *child; u64 blocknr = mid_buf->blocknr; if (btrfs_header_nritems(&mid->header) != 1) @@ -303,11 +303,11 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, root->node = child; path->nodes[level] = NULL; /* once for the path */ - tree_block_release(root, mid_buf); + btrfs_block_release(root, mid_buf); /* once for the root ptr */ - tree_block_release(root, mid_buf); + btrfs_block_release(root, mid_buf); clean_tree_block(root, mid_buf); - return free_extent(root, blocknr, 1); + return btrfs_free_extent(root, blocknr, 1); } parent = &parent_buf->node; @@ -340,14 +340,14 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, ret = wret; if (btrfs_header_nritems(&right->header) == 0) { u64 blocknr = right_buf->blocknr; - tree_block_release(root, right_buf); + btrfs_block_release(root, right_buf); clean_tree_block(root, right_buf); right_buf = NULL; right = NULL; wret = del_ptr(root, path, level + 1, pslot + 1); if (wret) ret = wret; - wret = free_extent(root, blocknr, 1); + wret = btrfs_free_extent(root, blocknr, 1); if (wret) ret = wret; } else { @@ -375,14 +375,14 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, if (btrfs_header_nritems(&mid->header) == 0) { /* we've managed to empty the middle node, drop it */ u64 blocknr = mid_buf->blocknr; - tree_block_release(root, mid_buf); + btrfs_block_release(root, mid_buf); clean_tree_block(root, mid_buf); mid_buf = NULL; mid = NULL; wret = del_ptr(root, path, level + 1, pslot); if (wret) ret = wret; - wret = free_extent(root, blocknr, 1); + wret = btrfs_free_extent(root, blocknr, 1); if (wret) ret = wret; } else { @@ -400,7 +400,7 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, path->slots[level + 1] -= 1; path->slots[level] = orig_slot; if (mid_buf) - tree_block_release(root, mid_buf); + btrfs_block_release(root, mid_buf); } else { orig_slot -= btrfs_header_nritems(&left->header); path->slots[level] = orig_slot; @@ -413,9 +413,9 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, BUG(); if (right_buf) - tree_block_release(root, right_buf); + btrfs_block_release(root, right_buf); if (left_buf) - tree_block_release(root, left_buf); + btrfs_block_release(root, left_buf); return ret; } @@ -432,12 +432,12 @@ static int balance_level(struct ctree_root *root, struct ctree_path *path, * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if * possible) */ -int search_slot(struct ctree_root *root, struct btrfs_key *key, - struct ctree_path *p, int ins_len, int cow) +int btrfs_search_slot(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_path *p, int ins_len, int cow) { - struct tree_buffer *b; - struct tree_buffer *cow_buf; - struct node *c; + struct btrfs_buffer *b; + struct btrfs_buffer *cow_buf; + struct btrfs_node *c; int slot; int ret; int level; @@ -486,9 +486,9 @@ again: } b = read_tree_block(root, btrfs_node_blockptr(c, slot)); } else { - struct leaf *l = (struct leaf *)c; + struct btrfs_leaf *l = (struct btrfs_leaf *)c; p->slots[level] = slot; - if (ins_len > 0 && leaf_free_space(l) < + if (ins_len > 0 && btrfs_leaf_free_space(l) < sizeof(struct btrfs_item) + ins_len) { int sret = split_leaf(root, p, ins_len); BUG_ON(sret > 0); @@ -513,14 +513,14 @@ again: * If this fails to write a tree block, it returns -1, but continues * fixing up the blocks in ram so the tree is consistent. */ -static int fixup_low_keys(struct ctree_root *root, - struct ctree_path *path, struct btrfs_disk_key *key, +static int fixup_low_keys(struct btrfs_root *root, + struct btrfs_path *path, struct btrfs_disk_key *key, int level) { int i; int ret = 0; - for (i = level; i < MAX_LEVEL; i++) { - struct node *t; + for (i = level; i < BTRFS_MAX_LEVEL; i++) { + struct btrfs_node *t; int tslot = path->slots[i]; if (!path->nodes[i]) break; @@ -540,11 +540,11 @@ static int fixup_low_keys(struct ctree_root *root, * returns 0 if some ptrs were pushed left, < 0 if there was some horrible * error, and > 0 if there was no room in the left hand block. */ -static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, - struct tree_buffer *src_buf) +static int push_node_left(struct btrfs_root *root, struct btrfs_buffer *dst_buf, + struct btrfs_buffer *src_buf) { - struct node *src = &src_buf->node; - struct node *dst = &dst_buf->node; + struct btrfs_node *src = &src_buf->node; + struct btrfs_node *dst = &dst_buf->node; int push_items = 0; int src_nritems; int dst_nritems; @@ -587,12 +587,12 @@ static int push_node_left(struct ctree_root *root, struct tree_buffer *dst_buf, * * this will only push up to 1/2 the contents of the left node over */ -static int balance_node_right(struct ctree_root *root, - struct tree_buffer *dst_buf, - struct tree_buffer *src_buf) +static int balance_node_right(struct btrfs_root *root, + struct btrfs_buffer *dst_buf, + struct btrfs_buffer *src_buf) { - struct node *src = &src_buf->node; - struct node *dst = &dst_buf->node; + struct btrfs_node *src = &src_buf->node; + struct btrfs_node *dst = &dst_buf->node; int push_items = 0; int max_push; int src_nritems; @@ -637,18 +637,18 @@ static int balance_node_right(struct ctree_root *root, * * returns zero on success or < 0 on failure. */ -static int insert_new_root(struct ctree_root *root, - struct ctree_path *path, int level) +static int insert_new_root(struct btrfs_root *root, + struct btrfs_path *path, int level) { - struct tree_buffer *t; - struct node *lower; - struct node *c; + struct btrfs_buffer *t; + struct btrfs_node *lower; + struct btrfs_node *c; struct btrfs_disk_key *lower_key; BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = alloc_free_block(root); + t = btrfs_alloc_free_block(root); c = &t->node; memset(c, 0, sizeof(c)); btrfs_set_header_nritems(&c->header, 1); @@ -658,13 +658,13 @@ static int insert_new_root(struct ctree_root *root, btrfs_header_parentid(&root->node->node.header)); lower = &path->nodes[level-1]->node; if (btrfs_is_leaf(lower)) - lower_key = &((struct leaf *)lower)->items[0].key; + lower_key = &((struct btrfs_leaf *)lower)->items[0].key; else lower_key = lower->keys; memcpy(c->keys, lower_key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->blocknr); /* the super has an extra ref to root->node */ - tree_block_release(root, root->node); + btrfs_block_release(root, root->node); root->node = t; t->count++; path->nodes[level] = t; @@ -681,11 +681,11 @@ static int insert_new_root(struct ctree_root *root, * * returns zero on success and < 0 on any error */ -static int insert_ptr(struct ctree_root *root, - struct ctree_path *path, struct btrfs_disk_key *key, +static int insert_ptr(struct btrfs_root *root, + struct btrfs_path *path, struct btrfs_disk_key *key, u64 blocknr, int slot, int level) { - struct node *lower; + struct btrfs_node *lower; int nritems; BUG_ON(!path->nodes[level]); @@ -719,13 +719,13 @@ static int insert_ptr(struct ctree_root *root, * * returns 0 on success and < 0 on failure */ -static int split_node(struct ctree_root *root, struct ctree_path *path, +static int split_node(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct tree_buffer *t; - struct node *c; - struct tree_buffer *split_buffer; - struct node *split; + struct btrfs_buffer *t; + struct btrfs_node *c; + struct btrfs_buffer *split_buffer; + struct btrfs_node *split; int mid; int ret; int wret; @@ -740,7 +740,7 @@ static int split_node(struct ctree_root *root, struct ctree_path *path, return ret; } c_nritems = btrfs_header_nritems(&c->header); - split_buffer = alloc_free_block(root); + split_buffer = btrfs_alloc_free_block(root); split = &split_buffer->node; btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); btrfs_set_header_blocknr(&split->header, split_buffer->blocknr); @@ -763,11 +763,11 @@ static int split_node(struct ctree_root *root, struct ctree_path *path, if (path->slots[level] >= mid) { path->slots[level] -= mid; - tree_block_release(root, t); + btrfs_block_release(root, t); path->nodes[level] = split_buffer; path->slots[level + 1] += 1; } else { - tree_block_release(root, split_buffer); + btrfs_block_release(root, split_buffer); } return ret; } @@ -777,7 +777,7 @@ static int split_node(struct ctree_root *root, struct ctree_path *path, * and nr indicate which items in the leaf to check. This totals up the * space used both by the item structs and the item data */ -static int leaf_space_used(struct leaf *l, int start, int nr) +static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) { int data_len; int end = start + nr - 1; @@ -797,14 +797,14 @@ static int leaf_space_used(struct leaf *l, int start, int nr) * returns 1 if the push failed because the other node didn't have enough * room, 0 if everything worked out and < 0 if there were major errors. */ -static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, +static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct tree_buffer *left_buf = path->nodes[0]; - struct leaf *left = &left_buf->leaf; - struct leaf *right; - struct tree_buffer *right_buf; - struct tree_buffer *upper; + struct btrfs_buffer *left_buf = path->nodes[0]; + struct btrfs_leaf *left = &left_buf->leaf; + struct btrfs_leaf *right; + struct btrfs_buffer *right_buf; + struct btrfs_buffer *upper; int slot; int i; int free_space; @@ -825,17 +825,17 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, right_buf = read_tree_block(root, btrfs_node_blockptr(&upper->node, slot + 1)); right = &right_buf->leaf; - free_space = leaf_free_space(right); + free_space = btrfs_leaf_free_space(right); if (free_space < data_size + sizeof(struct btrfs_item)) { - tree_block_release(root, right_buf); + btrfs_block_release(root, right_buf); return 1; } /* cow and double check */ btrfs_cow_block(root, right_buf, upper, slot + 1, &right_buf); right = &right_buf->leaf; - free_space = leaf_free_space(right); + free_space = btrfs_leaf_free_space(right); if (free_space < data_size + sizeof(struct btrfs_item)) { - tree_block_release(root, right_buf); + btrfs_block_release(root, right_buf); return 1; } @@ -851,7 +851,7 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, push_space += btrfs_item_size(item) + sizeof(*item); } if (push_items == 0) { - tree_block_release(root, right_buf); + btrfs_block_release(root, right_buf); return 1; } right_nritems = btrfs_header_nritems(&right->header); @@ -893,11 +893,11 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { path->slots[0] -= left_nritems; - tree_block_release(root, path->nodes[0]); + btrfs_block_release(root, path->nodes[0]); path->nodes[0] = right_buf; path->slots[1] += 1; } else { - tree_block_release(root, right_buf); + btrfs_block_release(root, right_buf); } return 0; } @@ -905,13 +905,13 @@ static int push_leaf_right(struct ctree_root *root, struct ctree_path *path, * push some data in the path leaf to the left, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise */ -static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, +static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct tree_buffer *right_buf = path->nodes[0]; - struct leaf *right = &right_buf->leaf; - struct tree_buffer *t; - struct leaf *left; + struct btrfs_buffer *right_buf = path->nodes[0]; + struct btrfs_leaf *right = &right_buf->leaf; + struct btrfs_buffer *t; + struct btrfs_leaf *left; int slot; int i; int free_space; @@ -932,18 +932,18 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, t = read_tree_block(root, btrfs_node_blockptr(&path->nodes[1]->node, slot - 1)); left = &t->leaf; - free_space = leaf_free_space(left); + free_space = btrfs_leaf_free_space(left); if (free_space < data_size + sizeof(struct btrfs_item)) { - tree_block_release(root, t); + btrfs_block_release(root, t); return 1; } /* cow and double check */ btrfs_cow_block(root, t, path->nodes[1], slot - 1, &t); left = &t->leaf; - free_space = leaf_free_space(left); + free_space = btrfs_leaf_free_space(left); if (free_space < data_size + sizeof(struct btrfs_item)) { - tree_block_release(root, t); + btrfs_block_release(root, t); return 1; } @@ -958,7 +958,7 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, push_space += btrfs_item_size(item) + sizeof(*item); } if (push_items == 0) { - tree_block_release(root, t); + btrfs_block_release(root, t); return 1; } /* push data from right to left */ @@ -1009,11 +1009,11 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { path->slots[0] += old_left_nritems; - tree_block_release(root, path->nodes[0]); + btrfs_block_release(root, path->nodes[0]); path->nodes[0] = t; path->slots[1] -= 1; } else { - tree_block_release(root, t); + btrfs_block_release(root, t); path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); @@ -1026,16 +1026,16 @@ static int push_leaf_left(struct ctree_root *root, struct ctree_path *path, * * returns 0 if all went well and < 0 on failure. */ -static int split_leaf(struct ctree_root *root, struct ctree_path *path, +static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct tree_buffer *l_buf; - struct leaf *l; + struct btrfs_buffer *l_buf; + struct btrfs_leaf *l; u32 nritems; int mid; int slot; - struct leaf *right; - struct tree_buffer *right_buffer; + struct btrfs_leaf *right; + struct btrfs_buffer *right_buffer; int space_needed = data_size + sizeof(struct btrfs_item); int data_copy_size; int rt_data_off; @@ -1047,7 +1047,7 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, l = &l_buf->leaf; /* did the pushes work? */ - if (leaf_free_space(l) >= sizeof(struct btrfs_item) + data_size) + if (btrfs_leaf_free_space(l) >= sizeof(struct btrfs_item) + data_size) return 0; if (!path->nodes[1]) { @@ -1058,7 +1058,7 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, slot = path->slots[0]; nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; - right_buffer = alloc_free_block(root); + right_buffer = btrfs_alloc_free_block(root); BUG_ON(!right_buffer); BUG_ON(mid == nritems); right = &right_buffer->leaf; @@ -1101,12 +1101,12 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, BUG_ON(list_empty(&l_buf->dirty)); BUG_ON(path->slots[0] != slot); if (mid <= slot) { - tree_block_release(root, path->nodes[0]); + btrfs_block_release(root, path->nodes[0]); path->nodes[0] = right_buffer; path->slots[0] -= mid; path->slots[1] += 1; } else - tree_block_release(root, right_buffer); + btrfs_block_release(root, right_buffer); BUG_ON(path->slots[0] < 0); return ret; } @@ -1115,17 +1115,17 @@ static int split_leaf(struct ctree_root *root, struct ctree_path *path, * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ -int insert_item(struct ctree_root *root, struct btrfs_key *cpu_key, +int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, void *data, int data_size) { int ret = 0; int slot; int slot_orig; - struct leaf *leaf; - struct tree_buffer *leaf_buf; + struct btrfs_leaf *leaf; + struct btrfs_buffer *leaf_buf; u32 nritems; unsigned int data_end; - struct ctree_path path; + struct btrfs_path path; struct btrfs_disk_key disk_key; btrfs_cpu_key_to_disk(&disk_key, cpu_key); @@ -1133,10 +1133,10 @@ int insert_item(struct ctree_root *root, struct btrfs_key *cpu_key, /* create a root if there isn't one */ if (!root->node) BUG(); - init_path(&path); - ret = search_slot(root, cpu_key, &path, data_size, 1); + btrfs_init_path(&path); + ret = btrfs_search_slot(root, cpu_key, &path, data_size, 1); if (ret == 0) { - release_path(root, &path); + btrfs_release_path(root, &path); return -EEXIST; } if (ret < 0) @@ -1149,7 +1149,8 @@ int insert_item(struct ctree_root *root, struct btrfs_key *cpu_key, nritems = btrfs_header_nritems(&leaf->header); data_end = leaf_data_end(leaf); - if (leaf_free_space(leaf) < sizeof(struct btrfs_item) + data_size) + if (btrfs_leaf_free_space(leaf) < + sizeof(struct btrfs_item) + data_size) BUG(); slot = path.slots[0]; @@ -1190,11 +1191,11 @@ int insert_item(struct ctree_root *root, struct btrfs_key *cpu_key, ret = fixup_low_keys(root, &path, &disk_key, 1); BUG_ON(list_empty(&leaf_buf->dirty)); - if (leaf_free_space(leaf) < 0) + if (btrfs_leaf_free_space(leaf) < 0) BUG(); check_leaf(&path, 0); out: - release_path(root, &path); + btrfs_release_path(root, &path); return ret; } @@ -1205,11 +1206,11 @@ out: * continuing all the way the root if required. The root is converted into * a leaf if all the nodes are emptied. */ -static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, +static int del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { - struct node *node; - struct tree_buffer *parent = path->nodes[level]; + struct btrfs_node *node; + struct btrfs_buffer *parent = path->nodes[level]; u32 nritems; int ret = 0; int wret; @@ -1242,11 +1243,11 @@ static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level, * delete the item at the leaf level in path. If that empties * the leaf, remove it from the tree */ -int del_item(struct ctree_root *root, struct ctree_path *path) +int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path) { int slot; - struct leaf *leaf; - struct tree_buffer *leaf_buf; + struct btrfs_leaf *leaf; + struct btrfs_buffer *leaf_buf; int doff; int dsize; int ret = 0; @@ -1286,7 +1287,7 @@ int del_item(struct ctree_root *root, struct ctree_path *path) wret = del_ptr(root, path, 1, path->slots[1]); if (wret) ret = wret; - wret = free_extent(root, leaf_buf->blocknr, 1); + wret = btrfs_free_extent(root, leaf_buf->blocknr, 1); if (wret) ret = wret; } @@ -1323,12 +1324,12 @@ int del_item(struct ctree_root *root, struct ctree_path *path) wret = del_ptr(root, path, 1, slot); if (wret) ret = wret; - tree_block_release(root, leaf_buf); - wret = free_extent(root, blocknr, 1); + btrfs_block_release(root, leaf_buf); + wret = btrfs_free_extent(root, blocknr, 1); if (wret) ret = wret; } else { - tree_block_release(root, leaf_buf); + btrfs_block_release(root, leaf_buf); } } } @@ -1340,15 +1341,15 @@ int del_item(struct ctree_root *root, struct ctree_path *path) * returns 0 if it found something or 1 if there are no greater leaves. * returns < 0 on io errors. */ -int next_leaf(struct ctree_root *root, struct ctree_path *path) +int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) { int slot; int level = 1; u64 blocknr; - struct tree_buffer *c; - struct tree_buffer *next = NULL; + struct btrfs_buffer *c; + struct btrfs_buffer *next = NULL; - while(level < MAX_LEVEL) { + while(level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) return 1; slot = path->slots[level] + 1; @@ -1359,7 +1360,7 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) } blocknr = btrfs_node_blockptr(&c->node, slot); if (next) - tree_block_release(root, next); + btrfs_block_release(root, next); next = read_tree_block(root, blocknr); break; } @@ -1367,7 +1368,7 @@ int next_leaf(struct ctree_root *root, struct ctree_path *path) while(1) { level--; c = path->nodes[level]; - tree_block_release(root, c); + btrfs_block_release(root, c); path->nodes[level] = next; path->slots[level] = 0; if (!level) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 90cc2c9bd79e..5dfcc90812c1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,10 +1,10 @@ -#ifndef __CTREE__ -#define __CTREE__ +#ifndef __BTRFS__ +#define __BTRFS__ #include "list.h" #include "kerncompat.h" -#define CTREE_BLOCKSIZE 1024 +#define BTRFS_BLOCKSIZE 1024 /* * the key defines the order in the tree, and so it also defines (optimal) @@ -46,21 +46,21 @@ struct btrfs_header { /* generation flags to be added */ } __attribute__ ((__packed__)); -#define MAX_LEVEL 8 -#define NODEPTRS_PER_BLOCK ((CTREE_BLOCKSIZE - sizeof(struct btrfs_header)) / \ +#define BTRFS_MAX_LEVEL 8 +#define NODEPTRS_PER_BLOCK ((BTRFS_BLOCKSIZE - sizeof(struct btrfs_header)) / \ (sizeof(struct btrfs_disk_key) + sizeof(u64))) -struct tree_buffer; +struct btrfs_buffer; /* * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. current_insert is used * only for the extent tree. */ -struct ctree_root { - struct tree_buffer *node; - struct tree_buffer *commit_root; - struct ctree_root *extent_root; +struct btrfs_root { + struct btrfs_buffer *node; + struct btrfs_buffer *commit_root; + struct btrfs_root *extent_root; struct btrfs_key current_insert; struct btrfs_key last_insert; int fp; @@ -74,7 +74,7 @@ struct ctree_root { /* * describes a tree on disk */ -struct ctree_root_info { +struct btrfs_root_info { u64 fsid[2]; /* FS specific uuid */ u64 blocknr; /* blocknr of this block */ u64 objectid; /* inode number of this root */ @@ -88,9 +88,9 @@ struct ctree_root_info { * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc */ -struct ctree_super_block { - struct ctree_root_info root_info; - struct ctree_root_info extent_info; +struct btrfs_super_block { + struct btrfs_root_info root_info; + struct btrfs_root_info extent_info; } __attribute__ ((__packed__)); /* @@ -111,13 +111,13 @@ struct btrfs_item { * The data is separate from the items to get the keys closer together * during searches. */ -#define LEAF_DATA_SIZE (CTREE_BLOCKSIZE - sizeof(struct btrfs_header)) -struct leaf { +#define LEAF_DATA_SIZE (BTRFS_BLOCKSIZE - sizeof(struct btrfs_header)) +struct btrfs_leaf { struct btrfs_header header; union { struct btrfs_item items[LEAF_DATA_SIZE/ sizeof(struct btrfs_item)]; - u8 data[CTREE_BLOCKSIZE-sizeof(struct btrfs_header)]; + u8 data[BTRFS_BLOCKSIZE - sizeof(struct btrfs_header)]; }; } __attribute__ ((__packed__)); @@ -125,7 +125,7 @@ struct leaf { * all non-leaf blocks are nodes, they hold only keys and pointers to * other blocks */ -struct node { +struct btrfs_node { struct btrfs_header header; struct btrfs_disk_key keys[NODEPTRS_PER_BLOCK]; __le64 blockptrs[NODEPTRS_PER_BLOCK]; @@ -135,50 +135,51 @@ struct node { * items in the extent btree are used to record the objectid of the * owner of the block and the number of references */ -struct extent_item { +struct btrfs_extent_item { __le32 refs; __le64 owner; } __attribute__ ((__packed__)); /* - * ctree_paths remember the path taken from the root down to the leaf. - * level 0 is always the leaf, and nodes[1...MAX_LEVEL] will point + * btrfs_paths remember the path taken from the root down to the leaf. + * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point * to any other levels that are present. * * The slots array records the index of the item or block pointer * used while walking the tree. */ -struct ctree_path { - struct tree_buffer *nodes[MAX_LEVEL]; - int slots[MAX_LEVEL]; +struct btrfs_path { + struct btrfs_buffer *nodes[BTRFS_MAX_LEVEL]; + int slots[BTRFS_MAX_LEVEL]; }; -static inline u64 btrfs_extent_owner(struct extent_item *ei) +static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) { return le64_to_cpu(ei->owner); } -static inline void btrfs_set_extent_owner(struct extent_item *ei, u64 val) +static inline void btrfs_set_extent_owner(struct btrfs_extent_item *ei, u64 val) { ei->owner = cpu_to_le64(val); } -static inline u32 btrfs_extent_refs(struct extent_item *ei) +static inline u32 btrfs_extent_refs(struct btrfs_extent_item *ei) { return le32_to_cpu(ei->refs); } -static inline void btrfs_set_extent_refs(struct extent_item *ei, u32 val) +static inline void btrfs_set_extent_refs(struct btrfs_extent_item *ei, u32 val) { ei->refs = cpu_to_le32(val); } -static inline u64 btrfs_node_blockptr(struct node *n, int nr) +static inline u64 btrfs_node_blockptr(struct btrfs_node *n, int nr) { return le64_to_cpu(n->blockptrs[nr]); } -static inline void btrfs_set_node_blockptr(struct node *n, int nr, u64 val) +static inline void btrfs_set_node_blockptr(struct btrfs_node *n, int nr, + u64 val) { n->blockptrs[nr] = cpu_to_le64(val); } @@ -300,34 +301,34 @@ static inline void btrfs_set_header_flags(struct btrfs_header *h, u16 val) static inline int btrfs_header_level(struct btrfs_header *h) { - return btrfs_header_flags(h) & (MAX_LEVEL - 1); + return btrfs_header_flags(h) & (BTRFS_MAX_LEVEL - 1); } static inline void btrfs_set_header_level(struct btrfs_header *h, int level) { u16 flags; - BUG_ON(level > MAX_LEVEL); - flags = btrfs_header_flags(h) & ~(MAX_LEVEL - 1); + BUG_ON(level > BTRFS_MAX_LEVEL); + flags = btrfs_header_flags(h) & ~(BTRFS_MAX_LEVEL - 1); btrfs_set_header_flags(h, flags | level); } -static inline int btrfs_is_leaf(struct node *n) +static inline int btrfs_is_leaf(struct btrfs_node *n) { return (btrfs_header_level(&n->header) == 0); } -struct tree_buffer *alloc_free_block(struct ctree_root *root); -int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf); -int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks); -int search_slot(struct ctree_root *root, struct btrfs_key *key, - struct ctree_path *p, int ins_len, int cow); -void release_path(struct ctree_root *root, struct ctree_path *p); -void init_path(struct ctree_path *p); -int del_item(struct ctree_root *root, struct ctree_path *path); -int insert_item(struct ctree_root *root, struct btrfs_key *key, +struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root); +int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf); +int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks); +int btrfs_search_slot(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_path *p, int ins_len, int cow); +void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); +void btrfs_init_path(struct btrfs_path *p); +int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path); +int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *key, void *data, int data_size); -int next_leaf(struct ctree_root *root, struct ctree_path *path); -int leaf_free_space(struct leaf *leaf); -int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap); -int btrfs_finish_extent_commit(struct ctree_root *root); +int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); +int btrfs_leaf_free_space(struct btrfs_leaf *leaf); +int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap); +int btrfs_finish_extent_commit(struct btrfs_root *root); #endif diff --git a/fs/btrfs/debug-tree.c b/fs/btrfs/debug-tree.c index 21f607d8b050..6da0a7aa0f47 100644 --- a/fs/btrfs/debug-tree.c +++ b/fs/btrfs/debug-tree.c @@ -7,13 +7,13 @@ #include "print-tree.h" int main(int ac, char **av) { - struct ctree_super_block super; - struct ctree_root *root; + struct btrfs_super_block super; + struct btrfs_root *root; radix_tree_init(); root = open_ctree("dbfile", &super); printf("root tree\n"); - print_tree(root, root->node); + btrfs_print_tree(root, root->node); printf("map tree\n"); - print_tree(root->extent_root, root->extent_root->node); + btrfs_print_tree(root->extent_root, root->extent_root->node); return 0; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 065e888d2c0b..c34c0c60935f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -13,7 +13,7 @@ static int allocated_blocks = 0; int cache_max = 10000; -static int check_tree_block(struct ctree_root *root, struct tree_buffer *buf) +static int check_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) { if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); @@ -23,18 +23,18 @@ static int check_tree_block(struct ctree_root *root, struct tree_buffer *buf) return 0; } -static int free_some_buffers(struct ctree_root *root) +static int free_some_buffers(struct btrfs_root *root) { struct list_head *node, *next; - struct tree_buffer *b; + struct btrfs_buffer *b; if (root->cache_size < cache_max) return 0; list_for_each_safe(node, next, &root->cache) { - b = list_entry(node, struct tree_buffer, cache); + b = list_entry(node, struct btrfs_buffer, cache); if (b->count == 1) { BUG_ON(!list_empty(&b->dirty)); list_del_init(&b->cache); - tree_block_release(root, b); + btrfs_block_release(root, b); if (root->cache_size < cache_max) break; } @@ -42,11 +42,11 @@ static int free_some_buffers(struct ctree_root *root) return 0; } -struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr) +struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr) { - struct tree_buffer *buf; + struct btrfs_buffer *buf; int ret; - buf = malloc(sizeof(struct tree_buffer)); + buf = malloc(sizeof(struct btrfs_buffer)); if (!buf) return buf; allocated_blocks++; @@ -66,9 +66,9 @@ struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr) return buf; } -struct tree_buffer *find_tree_block(struct ctree_root *root, u64 blocknr) +struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr) { - struct tree_buffer *buf; + struct btrfs_buffer *buf; buf = radix_tree_lookup(&root->cache_radix, blocknr); if (buf) { buf->count++; @@ -82,10 +82,10 @@ struct tree_buffer *find_tree_block(struct ctree_root *root, u64 blocknr) return buf; } -struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) +struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) { - loff_t offset = blocknr * CTREE_BLOCKSIZE; - struct tree_buffer *buf; + loff_t offset = blocknr * BTRFS_BLOCKSIZE; + struct btrfs_buffer *buf; int ret; buf = radix_tree_lookup(&root->cache_radix, blocknr); @@ -95,8 +95,8 @@ struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) buf = alloc_tree_block(root, blocknr); if (!buf) return NULL; - ret = pread(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); - if (ret != CTREE_BLOCKSIZE) { + ret = pread(root->fp, &buf->node, BTRFS_BLOCKSIZE, offset); + if (ret != BTRFS_BLOCKSIZE) { free(buf); return NULL; } @@ -106,7 +106,7 @@ struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) return buf; } -int dirty_tree_block(struct ctree_root *root, struct tree_buffer *buf) +int dirty_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) { if (!list_empty(&buf->dirty)) return 0; @@ -115,46 +115,47 @@ int dirty_tree_block(struct ctree_root *root, struct tree_buffer *buf) return 0; } -int clean_tree_block(struct ctree_root *root, struct tree_buffer *buf) +int clean_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) { if (!list_empty(&buf->dirty)) { list_del_init(&buf->dirty); - tree_block_release(root, buf); + btrfs_block_release(root, buf); } return 0; } -int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) +int write_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) { u64 blocknr = buf->blocknr; - loff_t offset = blocknr * CTREE_BLOCKSIZE; + loff_t offset = blocknr * BTRFS_BLOCKSIZE; int ret; if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); - ret = pwrite(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); - if (ret != CTREE_BLOCKSIZE) + ret = pwrite(root->fp, &buf->node, BTRFS_BLOCKSIZE, offset); + if (ret != BTRFS_BLOCKSIZE) return ret; return 0; } -static int __commit_transaction(struct ctree_root *root) +static int __commit_transaction(struct btrfs_root *root) { - struct tree_buffer *b; + struct btrfs_buffer *b; int ret = 0; int wret; while(!list_empty(&root->trans)) { - b = list_entry(root->trans.next, struct tree_buffer, dirty); + b = list_entry(root->trans.next, struct btrfs_buffer, dirty); list_del_init(&b->dirty); wret = write_tree_block(root, b); if (wret) ret = wret; - tree_block_release(root, b); + btrfs_block_release(root, b); } return ret; } -int commit_transaction(struct ctree_root *root, struct ctree_super_block *s) +int btrfs_commit_transaction(struct btrfs_root *root, + struct btrfs_super_block *s) { int ret = 0; @@ -163,20 +164,20 @@ int commit_transaction(struct ctree_root *root, struct ctree_super_block *s) ret = __commit_transaction(root->extent_root); BUG_ON(ret); if (root->commit_root != root->node) { - struct tree_buffer *snap = root->commit_root; + struct btrfs_buffer *snap = root->commit_root; root->commit_root = root->node; root->node->count++; ret = btrfs_drop_snapshot(root, snap); BUG_ON(ret); - // tree_block_release(root, snap); + // btrfs_block_release(root, snap); } write_ctree_super(root, s); btrfs_finish_extent_commit(root); return ret; } -static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, - struct ctree_root_info *info, int fp) +static int __setup_root(struct btrfs_root *root, struct btrfs_root *extent_root, + struct btrfs_root_info *info, int fp) { INIT_LIST_HEAD(&root->trans); INIT_LIST_HEAD(&root->cache); @@ -191,10 +192,10 @@ static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, return 0; } -struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) +struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) { - struct ctree_root *root = malloc(sizeof(struct ctree_root)); - struct ctree_root *extent_root = malloc(sizeof(struct ctree_root)); + struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); + struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); int fp; int ret; @@ -207,16 +208,16 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) INIT_RADIX_TREE(&root->pinned_radix, GFP_KERNEL); INIT_RADIX_TREE(&extent_root->pinned_radix, GFP_KERNEL); INIT_RADIX_TREE(&extent_root->cache_radix, GFP_KERNEL); - ret = pread(fp, super, sizeof(struct ctree_super_block), - CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); + ret = pread(fp, super, sizeof(struct btrfs_super_block), + BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); if (ret == 0 || super->root_info.tree_root == 0) { printf("making new FS!\n"); ret = mkfs(fp); if (ret) return NULL; - ret = pread(fp, super, sizeof(struct ctree_super_block), - CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); - if (ret != sizeof(struct ctree_super_block)) + ret = pread(fp, super, sizeof(struct btrfs_super_block), + BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); + if (ret != sizeof(struct btrfs_super_block)) return NULL; } BUG_ON(ret < 0); @@ -227,18 +228,19 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) return root; } -static int __update_root(struct ctree_root *root, struct ctree_root_info *info) +static int __update_root(struct btrfs_root *root, struct btrfs_root_info *info) { info->tree_root = root->node->blocknr; return 0; } -int write_ctree_super(struct ctree_root *root, struct ctree_super_block *s) +int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s) { int ret; __update_root(root, &s->root_info); __update_root(root->extent_root, &s->extent_info); - ret = pwrite(root->fp, s, sizeof(*s), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); + ret = pwrite(root->fp, s, sizeof(*s), + BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); if (ret != sizeof(*s)) { fprintf(stderr, "failed to write new super block err %d\n", ret); return ret; @@ -246,19 +248,19 @@ int write_ctree_super(struct ctree_root *root, struct ctree_super_block *s) return 0; } -static int drop_cache(struct ctree_root *root) +static int drop_cache(struct btrfs_root *root) { while(!list_empty(&root->cache)) { - struct tree_buffer *b = list_entry(root->cache.next, - struct tree_buffer, cache); + struct btrfs_buffer *b = list_entry(root->cache.next, + struct btrfs_buffer, cache); list_del_init(&b->cache); - tree_block_release(root, b); + btrfs_block_release(root, b); } return 0; } -int close_ctree(struct ctree_root *root, struct ctree_super_block *s) +int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) { - commit_transaction(root, s); + btrfs_commit_transaction(root, s); __commit_transaction(root->extent_root); write_ctree_super(root, s); drop_cache(root->extent_root); @@ -268,16 +270,16 @@ int close_ctree(struct ctree_root *root, struct ctree_super_block *s) close(root->fp); if (root->node) - tree_block_release(root, root->node); + btrfs_block_release(root, root->node); if (root->extent_root->node) - tree_block_release(root->extent_root, root->extent_root->node); - tree_block_release(root, root->commit_root); + btrfs_block_release(root->extent_root, root->extent_root->node); + btrfs_block_release(root, root->commit_root); free(root); printf("on close %d blocks are allocated\n", allocated_blocks); return 0; } -void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) +void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf) { buf->count--; if (buf->count < 0) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 1c0af7c56c21..b391335864b1 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -2,29 +2,30 @@ #define __DISKIO__ #include "list.h" -struct tree_buffer { +struct btrfs_buffer { u64 blocknr; int count; union { - struct node node; - struct leaf leaf; + struct btrfs_node node; + struct btrfs_leaf leaf; }; struct list_head dirty; struct list_head cache; }; -struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr); -struct tree_buffer *find_tree_block(struct ctree_root *root, u64 blocknr); -int write_tree_block(struct ctree_root *root, struct tree_buffer *buf); -int dirty_tree_block(struct ctree_root *root, struct tree_buffer *buf); -int clean_tree_block(struct ctree_root *root, struct tree_buffer *buf); -int commit_transaction(struct ctree_root *root, struct ctree_super_block *s); -struct ctree_root *open_ctree(char *filename, struct ctree_super_block *s); -int close_ctree(struct ctree_root *root, struct ctree_super_block *s); -void tree_block_release(struct ctree_root *root, struct tree_buffer *buf); -int write_ctree_super(struct ctree_root *root, struct ctree_super_block *s); +struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); +struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr); +int write_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf); +int dirty_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf); +int clean_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf); +int btrfs_commit_transaction(struct btrfs_root *root, + struct btrfs_super_block *s); +struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *s); +int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s); +void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf); +int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s); int mkfs(int fd); -#define CTREE_SUPER_INFO_OFFSET(bs) (16 * (bs)) +#define BTRFS_SUPER_INFO_OFFSET(bs) (16 * (bs)) #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1676a6595cce..4a4f2d810d9f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6,11 +6,11 @@ #include "disk-io.h" #include "print-tree.h" -static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, +static int find_free_extent(struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 search_end, struct btrfs_key *ins); -static int finish_current_insert(struct ctree_root *extent_root); -static int run_pending(struct ctree_root *extent_root); +static int finish_current_insert(struct btrfs_root *extent_root); +static int run_pending(struct btrfs_root *extent_root); /* * pending extents are blocks that we're trying to allocate in the extent @@ -21,62 +21,63 @@ static int run_pending(struct ctree_root *extent_root); */ #define CTREE_EXTENT_PENDING_DEL 0 -static int inc_block_ref(struct ctree_root *root, u64 blocknr) +static int inc_block_ref(struct btrfs_root *root, u64 blocknr) { - struct ctree_path path; + struct btrfs_path path; int ret; struct btrfs_key key; - struct leaf *l; - struct extent_item *item; + struct btrfs_leaf *l; + struct btrfs_extent_item *item; struct btrfs_key ins; u32 refs; find_free_extent(root->extent_root, 0, 0, (u64)-1, &ins); - init_path(&path); + btrfs_init_path(&path); key.objectid = blocknr; key.flags = 0; key.offset = 1; - ret = search_slot(root->extent_root, &key, &path, 0, 1); + ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 1); if (ret != 0) BUG(); BUG_ON(ret != 0); l = &path.nodes[0]->leaf; - item = (struct extent_item *)(l->data + btrfs_item_offset(l->items + - path.slots[0])); + item = (struct btrfs_extent_item *)(l->data + + btrfs_item_offset(l->items + + path.slots[0])); refs = btrfs_extent_refs(item); btrfs_set_extent_refs(item, refs + 1); BUG_ON(list_empty(&path.nodes[0]->dirty)); - release_path(root->extent_root, &path); + btrfs_release_path(root->extent_root, &path); finish_current_insert(root->extent_root); run_pending(root->extent_root); return 0; } -static int lookup_block_ref(struct ctree_root *root, u64 blocknr, u32 *refs) +static int lookup_block_ref(struct btrfs_root *root, u64 blocknr, u32 *refs) { - struct ctree_path path; + struct btrfs_path path; int ret; struct btrfs_key key; - struct leaf *l; - struct extent_item *item; - init_path(&path); + struct btrfs_leaf *l; + struct btrfs_extent_item *item; + btrfs_init_path(&path); key.objectid = blocknr; key.flags = 0; key.offset = 1; - ret = search_slot(root->extent_root, &key, &path, 0, 0); + ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 0); if (ret != 0) BUG(); l = &path.nodes[0]->leaf; - item = (struct extent_item *)(l->data + + item = (struct btrfs_extent_item *)(l->data + btrfs_item_offset(l->items + path.slots[0])); *refs = btrfs_extent_refs(item); - release_path(root->extent_root, &path); + btrfs_release_path(root->extent_root, &path); return 0; } -int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf) +int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf) { u64 blocknr; int i; @@ -93,9 +94,9 @@ int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf) return 0; } -int btrfs_finish_extent_commit(struct ctree_root *root) +int btrfs_finish_extent_commit(struct btrfs_root *root) { - struct ctree_root *extent_root = root->extent_root; + struct btrfs_root *extent_root = root->extent_root; unsigned long gang[8]; int ret; int i; @@ -115,10 +116,10 @@ int btrfs_finish_extent_commit(struct ctree_root *root) return 0; } -static int finish_current_insert(struct ctree_root *extent_root) +static int finish_current_insert(struct btrfs_root *extent_root) { struct btrfs_key ins; - struct extent_item extent_item; + struct btrfs_extent_item extent_item; int i; int ret; @@ -130,7 +131,7 @@ static int finish_current_insert(struct ctree_root *extent_root) for (i = 0; i < extent_root->current_insert.flags; i++) { ins.objectid = extent_root->current_insert.objectid + i; - ret = insert_item(extent_root, &ins, &extent_item, + ret = btrfs_insert_item(extent_root, &ins, &extent_item, sizeof(extent_item)); BUG_ON(ret); } @@ -141,14 +142,14 @@ static int finish_current_insert(struct ctree_root *extent_root) /* * remove an extent from the root, returns 0 on success */ -int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) +static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; - struct ctree_root *extent_root = root->extent_root; + struct btrfs_root *extent_root = root->extent_root; int ret; struct btrfs_item *item; - struct extent_item *ei; + struct btrfs_extent_item *ei; struct btrfs_key ins; u32 refs; @@ -157,16 +158,16 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) key.offset = num_blocks; find_free_extent(root, 0, 0, (u64)-1, &ins); - init_path(&path); - ret = search_slot(extent_root, &key, &path, -1, 1); + btrfs_init_path(&path); + ret = btrfs_search_slot(extent_root, &key, &path, -1, 1); if (ret) { printf("failed to find %Lu\n", key.objectid); - print_tree(extent_root, extent_root->node); + btrfs_print_tree(extent_root, extent_root->node); printf("failed to find %Lu\n", key.objectid); BUG(); } item = path.nodes[0]->leaf.items + path.slots[0]; - ei = (struct extent_item *)(path.nodes[0]->leaf.data + + ei = (struct btrfs_extent_item *)(path.nodes[0]->leaf.data + btrfs_item_offset(item)); BUG_ON(ei->refs == 0); refs = btrfs_extent_refs(ei) - 1; @@ -180,14 +181,14 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) BUG_ON(err); radix_tree_preload_end(); } - ret = del_item(extent_root, &path); + ret = btrfs_del_item(extent_root, &path); if (root != extent_root && extent_root->last_insert.objectid < blocknr) extent_root->last_insert.objectid = blocknr; if (ret) BUG(); } - release_path(extent_root, &path); + btrfs_release_path(extent_root, &path); finish_current_insert(extent_root); return ret; } @@ -196,10 +197,10 @@ int __free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) * find all the blocks marked as pending in the radix tree and remove * them from the extent map */ -static int del_pending_extents(struct ctree_root *extent_root) +static int del_pending_extents(struct btrfs_root *extent_root) { int ret; - struct tree_buffer *gang[4]; + struct btrfs_buffer *gang[4]; int i; while(1) { @@ -214,13 +215,13 @@ static int del_pending_extents(struct ctree_root *extent_root) radix_tree_tag_clear(&extent_root->cache_radix, gang[i]->blocknr, CTREE_EXTENT_PENDING_DEL); - tree_block_release(extent_root, gang[i]); + btrfs_block_release(extent_root, gang[i]); } } return 0; } -static int run_pending(struct ctree_root *extent_root) +static int run_pending(struct btrfs_root *extent_root) { while(radix_tree_tagged(&extent_root->cache_radix, CTREE_EXTENT_PENDING_DEL)) @@ -232,11 +233,11 @@ static int run_pending(struct ctree_root *extent_root) /* * remove an extent from the root, returns 0 on success */ -int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) +int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) { struct btrfs_key key; - struct ctree_root *extent_root = root->extent_root; - struct tree_buffer *t; + struct btrfs_root *extent_root = root->extent_root; + struct btrfs_buffer *t; int pending_ret; int ret; @@ -262,11 +263,11 @@ int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks) * ins->offset == number of blocks * Any available blocks before search_start are skipped. */ -static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, +static int find_free_extent(struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 search_end, struct btrfs_key *ins) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; int ret; u64 hole_size = 0; @@ -274,20 +275,20 @@ static int find_free_extent(struct ctree_root *orig_root, u64 num_blocks, u64 last_block; u64 test_block; int start_found; - struct leaf *l; - struct ctree_root * root = orig_root->extent_root; + struct btrfs_leaf *l; + struct btrfs_root * root = orig_root->extent_root; int total_needed = num_blocks; total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3; if (root->last_insert.objectid > search_start) search_start = root->last_insert.objectid; check_failed: - init_path(&path); + btrfs_init_path(&path); ins->objectid = search_start; ins->offset = 0; ins->flags = 0; start_found = 0; - ret = search_slot(root, ins, &path, 0, 0); + ret = btrfs_search_slot(root, ins, &path, 0, 0); if (ret < 0) goto error; @@ -298,7 +299,7 @@ check_failed: l = &path.nodes[0]->leaf; slot = path.slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { - ret = next_leaf(root, &path); + ret = btrfs_next_leaf(root, &path); if (ret == 0) continue; if (ret < 0) @@ -336,7 +337,7 @@ check_pending: /* we have to make sure we didn't find an extent that has already * been allocated by the map tree or the original allocation */ - release_path(root, &path); + btrfs_release_path(root, &path); BUG_ON(ins->objectid < search_start); for (test_block = ins->objectid; test_block < ins->objectid + total_needed; test_block++) { @@ -353,7 +354,7 @@ check_pending: ins->offset = num_blocks; return 0; error: - release_path(root, &path); + btrfs_release_path(root, &path); return ret; } @@ -364,13 +365,13 @@ error: * * returns 0 if everything worked, non-zero otherwise. */ -int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, +int alloc_extent(struct btrfs_root *root, u64 num_blocks, u64 search_start, u64 search_end, u64 owner, struct btrfs_key *ins) { int ret; int pending_ret; - struct ctree_root *extent_root = root->extent_root; - struct extent_item extent_item; + struct btrfs_root *extent_root = root->extent_root; + struct btrfs_extent_item extent_item; btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, owner); @@ -390,7 +391,7 @@ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, if (ret) return ret; - ret = insert_item(extent_root, ins, &extent_item, + ret = btrfs_insert_item(extent_root, ins, &extent_item, sizeof(extent_item)); finish_current_insert(extent_root); @@ -406,11 +407,11 @@ int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, * helper function to allocate a block for a given tree * returns the tree buffer or NULL. */ -struct tree_buffer *alloc_free_block(struct ctree_root *root) +struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root) { struct btrfs_key ins; int ret; - struct tree_buffer *buf; + struct btrfs_buffer *buf; ret = alloc_extent(root, 1, 0, (unsigned long)-1, btrfs_header_parentid(&root->node->node.header), @@ -424,10 +425,10 @@ struct tree_buffer *alloc_free_block(struct ctree_root *root) return buf; } -int walk_down_tree(struct ctree_root *root, struct ctree_path *path, int *level) +int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, int *level) { - struct tree_buffer *next; - struct tree_buffer *cur; + struct btrfs_buffer *next; + struct btrfs_buffer *cur; u64 blocknr; int ret; u32 refs; @@ -445,33 +446,33 @@ int walk_down_tree(struct ctree_root *root, struct ctree_path *path, int *level) ret = lookup_block_ref(root, blocknr, &refs); if (refs != 1 || *level == 1) { path->slots[*level]++; - ret = free_extent(root, blocknr, 1); + ret = btrfs_free_extent(root, blocknr, 1); BUG_ON(ret); continue; } BUG_ON(ret); next = read_tree_block(root, blocknr); if (path->nodes[*level-1]) - tree_block_release(root, path->nodes[*level-1]); + btrfs_block_release(root, path->nodes[*level-1]); path->nodes[*level-1] = next; *level = btrfs_header_level(&next->node.header); path->slots[*level] = 0; } out: - ret = free_extent(root, path->nodes[*level]->blocknr, 1); - tree_block_release(root, path->nodes[*level]); + ret = btrfs_free_extent(root, path->nodes[*level]->blocknr, 1); + btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; BUG_ON(ret); return 0; } -int walk_up_tree(struct ctree_root *root, struct ctree_path *path, int *level) +int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, int *level) { int i; int slot; int ret; - for(i = *level; i < MAX_LEVEL - 1 && path->nodes[i]; i++) { + for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; if (slot < btrfs_header_nritems(&path->nodes[i]->node.header)- 1) { @@ -479,9 +480,9 @@ int walk_up_tree(struct ctree_root *root, struct ctree_path *path, int *level) *level = i; return 0; } else { - ret = free_extent(root, + ret = btrfs_free_extent(root, path->nodes[*level]->blocknr, 1); - tree_block_release(root, path->nodes[*level]); + btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; BUG_ON(ret); @@ -490,15 +491,15 @@ int walk_up_tree(struct ctree_root *root, struct ctree_path *path, int *level) return 1; } -int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) +int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap) { int ret; int level; - struct ctree_path path; + struct btrfs_path path; int i; int orig_level; - init_path(&path); + btrfs_init_path(&path); level = btrfs_header_level(&snap->node.header); orig_level = level; @@ -514,7 +515,7 @@ int btrfs_drop_snapshot(struct ctree_root *root, struct tree_buffer *snap) } for (i = 0; i <= orig_level; i++) { if (path.nodes[i]) { - tree_block_release(root, path.nodes[i]); + btrfs_block_release(root, path.nodes[i]); } } diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index e0d3bc1d0259..fc1923320de3 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -12,10 +12,10 @@ int mkfs(int fd) { - struct ctree_root_info info[2]; - struct leaf empty_leaf; + struct btrfs_root_info info[2]; + struct btrfs_leaf empty_leaf; struct btrfs_item item; - struct extent_item extent_item; + struct btrfs_extent_item extent_item; int ret; /* setup the super block area */ @@ -28,7 +28,7 @@ int mkfs(int fd) info[1].objectid = 2; info[1].tree_root = 18; ret = pwrite(fd, info, sizeof(info), - CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); + BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); if (ret != sizeof(info)) return -1; @@ -36,7 +36,7 @@ int mkfs(int fd) memset(&empty_leaf, 0, sizeof(empty_leaf)); btrfs_set_header_parentid(&empty_leaf.header, 1); btrfs_set_header_blocknr(&empty_leaf.header, 17); - ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 17 * CTREE_BLOCKSIZE); + ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 17 * BTRFS_BLOCKSIZE); if (ret != sizeof(empty_leaf)) return -1; @@ -48,9 +48,9 @@ int mkfs(int fd) btrfs_set_key_objectid(&item.key, 0); btrfs_set_key_offset(&item.key, 17); btrfs_set_key_flags(&item.key, 0); - btrfs_set_item_offset(&item, - LEAF_DATA_SIZE - sizeof(struct extent_item)); - btrfs_set_item_size(&item, sizeof(struct extent_item)); + btrfs_set_item_offset(&item, LEAF_DATA_SIZE - + sizeof(struct btrfs_extent_item)); + btrfs_set_item_size(&item, sizeof(struct btrfs_extent_item)); btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, 0); memcpy(empty_leaf.items, &item, sizeof(item)); @@ -60,8 +60,8 @@ int mkfs(int fd) /* item2, give block 17 to the root */ btrfs_set_key_objectid(&item.key, 17); btrfs_set_key_offset(&item.key, 1); - btrfs_set_item_offset(&item, - LEAF_DATA_SIZE - sizeof(struct extent_item) * 2); + btrfs_set_item_offset(&item, LEAF_DATA_SIZE - + sizeof(struct btrfs_extent_item) * 2); btrfs_set_extent_owner(&extent_item, 1); memcpy(empty_leaf.items + 1, &item, sizeof(item)); memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, @@ -70,13 +70,13 @@ int mkfs(int fd) /* item3, give block 18 for the extent root */ btrfs_set_key_objectid(&item.key, 18); btrfs_set_key_offset(&item.key, 1); - btrfs_set_item_offset(&item, - LEAF_DATA_SIZE - sizeof(struct extent_item) * 3); + btrfs_set_item_offset(&item, LEAF_DATA_SIZE - + sizeof(struct btrfs_extent_item) * 3); btrfs_set_extent_owner(&extent_item, 2); memcpy(empty_leaf.items + 2, &item, sizeof(item)); memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, btrfs_item_size(&item)); - ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 18 * CTREE_BLOCKSIZE); + ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 18 * BTRFS_BLOCKSIZE); if (ret != sizeof(empty_leaf)) return -1; return 0; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 101278e1139a..c95c85640aa9 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -5,14 +5,14 @@ #include "ctree.h" #include "disk-io.h" -void print_leaf(struct leaf *l) +void btrfs_print_leaf(struct btrfs_leaf *l) { int i; u32 nr = btrfs_header_nritems(&l->header); struct btrfs_item *item; - struct extent_item *ei; + struct btrfs_extent_item *ei; printf("leaf %Lu total ptrs %d free space %d\n", - btrfs_header_blocknr(&l->header), nr, leaf_free_space(l)); + btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(l)); fflush(stdout); for (i = 0 ; i < nr ; i++) { item = l->items + i; @@ -26,24 +26,25 @@ void print_leaf(struct leaf *l) fflush(stdout); printf("\t\titem data %.*s\n", btrfs_item_size(item), l->data + btrfs_item_offset(item)); - ei = (struct extent_item *)(l->data + btrfs_item_offset(item)); + ei = (struct btrfs_extent_item *)(l->data + + btrfs_item_offset(item)); printf("\t\textent data refs %u owner %Lu\n", ei->refs, ei->owner); fflush(stdout); } } -void print_tree(struct ctree_root *root, struct tree_buffer *t) +void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) { int i; u32 nr; - struct node *c; + struct btrfs_node *c; if (!t) return; c = &t->node; nr = btrfs_header_nritems(&c->header); if (btrfs_is_leaf(c)) { - print_leaf((struct leaf *)c); + btrfs_print_leaf((struct btrfs_leaf *)c); return; } printf("node %Lu level %d total ptrs %d free spc %u\n", t->blocknr, @@ -58,17 +59,17 @@ void print_tree(struct ctree_root *root, struct tree_buffer *t) fflush(stdout); } for (i = 0; i < nr; i++) { - struct tree_buffer *next_buf = read_tree_block(root, + struct btrfs_buffer *next_buf = read_tree_block(root, btrfs_node_blockptr(c, i)); - struct node *next = &next_buf->node; + struct btrfs_node *next = &next_buf->node; if (btrfs_is_leaf(next) && btrfs_header_level(&c->header) != 1) BUG(); if (btrfs_header_level(&next->header) != btrfs_header_level(&c->header) - 1) BUG(); - print_tree(root, next_buf); - tree_block_release(root, next_buf); + btrfs_print_tree(root, next_buf); + btrfs_block_release(root, next_buf); } } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 3c1e9a3e0260..e8d0b847c023 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -1,3 +1,3 @@ -void print_leaf(struct leaf *l); -void print_tree(struct ctree_root *root, struct tree_buffer *t); +void btrfs_print_leaf(struct btrfs_leaf *l); +void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t); diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index ab3bda53a2f6..66bdc57905d7 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -22,9 +22,9 @@ int main(int ac, char **av) { int run_size = 100000; int max_key = 100000000; int tree_size = 0; - struct ctree_path path; - struct ctree_super_block super; - struct ctree_root *root; + struct btrfs_path path; + struct btrfs_super_block super; + struct btrfs_root *root; radix_tree_init(); @@ -40,12 +40,12 @@ int main(int ac, char **av) { ins.objectid = num; ins.offset = 0; ins.flags = 0; - ret = insert_item(root, &ins, buf, strlen(buf)); + ret = btrfs_insert_item(root, &ins, buf, strlen(buf)); if (!ret) tree_size++; free(buf); if (i == run_size - 5) { - commit_transaction(root, &super); + btrfs_commit_transaction(root, &super); } } @@ -57,16 +57,16 @@ int main(int ac, char **av) { for (i = 0; i < run_size; i++) { num = next_key(i, max_key); ins.objectid = num; - init_path(&path); + btrfs_init_path(&path); if (i % 10000 == 0) fprintf(stderr, "search %d:%d\n", num, i); - ret = search_slot(root, &ins, &path, 0, 0); + ret = btrfs_search_slot(root, &ins, &path, 0, 0); if (ret) { - print_tree(root, root->node); + btrfs_print_tree(root, root->node); printf("unable to find %d\n", num); exit(1); } - release_path(root, &path); + btrfs_release_path(root, &path); } close_ctree(root, &super); root = open_ctree("dbfile", &super); @@ -81,17 +81,17 @@ int main(int ac, char **av) { for (i = 0 ; i < run_size/4; i++) { num = next_key(i, max_key); ins.objectid = num; - init_path(&path); - ret = search_slot(root, &ins, &path, -1, 1); + btrfs_init_path(&path); + ret = btrfs_search_slot(root, &ins, &path, -1, 1); if (!ret) { if (i % 10000 == 0) fprintf(stderr, "del %d:%d\n", num, i); - ret = del_item(root, &path); + ret = btrfs_del_item(root, &path); if (ret != 0) BUG(); tree_size--; } - release_path(root, &path); + btrfs_release_path(root, &path); } close_ctree(root, &super); root = open_ctree("dbfile", &super); @@ -103,7 +103,7 @@ int main(int ac, char **av) { ins.objectid = num; if (i % 10000 == 0) fprintf(stderr, "insert %d:%d\n", num, i); - ret = insert_item(root, &ins, buf, strlen(buf)); + ret = btrfs_insert_item(root, &ins, buf, strlen(buf)); if (!ret) tree_size++; free(buf); @@ -115,25 +115,25 @@ int main(int ac, char **av) { for (i = 0; i < run_size; i++) { num = next_key(i, max_key); ins.objectid = num; - init_path(&path); + btrfs_init_path(&path); if (i % 10000 == 0) fprintf(stderr, "search %d:%d\n", num, i); - ret = search_slot(root, &ins, &path, 0, 0); + ret = btrfs_search_slot(root, &ins, &path, 0, 0); if (ret) { - print_tree(root, root->node); + btrfs_print_tree(root, root->node); printf("unable to find %d\n", num); exit(1); } - release_path(root, &path); + btrfs_release_path(root, &path); } printf("starting big long delete run\n"); while(root->node && btrfs_header_nritems(&root->node->node.header) > 0) { - struct leaf *leaf; + struct btrfs_leaf *leaf; int slot; ins.objectid = (u64)-1; - init_path(&path); - ret = search_slot(root, &ins, &path, -1, 1); + btrfs_init_path(&path); + ret = btrfs_search_slot(root, &ins, &path, -1, 1); if (ret == 0) BUG(); @@ -149,26 +149,26 @@ int main(int ac, char **av) { btrfs_disk_key_to_cpu(&last, &leaf->items[slot].key); if (tree_size % 10000 == 0) printf("big del %d:%d\n", tree_size, i); - ret = del_item(root, &path); + ret = btrfs_del_item(root, &path); if (ret != 0) { printf("del_item returned %d\n", ret); BUG(); } tree_size--; } - release_path(root, &path); + btrfs_release_path(root, &path); } /* printf("previous tree:\n"); - print_tree(root, root->commit_root); + btrfs_print_tree(root, root->commit_root); printf("map before commit\n"); - print_tree(root->extent_root, root->extent_root->node); + btrfs_print_tree(root->extent_root, root->extent_root->node); */ - commit_transaction(root, &super); + btrfs_commit_transaction(root, &super); printf("tree size is now %d\n", tree_size); printf("root %p commit root %p\n", root->node, root->commit_root); printf("map tree\n"); - print_tree(root->extent_root, root->extent_root->node); + btrfs_print_tree(root->extent_root, root->extent_root->node); close_ctree(root, &super); return 0; } diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index e767528bc521..a8ef0478991b 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -8,7 +8,7 @@ #include "print-tree.h" int keep_running = 1; -struct ctree_super_block super; +struct btrfs_super_block super; static int setup_key(struct radix_tree_root *root, struct btrfs_key *key, int exists) @@ -36,17 +36,17 @@ again: return 0; } -static int ins_one(struct ctree_root *root, struct radix_tree_root *radix) +static int ins_one(struct btrfs_root *root, struct radix_tree_root *radix) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; int ret; char buf[128]; unsigned long oid; - init_path(&path); + btrfs_init_path(&path); ret = setup_key(radix, &key, 0); sprintf(buf, "str-%Lu\n", key.objectid); - ret = insert_item(root, &key, buf, strlen(buf)); + ret = btrfs_insert_item(root, &key, buf, strlen(buf)); if (ret) goto error; oid = (unsigned long)key.objectid; @@ -61,18 +61,18 @@ error: return -1; } -static int insert_dup(struct ctree_root *root, struct radix_tree_root *radix) +static int insert_dup(struct btrfs_root *root, struct radix_tree_root *radix) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; int ret; char buf[128]; - init_path(&path); + btrfs_init_path(&path); ret = setup_key(radix, &key, 1); if (ret < 0) return 0; sprintf(buf, "str-%Lu\n", key.objectid); - ret = insert_item(root, &key, buf, strlen(buf)); + ret = btrfs_insert_item(root, &key, buf, strlen(buf)); if (ret != -EEXIST) { printf("insert on %Lu gave us %d\n", key.objectid, ret); return 1; @@ -80,21 +80,21 @@ static int insert_dup(struct ctree_root *root, struct radix_tree_root *radix) return 0; } -static int del_one(struct ctree_root *root, struct radix_tree_root *radix) +static int del_one(struct btrfs_root *root, struct radix_tree_root *radix) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; int ret; unsigned long *ptr; - init_path(&path); + btrfs_init_path(&path); ret = setup_key(radix, &key, 1); if (ret < 0) return 0; - ret = search_slot(root, &key, &path, -1, 1); + ret = btrfs_search_slot(root, &key, &path, -1, 1); if (ret) goto error; - ret = del_item(root, &path); - release_path(root, &path); + ret = btrfs_del_item(root, &path); + btrfs_release_path(root, &path); if (ret != 0) goto error; ptr = radix_tree_delete(radix, key.objectid); @@ -106,17 +106,17 @@ error: return -1; } -static int lookup_item(struct ctree_root *root, struct radix_tree_root *radix) +static int lookup_item(struct btrfs_root *root, struct radix_tree_root *radix) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; int ret; - init_path(&path); + btrfs_init_path(&path); ret = setup_key(radix, &key, 1); if (ret < 0) return 0; - ret = search_slot(root, &key, &path, 0, 1); - release_path(root, &path); + ret = btrfs_search_slot(root, &key, &path, 0, 1); + btrfs_release_path(root, &path); if (ret) goto error; return 0; @@ -125,17 +125,17 @@ error: return -1; } -static int lookup_enoent(struct ctree_root *root, struct radix_tree_root *radix) +static int lookup_enoent(struct btrfs_root *root, struct radix_tree_root *radix) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; int ret; - init_path(&path); + btrfs_init_path(&path); ret = setup_key(radix, &key, 0); if (ret < 0) return ret; - ret = search_slot(root, &key, &path, 0, 0); - release_path(root, &path); + ret = btrfs_search_slot(root, &key, &path, 0, 0); + btrfs_release_path(root, &path); if (ret <= 0) goto error; return 0; @@ -144,10 +144,10 @@ error: return -1; } -static int empty_tree(struct ctree_root *root, struct radix_tree_root *radix, +static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, int nr) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; unsigned long found = 0; int ret; @@ -159,22 +159,22 @@ static int empty_tree(struct ctree_root *root, struct radix_tree_root *radix, key.flags = 0; key.objectid = (unsigned long)-1; while(nr-- >= 0) { - init_path(&path); - ret = search_slot(root, &key, &path, -1, 1); + btrfs_init_path(&path); + ret = btrfs_search_slot(root, &key, &path, -1, 1); if (ret < 0) { - release_path(root, &path); + btrfs_release_path(root, &path); return ret; } if (ret != 0) { if (path.slots[0] == 0) { - release_path(root, &path); + btrfs_release_path(root, &path); break; } path.slots[0] -= 1; } slot = path.slots[0]; found=btrfs_key_objectid(&path.nodes[0]->leaf.items[slot].key); - ret = del_item(root, &path); + ret = btrfs_del_item(root, &path); count++; if (ret) { fprintf(stderr, @@ -182,7 +182,7 @@ static int empty_tree(struct ctree_root *root, struct radix_tree_root *radix, found); return -1; } - release_path(root, &path); + btrfs_release_path(root, &path); ptr = radix_tree_delete(radix, found); if (!ptr) goto error; @@ -195,7 +195,7 @@ error: return -1; } -static int fill_tree(struct ctree_root *root, struct radix_tree_root *radix, +static int fill_tree(struct btrfs_root *root, struct radix_tree_root *radix, int count) { int i; @@ -207,7 +207,7 @@ static int fill_tree(struct ctree_root *root, struct radix_tree_root *radix, goto out; } if (i % 1000 == 0) { - ret = commit_transaction(root, &super); + ret = btrfs_commit_transaction(root, &super); if (ret) { fprintf(stderr, "fill commit failed\n"); return ret; @@ -223,7 +223,7 @@ out: return ret; } -static int bulk_op(struct ctree_root *root, struct radix_tree_root *radix) +static int bulk_op(struct btrfs_root *root, struct radix_tree_root *radix) { int ret; int nr = rand() % 5000; @@ -242,13 +242,13 @@ static int bulk_op(struct ctree_root *root, struct radix_tree_root *radix) } -int (*ops[])(struct ctree_root *root, struct radix_tree_root *radix) = +int (*ops[])(struct btrfs_root *root, struct radix_tree_root *radix) = { ins_one, insert_dup, del_one, lookup_item, lookup_enoent, bulk_op }; -static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) +static int fill_radix(struct btrfs_root *root, struct radix_tree_root *radix) { - struct ctree_path path; + struct btrfs_path path; struct btrfs_key key; unsigned long found; int ret; @@ -259,16 +259,16 @@ static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) key.flags = 0; key.objectid = (unsigned long)-1; while(1) { - init_path(&path); - ret = search_slot(root, &key, &path, 0, 0); + btrfs_init_path(&path); + ret = btrfs_search_slot(root, &key, &path, 0, 0); if (ret < 0) { - release_path(root, &path); + btrfs_release_path(root, &path); return ret; } slot = path.slots[0]; if (ret != 0) { if (slot == 0) { - release_path(root, &path); + btrfs_release_path(root, &path); break; } slot -= 1; @@ -287,7 +287,7 @@ static int fill_radix(struct ctree_root *root, struct radix_tree_root *radix) radix_tree_preload_end(); } - release_path(root, &path); + btrfs_release_path(root, &path); key.objectid = found - 1; if (key.objectid > found) break; @@ -312,7 +312,7 @@ int print_usage(void) int main(int ac, char **av) { RADIX_TREE(radix, GFP_KERNEL); - struct ctree_root *root; + struct btrfs_root *root; int i; int ret; int count; @@ -370,7 +370,7 @@ int main(int ac, char **av) if (ret) { fprintf(stderr, "op %d failed %d:%d\n", op, i, iterations); - print_tree(root, root->node); + btrfs_print_tree(root, root->node); fprintf(stderr, "op %d failed %d:%d\n", op, i, iterations); err = ret; From 9aca1d51323c0291a7358e0728b64ec4f17d1a77 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 Mar 2007 11:09:37 -0400 Subject: [PATCH 0040/2985] Btrfs: make some funcs static Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 16 +++++++------- fs/btrfs/extent-tree.c | 47 ++++++++++++++++++++++++++++++++---------- 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c8529b394d5f..bbb632291709 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -34,7 +34,7 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) memset(p, 0, sizeof(*p)); } -int btrfs_cow_block(struct btrfs_root *root, +static int btrfs_cow_block(struct btrfs_root *root, struct btrfs_buffer *buf, struct btrfs_buffer *parent, int parent_slot, @@ -96,7 +96,7 @@ int btrfs_leaf_free_space(struct btrfs_leaf *leaf) /* * compare two keys in a memcmp fashion */ -int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) +static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) { struct btrfs_key k1; @@ -117,7 +117,7 @@ int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 0; } -int check_node(struct btrfs_path *path, int level) +static int check_node(struct btrfs_path *path, int level) { int i; struct btrfs_node *parent = NULL; @@ -146,7 +146,7 @@ int check_node(struct btrfs_path *path, int level) return 0; } -int check_leaf(struct btrfs_path *path, int level) +static int check_leaf(struct btrfs_path *path, int level) { int i; struct btrfs_leaf *leaf = &path->nodes[level]->leaf; @@ -186,7 +186,7 @@ int check_leaf(struct btrfs_path *path, int level) return 0; } -int check_block(struct btrfs_path *path, int level) +static int check_block(struct btrfs_path *path, int level) { if (level == 0) return check_leaf(path, level); @@ -202,7 +202,7 @@ int check_block(struct btrfs_path *path, int level) * * slot may point to max if the key is bigger than all of the keys */ -int generic_bin_search(char *p, int item_size, struct btrfs_key *key, +static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, int max, int *slot) { int low = 0; @@ -233,7 +233,7 @@ int generic_bin_search(char *p, int item_size, struct btrfs_key *key, * simple bin_search frontend that does the right thing for * leaves vs nodes */ -int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) +static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) { if (btrfs_is_leaf(c)) { struct btrfs_leaf *l = (struct btrfs_leaf *)c; @@ -250,7 +250,7 @@ int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) return -1; } -struct btrfs_buffer *read_node_slot(struct btrfs_root *root, +static struct btrfs_buffer *read_node_slot(struct btrfs_root *root, struct btrfs_buffer *parent_buf, int slot) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4a4f2d810d9f..a6969538bca2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -365,8 +365,9 @@ error: * * returns 0 if everything worked, non-zero otherwise. */ -int alloc_extent(struct btrfs_root *root, u64 num_blocks, u64 search_start, - u64 search_end, u64 owner, struct btrfs_key *ins) +static int alloc_extent(struct btrfs_root *root, u64 num_blocks, + u64 search_start, u64 search_end, u64 owner, + struct btrfs_key *ins) { int ret; int pending_ret; @@ -425,7 +426,12 @@ struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root) return buf; } -int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, int *level) +/* + * helper function for drop_snapshot, this walks down the tree dropping ref + * counts as it goes. + */ +static int walk_down_tree(struct btrfs_root *root, + struct btrfs_path *path, int *level) { struct btrfs_buffer *next; struct btrfs_buffer *cur; @@ -437,6 +443,9 @@ int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, int *level) BUG_ON(ret); if (refs > 1) goto out; + /* + * walk down to the last node level and free all the leaves + */ while(*level > 0) { cur = path->nodes[*level]; if (path->slots[*level] >= @@ -467,7 +476,13 @@ out: return 0; } -int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, int *level) +/* + * helper for dropping snapshots. This walks back up the tree in the path + * to find the first node higher up where we haven't yet gone through + * all the slots + */ +static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, + int *level) { int i; int slot; @@ -491,9 +506,15 @@ int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, int *level) return 1; } +/* + * drop the reference count on the tree rooted at 'snap'. This traverses + * the tree freeing any blocks that have a ref count of zero after being + * decremented. + */ int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap) { - int ret; + int ret = 0;; + int wret; int level; struct btrfs_path path; int i; @@ -506,18 +527,22 @@ int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap) path.nodes[level] = snap; path.slots[level] = 0; while(1) { - ret = walk_down_tree(root, &path, &level); - if (ret > 0) + wret = walk_down_tree(root, &path, &level); + if (wret > 0) break; - ret = walk_up_tree(root, &path, &level); - if (ret > 0) + if (wret < 0) + ret = wret; + + wret = walk_up_tree(root, &path, &level); + if (wret > 0) break; + if (wret < 0) + ret = wret; } for (i = 0; i <= orig_level; i++) { if (path.nodes[i]) { btrfs_block_release(root, path.nodes[i]); } } - - return 0; + return ret; } From eaee50e881b88766d9a781c54c4142456cf5e527 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 Mar 2007 11:17:52 -0400 Subject: [PATCH 0041/2985] Btrfs: merge leaves before split Signed-off-by: Chris Mason --- fs/btrfs/TODO | 2 -- fs/btrfs/ctree.c | 9 +++++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 52d2a7a071f4..ad29267b8738 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -1,10 +1,8 @@ * cleanup, add more error checking, get rid of BUG_ONs -* endian fixes * Make IO functions look more like the page cache * Fix ENOSPC handling * make blocksize a mkfs parameter instead of #define * make a real mkfs and superblock -* Add shadowing and transactions * Do checksumming * Define FS objects in terms of different item types * Add block mapping tree (simple dm layer) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index bbb632291709..dc620186e501 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1043,6 +1043,15 @@ static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, int ret; int wret; + wret = push_leaf_left(root, path, data_size); + if (wret < 0) + return wret; + if (wret) { + wret = push_leaf_right(root, path, data_size); + if (wret < 0) + return wret; + } + l_buf = path->nodes[0]; l = &l_buf->leaf; From 3768f3689fc76ecea17414936dff7a02746a4355 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 Mar 2007 16:47:54 -0400 Subject: [PATCH 0042/2985] Btrfs: Change the super to point to a tree of trees to enable persistent snapshots Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/TODO | 1 + fs/btrfs/ctree.h | 123 ++++++++++++++++++++++++++++++++----- fs/btrfs/debug-tree.c | 4 +- fs/btrfs/disk-io.c | 135 ++++++++++++++++++++++++++++++++--------- fs/btrfs/disk-io.h | 3 +- fs/btrfs/extent-tree.c | 15 +++-- fs/btrfs/mkfs.c | 128 ++++++++++++++++++++++++++++++++++++-- fs/btrfs/print-tree.c | 10 +-- fs/btrfs/root-tree.c | 88 +++++++++++++++++++++++++++ 10 files changed, 445 insertions(+), 65 deletions(-) create mode 100644 fs/btrfs/root-tree.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index f8532200e9ad..13ed6313ac85 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -2,7 +2,8 @@ CC=gcc CFLAGS = -g -Wall headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h -objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o +objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o \ + root-tree.o # if you don't have sparse installed, use ls instead CHECKFLAGS=-D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \ diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index ad29267b8738..ff90e5eaf431 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -5,6 +5,7 @@ * make a real mkfs and superblock * Do checksumming * Define FS objects in terms of different item types +* add inode tree * Add block mapping tree (simple dm layer) * Add simple tree locking (semaphore per tree) * Make allocator smarter diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5dfcc90812c1..0968899fb7f9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -4,8 +4,13 @@ #include "list.h" #include "kerncompat.h" +#define BTRFS_MAGIC "_BtRfS_M" #define BTRFS_BLOCKSIZE 1024 +#define BTRFS_ROOT_TREE_OBJECTID 1 +#define BTRFS_EXTENT_TREE_OBJECTID 2 +#define BTRFS_FS_TREE_OBJECTID 3 + /* * the key defines the order in the tree, and so it also defines (optimal) * block layout. objectid corresonds to the inode number. The flags @@ -36,7 +41,7 @@ struct btrfs_key { * every tree block (leaf or node) starts with this header. */ struct btrfs_header { - __le64 fsid[2]; /* FS specific uuid */ + u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ __le64 parentid; /* objectid of the tree root */ __le32 csum; @@ -52,6 +57,14 @@ struct btrfs_header { struct btrfs_buffer; +struct btrfs_root_item { + __le64 blocknr; + __le32 flags; + __le64 block_limit; + __le64 blocks_used; + __le32 refs; +}; + /* * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. current_insert is used @@ -61,6 +74,7 @@ struct btrfs_root { struct btrfs_buffer *node; struct btrfs_buffer *commit_root; struct btrfs_root *extent_root; + struct btrfs_root *tree_root; struct btrfs_key current_insert; struct btrfs_key last_insert; int fp; @@ -69,28 +83,25 @@ struct btrfs_root { struct list_head trans; struct list_head cache; int cache_size; + int ref_cows; + struct btrfs_root_item root_item; + struct btrfs_key root_key; }; -/* - * describes a tree on disk - */ -struct btrfs_root_info { - u64 fsid[2]; /* FS specific uuid */ - u64 blocknr; /* blocknr of this block */ - u64 objectid; /* inode number of this root */ - u64 tree_root; /* the tree root block */ - u32 csum; - u32 ham; - u64 snapuuid[2]; /* root specific uuid */ -} __attribute__ ((__packed__)); - /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc */ struct btrfs_super_block { - struct btrfs_root_info root_info; - struct btrfs_root_info extent_info; + u8 fsid[16]; /* FS specific uuid */ + __le64 blocknr; /* this block number */ + __le32 csum; + __le64 magic; + __le16 blocksize; + __le64 generation; + __le64 root; + __le64 total_blocks; + __le64 blocks_used; } __attribute__ ((__packed__)); /* @@ -317,6 +328,79 @@ static inline int btrfs_is_leaf(struct btrfs_node *n) return (btrfs_header_level(&n->header) == 0); } +static inline u64 btrfs_root_blocknr(struct btrfs_root_item *item) +{ + return le64_to_cpu(item->blocknr); +} + +static inline void btrfs_set_root_blocknr(struct btrfs_root_item *item, u64 val) +{ + item->blocknr = cpu_to_le64(val); +} + +static inline u32 btrfs_root_refs(struct btrfs_root_item *item) +{ + return le32_to_cpu(item->refs); +} + +static inline void btrfs_set_root_refs(struct btrfs_root_item *item, u32 val) +{ + item->refs = cpu_to_le32(val); +} + +static inline u64 btrfs_super_blocknr(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->blocknr); +} + +static inline void btrfs_set_super_blocknr(struct btrfs_super_block *s, u64 val) +{ + s->blocknr = cpu_to_le64(val); +} + +static inline u64 btrfs_super_root(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->root); +} + +static inline void btrfs_set_super_root(struct btrfs_super_block *s, u64 val) +{ + s->root = cpu_to_le64(val); +} + +static inline u64 btrfs_super_total_blocks(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->total_blocks); +} + +static inline void btrfs_set_super_total_blocks(struct btrfs_super_block *s, + u64 val) +{ + s->total_blocks = cpu_to_le64(val); +} + +static inline u64 btrfs_super_blocks_used(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->blocks_used); +} + +static inline void btrfs_set_super_blocks_used(struct btrfs_super_block *s, + u64 val) +{ + s->blocks_used = cpu_to_le64(val); +} + +static inline u16 btrfs_super_blocksize(struct btrfs_super_block *s) +{ + return le16_to_cpu(s->blocksize); +} + +static inline void btrfs_set_super_blocksize(struct btrfs_super_block *s, + u16 val) +{ + s->blocksize = cpu_to_le16(val); +} + struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root); int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf); int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks); @@ -331,4 +415,11 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_leaf *leaf); int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap); int btrfs_finish_extent_commit(struct btrfs_root *root); +int btrfs_del_root(struct btrfs_root *root, struct btrfs_key *key); +int btrfs_insert_root(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_root_item *item); +int btrfs_update_root(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_root_item *item); +int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, + struct btrfs_root_item *item, struct btrfs_key *key); #endif diff --git a/fs/btrfs/debug-tree.c b/fs/btrfs/debug-tree.c index 6da0a7aa0f47..de45fb4dfddd 100644 --- a/fs/btrfs/debug-tree.c +++ b/fs/btrfs/debug-tree.c @@ -11,9 +11,11 @@ int main(int ac, char **av) { struct btrfs_root *root; radix_tree_init(); root = open_ctree("dbfile", &super); - printf("root tree\n"); + printf("fs tree\n"); btrfs_print_tree(root, root->node); printf("map tree\n"); btrfs_print_tree(root->extent_root, root->extent_root->node); + printf("root tree\n"); + btrfs_print_tree(root->tree_root, root->tree_root->node); return 0; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c34c0c60935f..3d4bf6833f2a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -154,41 +154,96 @@ static int __commit_transaction(struct btrfs_root *root) return ret; } +static int commit_extent_and_tree_roots(struct btrfs_root *tree_root, + struct btrfs_root *extent_root) +{ + int ret; + u64 old_extent_block; + + while(1) { + old_extent_block = btrfs_root_blocknr(&extent_root->root_item); + if (old_extent_block == extent_root->node->blocknr) + break; + btrfs_set_root_blocknr(&extent_root->root_item, + extent_root->node->blocknr); + ret = btrfs_update_root(tree_root, + &extent_root->root_key, + &extent_root->root_item); + BUG_ON(ret); + } + __commit_transaction(extent_root); + __commit_transaction(tree_root); + return 0; +} + int btrfs_commit_transaction(struct btrfs_root *root, struct btrfs_super_block *s) { int ret = 0; + struct btrfs_buffer *snap = root->commit_root; + struct btrfs_key snap_key; ret = __commit_transaction(root); - if (!ret && root != root->extent_root) - ret = __commit_transaction(root->extent_root); BUG_ON(ret); - if (root->commit_root != root->node) { - struct btrfs_buffer *snap = root->commit_root; - root->commit_root = root->node; - root->node->count++; - ret = btrfs_drop_snapshot(root, snap); - BUG_ON(ret); - // btrfs_block_release(root, snap); - } + + if (root->commit_root == root->node) + return 0; + + memcpy(&snap_key, &root->root_key, sizeof(snap_key)); + root->root_key.offset++; + + btrfs_set_root_blocknr(&root->root_item, root->node->blocknr); + ret = btrfs_insert_root(root->tree_root, &root->root_key, + &root->root_item); + BUG_ON(ret); + + ret = commit_extent_and_tree_roots(root->tree_root, root->extent_root); + BUG_ON(ret); + write_ctree_super(root, s); - btrfs_finish_extent_commit(root); + btrfs_finish_extent_commit(root->extent_root); + btrfs_finish_extent_commit(root->tree_root); + + root->commit_root = root->node; + root->node->count++; + ret = btrfs_drop_snapshot(root, snap); + BUG_ON(ret); + + ret = btrfs_del_root(root->tree_root, &snap_key); + BUG_ON(ret); + return ret; } -static int __setup_root(struct btrfs_root *root, struct btrfs_root *extent_root, - struct btrfs_root_info *info, int fp) +static int __setup_root(struct btrfs_root *root, u64 objectid, int fp) { INIT_LIST_HEAD(&root->trans); INIT_LIST_HEAD(&root->cache); root->cache_size = 0; root->fp = fp; root->node = NULL; - root->extent_root = extent_root; root->commit_root = NULL; - root->node = read_tree_block(root, info->tree_root); memset(&root->current_insert, 0, sizeof(root->current_insert)); memset(&root->last_insert, 0, sizeof(root->last_insert)); + memset(&root->root_key, 0, sizeof(root->root_key)); + memset(&root->root_item, 0, sizeof(root->root_item)); + return 0; +} + +static int find_and_setup_root(struct btrfs_root *tree_root, u64 objectid, + struct btrfs_root *root, int fp) +{ + int ret; + + __setup_root(root, objectid, fp); + ret = btrfs_find_last_root(tree_root, objectid, + &root->root_item, &root->root_key); + BUG_ON(ret); + + root->node = read_tree_block(root, + btrfs_root_blocknr(&root->root_item)); + root->ref_cows = 0; + BUG_ON(!root->node); return 0; } @@ -196,9 +251,19 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) { struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); + struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root)); int fp; int ret; + root->extent_root = extent_root; + root->tree_root = tree_root; + + extent_root->extent_root = extent_root; + extent_root->tree_root = tree_root; + + tree_root->extent_root = extent_root; + tree_root->tree_root = tree_root; + fp = open(filename, O_CREAT | O_RDWR, 0600); if (fp < 0) { free(root); @@ -208,11 +273,14 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) INIT_RADIX_TREE(&root->pinned_radix, GFP_KERNEL); INIT_RADIX_TREE(&extent_root->pinned_radix, GFP_KERNEL); INIT_RADIX_TREE(&extent_root->cache_radix, GFP_KERNEL); + INIT_RADIX_TREE(&tree_root->pinned_radix, GFP_KERNEL); + INIT_RADIX_TREE(&tree_root->cache_radix, GFP_KERNEL); + ret = pread(fp, super, sizeof(struct btrfs_super_block), BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); - if (ret == 0 || super->root_info.tree_root == 0) { + if (ret == 0 || btrfs_super_root(super) == 0) { printf("making new FS!\n"); - ret = mkfs(fp); + ret = mkfs(fp, 0, BTRFS_BLOCKSIZE); if (ret) return NULL; ret = pread(fp, super, sizeof(struct btrfs_super_block), @@ -221,24 +289,29 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) return NULL; } BUG_ON(ret < 0); - __setup_root(root, extent_root, &super->root_info, fp); - __setup_root(extent_root, extent_root, &super->extent_info, fp); + + __setup_root(tree_root, BTRFS_ROOT_TREE_OBJECTID, fp); + tree_root->node = read_tree_block(tree_root, btrfs_super_root(super)); + BUG_ON(!tree_root->node); + + ret = find_and_setup_root(tree_root, BTRFS_EXTENT_TREE_OBJECTID, + extent_root, fp); + BUG_ON(ret); + + ret = find_and_setup_root(tree_root, BTRFS_FS_TREE_OBJECTID, + root, fp); + BUG_ON(ret); + root->commit_root = root->node; root->node->count++; + root->ref_cows = 1; return root; } -static int __update_root(struct btrfs_root *root, struct btrfs_root_info *info) -{ - info->tree_root = root->node->blocknr; - return 0; -} - int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s) { int ret; - __update_root(root, &s->root_info); - __update_root(root->extent_root, &s->extent_info); + btrfs_set_super_root(s, root->tree_root->node->blocknr); ret = pwrite(root->fp, s, sizeof(*s), BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); if (ret != sizeof(*s)) { @@ -260,19 +333,25 @@ static int drop_cache(struct btrfs_root *root) } int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) { + int ret; btrfs_commit_transaction(root, s); - __commit_transaction(root->extent_root); + ret = commit_extent_and_tree_roots(root->tree_root, root->extent_root); + BUG_ON(ret); write_ctree_super(root, s); drop_cache(root->extent_root); + drop_cache(root->tree_root); drop_cache(root); BUG_ON(!list_empty(&root->trans)); BUG_ON(!list_empty(&root->extent_root->trans)); + BUG_ON(!list_empty(&root->tree_root->trans)); close(root->fp); if (root->node) btrfs_block_release(root, root->node); if (root->extent_root->node) btrfs_block_release(root->extent_root, root->extent_root->node); + if (root->tree_root->node) + btrfs_block_release(root->tree_root, root->tree_root->node); btrfs_block_release(root, root->commit_root); free(root); printf("on close %d blocks are allocated\n", allocated_blocks); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index b391335864b1..c22a61f9233f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -24,7 +24,8 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *s); int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s); void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf); int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s); -int mkfs(int fd); +int mkfs(int fd, u64 num_blocks, u16 blocksize); + #define BTRFS_SUPER_INFO_OFFSET(bs) (16 * (bs)) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a6969538bca2..21f39b404697 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -82,7 +82,7 @@ int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf) u64 blocknr; int i; - if (root == root->extent_root) + if (!root->ref_cows) return 0; if (btrfs_is_leaf(&buf->node)) return 0; @@ -96,23 +96,22 @@ int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf) int btrfs_finish_extent_commit(struct btrfs_root *root) { - struct btrfs_root *extent_root = root->extent_root; unsigned long gang[8]; int ret; int i; while(1) { - ret = radix_tree_gang_lookup(&extent_root->pinned_radix, + ret = radix_tree_gang_lookup(&root->pinned_radix, (void **)gang, 0, ARRAY_SIZE(gang)); if (!ret) break; for (i = 0; i < ret; i++) { - radix_tree_delete(&extent_root->pinned_radix, gang[i]); + radix_tree_delete(&root->pinned_radix, gang[i]); } } - extent_root->last_insert.objectid = 0; - extent_root->last_insert.offset = 0; + root->last_insert.objectid = 0; + root->last_insert.offset = 0; return 0; } @@ -173,7 +172,7 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); if (refs == 0) { - if (root == extent_root) { + if (!root->ref_cows) { int err; radix_tree_preload(GFP_KERNEL); err = radix_tree_insert(&extent_root->pinned_radix, @@ -513,7 +512,7 @@ static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, */ int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap) { - int ret = 0;; + int ret = 0; int wret; int level; struct btrfs_path path; diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index fc1923320de3..dd14ed4fea6c 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -10,6 +10,120 @@ #include "ctree.h" #include "disk-io.h" +int mkfs(int fd, u64 num_blocks, u16 blocksize) +{ + struct btrfs_super_block super; + struct btrfs_leaf empty_leaf; + struct btrfs_root_item root_item; + struct btrfs_item item; + struct btrfs_extent_item extent_item; + char *block; + int ret; + u16 itemoff; + + btrfs_set_super_blocknr(&super, 16); + btrfs_set_super_root(&super, 17); + strcpy((char *)(&super.magic), BTRFS_MAGIC); + btrfs_set_super_blocksize(&super, blocksize); + btrfs_set_super_total_blocks(&super, num_blocks); + btrfs_set_super_blocks_used(&super, 0); + + block = malloc(blocksize); + memset(block, 0, blocksize); + BUG_ON(sizeof(super) > blocksize); + memcpy(block, &super, sizeof(super)); + ret = pwrite(fd, block, blocksize, BTRFS_SUPER_INFO_OFFSET(blocksize)); + BUG_ON(ret != blocksize); + + /* create the tree of root objects */ + memset(&empty_leaf, 0, sizeof(empty_leaf)); + btrfs_set_header_parentid(&empty_leaf.header, BTRFS_ROOT_TREE_OBJECTID); + btrfs_set_header_blocknr(&empty_leaf.header, 17); + btrfs_set_header_nritems(&empty_leaf.header, 2); + + /* create the items for the root tree */ + btrfs_set_root_blocknr(&root_item, 18); + btrfs_set_root_refs(&root_item, 1); + itemoff = LEAF_DATA_SIZE - sizeof(root_item); + btrfs_set_item_offset(&item, itemoff); + btrfs_set_item_size(&item, sizeof(root_item)); + btrfs_set_key_objectid(&item.key, BTRFS_EXTENT_TREE_OBJECTID); + btrfs_set_key_offset(&item.key, 0); + btrfs_set_key_flags(&item.key, 0); + memcpy(empty_leaf.items, &item, sizeof(item)); + memcpy(empty_leaf.data + itemoff, &root_item, sizeof(root_item)); + + btrfs_set_root_blocknr(&root_item, 19); + itemoff = itemoff - sizeof(root_item); + btrfs_set_item_offset(&item, itemoff); + btrfs_set_key_objectid(&item.key, BTRFS_FS_TREE_OBJECTID); + memcpy(empty_leaf.items + 1, &item, sizeof(item)); + memcpy(empty_leaf.data + itemoff, &root_item, sizeof(root_item)); + ret = pwrite(fd, &empty_leaf, blocksize, 17 * blocksize); + + /* create the items for the extent tree */ + btrfs_set_header_parentid(&empty_leaf.header, + BTRFS_EXTENT_TREE_OBJECTID); + btrfs_set_header_blocknr(&empty_leaf.header, 18); + btrfs_set_header_nritems(&empty_leaf.header, 4); + + /* item1, reserve blocks 0-16 */ + btrfs_set_key_objectid(&item.key, 0); + btrfs_set_key_offset(&item.key, 17); + btrfs_set_key_flags(&item.key, 0); + itemoff = LEAF_DATA_SIZE - sizeof(struct btrfs_extent_item); + btrfs_set_item_offset(&item, itemoff); + btrfs_set_item_size(&item, sizeof(struct btrfs_extent_item)); + btrfs_set_extent_refs(&extent_item, 1); + btrfs_set_extent_owner(&extent_item, 0); + memcpy(empty_leaf.items, &item, sizeof(item)); + memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, + btrfs_item_size(&item)); + + /* item2, give block 17 to the root */ + btrfs_set_key_objectid(&item.key, 17); + btrfs_set_key_offset(&item.key, 1); + itemoff = itemoff - sizeof(struct btrfs_extent_item); + btrfs_set_item_offset(&item, itemoff); + btrfs_set_extent_owner(&extent_item, BTRFS_ROOT_TREE_OBJECTID); + memcpy(empty_leaf.items + 1, &item, sizeof(item)); + memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, + btrfs_item_size(&item)); + + /* item3, give block 18 to the extent root */ + btrfs_set_key_objectid(&item.key, 18); + btrfs_set_key_offset(&item.key, 1); + itemoff = itemoff - sizeof(struct btrfs_extent_item); + btrfs_set_item_offset(&item, itemoff); + btrfs_set_extent_owner(&extent_item, BTRFS_EXTENT_TREE_OBJECTID); + memcpy(empty_leaf.items + 2, &item, sizeof(item)); + memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, + btrfs_item_size(&item)); + + /* item4, give block 19 to the FS root */ + btrfs_set_key_objectid(&item.key, 19); + btrfs_set_key_offset(&item.key, 1); + itemoff = itemoff - sizeof(struct btrfs_extent_item); + btrfs_set_item_offset(&item, itemoff); + btrfs_set_extent_owner(&extent_item, BTRFS_FS_TREE_OBJECTID); + memcpy(empty_leaf.items + 3, &item, sizeof(item)); + memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, + btrfs_item_size(&item)); + ret = pwrite(fd, &empty_leaf, blocksize, 18 * blocksize); + if (ret != sizeof(empty_leaf)) + return -1; + + /* finally create the FS root */ + btrfs_set_header_parentid(&empty_leaf.header, BTRFS_FS_TREE_OBJECTID); + btrfs_set_header_blocknr(&empty_leaf.header, 19); + btrfs_set_header_nritems(&empty_leaf.header, 0); + ret = pwrite(fd, &empty_leaf, blocksize, 19 * blocksize); + if (ret != sizeof(empty_leaf)) + return -1; + return 0; +} + +#if 0 int mkfs(int fd) { struct btrfs_root_info info[2]; @@ -20,13 +134,14 @@ int mkfs(int fd) /* setup the super block area */ memset(info, 0, sizeof(info)); - info[0].blocknr = 16; - info[0].objectid = 1; - info[0].tree_root = 17; + btrfs_set_root_blocknr(info, 16); + btrfs_set_root_objectid(info, 1); + btrfs_set_root_tree_root(info, 17); + + btrfs_set_root_blocknr(info + 1, 16); + btrfs_set_root_objectid(info + 1, 2); + btrfs_set_root_tree_root(info + 1, 18); - info[1].blocknr = 16; - info[1].objectid = 2; - info[1].tree_root = 18; ret = pwrite(fd, info, sizeof(info), BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); if (ret != sizeof(info)) @@ -81,3 +196,4 @@ int mkfs(int fd) return -1; return 0; } +#endif diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index c95c85640aa9..64829b6b90f9 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -11,6 +11,7 @@ void btrfs_print_leaf(struct btrfs_leaf *l) u32 nr = btrfs_header_nritems(&l->header); struct btrfs_item *item; struct btrfs_extent_item *ei; + struct btrfs_root_item *ri; printf("leaf %Lu total ptrs %d free space %d\n", btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(l)); fflush(stdout); @@ -23,13 +24,15 @@ void btrfs_print_leaf(struct btrfs_leaf *l) btrfs_key_offset(&item->key), btrfs_item_offset(item), btrfs_item_size(item)); - fflush(stdout); printf("\t\titem data %.*s\n", btrfs_item_size(item), l->data + btrfs_item_offset(item)); ei = (struct btrfs_extent_item *)(l->data + btrfs_item_offset(item)); - printf("\t\textent data refs %u owner %Lu\n", ei->refs, - ei->owner); + printf("\t\textent data refs %u owner %Lu\n", + btrfs_extent_refs(ei), btrfs_extent_owner(ei)); + ri = (struct btrfs_root_item *)ei; + printf("\t\troot data blocknr %Lu refs %u\n", + btrfs_root_blocknr(ri), btrfs_root_refs(ri)); fflush(stdout); } } @@ -71,6 +74,5 @@ void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) btrfs_print_tree(root, next_buf); btrfs_block_release(root, next_buf); } - } diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c new file mode 100644 index 000000000000..0ab90cfea98f --- /dev/null +++ b/fs/btrfs/root-tree.c @@ -0,0 +1,88 @@ +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" + +int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, + struct btrfs_root_item *item, struct btrfs_key *key) +{ + struct btrfs_path path; + struct btrfs_key search_key; + struct btrfs_leaf *l; + int ret; + int slot; + + search_key.objectid = objectid; + search_key.flags = (u32)-1; + search_key.offset = (u32)-1; + + btrfs_init_path(&path); + ret = btrfs_search_slot(root, &search_key, &path, 0, 0); + if (ret < 0) + goto out; + BUG_ON(ret == 0); + l = &path.nodes[0]->leaf; + BUG_ON(path.slots[0] == 0); + slot = path.slots[0] - 1; + if (btrfs_key_objectid(&l->items[slot].key) != objectid) { + ret = 1; + goto out; + } + memcpy(item, l->data + btrfs_item_offset(l->items + slot), + sizeof(*item)); + btrfs_disk_key_to_cpu(key, &l->items[slot].key); + btrfs_release_path(root, &path); + ret = 0; +out: + return ret; +} + +int btrfs_update_root(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_root_item *item) +{ + struct btrfs_path path; + struct btrfs_leaf *l; + int ret; + int slot; + + btrfs_init_path(&path); + ret = btrfs_search_slot(root, key, &path, 0, 1); + if (ret < 0) + goto out; + BUG_ON(ret != 0); + l = &path.nodes[0]->leaf; + slot = path.slots[0]; + memcpy(l->data + btrfs_item_offset(l->items + slot), item, + sizeof(*item)); +out: + btrfs_release_path(root, &path); + return ret; +} + +int btrfs_insert_root(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_root_item *item) +{ + int ret; + ret = btrfs_insert_item(root, key, item, sizeof(*item)); + BUG_ON(ret); + return ret; +} + +int btrfs_del_root(struct btrfs_root *root, struct btrfs_key *key) +{ + struct btrfs_path path; + int ret; + + btrfs_init_path(&path); + ret = btrfs_search_slot(root, key, &path, -1, 1); + if (ret < 0) + goto out; + BUG_ON(ret != 0); + ret = btrfs_del_item(root, &path); +out: + btrfs_release_path(root, &path); + return ret; +} From 710874947aa0f81c3b348b052e81cb9075113c49 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 14 Mar 2007 09:20:39 -0400 Subject: [PATCH 0043/2985] Btrfs: properly reset block cache on free Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 21f39b404697..7f156d9b7089 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -182,7 +182,7 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) } ret = btrfs_del_item(extent_root, &path); if (root != extent_root && - extent_root->last_insert.objectid < blocknr) + extent_root->last_insert.objectid > blocknr) extent_root->last_insert.objectid = blocknr; if (ret) BUG(); From 4beb1b8b75a86373f6020103ab840448d14c8880 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 14 Mar 2007 10:31:29 -0400 Subject: [PATCH 0044/2985] Btrfs: add leaf data casting helper Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++++ fs/btrfs/extent-tree.c | 8 ++------ fs/btrfs/print-tree.c | 5 ++--- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0968899fb7f9..05c7707263f5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -401,6 +401,10 @@ static inline void btrfs_set_super_blocksize(struct btrfs_super_block *s, s->blocksize = cpu_to_le16(val); } +/* helper function to cast into the data area of the leaf. */ +#define btrfs_item_ptr(leaf, slot, type) \ + ((type *)((leaf)->data + btrfs_item_offset((leaf)->items + (slot)))) + struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root); int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf); int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7f156d9b7089..3767744f659e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -41,9 +41,7 @@ static int inc_block_ref(struct btrfs_root *root, u64 blocknr) BUG(); BUG_ON(ret != 0); l = &path.nodes[0]->leaf; - item = (struct btrfs_extent_item *)(l->data + - btrfs_item_offset(l->items + - path.slots[0])); + item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(item); btrfs_set_extent_refs(item, refs + 1); @@ -69,9 +67,7 @@ static int lookup_block_ref(struct btrfs_root *root, u64 blocknr, u32 *refs) if (ret != 0) BUG(); l = &path.nodes[0]->leaf; - item = (struct btrfs_extent_item *)(l->data + - btrfs_item_offset(l->items + - path.slots[0])); + item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); *refs = btrfs_extent_refs(item); btrfs_release_path(root->extent_root, &path); return 0; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 64829b6b90f9..e769f36cf051 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -26,11 +26,10 @@ void btrfs_print_leaf(struct btrfs_leaf *l) btrfs_item_size(item)); printf("\t\titem data %.*s\n", btrfs_item_size(item), l->data + btrfs_item_offset(item)); - ei = (struct btrfs_extent_item *)(l->data + - btrfs_item_offset(item)); + ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); printf("\t\textent data refs %u owner %Lu\n", btrfs_extent_refs(ei), btrfs_extent_owner(ei)); - ri = (struct btrfs_root_item *)ei; + ri = btrfs_item_ptr(l, i, struct btrfs_root_item); printf("\t\troot data blocknr %Lu refs %u\n", btrfs_root_blocknr(ri), btrfs_root_refs(ri)); fflush(stdout); From 123abc88c9087b9c5605566ee3491aaef17fd837 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 14 Mar 2007 14:14:43 -0400 Subject: [PATCH 0045/2985] Btrfs: variable block size support Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 225 ++++++++++++++++++++--------------------- fs/btrfs/ctree.h | 62 +++++++----- fs/btrfs/disk-io.c | 42 ++++---- fs/btrfs/disk-io.h | 9 +- fs/btrfs/extent-tree.c | 6 +- fs/btrfs/mkfs.c | 171 ++++++++++--------------------- fs/btrfs/print-tree.c | 15 +-- fs/btrfs/print-tree.h | 2 +- fs/btrfs/quick-test.c | 2 +- fs/btrfs/root-tree.c | 4 +- 10 files changed, 238 insertions(+), 300 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index dc620186e501..ece8de7f38ef 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -47,7 +47,7 @@ static int btrfs_cow_block(struct btrfs_root *root, return 0; } cow = btrfs_alloc_free_block(root); - memcpy(&cow->node, &buf->node, sizeof(buf->node)); + memcpy(&cow->node, &buf->node, root->blocksize); btrfs_set_header_blocknr(&cow->node.header, cow->blocknr); *cow_ret = cow; btrfs_inc_ref(root, buf); @@ -72,11 +72,12 @@ static int btrfs_cow_block(struct btrfs_root *root, * this returns the address of the start of the last item, * which is the stop of the leaf data stack */ -static inline unsigned int leaf_data_end(struct btrfs_leaf *leaf) +static inline unsigned int leaf_data_end(struct btrfs_root *root, + struct btrfs_leaf *leaf) { u32 nr = btrfs_header_nritems(&leaf->header); if (nr == 0) - return sizeof(leaf->data); + return BTRFS_LEAF_DATA_SIZE(root); return btrfs_item_offset(leaf->items + nr - 1); } @@ -85,12 +86,12 @@ static inline unsigned int leaf_data_end(struct btrfs_leaf *leaf) * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ -int btrfs_leaf_free_space(struct btrfs_leaf *leaf) +int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) { - int data_end = leaf_data_end(leaf); + int data_end = leaf_data_end(root, leaf); int nritems = btrfs_header_nritems(&leaf->header); char *items_end = (char *)(leaf->items + nritems + 1); - return (char *)(leaf->data + data_end) - (char *)items_end; + return (char *)(btrfs_leaf_data(leaf) + data_end) - (char *)items_end; } /* @@ -117,7 +118,8 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 0; } -static int check_node(struct btrfs_path *path, int level) +static int check_node(struct btrfs_root *root, struct btrfs_path *path, + int level) { int i; struct btrfs_node *parent = NULL; @@ -131,22 +133,23 @@ static int check_node(struct btrfs_path *path, int level) BUG_ON(nritems == 0); if (parent) { struct btrfs_disk_key *parent_key; - parent_key = &parent->keys[parent_slot]; - BUG_ON(memcmp(parent_key, node->keys, + parent_key = &parent->ptrs[parent_slot].key; + BUG_ON(memcmp(parent_key, &node->ptrs[0].key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != btrfs_header_blocknr(&node->header)); } - BUG_ON(nritems > NODEPTRS_PER_BLOCK); + BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); for (i = 0; nritems > 1 && i < nritems - 2; i++) { struct btrfs_key cpukey; - btrfs_disk_key_to_cpu(&cpukey, &node->keys[i + 1]); - BUG_ON(comp_keys(&node->keys[i], &cpukey) >= 0); + btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key); + BUG_ON(comp_keys(&node->ptrs[i].key, &cpukey) >= 0); } return 0; } -static int check_leaf(struct btrfs_path *path, int level) +static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, + int level) { int i; struct btrfs_leaf *leaf = &path->nodes[level]->leaf; @@ -157,14 +160,14 @@ static int check_leaf(struct btrfs_path *path, int level) if (path->nodes[level + 1]) parent = &path->nodes[level + 1]->node; parent_slot = path->slots[level + 1]; - BUG_ON(btrfs_leaf_free_space(leaf) < 0); + BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); if (nritems == 0) return 0; if (parent) { struct btrfs_disk_key *parent_key; - parent_key = &parent->keys[parent_slot]; + parent_key = &parent->ptrs[parent_slot].key; BUG_ON(memcmp(parent_key, &leaf->items[0].key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != @@ -180,17 +183,18 @@ static int check_leaf(struct btrfs_path *path, int level) if (i == 0) { BUG_ON(btrfs_item_offset(leaf->items + i) + btrfs_item_size(leaf->items + i) != - LEAF_DATA_SIZE); + BTRFS_LEAF_DATA_SIZE(root)); } } return 0; } -static int check_block(struct btrfs_path *path, int level) +static int check_block(struct btrfs_root *root, struct btrfs_path *path, + int level) { if (level == 0) - return check_leaf(path, level); - return check_node(path, level); + return check_leaf(root, path, level); + return check_node(root, path, level); } /* @@ -242,8 +246,8 @@ static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) key, btrfs_header_nritems(&c->header), slot); } else { - return generic_bin_search((void *)c->keys, - sizeof(struct btrfs_disk_key), + return generic_bin_search((void *)c->ptrs, + sizeof(struct btrfs_key_ptr), key, btrfs_header_nritems(&c->header), slot); } @@ -311,7 +315,8 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, } parent = &parent_buf->node; - if (btrfs_header_nritems(&mid->header) > NODEPTRS_PER_BLOCK / 4) + if (btrfs_header_nritems(&mid->header) > + BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; left_buf = read_node_slot(root, parent_buf, pslot - 1); @@ -351,7 +356,8 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, if (wret) ret = wret; } else { - memcpy(parent->keys + pslot + 1, right->keys, + memcpy(&parent->ptrs[pslot + 1].key, + &right->ptrs[0].key, sizeof(struct btrfs_disk_key)); BUG_ON(list_empty(&parent_buf->dirty)); } @@ -387,7 +393,7 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, ret = wret; } else { /* update the parent key to reflect our changes */ - memcpy(parent->keys + pslot, mid->keys, + memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key, sizeof(struct btrfs_disk_key)); BUG_ON(list_empty(&parent_buf->dirty)); } @@ -407,7 +413,7 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, } } /* double check we haven't messed things up */ - check_block(path, level); + check_block(root, path, level); if (orig_ptr != btrfs_node_blockptr(&path->nodes[level]->node, path->slots[level])) BUG(); @@ -456,7 +462,7 @@ again: BUG_ON(!cow && ins_len); c = &b->node; p->nodes[level] = b; - ret = check_block(p, level); + ret = check_block(root, p, level); if (ret) return -1; ret = bin_search(c, key, &slot); @@ -465,7 +471,7 @@ again: slot -= 1; p->slots[level] = slot; if (ins_len > 0 && btrfs_header_nritems(&c->header) == - NODEPTRS_PER_BLOCK) { + BTRFS_NODEPTRS_PER_BLOCK(root)) { int sret = split_node(root, p, level); BUG_ON(sret > 0); if (sret) @@ -488,7 +494,7 @@ again: } else { struct btrfs_leaf *l = (struct btrfs_leaf *)c; p->slots[level] = slot; - if (ins_len > 0 && btrfs_leaf_free_space(l) < + if (ins_len > 0 && btrfs_leaf_free_space(root, l) < sizeof(struct btrfs_item) + ins_len) { int sret = split_leaf(root, p, ins_len); BUG_ON(sret > 0); @@ -525,7 +531,7 @@ static int fixup_low_keys(struct btrfs_root *root, if (!path->nodes[i]) break; t = &path->nodes[i]->node; - memcpy(t->keys + tslot, key, sizeof(*key)); + memcpy(&t->ptrs[tslot].key, key, sizeof(*key)); BUG_ON(list_empty(&path->nodes[i]->dirty)); if (tslot != 0) break; @@ -552,7 +558,7 @@ static int push_node_left(struct btrfs_root *root, struct btrfs_buffer *dst_buf, src_nritems = btrfs_header_nritems(&src->header); dst_nritems = btrfs_header_nritems(&dst->header); - push_items = NODEPTRS_PER_BLOCK - dst_nritems; + push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; if (push_items <= 0) { return 1; } @@ -560,16 +566,12 @@ static int push_node_left(struct btrfs_root *root, struct btrfs_buffer *dst_buf, if (src_nritems < push_items) push_items = src_nritems; - memcpy(dst->keys + dst_nritems, src->keys, - push_items * sizeof(struct btrfs_disk_key)); - memcpy(dst->blockptrs + dst_nritems, src->blockptrs, - push_items * sizeof(u64)); + memcpy(dst->ptrs + dst_nritems, src->ptrs, + push_items * sizeof(struct btrfs_key_ptr)); if (push_items < src_nritems) { - memmove(src->keys, src->keys + push_items, + memmove(src->ptrs, src->ptrs + push_items, (src_nritems - push_items) * - sizeof(struct btrfs_disk_key)); - memmove(src->blockptrs, src->blockptrs + push_items, - (src_nritems - push_items) * sizeof(u64)); + sizeof(struct btrfs_key_ptr)); } btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); @@ -601,7 +603,7 @@ static int balance_node_right(struct btrfs_root *root, src_nritems = btrfs_header_nritems(&src->header); dst_nritems = btrfs_header_nritems(&dst->header); - push_items = NODEPTRS_PER_BLOCK - dst_nritems; + push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; if (push_items <= 0) { return 1; } @@ -613,14 +615,10 @@ static int balance_node_right(struct btrfs_root *root, if (max_push < push_items) push_items = max_push; - memmove(dst->keys + push_items, dst->keys, - dst_nritems * sizeof(struct btrfs_disk_key)); - memmove(dst->blockptrs + push_items, dst->blockptrs, - dst_nritems * sizeof(u64)); - memcpy(dst->keys, src->keys + src_nritems - push_items, - push_items * sizeof(struct btrfs_disk_key)); - memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items, - push_items * sizeof(u64)); + memmove(dst->ptrs + push_items, dst->ptrs, + dst_nritems * sizeof(struct btrfs_key_ptr)); + memcpy(dst->ptrs, src->ptrs + src_nritems - push_items, + push_items * sizeof(struct btrfs_key_ptr)); btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); @@ -650,7 +648,7 @@ static int insert_new_root(struct btrfs_root *root, t = btrfs_alloc_free_block(root); c = &t->node; - memset(c, 0, sizeof(c)); + memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); btrfs_set_header_level(&c->header, level); btrfs_set_header_blocknr(&c->header, t->blocknr); @@ -660,8 +658,8 @@ static int insert_new_root(struct btrfs_root *root, if (btrfs_is_leaf(lower)) lower_key = &((struct btrfs_leaf *)lower)->items[0].key; else - lower_key = lower->keys; - memcpy(c->keys, lower_key, sizeof(struct btrfs_disk_key)); + lower_key = &lower->ptrs[0].key; + memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->blocknr); /* the super has an extra ref to root->node */ btrfs_block_release(root, root->node); @@ -693,19 +691,15 @@ static int insert_ptr(struct btrfs_root *root, nritems = btrfs_header_nritems(&lower->header); if (slot > nritems) BUG(); - if (nritems == NODEPTRS_PER_BLOCK) + if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) BUG(); if (slot != nritems) { - memmove(lower->keys + slot + 1, lower->keys + slot, - (nritems - slot) * sizeof(struct btrfs_disk_key)); - memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot, - (nritems - slot) * sizeof(u64)); + memmove(lower->ptrs + slot + 1, lower->ptrs + slot, + (nritems - slot) * sizeof(struct btrfs_key_ptr)); } - memcpy(lower->keys + slot, key, sizeof(struct btrfs_disk_key)); + memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(lower, slot, blocknr); btrfs_set_header_nritems(&lower->header, nritems + 1); - if (lower->keys[1].objectid == 0) - BUG(); BUG_ON(list_empty(&path->nodes[level]->dirty)); return 0; } @@ -747,17 +741,16 @@ static int split_node(struct btrfs_root *root, struct btrfs_path *path, btrfs_set_header_parentid(&split->header, btrfs_header_parentid(&root->node->node.header)); mid = (c_nritems + 1) / 2; - memcpy(split->keys, c->keys + mid, - (c_nritems - mid) * sizeof(struct btrfs_disk_key)); - memcpy(split->blockptrs, c->blockptrs + mid, - (c_nritems - mid) * sizeof(u64)); + memcpy(split->ptrs, c->ptrs + mid, + (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); btrfs_set_header_nritems(&split->header, c_nritems - mid); btrfs_set_header_nritems(&c->header, mid); ret = 0; BUG_ON(list_empty(&t->dirty)); - wret = insert_ptr(root, path, split->keys, split_buffer->blocknr, - path->slots[level + 1] + 1, level + 1); + wret = insert_ptr(root, path, &split->ptrs[0].key, + split_buffer->blocknr, path->slots[level + 1] + 1, + level + 1); if (wret) ret = wret; @@ -825,7 +818,7 @@ static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, right_buf = read_tree_block(root, btrfs_node_blockptr(&upper->node, slot + 1)); right = &right_buf->leaf; - free_space = btrfs_leaf_free_space(right); + free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, right_buf); return 1; @@ -833,7 +826,7 @@ static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, /* cow and double check */ btrfs_cow_block(root, right_buf, upper, slot + 1, &right_buf); right = &right_buf->leaf; - free_space = btrfs_leaf_free_space(right); + free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, right_buf); return 1; @@ -857,15 +850,14 @@ static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, right_nritems = btrfs_header_nritems(&right->header); /* push left to right */ push_space = btrfs_item_end(left->items + left_nritems - push_items); - push_space -= leaf_data_end(left); + push_space -= leaf_data_end(root, left); /* make room in the right data area */ - memmove(right->data + leaf_data_end(right) - push_space, - right->data + leaf_data_end(right), - LEAF_DATA_SIZE - leaf_data_end(right)); + memmove(btrfs_leaf_data(right) + leaf_data_end(root, right) - + push_space, btrfs_leaf_data(right) + leaf_data_end(root, right), + BTRFS_LEAF_DATA_SIZE(root) - leaf_data_end(root, right)); /* copy from the left data area */ - memcpy(right->data + LEAF_DATA_SIZE - push_space, - left->data + leaf_data_end(left), - push_space); + memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - push_space, + btrfs_leaf_data(left) + leaf_data_end(root, left), push_space); memmove(right->items + push_items, right->items, right_nritems * sizeof(struct btrfs_item)); /* copy the items from left to right */ @@ -875,7 +867,7 @@ static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, /* update the item pointers */ right_nritems += push_items; btrfs_set_header_nritems(&right->header, right_nritems); - push_space = LEAF_DATA_SIZE; + push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < right_nritems; i++) { btrfs_set_item_offset(right->items + i, push_space - btrfs_item_size(right->items + i)); @@ -886,7 +878,7 @@ static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(list_empty(&left_buf->dirty)); BUG_ON(list_empty(&right_buf->dirty)); - memcpy(upper->node.keys + slot + 1, + memcpy(&upper->node.ptrs[slot + 1].key, &right->items[0].key, sizeof(struct btrfs_disk_key)); BUG_ON(list_empty(&upper->dirty)); @@ -932,7 +924,7 @@ static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, t = read_tree_block(root, btrfs_node_blockptr(&path->nodes[1]->node, slot - 1)); left = &t->leaf; - free_space = btrfs_leaf_free_space(left); + free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, t); return 1; @@ -941,7 +933,7 @@ static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, /* cow and double check */ btrfs_cow_block(root, t, path->nodes[1], slot - 1, &t); left = &t->leaf; - free_space = btrfs_leaf_free_space(left); + free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, t); return 1; @@ -964,17 +956,19 @@ static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, /* push data from right to left */ memcpy(left->items + btrfs_header_nritems(&left->header), right->items, push_items * sizeof(struct btrfs_item)); - push_space = LEAF_DATA_SIZE - + push_space = BTRFS_LEAF_DATA_SIZE(root) - btrfs_item_offset(right->items + push_items -1); - memcpy(left->data + leaf_data_end(left) - push_space, - right->data + btrfs_item_offset(right->items + push_items - 1), + memcpy(btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space, + btrfs_leaf_data(right) + + btrfs_item_offset(right->items + push_items - 1), push_space); old_left_nritems = btrfs_header_nritems(&left->header); BUG_ON(old_left_nritems < 0); for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { - u16 ioff = btrfs_item_offset(left->items + i); - btrfs_set_item_offset(left->items + i, ioff - (LEAF_DATA_SIZE - + u32 ioff = btrfs_item_offset(left->items + i); + btrfs_set_item_offset(left->items + i, ioff - + (BTRFS_LEAF_DATA_SIZE(root) - btrfs_item_offset(left->items + old_left_nritems - 1))); } @@ -982,16 +976,17 @@ static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, /* fixup right node */ push_space = btrfs_item_offset(right->items + push_items - 1) - - leaf_data_end(right); - memmove(right->data + LEAF_DATA_SIZE - push_space, right->data + - leaf_data_end(right), push_space); + leaf_data_end(root, right); + memmove(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - + push_space, btrfs_leaf_data(right) + + leaf_data_end(root, right), push_space); memmove(right->items, right->items + push_items, (btrfs_header_nritems(&right->header) - push_items) * sizeof(struct btrfs_item)); btrfs_set_header_nritems(&right->header, btrfs_header_nritems(&right->header) - push_items); - push_space = LEAF_DATA_SIZE; + push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < btrfs_header_nritems(&right->header); i++) { btrfs_set_item_offset(right->items + i, push_space - @@ -1051,12 +1046,12 @@ static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, if (wret < 0) return wret; } - l_buf = path->nodes[0]; l = &l_buf->leaf; /* did the pushes work? */ - if (btrfs_leaf_free_space(l) >= sizeof(struct btrfs_item) + data_size) + if (btrfs_leaf_free_space(root, l) >= + sizeof(struct btrfs_item) + data_size) return 0; if (!path->nodes[1]) { @@ -1071,16 +1066,16 @@ static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(!right_buffer); BUG_ON(mid == nritems); right = &right_buffer->leaf; - memset(right, 0, sizeof(*right)); + memset(&right->header, 0, sizeof(right->header)); if (mid <= slot) { /* FIXME, just alloc a new leaf here */ if (leaf_space_used(l, mid, nritems - mid) + space_needed > - LEAF_DATA_SIZE) + BTRFS_LEAF_DATA_SIZE(root)) BUG(); } else { /* FIXME, just alloc a new leaf here */ if (leaf_space_used(l, 0, mid + 1) + space_needed > - LEAF_DATA_SIZE) + BTRFS_LEAF_DATA_SIZE(root)) BUG(); } btrfs_set_header_nritems(&right->header, nritems - mid); @@ -1088,15 +1083,18 @@ static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, btrfs_set_header_level(&right->header, 0); btrfs_set_header_parentid(&right->header, btrfs_header_parentid(&root->node->node.header)); - data_copy_size = btrfs_item_end(l->items + mid) - leaf_data_end(l); + data_copy_size = btrfs_item_end(l->items + mid) - + leaf_data_end(root, l); memcpy(right->items, l->items + mid, (nritems - mid) * sizeof(struct btrfs_item)); - memcpy(right->data + LEAF_DATA_SIZE - data_copy_size, - l->data + leaf_data_end(l), data_copy_size); - rt_data_off = LEAF_DATA_SIZE - btrfs_item_end(l->items + mid); + memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - + data_copy_size, btrfs_leaf_data(l) + + leaf_data_end(root, l), data_copy_size); + rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - + btrfs_item_end(l->items + mid); for (i = 0; i < btrfs_header_nritems(&right->header); i++) { - u16 ioff = btrfs_item_offset(right->items + i); + u32 ioff = btrfs_item_offset(right->items + i); btrfs_set_item_offset(right->items + i, ioff + rt_data_off); } @@ -1156,9 +1154,9 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, leaf = &leaf_buf->leaf; nritems = btrfs_header_nritems(&leaf->header); - data_end = leaf_data_end(leaf); + data_end = leaf_data_end(root, leaf); - if (btrfs_leaf_free_space(leaf) < + if (btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item) + data_size) BUG(); @@ -1173,7 +1171,7 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { - u16 ioff = btrfs_item_offset(leaf->items + i); + u32 ioff = btrfs_item_offset(leaf->items + i); btrfs_set_item_offset(leaf->items + i, ioff - data_size); } @@ -1183,7 +1181,8 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, (nritems - slot) * sizeof(struct btrfs_item)); /* shift the data */ - memmove(leaf->data + data_end - data_size, leaf->data + + memmove(btrfs_leaf_data(leaf) + data_end - data_size, + btrfs_leaf_data(leaf) + data_end, old_data - data_end); data_end = old_data; } @@ -1192,7 +1191,7 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, sizeof(struct btrfs_disk_key)); btrfs_set_item_offset(leaf->items + slot, data_end - data_size); btrfs_set_item_size(leaf->items + slot, data_size); - memcpy(leaf->data + data_end - data_size, data, data_size); + memcpy(btrfs_leaf_data(leaf) + data_end - data_size, data, data_size); btrfs_set_header_nritems(&leaf->header, nritems + 1); ret = 0; @@ -1200,9 +1199,9 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, ret = fixup_low_keys(root, &path, &disk_key, 1); BUG_ON(list_empty(&leaf_buf->dirty)); - if (btrfs_leaf_free_space(leaf) < 0) + if (btrfs_leaf_free_space(root, leaf) < 0) BUG(); - check_leaf(&path, 0); + check_leaf(root, &path, 0); out: btrfs_release_path(root, &path); return ret; @@ -1227,11 +1226,8 @@ static int del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, node = &parent->node; nritems = btrfs_header_nritems(&node->header); if (slot != nritems -1) { - memmove(node->keys + slot, node->keys + slot + 1, - sizeof(struct btrfs_disk_key) * (nritems - slot - 1)); - memmove(node->blockptrs + slot, - node->blockptrs + slot + 1, - sizeof(u64) * (nritems - slot - 1)); + memmove(node->ptrs + slot, node->ptrs + slot + 1, + sizeof(struct btrfs_key_ptr) * (nritems - slot - 1)); } nritems--; btrfs_set_header_nritems(&node->header, nritems); @@ -1240,7 +1236,8 @@ static int del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, /* just turn the root into a leaf and break */ btrfs_set_header_level(&root->node->node.header, 0); } else if (slot == 0) { - wret = fixup_low_keys(root, path, node->keys, level + 1); + wret = fixup_low_keys(root, path, &node->ptrs[0].key, + level + 1); if (wret) ret = wret; } @@ -1272,12 +1269,12 @@ int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path) if (slot != nritems - 1) { int i; - int data_end = leaf_data_end(leaf); - memmove(leaf->data + data_end + dsize, - leaf->data + data_end, + int data_end = leaf_data_end(root, leaf); + memmove(btrfs_leaf_data(leaf) + data_end + dsize, + btrfs_leaf_data(leaf) + data_end, doff - data_end); for (i = slot + 1; i < nritems; i++) { - u16 ioff = btrfs_item_offset(leaf->items + i); + u32 ioff = btrfs_item_offset(leaf->items + i); btrfs_set_item_offset(leaf->items + i, ioff + dsize); } memmove(leaf->items + slot, leaf->items + slot + 1, @@ -1311,7 +1308,7 @@ int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path) BUG_ON(list_empty(&leaf_buf->dirty)); /* delete the leaf if it is mostly empty */ - if (used < LEAF_DATA_SIZE / 3) { + if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { /* push_leaf_left fixes the path. * make sure the path still points to our leaf * for possible call to del_ptr below diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 05c7707263f5..c61ad0f69be9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -5,7 +5,6 @@ #include "kerncompat.h" #define BTRFS_MAGIC "_BtRfS_M" -#define BTRFS_BLOCKSIZE 1024 #define BTRFS_ROOT_TREE_OBJECTID 1 #define BTRFS_EXTENT_TREE_OBJECTID 2 @@ -52,8 +51,11 @@ struct btrfs_header { } __attribute__ ((__packed__)); #define BTRFS_MAX_LEVEL 8 -#define NODEPTRS_PER_BLOCK ((BTRFS_BLOCKSIZE - sizeof(struct btrfs_header)) / \ - (sizeof(struct btrfs_disk_key) + sizeof(u64))) +#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->blocksize - \ + sizeof(struct btrfs_header)) / \ + (sizeof(struct btrfs_disk_key) + sizeof(u64))) +#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) +#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize)) struct btrfs_buffer; @@ -86,6 +88,7 @@ struct btrfs_root { int ref_cows; struct btrfs_root_item root_item; struct btrfs_key root_key; + u32 blocksize; }; /* @@ -97,7 +100,7 @@ struct btrfs_super_block { __le64 blocknr; /* this block number */ __le32 csum; __le64 magic; - __le16 blocksize; + __le32 blocksize; __le64 generation; __le64 root; __le64 total_blocks; @@ -111,7 +114,7 @@ struct btrfs_super_block { */ struct btrfs_item { struct btrfs_disk_key key; - __le16 offset; + __le32 offset; __le16 size; } __attribute__ ((__packed__)); @@ -122,24 +125,23 @@ struct btrfs_item { * The data is separate from the items to get the keys closer together * during searches. */ -#define LEAF_DATA_SIZE (BTRFS_BLOCKSIZE - sizeof(struct btrfs_header)) struct btrfs_leaf { struct btrfs_header header; - union { - struct btrfs_item items[LEAF_DATA_SIZE/ - sizeof(struct btrfs_item)]; - u8 data[BTRFS_BLOCKSIZE - sizeof(struct btrfs_header)]; - }; + struct btrfs_item items[]; } __attribute__ ((__packed__)); /* * all non-leaf blocks are nodes, they hold only keys and pointers to * other blocks */ +struct btrfs_key_ptr { + struct btrfs_disk_key key; + __le64 blockptr; +} __attribute__ ((__packed__)); + struct btrfs_node { struct btrfs_header header; - struct btrfs_disk_key keys[NODEPTRS_PER_BLOCK]; - __le64 blockptrs[NODEPTRS_PER_BLOCK]; + struct btrfs_key_ptr ptrs[]; } __attribute__ ((__packed__)); /* @@ -186,28 +188,28 @@ static inline void btrfs_set_extent_refs(struct btrfs_extent_item *ei, u32 val) static inline u64 btrfs_node_blockptr(struct btrfs_node *n, int nr) { - return le64_to_cpu(n->blockptrs[nr]); + return le64_to_cpu(n->ptrs[nr].blockptr); } static inline void btrfs_set_node_blockptr(struct btrfs_node *n, int nr, u64 val) { - n->blockptrs[nr] = cpu_to_le64(val); + n->ptrs[nr].blockptr = cpu_to_le64(val); } -static inline u16 btrfs_item_offset(struct btrfs_item *item) +static inline u32 btrfs_item_offset(struct btrfs_item *item) { - return le16_to_cpu(item->offset); + return le32_to_cpu(item->offset); } -static inline void btrfs_set_item_offset(struct btrfs_item *item, u16 val) +static inline void btrfs_set_item_offset(struct btrfs_item *item, u32 val) { - item->offset = cpu_to_le16(val); + item->offset = cpu_to_le32(val); } -static inline u16 btrfs_item_end(struct btrfs_item *item) +static inline u32 btrfs_item_end(struct btrfs_item *item) { - return le16_to_cpu(item->offset) + le16_to_cpu(item->size); + return le32_to_cpu(item->offset) + le16_to_cpu(item->size); } static inline u16 btrfs_item_size(struct btrfs_item *item) @@ -390,20 +392,26 @@ static inline void btrfs_set_super_blocks_used(struct btrfs_super_block *s, s->blocks_used = cpu_to_le64(val); } -static inline u16 btrfs_super_blocksize(struct btrfs_super_block *s) +static inline u32 btrfs_super_blocksize(struct btrfs_super_block *s) { - return le16_to_cpu(s->blocksize); + return le32_to_cpu(s->blocksize); } static inline void btrfs_set_super_blocksize(struct btrfs_super_block *s, - u16 val) + u32 val) { - s->blocksize = cpu_to_le16(val); + s->blocksize = cpu_to_le32(val); +} + +static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) +{ + return (u8 *)l->items; } /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ - ((type *)((leaf)->data + btrfs_item_offset((leaf)->items + (slot)))) + ((type *)(btrfs_leaf_data(leaf) + \ + btrfs_item_offset((leaf)->items + (slot)))) struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root); int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf); @@ -416,7 +424,7 @@ int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path); int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *key, void *data, int data_size); int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); -int btrfs_leaf_free_space(struct btrfs_leaf *leaf); +int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap); int btrfs_finish_extent_commit(struct btrfs_root *root); int btrfs_del_root(struct btrfs_root *root, struct btrfs_key *key); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3d4bf6833f2a..8d9457b5aef5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -46,7 +46,8 @@ struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr) { struct btrfs_buffer *buf; int ret; - buf = malloc(sizeof(struct btrfs_buffer)); + + buf = malloc(sizeof(struct btrfs_buffer) + root->blocksize); if (!buf) return buf; allocated_blocks++; @@ -84,7 +85,7 @@ struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr) struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) { - loff_t offset = blocknr * BTRFS_BLOCKSIZE; + loff_t offset = blocknr * root->blocksize; struct btrfs_buffer *buf; int ret; @@ -95,8 +96,8 @@ struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) buf = alloc_tree_block(root, blocknr); if (!buf) return NULL; - ret = pread(root->fp, &buf->node, BTRFS_BLOCKSIZE, offset); - if (ret != BTRFS_BLOCKSIZE) { + ret = pread(root->fp, &buf->node, root->blocksize, offset); + if (ret != root->blocksize) { free(buf); return NULL; } @@ -127,13 +128,13 @@ int clean_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) int write_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) { u64 blocknr = buf->blocknr; - loff_t offset = blocknr * BTRFS_BLOCKSIZE; + loff_t offset = blocknr * root->blocksize; int ret; if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); - ret = pwrite(root->fp, &buf->node, BTRFS_BLOCKSIZE, offset); - if (ret != BTRFS_BLOCKSIZE) + ret = pwrite(root->fp, &buf->node, root->blocksize, offset); + if (ret != root->blocksize) return ret; return 0; } @@ -215,7 +216,8 @@ int btrfs_commit_transaction(struct btrfs_root *root, return ret; } -static int __setup_root(struct btrfs_root *root, u64 objectid, int fp) +static int __setup_root(struct btrfs_super_block *super, + struct btrfs_root *root, u64 objectid, int fp) { INIT_LIST_HEAD(&root->trans); INIT_LIST_HEAD(&root->cache); @@ -223,6 +225,8 @@ static int __setup_root(struct btrfs_root *root, u64 objectid, int fp) root->fp = fp; root->node = NULL; root->commit_root = NULL; + root->blocksize = btrfs_super_blocksize(super); + root->ref_cows = 0; memset(&root->current_insert, 0, sizeof(root->current_insert)); memset(&root->last_insert, 0, sizeof(root->last_insert)); memset(&root->root_key, 0, sizeof(root->root_key)); @@ -230,19 +234,19 @@ static int __setup_root(struct btrfs_root *root, u64 objectid, int fp) return 0; } -static int find_and_setup_root(struct btrfs_root *tree_root, u64 objectid, - struct btrfs_root *root, int fp) +static int find_and_setup_root(struct btrfs_super_block *super, + struct btrfs_root *tree_root, u64 objectid, + struct btrfs_root *root, int fp) { int ret; - __setup_root(root, objectid, fp); + __setup_root(super, root, objectid, fp); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); root->node = read_tree_block(root, btrfs_root_blocknr(&root->root_item)); - root->ref_cows = 0; BUG_ON(!root->node); return 0; } @@ -277,28 +281,28 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) INIT_RADIX_TREE(&tree_root->cache_radix, GFP_KERNEL); ret = pread(fp, super, sizeof(struct btrfs_super_block), - BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); + BTRFS_SUPER_INFO_OFFSET); if (ret == 0 || btrfs_super_root(super) == 0) { printf("making new FS!\n"); - ret = mkfs(fp, 0, BTRFS_BLOCKSIZE); + ret = mkfs(fp, 0, 1024); if (ret) return NULL; ret = pread(fp, super, sizeof(struct btrfs_super_block), - BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); + BTRFS_SUPER_INFO_OFFSET); if (ret != sizeof(struct btrfs_super_block)) return NULL; } BUG_ON(ret < 0); - __setup_root(tree_root, BTRFS_ROOT_TREE_OBJECTID, fp); + __setup_root(super, tree_root, BTRFS_ROOT_TREE_OBJECTID, fp); tree_root->node = read_tree_block(tree_root, btrfs_super_root(super)); BUG_ON(!tree_root->node); - ret = find_and_setup_root(tree_root, BTRFS_EXTENT_TREE_OBJECTID, + ret = find_and_setup_root(super, tree_root, BTRFS_EXTENT_TREE_OBJECTID, extent_root, fp); BUG_ON(ret); - ret = find_and_setup_root(tree_root, BTRFS_FS_TREE_OBJECTID, + ret = find_and_setup_root(super, tree_root, BTRFS_FS_TREE_OBJECTID, root, fp); BUG_ON(ret); @@ -313,7 +317,7 @@ int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s) int ret; btrfs_set_super_root(s, root->tree_root->node->blocknr); ret = pwrite(root->fp, s, sizeof(*s), - BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); + BTRFS_SUPER_INFO_OFFSET); if (ret != sizeof(*s)) { fprintf(stderr, "failed to write new super block err %d\n", ret); return ret; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c22a61f9233f..5771bb90acb2 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -5,12 +5,12 @@ struct btrfs_buffer { u64 blocknr; int count; + struct list_head dirty; + struct list_head cache; union { struct btrfs_node node; struct btrfs_leaf leaf; }; - struct list_head dirty; - struct list_head cache; }; struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); @@ -24,9 +24,8 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *s); int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s); void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf); int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s); -int mkfs(int fd, u64 num_blocks, u16 blocksize); +int mkfs(int fd, u64 num_blocks, u32 blocksize); - -#define BTRFS_SUPER_INFO_OFFSET(bs) (16 * (bs)) +#define BTRFS_SUPER_INFO_OFFSET (16 * 1024) #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3767744f659e..d4f1ec328399 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -143,7 +143,6 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) struct btrfs_key key; struct btrfs_root *extent_root = root->extent_root; int ret; - struct btrfs_item *item; struct btrfs_extent_item *ei; struct btrfs_key ins; u32 refs; @@ -161,9 +160,8 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) printf("failed to find %Lu\n", key.objectid); BUG(); } - item = path.nodes[0]->leaf.items + path.slots[0]; - ei = (struct btrfs_extent_item *)(path.nodes[0]->leaf.data + - btrfs_item_offset(item)); + ei = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + struct btrfs_extent_item); BUG_ON(ei->refs == 0); refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index dd14ed4fea6c..317d20ce759d 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -10,19 +10,20 @@ #include "ctree.h" #include "disk-io.h" -int mkfs(int fd, u64 num_blocks, u16 blocksize) +int mkfs(int fd, u64 num_blocks, u32 blocksize) { struct btrfs_super_block super; - struct btrfs_leaf empty_leaf; + struct btrfs_leaf *empty_leaf; struct btrfs_root_item root_item; struct btrfs_item item; struct btrfs_extent_item extent_item; char *block; int ret; - u16 itemoff; + u32 itemoff; + u32 start_block = BTRFS_SUPER_INFO_OFFSET / blocksize; - btrfs_set_super_blocknr(&super, 16); - btrfs_set_super_root(&super, 17); + btrfs_set_super_blocknr(&super, start_block); + btrfs_set_super_root(&super, start_block + 1); strcpy((char *)(&super.magic), BTRFS_MAGIC); btrfs_set_super_blocksize(&super, blocksize); btrfs_set_super_total_blocks(&super, num_blocks); @@ -32,168 +33,98 @@ int mkfs(int fd, u64 num_blocks, u16 blocksize) memset(block, 0, blocksize); BUG_ON(sizeof(super) > blocksize); memcpy(block, &super, sizeof(super)); - ret = pwrite(fd, block, blocksize, BTRFS_SUPER_INFO_OFFSET(blocksize)); + ret = pwrite(fd, block, blocksize, BTRFS_SUPER_INFO_OFFSET); BUG_ON(ret != blocksize); /* create the tree of root objects */ - memset(&empty_leaf, 0, sizeof(empty_leaf)); - btrfs_set_header_parentid(&empty_leaf.header, BTRFS_ROOT_TREE_OBJECTID); - btrfs_set_header_blocknr(&empty_leaf.header, 17); - btrfs_set_header_nritems(&empty_leaf.header, 2); + empty_leaf = malloc(blocksize); + memset(empty_leaf, 0, blocksize); + btrfs_set_header_parentid(&empty_leaf->header, + BTRFS_ROOT_TREE_OBJECTID); + btrfs_set_header_blocknr(&empty_leaf->header, start_block + 1); + btrfs_set_header_nritems(&empty_leaf->header, 2); /* create the items for the root tree */ - btrfs_set_root_blocknr(&root_item, 18); + btrfs_set_root_blocknr(&root_item, start_block + 2); btrfs_set_root_refs(&root_item, 1); - itemoff = LEAF_DATA_SIZE - sizeof(root_item); + itemoff = __BTRFS_LEAF_DATA_SIZE(blocksize) - sizeof(root_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_item_size(&item, sizeof(root_item)); btrfs_set_key_objectid(&item.key, BTRFS_EXTENT_TREE_OBJECTID); btrfs_set_key_offset(&item.key, 0); btrfs_set_key_flags(&item.key, 0); - memcpy(empty_leaf.items, &item, sizeof(item)); - memcpy(empty_leaf.data + itemoff, &root_item, sizeof(root_item)); + memcpy(empty_leaf->items, &item, sizeof(item)); + memcpy(btrfs_leaf_data(empty_leaf) + itemoff, + &root_item, sizeof(root_item)); - btrfs_set_root_blocknr(&root_item, 19); + btrfs_set_root_blocknr(&root_item, start_block + 3); itemoff = itemoff - sizeof(root_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_key_objectid(&item.key, BTRFS_FS_TREE_OBJECTID); - memcpy(empty_leaf.items + 1, &item, sizeof(item)); - memcpy(empty_leaf.data + itemoff, &root_item, sizeof(root_item)); - ret = pwrite(fd, &empty_leaf, blocksize, 17 * blocksize); + memcpy(empty_leaf->items + 1, &item, sizeof(item)); + memcpy(btrfs_leaf_data(empty_leaf) + itemoff, + &root_item, sizeof(root_item)); + ret = pwrite(fd, empty_leaf, blocksize, (start_block + 1) * blocksize); /* create the items for the extent tree */ - btrfs_set_header_parentid(&empty_leaf.header, + btrfs_set_header_parentid(&empty_leaf->header, BTRFS_EXTENT_TREE_OBJECTID); - btrfs_set_header_blocknr(&empty_leaf.header, 18); - btrfs_set_header_nritems(&empty_leaf.header, 4); + btrfs_set_header_blocknr(&empty_leaf->header, start_block + 2); + btrfs_set_header_nritems(&empty_leaf->header, 4); /* item1, reserve blocks 0-16 */ btrfs_set_key_objectid(&item.key, 0); - btrfs_set_key_offset(&item.key, 17); + btrfs_set_key_offset(&item.key, start_block + 1); btrfs_set_key_flags(&item.key, 0); - itemoff = LEAF_DATA_SIZE - sizeof(struct btrfs_extent_item); + itemoff = __BTRFS_LEAF_DATA_SIZE(blocksize) - + sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_item_size(&item, sizeof(struct btrfs_extent_item)); btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, 0); - memcpy(empty_leaf.items, &item, sizeof(item)); - memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, - btrfs_item_size(&item)); + memcpy(empty_leaf->items, &item, sizeof(item)); + memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), + &extent_item, btrfs_item_size(&item)); /* item2, give block 17 to the root */ - btrfs_set_key_objectid(&item.key, 17); + btrfs_set_key_objectid(&item.key, start_block + 1); btrfs_set_key_offset(&item.key, 1); itemoff = itemoff - sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_extent_owner(&extent_item, BTRFS_ROOT_TREE_OBJECTID); - memcpy(empty_leaf.items + 1, &item, sizeof(item)); - memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, - btrfs_item_size(&item)); + memcpy(empty_leaf->items + 1, &item, sizeof(item)); + memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), + &extent_item, btrfs_item_size(&item)); /* item3, give block 18 to the extent root */ - btrfs_set_key_objectid(&item.key, 18); + btrfs_set_key_objectid(&item.key, start_block + 2); btrfs_set_key_offset(&item.key, 1); itemoff = itemoff - sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_extent_owner(&extent_item, BTRFS_EXTENT_TREE_OBJECTID); - memcpy(empty_leaf.items + 2, &item, sizeof(item)); - memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, - btrfs_item_size(&item)); + memcpy(empty_leaf->items + 2, &item, sizeof(item)); + memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), + &extent_item, btrfs_item_size(&item)); /* item4, give block 19 to the FS root */ - btrfs_set_key_objectid(&item.key, 19); + btrfs_set_key_objectid(&item.key, start_block + 3); btrfs_set_key_offset(&item.key, 1); itemoff = itemoff - sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_extent_owner(&extent_item, BTRFS_FS_TREE_OBJECTID); - memcpy(empty_leaf.items + 3, &item, sizeof(item)); - memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, - btrfs_item_size(&item)); - ret = pwrite(fd, &empty_leaf, blocksize, 18 * blocksize); - if (ret != sizeof(empty_leaf)) + memcpy(empty_leaf->items + 3, &item, sizeof(item)); + memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), + &extent_item, btrfs_item_size(&item)); + ret = pwrite(fd, empty_leaf, blocksize, (start_block + 2) * blocksize); + if (ret != blocksize) return -1; /* finally create the FS root */ - btrfs_set_header_parentid(&empty_leaf.header, BTRFS_FS_TREE_OBJECTID); - btrfs_set_header_blocknr(&empty_leaf.header, 19); - btrfs_set_header_nritems(&empty_leaf.header, 0); - ret = pwrite(fd, &empty_leaf, blocksize, 19 * blocksize); - if (ret != sizeof(empty_leaf)) + btrfs_set_header_parentid(&empty_leaf->header, BTRFS_FS_TREE_OBJECTID); + btrfs_set_header_blocknr(&empty_leaf->header, start_block + 3); + btrfs_set_header_nritems(&empty_leaf->header, 0); + ret = pwrite(fd, empty_leaf, blocksize, (start_block + 3) * blocksize); + if (ret != blocksize) return -1; return 0; } - -#if 0 -int mkfs(int fd) -{ - struct btrfs_root_info info[2]; - struct btrfs_leaf empty_leaf; - struct btrfs_item item; - struct btrfs_extent_item extent_item; - int ret; - - /* setup the super block area */ - memset(info, 0, sizeof(info)); - btrfs_set_root_blocknr(info, 16); - btrfs_set_root_objectid(info, 1); - btrfs_set_root_tree_root(info, 17); - - btrfs_set_root_blocknr(info + 1, 16); - btrfs_set_root_objectid(info + 1, 2); - btrfs_set_root_tree_root(info + 1, 18); - - ret = pwrite(fd, info, sizeof(info), - BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); - if (ret != sizeof(info)) - return -1; - - /* create leaves for the tree root and extent root */ - memset(&empty_leaf, 0, sizeof(empty_leaf)); - btrfs_set_header_parentid(&empty_leaf.header, 1); - btrfs_set_header_blocknr(&empty_leaf.header, 17); - ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 17 * BTRFS_BLOCKSIZE); - if (ret != sizeof(empty_leaf)) - return -1; - - btrfs_set_header_parentid(&empty_leaf.header, 2); - btrfs_set_header_blocknr(&empty_leaf.header, 18); - btrfs_set_header_nritems(&empty_leaf.header, 3); - - /* item1, reserve blocks 0-16 */ - btrfs_set_key_objectid(&item.key, 0); - btrfs_set_key_offset(&item.key, 17); - btrfs_set_key_flags(&item.key, 0); - btrfs_set_item_offset(&item, LEAF_DATA_SIZE - - sizeof(struct btrfs_extent_item)); - btrfs_set_item_size(&item, sizeof(struct btrfs_extent_item)); - btrfs_set_extent_refs(&extent_item, 1); - btrfs_set_extent_owner(&extent_item, 0); - memcpy(empty_leaf.items, &item, sizeof(item)); - memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, - btrfs_item_size(&item)); - - /* item2, give block 17 to the root */ - btrfs_set_key_objectid(&item.key, 17); - btrfs_set_key_offset(&item.key, 1); - btrfs_set_item_offset(&item, LEAF_DATA_SIZE - - sizeof(struct btrfs_extent_item) * 2); - btrfs_set_extent_owner(&extent_item, 1); - memcpy(empty_leaf.items + 1, &item, sizeof(item)); - memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, - btrfs_item_size(&item)); - - /* item3, give block 18 for the extent root */ - btrfs_set_key_objectid(&item.key, 18); - btrfs_set_key_offset(&item.key, 1); - btrfs_set_item_offset(&item, LEAF_DATA_SIZE - - sizeof(struct btrfs_extent_item) * 3); - btrfs_set_extent_owner(&extent_item, 2); - memcpy(empty_leaf.items + 2, &item, sizeof(item)); - memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item, - btrfs_item_size(&item)); - ret = pwrite(fd, &empty_leaf, sizeof(empty_leaf), 18 * BTRFS_BLOCKSIZE); - if (ret != sizeof(empty_leaf)) - return -1; - return 0; -} -#endif diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index e769f36cf051..7a189eaa589f 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -5,7 +5,7 @@ #include "ctree.h" #include "disk-io.h" -void btrfs_print_leaf(struct btrfs_leaf *l) +void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) { int i; u32 nr = btrfs_header_nritems(&l->header); @@ -13,7 +13,8 @@ void btrfs_print_leaf(struct btrfs_leaf *l) struct btrfs_extent_item *ei; struct btrfs_root_item *ri; printf("leaf %Lu total ptrs %d free space %d\n", - btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(l)); + btrfs_header_blocknr(&l->header), nr, + btrfs_leaf_free_space(root, l)); fflush(stdout); for (i = 0 ; i < nr ; i++) { item = l->items + i; @@ -25,7 +26,7 @@ void btrfs_print_leaf(struct btrfs_leaf *l) btrfs_item_offset(item), btrfs_item_size(item)); printf("\t\titem data %.*s\n", btrfs_item_size(item), - l->data + btrfs_item_offset(item)); + btrfs_leaf_data(l) + btrfs_item_offset(item)); ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); printf("\t\textent data refs %u owner %Lu\n", btrfs_extent_refs(ei), btrfs_extent_owner(ei)); @@ -46,18 +47,18 @@ void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) c = &t->node; nr = btrfs_header_nritems(&c->header); if (btrfs_is_leaf(c)) { - btrfs_print_leaf((struct btrfs_leaf *)c); + btrfs_print_leaf(root, (struct btrfs_leaf *)c); return; } printf("node %Lu level %d total ptrs %d free spc %u\n", t->blocknr, btrfs_header_level(&c->header), nr, - (u32)NODEPTRS_PER_BLOCK - nr); + (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); fflush(stdout); for (i = 0; i < nr; i++) { printf("\tkey %d (%Lu %u %Lu) block %Lu\n", i, - c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset, - btrfs_node_blockptr(c, i)); + c->ptrs[i].key.objectid, c->ptrs[i].key.flags, + c->ptrs[i].key.offset, btrfs_node_blockptr(c, i)); fflush(stdout); } for (i = 0; i < nr; i++) { diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index e8d0b847c023..9236abda2a74 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -1,3 +1,3 @@ -void btrfs_print_leaf(struct btrfs_leaf *l); +void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t); diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index 66bdc57905d7..d9287cad35c9 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -73,7 +73,7 @@ int main(int ac, char **av) { printf("node %p level %d total ptrs %d free spc %lu\n", root->node, btrfs_header_level(&root->node->node.header), btrfs_header_nritems(&root->node->node.header), - NODEPTRS_PER_BLOCK - + BTRFS_NODEPTRS_PER_BLOCK(root) - btrfs_header_nritems(&root->node->node.header)); printf("all searches good, deleting some items\n"); i = 0; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 0ab90cfea98f..03aa7c2c6336 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -31,7 +31,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, ret = 1; goto out; } - memcpy(item, l->data + btrfs_item_offset(l->items + slot), + memcpy(item, btrfs_item_ptr(l, slot, struct btrfs_root_item), sizeof(*item)); btrfs_disk_key_to_cpu(key, &l->items[slot].key); btrfs_release_path(root, &path); @@ -55,7 +55,7 @@ int btrfs_update_root(struct btrfs_root *root, struct btrfs_key *key, BUG_ON(ret != 0); l = &path.nodes[0]->leaf; slot = path.slots[0]; - memcpy(l->data + btrfs_item_offset(l->items + slot), item, + memcpy(btrfs_item_ptr(l, slot, struct btrfs_root_item), item, sizeof(*item)); out: btrfs_release_path(root, &path); From a1516c8921ce2c148b69493e858d95bc79c92f01 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 14 Mar 2007 14:26:53 -0400 Subject: [PATCH 0046/2985] Btrfs: reorder key offset and flags Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 - fs/btrfs/ctree.h | 4 ++-- fs/btrfs/print-tree.c | 12 +++++++----- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index ff90e5eaf431..8142be88d71c 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -1,7 +1,6 @@ * cleanup, add more error checking, get rid of BUG_ONs * Make IO functions look more like the page cache * Fix ENOSPC handling -* make blocksize a mkfs parameter instead of #define * make a real mkfs and superblock * Do checksumming * Define FS objects in terms of different item types diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c61ad0f69be9..73ebc824924a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -26,14 +26,14 @@ */ struct btrfs_disk_key { __le64 objectid; - __le32 flags; __le64 offset; + __le32 flags; } __attribute__ ((__packed__)); struct btrfs_key { u64 objectid; - u32 flags; u64 offset; + u32 flags; } __attribute__ ((__packed__)); /* diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 7a189eaa589f..4c710190343d 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -18,11 +18,11 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) fflush(stdout); for (i = 0 ; i < nr ; i++) { item = l->items + i; - printf("\titem %d key (%Lu %u %Lu) itemoff %d itemsize %d\n", + printf("\titem %d key (%Lu %Lu %u) itemoff %d itemsize %d\n", i, btrfs_key_objectid(&item->key), - btrfs_key_flags(&item->key), btrfs_key_offset(&item->key), + btrfs_key_flags(&item->key), btrfs_item_offset(item), btrfs_item_size(item)); printf("\t\titem data %.*s\n", btrfs_item_size(item), @@ -55,10 +55,12 @@ void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); fflush(stdout); for (i = 0; i < nr; i++) { - printf("\tkey %d (%Lu %u %Lu) block %Lu\n", + printf("\tkey %d (%Lu %Lu %u) block %Lu\n", i, - c->ptrs[i].key.objectid, c->ptrs[i].key.flags, - c->ptrs[i].key.offset, btrfs_node_blockptr(c, i)); + c->ptrs[i].key.objectid, + c->ptrs[i].key.offset, + c->ptrs[i].key.flags, + btrfs_node_blockptr(c, i)); fflush(stdout); } for (i = 0; i < nr; i++) { From 62e2749e03a855d98855f9ce032dbe72d5fad148 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 15 Mar 2007 12:56:47 -0400 Subject: [PATCH 0047/2985] Btrfs: Use a chunk of the key flags to record the item type. Add (untested and simple) directory item code Fix comp_keys to use the new key ordering Add btrfs_insert_empty_item Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/TODO | 1 + fs/btrfs/ctree.c | 51 ++++++++---- fs/btrfs/ctree.h | 182 ++++++++++++++++++++++++++++------------- fs/btrfs/dir-item.c | 102 +++++++++++++++++++++++ fs/btrfs/extent-tree.c | 17 ++-- fs/btrfs/hash.c | 86 +++++++++++++++++++ fs/btrfs/hasher.c | 23 ++++++ fs/btrfs/kerncompat.h | 1 + fs/btrfs/mkfs.c | 28 ++++--- fs/btrfs/print-tree.c | 36 +++++--- fs/btrfs/quick-test.c | 3 +- fs/btrfs/random-test.c | 10 ++- fs/btrfs/root-tree.c | 2 +- 14 files changed, 435 insertions(+), 109 deletions(-) create mode 100644 fs/btrfs/dir-item.c create mode 100644 fs/btrfs/hash.c create mode 100644 fs/btrfs/hasher.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 13ed6313ac85..ee78cdd08c04 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -3,7 +3,7 @@ CC=gcc CFLAGS = -g -Wall headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o \ - root-tree.o + root-tree.o dir-item.o hash.o # if you don't have sparse installed, use ls instead CHECKFLAGS=-D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \ diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 8142be88d71c..249e2e90bea8 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -8,6 +8,7 @@ * Add block mapping tree (simple dm layer) * Add simple tree locking (semaphore per tree) * Make allocator smarter +* Do actual block accounting * Port into the kernel * Add virtual filesystems, mountable snapshots * Get rid of struct ctree_path, limiting tree levels held at one time diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ece8de7f38ef..7edfbd468042 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -107,14 +107,14 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 1; if (k1.objectid < k2->objectid) return -1; - if (k1.flags > k2->flags) - return 1; - if (k1.flags < k2->flags) - return -1; if (k1.offset > k2->offset) return 1; if (k1.offset < k2->offset) return -1; + if (k1.flags > k2->flags) + return 1; + if (k1.flags < k2->flags) + return -1; return 0; } @@ -1122,8 +1122,8 @@ static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ -int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, - void *data, int data_size) +int btrfs_insert_empty_item(struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 data_size) { int ret = 0; int slot; @@ -1132,7 +1132,6 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, struct btrfs_buffer *leaf_buf; u32 nritems; unsigned int data_end; - struct btrfs_path path; struct btrfs_disk_key disk_key; btrfs_cpu_key_to_disk(&disk_key, cpu_key); @@ -1140,17 +1139,16 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, /* create a root if there isn't one */ if (!root->node) BUG(); - btrfs_init_path(&path); - ret = btrfs_search_slot(root, cpu_key, &path, data_size, 1); + ret = btrfs_search_slot(root, cpu_key, path, data_size, 1); if (ret == 0) { - btrfs_release_path(root, &path); + btrfs_release_path(root, path); return -EEXIST; } if (ret < 0) goto out; - slot_orig = path.slots[0]; - leaf_buf = path.nodes[0]; + slot_orig = path->slots[0]; + leaf_buf = path->nodes[0]; leaf = &leaf_buf->leaf; nritems = btrfs_header_nritems(&leaf->header); @@ -1160,7 +1158,7 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, sizeof(struct btrfs_item) + data_size) BUG(); - slot = path.slots[0]; + slot = path->slots[0]; BUG_ON(slot < 0); if (slot != nritems) { int i; @@ -1186,23 +1184,42 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, data_end, old_data - data_end); data_end = old_data; } - /* copy the new data in */ + /* setup the item for the new data */ memcpy(&leaf->items[slot].key, &disk_key, sizeof(struct btrfs_disk_key)); btrfs_set_item_offset(leaf->items + slot, data_end - data_size); btrfs_set_item_size(leaf->items + slot, data_size); - memcpy(btrfs_leaf_data(leaf) + data_end - data_size, data, data_size); btrfs_set_header_nritems(&leaf->header, nritems + 1); ret = 0; if (slot == 0) - ret = fixup_low_keys(root, &path, &disk_key, 1); + ret = fixup_low_keys(root, path, &disk_key, 1); BUG_ON(list_empty(&leaf_buf->dirty)); if (btrfs_leaf_free_space(root, leaf) < 0) BUG(); - check_leaf(root, &path, 0); + check_leaf(root, path, 0); out: + return ret; +} + +/* + * Given a key and some data, insert an item into the tree. + * This does all the path init required, making room in the tree if needed. + */ +int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, + void *data, u32 data_size) +{ + int ret = 0; + struct btrfs_path path; + u8 *ptr; + + btrfs_init_path(&path); + ret = btrfs_insert_empty_item(root, &path, cpu_key, data_size); + if (!ret) { + ptr = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], u8); + memcpy(ptr, data, data_size); + } btrfs_release_path(root, &path); return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 73ebc824924a..e8a26fd8ea9f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -58,39 +58,6 @@ struct btrfs_header { #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize)) struct btrfs_buffer; - -struct btrfs_root_item { - __le64 blocknr; - __le32 flags; - __le64 block_limit; - __le64 blocks_used; - __le32 refs; -}; - -/* - * in ram representation of the tree. extent_root is used for all allocations - * and for the extent tree extent_root root. current_insert is used - * only for the extent tree. - */ -struct btrfs_root { - struct btrfs_buffer *node; - struct btrfs_buffer *commit_root; - struct btrfs_root *extent_root; - struct btrfs_root *tree_root; - struct btrfs_key current_insert; - struct btrfs_key last_insert; - int fp; - struct radix_tree_root cache_radix; - struct radix_tree_root pinned_radix; - struct list_head trans; - struct list_head cache; - int cache_size; - int ref_cows; - struct btrfs_root_item root_item; - struct btrfs_key root_key; - u32 blocksize; -}; - /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc @@ -108,8 +75,7 @@ struct btrfs_super_block { } __attribute__ ((__packed__)); /* - * A leaf is full of items. The exact type of item is defined by - * the key flags parameter. offset and size tell us where to find + * A leaf is full of items. offset and size tell us where to find * the item in the leaf (relative to the start of the data area) */ struct btrfs_item { @@ -144,15 +110,6 @@ struct btrfs_node { struct btrfs_key_ptr ptrs[]; } __attribute__ ((__packed__)); -/* - * items in the extent btree are used to record the objectid of the - * owner of the block and the number of references - */ -struct btrfs_extent_item { - __le32 refs; - __le64 owner; -} __attribute__ ((__packed__)); - /* * btrfs_paths remember the path taken from the root down to the leaf. * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point @@ -166,6 +123,94 @@ struct btrfs_path { int slots[BTRFS_MAX_LEVEL]; }; +/* + * items in the extent btree are used to record the objectid of the + * owner of the block and the number of references + */ +struct btrfs_extent_item { + __le32 refs; + __le64 owner; +} __attribute__ ((__packed__)); + +struct btrfs_dir_item { + __le64 objectid; + __le16 flags; + u8 type; +} __attribute__ ((__packed__)); + +struct btrfs_root_item { + __le64 blocknr; + __le32 flags; + __le64 block_limit; + __le64 blocks_used; + __le32 refs; +}; + +/* + * in ram representation of the tree. extent_root is used for all allocations + * and for the extent tree extent_root root. current_insert is used + * only for the extent tree. + */ +struct btrfs_root { + struct btrfs_buffer *node; + struct btrfs_buffer *commit_root; + struct btrfs_root *extent_root; + struct btrfs_root *tree_root; + struct btrfs_key current_insert; + struct btrfs_key last_insert; + int fp; + struct radix_tree_root cache_radix; + struct radix_tree_root pinned_radix; + struct list_head trans; + struct list_head cache; + int cache_size; + int ref_cows; + struct btrfs_root_item root_item; + struct btrfs_key root_key; + u32 blocksize; +}; + + +/* the lower bits in the key flags defines the item type */ +#define BTRFS_KEY_TYPE_MAX 256 +#define BTRFS_KEY_TYPE_MASK (BTRFS_KEY_TYPE_MAX - 1) +#define BTRFS_INODE_ITEM_KEY 1 +#define BTRFS_DIR_ITEM_KEY 2 +#define BTRFS_ROOT_ITEM_KEY 3 +#define BTRFS_EXTENT_ITEM_KEY 4 +#define BTRFS_STRING_ITEM_KEY 5 + +static inline u64 btrfs_dir_objectid(struct btrfs_dir_item *d) +{ + return le64_to_cpu(d->objectid); +} + +static inline void btrfs_set_dir_objectid(struct btrfs_dir_item *d, u64 val) +{ + d->objectid = cpu_to_le64(val); +} + +static inline u16 btrfs_dir_flags(struct btrfs_dir_item *d) +{ + return le16_to_cpu(d->flags); +} + +static inline void btrfs_set_dir_flags(struct btrfs_dir_item *d, u16 val) +{ + d->flags = cpu_to_le16(val); +} + +static inline u8 btrfs_dir_type(struct btrfs_dir_item *d) +{ + return d->type; +} + +static inline void btrfs_set_dir_type(struct btrfs_dir_item *d, u8 val) +{ + d->type = val; +} + + static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) { return le64_to_cpu(ei->owner); @@ -238,39 +283,65 @@ static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, disk->objectid = cpu_to_le64(cpu->objectid); } -static inline u64 btrfs_key_objectid(struct btrfs_disk_key *disk) +static inline u64 btrfs_disk_key_objectid(struct btrfs_disk_key *disk) { return le64_to_cpu(disk->objectid); } -static inline void btrfs_set_key_objectid(struct btrfs_disk_key *disk, - u64 val) +static inline void btrfs_set_disk_key_objectid(struct btrfs_disk_key *disk, + u64 val) { disk->objectid = cpu_to_le64(val); } -static inline u64 btrfs_key_offset(struct btrfs_disk_key *disk) +static inline u64 btrfs_disk_key_offset(struct btrfs_disk_key *disk) { return le64_to_cpu(disk->offset); } -static inline void btrfs_set_key_offset(struct btrfs_disk_key *disk, - u64 val) +static inline void btrfs_set_disk_key_offset(struct btrfs_disk_key *disk, + u64 val) { disk->offset = cpu_to_le64(val); } -static inline u32 btrfs_key_flags(struct btrfs_disk_key *disk) +static inline u32 btrfs_disk_key_flags(struct btrfs_disk_key *disk) { return le32_to_cpu(disk->flags); } -static inline void btrfs_set_key_flags(struct btrfs_disk_key *disk, - u32 val) +static inline void btrfs_set_disk_key_flags(struct btrfs_disk_key *disk, + u32 val) { disk->flags = cpu_to_le32(val); } +static inline u32 btrfs_key_type(struct btrfs_key *key) +{ + return key->flags & BTRFS_KEY_TYPE_MASK; +} + +static inline u32 btrfs_disk_key_type(struct btrfs_disk_key *key) +{ + return le32_to_cpu(key->flags) & BTRFS_KEY_TYPE_MASK; +} + +static inline void btrfs_set_key_type(struct btrfs_key *key, u32 type) +{ + BUG_ON(type >= BTRFS_KEY_TYPE_MAX); + key->flags = (key->flags & ~((u64)BTRFS_KEY_TYPE_MASK)) | type; +} + +static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key, u32 type) +{ + u32 flags = btrfs_disk_key_flags(key); + BUG_ON(type >= BTRFS_KEY_TYPE_MAX); + flags = (flags & ~((u64)BTRFS_KEY_TYPE_MASK)) | type; + btrfs_set_disk_key_flags(key, flags); +} + + + static inline u64 btrfs_header_blocknr(struct btrfs_header *h) { return le64_to_cpu(h->blocknr); @@ -407,7 +478,6 @@ static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) { return (u8 *)l->items; } - /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ @@ -422,7 +492,9 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); void btrfs_init_path(struct btrfs_path *p); int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path); int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *key, - void *data, int data_size); + void *data, u32 data_size); +int btrfs_insert_empty_item(struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 data_size); int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c new file mode 100644 index 000000000000..2a888e97e1a2 --- /dev/null +++ b/fs/btrfs/dir-item.c @@ -0,0 +1,102 @@ +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" +#include "hash.h" + +int btrfs_insert_dir_item(struct btrfs_root *root, char *name, int name_len, + u64 dir, u64 objectid, u8 type) +{ + int ret = 0; + struct btrfs_path path; + struct btrfs_dir_item *dir_item; + char *name_ptr; + struct btrfs_key key; + u32 data_size; + + key.objectid = dir; + key.flags = 0; + ret = btrfs_name_hash(name, name_len, &key.offset); + BUG_ON(ret); + btrfs_init_path(&path); + data_size = sizeof(*dir_item) + name_len; + ret = btrfs_insert_empty_item(root, &path, &key, data_size); + if (ret) + goto out; + + dir_item = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + struct btrfs_dir_item); + btrfs_set_dir_objectid(dir_item, objectid); + btrfs_set_dir_type(dir_item, type); + btrfs_set_dir_flags(dir_item, 0); + name_ptr = (char *)(dir_item + 1); + memcpy(name_ptr, name, name_len); +out: + btrfs_release_path(root, &path); + return ret; +} + +int btrfs_del_dir_item(struct btrfs_root *root, u64 dir, char *name, + int name_len) +{ + int ret = 0; + struct btrfs_path path; + struct btrfs_key key; + + key.objectid = dir; + key.flags = 0; + ret = btrfs_name_hash(name, name_len, &key.offset); + BUG_ON(ret); + btrfs_init_path(&path); + ret = btrfs_search_slot(root, &key, &path, 0, 1); + if (ret) + goto out; + ret = btrfs_del_item(root, &path); +out: + btrfs_release_path(root, &path); + return ret; +} + +int btrfs_lookup_dir_item(struct btrfs_root *root, u64 dir, char *name, + int name_len, u64 *objectid) +{ + int ret = 0; + struct btrfs_path path; + struct btrfs_dir_item *dir_item; + char *name_ptr; + struct btrfs_key key; + u32 item_len; + struct btrfs_item *item; + + key.objectid = dir; + key.flags = 0; + ret = btrfs_name_hash(name, name_len, &key.offset); + BUG_ON(ret); + btrfs_init_path(&path); + ret = btrfs_search_slot(root, &key, &path, 0, 0); + if (ret) + goto out; + + dir_item = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + struct btrfs_dir_item); + + item = path.nodes[0]->leaf.items + path.slots[0]; + item_len = btrfs_item_size(item); + if (item_len != name_len + sizeof(struct btrfs_dir_item)) { + BUG(); + ret = 1; + goto out; + } + name_ptr = (char *)(dir_item + 1); + if (memcmp(name_ptr, name, name_len)) { + BUG(); + ret = 1; + goto out; + } + *objectid = btrfs_dir_objectid(dir_item); +out: + btrfs_release_path(root, &path); + return ret; +} diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d4f1ec328399..c81e14162ef1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -35,6 +35,7 @@ static int inc_block_ref(struct btrfs_root *root, u64 blocknr) btrfs_init_path(&path); key.objectid = blocknr; key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = 1; ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 1); if (ret != 0) @@ -61,8 +62,9 @@ static int lookup_block_ref(struct btrfs_root *root, u64 blocknr, u32 *refs) struct btrfs_extent_item *item; btrfs_init_path(&path); key.objectid = blocknr; - key.flags = 0; key.offset = 1; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 0); if (ret != 0) BUG(); @@ -123,6 +125,7 @@ static int finish_current_insert(struct btrfs_root *extent_root) btrfs_header_parentid(&extent_root->node->node.header)); ins.offset = 1; ins.flags = 0; + btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); for (i = 0; i < extent_root->current_insert.flags; i++) { ins.objectid = extent_root->current_insert.objectid + i; @@ -149,6 +152,7 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) key.objectid = blocknr; key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; find_free_extent(root, 0, 0, (u64)-1, &ins); @@ -228,7 +232,6 @@ static int run_pending(struct btrfs_root *extent_root) */ int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) { - struct btrfs_key key; struct btrfs_root *extent_root = root->extent_root; struct btrfs_buffer *t; int pending_ret; @@ -240,9 +243,6 @@ int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) CTREE_EXTENT_PENDING_DEL); return 0; } - key.objectid = blocknr; - key.flags = 0; - key.offset = num_blocks; ret = __free_extent(root, blocknr, num_blocks); pending_ret = run_pending(root->extent_root); return ret ? ret : pending_ret; @@ -252,7 +252,7 @@ int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) * walks the btree of allocated extents and find a hole of a given size. * The key ins is changed to record the hole: * ins->objectid == block start - * ins->flags = 0 + * ins->flags = BTRFS_EXTENT_ITEM_KEY * ins->offset == number of blocks * Any available blocks before search_start are skipped. */ @@ -275,11 +275,14 @@ static int find_free_extent(struct btrfs_root *orig_root, u64 num_blocks, total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3; if (root->last_insert.objectid > search_start) search_start = root->last_insert.objectid; + + ins->flags = 0; + btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + check_failed: btrfs_init_path(&path); ins->objectid = search_start; ins->offset = 0; - ins->flags = 0; start_found = 0; ret = btrfs_search_slot(root, ins, &path, 0, 0); if (ret < 0) diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c new file mode 100644 index 000000000000..92c37eece1f2 --- /dev/null +++ b/fs/btrfs/hash.c @@ -0,0 +1,86 @@ +/* + * Original copy from: + * linux/fs/ext3/hash.c + * + * Copyright (C) 2002 by Theodore Ts'o + * + * This file is released under the GPL v2. + * + * This file may be redistributed under the terms of the GNU Public + * License. + */ + +#include "kerncompat.h" +#define DELTA 0x9E3779B9 + +static void TEA_transform(__u32 buf[2], __u32 const in[]) +{ + __u32 sum = 0; + __u32 b0 = buf[0], b1 = buf[1]; + __u32 a = in[0], b = in[1], c = in[2], d = in[3]; + int n = 16; + + do { + sum += DELTA; + b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); + b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); + } while(--n); + + buf[0] += b0; + buf[1] += b1; +} + +static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) +{ + __u32 pad, val; + int i; + + pad = (__u32)len | ((__u32)len << 8); + pad |= pad << 16; + + val = pad; + if (len > num*4) + len = num * 4; + for (i=0; i < len; i++) { + if ((i % 4) == 0) + val = pad; + val = msg[i] + (val << 8); + if ((i % 4) == 3) { + *buf++ = val; + val = pad; + num--; + } + } + if (--num >= 0) + *buf++ = val; + while (--num >= 0) + *buf++ = pad; +} + +int btrfs_name_hash(const char *name, int len, u64 *hash_result) +{ + __u32 hash; + __u32 minor_hash = 0; + const char *p; + __u32 in[8], buf[2]; + + /* Initialize the default seed for the hash checksum functions */ + buf[0] = 0x67452301; + buf[1] = 0xefcdab89; + buf[2] = 0x98badcfe; + buf[3] = 0x10325476; + + p = name; + while (len > 0) { + str2hashbuf(p, len, in, 4); + TEA_transform(buf, in); + len -= 16; + p += 16; + } + hash = buf[0]; + minor_hash = buf[1]; + *hash_result = buf[0]; + *hash_result <<= 32; + *hash_result |= buf[1]; + return 0; +} diff --git a/fs/btrfs/hasher.c b/fs/btrfs/hasher.c new file mode 100644 index 000000000000..96702da4329c --- /dev/null +++ b/fs/btrfs/hasher.c @@ -0,0 +1,23 @@ +#include +#include +#include +#include "kerncompat.h" +#include "hash.h" + +int main() { + u64 result; + int ret; + char line[255]; + char *p; + while(1) { + p = fgets(line, 255, stdin); + if (!p) + break; + if (strlen(line) == 0) + continue; + ret = btrfs_name_hash(line, strlen(line), &result); + BUG_ON(ret); + printf("hash returns %Lu\n", result); + } + return 0; +} diff --git a/fs/btrfs/kerncompat.h b/fs/btrfs/kerncompat.h index 56239b922ba6..105d3f584089 100644 --- a/fs/btrfs/kerncompat.h +++ b/fs/btrfs/kerncompat.h @@ -21,6 +21,7 @@ #endif typedef unsigned int u32; +typedef u32 __u32; typedef unsigned long long u64; typedef unsigned char u8; typedef unsigned short u16; diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index 317d20ce759d..9aa900811c33 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -50,9 +50,10 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) itemoff = __BTRFS_LEAF_DATA_SIZE(blocksize) - sizeof(root_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_item_size(&item, sizeof(root_item)); - btrfs_set_key_objectid(&item.key, BTRFS_EXTENT_TREE_OBJECTID); - btrfs_set_key_offset(&item.key, 0); - btrfs_set_key_flags(&item.key, 0); + btrfs_set_disk_key_objectid(&item.key, BTRFS_EXTENT_TREE_OBJECTID); + btrfs_set_disk_key_offset(&item.key, 0); + btrfs_set_disk_key_flags(&item.key, 0); + btrfs_set_disk_key_type(&item.key, BTRFS_ROOT_ITEM_KEY); memcpy(empty_leaf->items, &item, sizeof(item)); memcpy(btrfs_leaf_data(empty_leaf) + itemoff, &root_item, sizeof(root_item)); @@ -60,7 +61,7 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) btrfs_set_root_blocknr(&root_item, start_block + 3); itemoff = itemoff - sizeof(root_item); btrfs_set_item_offset(&item, itemoff); - btrfs_set_key_objectid(&item.key, BTRFS_FS_TREE_OBJECTID); + btrfs_set_disk_key_objectid(&item.key, BTRFS_FS_TREE_OBJECTID); memcpy(empty_leaf->items + 1, &item, sizeof(item)); memcpy(btrfs_leaf_data(empty_leaf) + itemoff, &root_item, sizeof(root_item)); @@ -73,9 +74,10 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) btrfs_set_header_nritems(&empty_leaf->header, 4); /* item1, reserve blocks 0-16 */ - btrfs_set_key_objectid(&item.key, 0); - btrfs_set_key_offset(&item.key, start_block + 1); - btrfs_set_key_flags(&item.key, 0); + btrfs_set_disk_key_objectid(&item.key, 0); + btrfs_set_disk_key_offset(&item.key, start_block + 1); + btrfs_set_disk_key_flags(&item.key, 0); + btrfs_set_disk_key_type(&item.key, BTRFS_EXTENT_ITEM_KEY); itemoff = __BTRFS_LEAF_DATA_SIZE(blocksize) - sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); @@ -87,8 +89,8 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) &extent_item, btrfs_item_size(&item)); /* item2, give block 17 to the root */ - btrfs_set_key_objectid(&item.key, start_block + 1); - btrfs_set_key_offset(&item.key, 1); + btrfs_set_disk_key_objectid(&item.key, start_block + 1); + btrfs_set_disk_key_offset(&item.key, 1); itemoff = itemoff - sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_extent_owner(&extent_item, BTRFS_ROOT_TREE_OBJECTID); @@ -97,8 +99,8 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) &extent_item, btrfs_item_size(&item)); /* item3, give block 18 to the extent root */ - btrfs_set_key_objectid(&item.key, start_block + 2); - btrfs_set_key_offset(&item.key, 1); + btrfs_set_disk_key_objectid(&item.key, start_block + 2); + btrfs_set_disk_key_offset(&item.key, 1); itemoff = itemoff - sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_extent_owner(&extent_item, BTRFS_EXTENT_TREE_OBJECTID); @@ -107,8 +109,8 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) &extent_item, btrfs_item_size(&item)); /* item4, give block 19 to the FS root */ - btrfs_set_key_objectid(&item.key, start_block + 3); - btrfs_set_key_offset(&item.key, 1); + btrfs_set_disk_key_objectid(&item.key, start_block + 3); + btrfs_set_disk_key_offset(&item.key, 1); itemoff = itemoff - sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); btrfs_set_extent_owner(&extent_item, BTRFS_FS_TREE_OBJECTID); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 4c710190343d..f2745b247473 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -12,27 +12,41 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_item *item; struct btrfs_extent_item *ei; struct btrfs_root_item *ri; + u32 type; printf("leaf %Lu total ptrs %d free space %d\n", btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(root, l)); fflush(stdout); for (i = 0 ; i < nr ; i++) { item = l->items + i; + type = btrfs_disk_key_type(&item->key); printf("\titem %d key (%Lu %Lu %u) itemoff %d itemsize %d\n", i, - btrfs_key_objectid(&item->key), - btrfs_key_offset(&item->key), - btrfs_key_flags(&item->key), + btrfs_disk_key_objectid(&item->key), + btrfs_disk_key_offset(&item->key), + btrfs_disk_key_flags(&item->key), btrfs_item_offset(item), btrfs_item_size(item)); - printf("\t\titem data %.*s\n", btrfs_item_size(item), - btrfs_leaf_data(l) + btrfs_item_offset(item)); - ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); - printf("\t\textent data refs %u owner %Lu\n", - btrfs_extent_refs(ei), btrfs_extent_owner(ei)); - ri = btrfs_item_ptr(l, i, struct btrfs_root_item); - printf("\t\troot data blocknr %Lu refs %u\n", - btrfs_root_blocknr(ri), btrfs_root_refs(ri)); + switch (type) { + case BTRFS_INODE_ITEM_KEY: + break; + case BTRFS_DIR_ITEM_KEY: + break; + case BTRFS_ROOT_ITEM_KEY: + ri = btrfs_item_ptr(l, i, struct btrfs_root_item); + printf("\t\troot data blocknr %Lu refs %u\n", + btrfs_root_blocknr(ri), btrfs_root_refs(ri)); + break; + case BTRFS_EXTENT_ITEM_KEY: + ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); + printf("\t\textent data refs %u owner %Lu\n", + btrfs_extent_refs(ei), btrfs_extent_owner(ei)); + break; + case BTRFS_STRING_ITEM_KEY: + printf("\t\titem data %.*s\n", btrfs_item_size(item), + btrfs_leaf_data(l) + btrfs_item_offset(item)); + break; + }; fflush(stdout); } } diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index d9287cad35c9..3a2571617121 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -30,6 +30,8 @@ int main(int ac, char **av) { root = open_ctree("dbfile", &super); srand(55); + ins.flags = 0; + btrfs_set_key_type(&ins, BTRFS_STRING_ITEM_KEY); for (i = 0; i < run_size; i++) { buf = malloc(64); num = next_key(i, max_key); @@ -39,7 +41,6 @@ int main(int ac, char **av) { fprintf(stderr, "insert %d:%d\n", num, i); ins.objectid = num; ins.offset = 0; - ins.flags = 0; ret = btrfs_insert_item(root, &ins, buf, strlen(buf)); if (!ret) tree_size++; diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index a8ef0478991b..f05135f13f9a 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -18,6 +18,7 @@ static int setup_key(struct radix_tree_root *root, struct btrfs_key *key, int ret; key->flags = 0; + btrfs_set_key_type(key, BTRFS_STRING_ITEM_KEY); key->offset = 0; again: ret = radix_tree_gang_lookup(root, (void **)res, num, 2); @@ -157,6 +158,7 @@ static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, key.offset = 0; key.flags = 0; + btrfs_set_key_type(&key, BTRFS_STRING_ITEM_KEY); key.objectid = (unsigned long)-1; while(nr-- >= 0) { btrfs_init_path(&path); @@ -173,7 +175,8 @@ static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, path.slots[0] -= 1; } slot = path.slots[0]; - found=btrfs_key_objectid(&path.nodes[0]->leaf.items[slot].key); + found = btrfs_disk_key_objectid( + &path.nodes[0]->leaf.items[slot].key); ret = btrfs_del_item(root, &path); count++; if (ret) { @@ -257,6 +260,7 @@ static int fill_radix(struct btrfs_root *root, struct radix_tree_root *radix) key.offset = 0; key.flags = 0; + btrfs_set_key_type(&key, BTRFS_STRING_ITEM_KEY); key.objectid = (unsigned long)-1; while(1) { btrfs_init_path(&path); @@ -274,8 +278,8 @@ static int fill_radix(struct btrfs_root *root, struct radix_tree_root *radix) slot -= 1; } for (i = slot; i >= 0; i--) { - found = btrfs_key_objectid(&path.nodes[0]-> - leaf.items[i].key); + found = btrfs_disk_key_objectid(&path.nodes[0]-> + leaf.items[i].key); radix_tree_preload(GFP_KERNEL); ret = radix_tree_insert(radix, found, (void *)found); if (ret) { diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 03aa7c2c6336..a24b47274037 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -27,7 +27,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, l = &path.nodes[0]->leaf; BUG_ON(path.slots[0] == 0); slot = path.slots[0] - 1; - if (btrfs_key_objectid(&l->items[slot].key) != objectid) { + if (btrfs_disk_key_objectid(&l->items[slot].key) != objectid) { ret = 1; goto out; } From 24af03ea5e765d57860c975efcdcd41ec7f63fd9 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 15 Mar 2007 12:57:43 -0400 Subject: [PATCH 0048/2985] Btrfs: add hash.h Signed-off-by: Chris Mason --- fs/btrfs/hash.h | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 fs/btrfs/hash.h diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h new file mode 100644 index 000000000000..e56c4670093d --- /dev/null +++ b/fs/btrfs/hash.h @@ -0,0 +1,4 @@ +#ifndef __HASH__ +#define __HASH__ +int btrfs_name_hash(const char *name, int len, u64 *hash_result); +#endif From 1d4f6404de26df49eb8452f8fdf7672b59f407fc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 15 Mar 2007 15:18:43 -0400 Subject: [PATCH 0049/2985] Btrfs: directory testing code and dir item fixes Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 5 +- fs/btrfs/ctree.h | 72 ++++---- fs/btrfs/dir-item.c | 58 ++---- fs/btrfs/dir-test.c | 404 ++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/print-tree.c | 12 ++ 5 files changed, 477 insertions(+), 74 deletions(-) create mode 100644 fs/btrfs/dir-test.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index ee78cdd08c04..ea3e1493baa9 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,4 +1,3 @@ - CC=gcc CFLAGS = -g -Wall headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h @@ -15,7 +14,7 @@ check=sparse $(CHECKFLAGS) $(check) $< $(CC) $(CFLAGS) -c $< -all: tester debug-tree quick-test +all: tester debug-tree quick-test dir-test debug-tree: $(objects) debug-tree.o gcc $(CFLAGS) -o debug-tree $(objects) debug-tree.o @@ -23,6 +22,8 @@ debug-tree: $(objects) debug-tree.o tester: $(objects) random-test.o gcc $(CFLAGS) -o tester $(objects) random-test.o +dir-test: $(objects) dir-test.o + gcc $(CFLAGS) -o dir-test $(objects) dir-test.o quick-test: $(objects) quick-test.o gcc $(CFLAGS) -o quick-test $(objects) quick-test.o diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e8a26fd8ea9f..7a3492d5888e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -180,37 +180,6 @@ struct btrfs_root { #define BTRFS_EXTENT_ITEM_KEY 4 #define BTRFS_STRING_ITEM_KEY 5 -static inline u64 btrfs_dir_objectid(struct btrfs_dir_item *d) -{ - return le64_to_cpu(d->objectid); -} - -static inline void btrfs_set_dir_objectid(struct btrfs_dir_item *d, u64 val) -{ - d->objectid = cpu_to_le64(val); -} - -static inline u16 btrfs_dir_flags(struct btrfs_dir_item *d) -{ - return le16_to_cpu(d->flags); -} - -static inline void btrfs_set_dir_flags(struct btrfs_dir_item *d, u16 val) -{ - d->flags = cpu_to_le16(val); -} - -static inline u8 btrfs_dir_type(struct btrfs_dir_item *d) -{ - return d->type; -} - -static inline void btrfs_set_dir_type(struct btrfs_dir_item *d, u8 val) -{ - d->type = val; -} - - static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) { return le64_to_cpu(ei->owner); @@ -267,6 +236,41 @@ static inline void btrfs_set_item_size(struct btrfs_item *item, u16 val) item->size = cpu_to_le16(val); } +static inline u64 btrfs_dir_objectid(struct btrfs_dir_item *d) +{ + return le64_to_cpu(d->objectid); +} + +static inline void btrfs_set_dir_objectid(struct btrfs_dir_item *d, u64 val) +{ + d->objectid = cpu_to_le64(val); +} + +static inline u16 btrfs_dir_flags(struct btrfs_dir_item *d) +{ + return le16_to_cpu(d->flags); +} + +static inline void btrfs_set_dir_flags(struct btrfs_dir_item *d, u16 val) +{ + d->flags = cpu_to_le16(val); +} + +static inline u8 btrfs_dir_type(struct btrfs_dir_item *d) +{ + return d->type; +} + +static inline void btrfs_set_dir_type(struct btrfs_dir_item *d, u8 val) +{ + d->type = val; +} + +static inline u32 btrfs_dir_name_len(struct btrfs_item *i) +{ + return btrfs_item_size(i) - sizeof(struct btrfs_dir_item); +} + static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, struct btrfs_disk_key *disk) { @@ -506,4 +510,10 @@ int btrfs_update_root(struct btrfs_root *root, struct btrfs_key *key, struct btrfs_root_item *item); int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); +int btrfs_insert_dir_item(struct btrfs_root *root, char *name, int name_len, + u64 dir, u64 objectid, u8 type); +int btrfs_lookup_dir_item(struct btrfs_root *root, struct btrfs_path *path, + u64 dir, char *name, int name_len, int mod); +int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, + char *name, int name_len); #endif diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 2a888e97e1a2..8043b2ef10d1 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -18,6 +18,7 @@ int btrfs_insert_dir_item(struct btrfs_root *root, char *name, int name_len, key.objectid = dir; key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); btrfs_init_path(&path); @@ -38,65 +39,40 @@ out: return ret; } -int btrfs_del_dir_item(struct btrfs_root *root, u64 dir, char *name, - int name_len) +int btrfs_lookup_dir_item(struct btrfs_root *root, struct btrfs_path *path, + u64 dir, char *name, int name_len, int mod) { - int ret = 0; - struct btrfs_path path; + int ret; struct btrfs_key key; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; key.objectid = dir; key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); - btrfs_init_path(&path); - ret = btrfs_search_slot(root, &key, &path, 0, 1); - if (ret) - goto out; - ret = btrfs_del_item(root, &path); -out: - btrfs_release_path(root, &path); + ret = btrfs_search_slot(root, &key, path, ins_len, cow); return ret; } -int btrfs_lookup_dir_item(struct btrfs_root *root, u64 dir, char *name, - int name_len, u64 *objectid) +int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, + char *name, int name_len) { - int ret = 0; - struct btrfs_path path; + struct btrfs_item *item; struct btrfs_dir_item *dir_item; char *name_ptr; - struct btrfs_key key; u32 item_len; - struct btrfs_item *item; - - key.objectid = dir; - key.flags = 0; - ret = btrfs_name_hash(name, name_len, &key.offset); - BUG_ON(ret); - btrfs_init_path(&path); - ret = btrfs_search_slot(root, &key, &path, 0, 0); - if (ret) - goto out; - - dir_item = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], - struct btrfs_dir_item); - - item = path.nodes[0]->leaf.items + path.slots[0]; + item = path->nodes[0]->leaf.items + path->slots[0]; item_len = btrfs_item_size(item); if (item_len != name_len + sizeof(struct btrfs_dir_item)) { - BUG(); - ret = 1; - goto out; + return 0; } + dir_item = btrfs_item_ptr(&path->nodes[0]->leaf, path->slots[0], + struct btrfs_dir_item); name_ptr = (char *)(dir_item + 1); if (memcmp(name_ptr, name, name_len)) { - BUG(); - ret = 1; - goto out; + return 0; } - *objectid = btrfs_dir_objectid(dir_item); -out: - btrfs_release_path(root, &path); - return ret; + return 1; } diff --git a/fs/btrfs/dir-test.c b/fs/btrfs/dir-test.c new file mode 100644 index 000000000000..b482b8f49f8a --- /dev/null +++ b/fs/btrfs/dir-test.c @@ -0,0 +1,404 @@ +#include +#include +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" +#include "hash.h" + +int keep_running = 1; +struct btrfs_super_block super; +static u64 dir_oid = 44556; +static u64 file_oid = 33778; + +static int find_num(struct radix_tree_root *root, unsigned long *num_ret, + int exists) +{ + unsigned long num = rand(); + unsigned long res[2]; + int ret; + +again: + ret = radix_tree_gang_lookup(root, (void **)res, num, 2); + if (exists) { + if (ret == 0) + return -1; + num = res[0]; + } else if (ret != 0 && num == res[0]) { + num++; + if (ret > 1 && num == res[1]) { + num++; + goto again; + } + } + *num_ret = num; + return 0; +} + +static int ins_one(struct btrfs_root *root, struct radix_tree_root *radix) +{ + int ret; + char buf[128]; + unsigned long oid; + struct btrfs_path path; + + find_num(radix, &oid, 0); + sprintf(buf, "str-%lu", oid); + + ret = btrfs_insert_dir_item(root, buf, strlen(buf), dir_oid, file_oid, + 1); + if (ret) + goto error; + + radix_tree_preload(GFP_KERNEL); + ret = radix_tree_insert(radix, oid, (void *)oid); + radix_tree_preload_end(); + if (ret) + goto error; + return ret; +error: + if (ret != -EEXIST) + goto fatal; + + /* + * if we got an EEXIST, it may be due to hash collision, double + * check + */ + btrfs_init_path(&path); + ret = btrfs_lookup_dir_item(root, &path, dir_oid, buf, strlen(buf), 0); + if (ret) + goto fatal_release; + if (!btrfs_match_dir_item_name(root, &path, buf, strlen(buf))) { + struct btrfs_dir_item *di; + char *found; + u32 found_len; + u64 myhash; + u64 foundhash; + + di = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + struct btrfs_dir_item); + found = (char *)(di + 1); + found_len = btrfs_dir_name_len(path.nodes[0]->leaf.items + + path.slots[0]); + btrfs_name_hash(buf, strlen(buf), &myhash); + btrfs_name_hash(found, found_len, &foundhash); + if (myhash != foundhash) + goto fatal_release; + btrfs_release_path(root, &path); + return 0; + } +fatal_release: + btrfs_release_path(root, &path); +fatal: + printf("failed to insert %lu ret %d\n", oid, ret); + return -1; +} + +static int insert_dup(struct btrfs_root *root, struct radix_tree_root *radix) +{ + int ret; + char buf[128]; + unsigned long oid; + + ret = find_num(radix, &oid, 1); + if (ret < 0) + return 0; + sprintf(buf, "str-%lu", oid); + + ret = btrfs_insert_dir_item(root, buf, strlen(buf), dir_oid, file_oid, + 1); + if (ret != -EEXIST) { + printf("insert on %s gave us %d\n", buf, ret); + return 1; + } + return 0; +} + +static int del_one(struct btrfs_root *root, struct radix_tree_root *radix) +{ + int ret; + char buf[128]; + unsigned long oid; + struct btrfs_path path; + unsigned long *ptr; + + ret = find_num(radix, &oid, 1); + if (ret < 0) + return 0; + sprintf(buf, "str-%lu", oid); + btrfs_init_path(&path); + ret = btrfs_lookup_dir_item(root, &path, dir_oid, buf, strlen(buf), -1); + if (ret) + goto out_release; + ret = btrfs_del_item(root, &path); + if (ret) + goto out_release; + btrfs_release_path(root, &path); + ptr = radix_tree_delete(radix, oid); + if (!ptr) { + ret = -5555; + goto out; + } + return 0; +out_release: + btrfs_release_path(root, &path); +out: + printf("failed to delete %lu %d\n", oid, ret); + return -1; +} + +static int lookup_item(struct btrfs_root *root, struct radix_tree_root *radix) +{ + struct btrfs_path path; + char buf[128]; + int ret; + unsigned long oid; + + ret = find_num(radix, &oid, 1); + if (ret < 0) + return 0; + sprintf(buf, "str-%lu", oid); + btrfs_init_path(&path); + ret = btrfs_lookup_dir_item(root, &path, dir_oid, buf, strlen(buf), 0); + btrfs_release_path(root, &path); + if (ret) { + printf("unable to find key %lu\n", oid); + return -1; + } + return 0; +} + +static int lookup_enoent(struct btrfs_root *root, struct radix_tree_root *radix) +{ + struct btrfs_path path; + char buf[128]; + int ret; + unsigned long oid; + + ret = find_num(radix, &oid, 0); + if (ret < 0) + return 0; + sprintf(buf, "str-%lu", oid); + btrfs_init_path(&path); + ret = btrfs_lookup_dir_item(root, &path, dir_oid, buf, strlen(buf), 0); + btrfs_release_path(root, &path); + if (!ret) { + printf("able to find key that should not exist %lu\n", oid); + return -1; + } + return 0; +} + +static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, + int nr) +{ + struct btrfs_path path; + struct btrfs_key key; + unsigned long found = 0; + u32 found_len; + int ret; + int slot; + int *ptr; + int count = 0; + char buf[128]; + struct btrfs_dir_item *di; + + key.offset = (u64)-1; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + key.objectid = dir_oid; + while(nr-- >= 0) { + btrfs_init_path(&path); + ret = btrfs_search_slot(root, &key, &path, -1, 1); + if (ret < 0) { + btrfs_release_path(root, &path); + return ret; + } + if (ret != 0) { + if (path.slots[0] == 0) { + btrfs_release_path(root, &path); + break; + } + path.slots[0] -= 1; + } + slot = path.slots[0]; + di = btrfs_item_ptr(&path.nodes[0]->leaf, slot, + struct btrfs_dir_item); + found_len = btrfs_dir_name_len(path.nodes[0]->leaf.items + + slot); + memcpy(buf, (char *)(di + 1), found_len); + BUG_ON(found_len > 128); + buf[found_len] = '\0'; + found = atoi(buf + 4); + ret = btrfs_del_item(root, &path); + count++; + if (ret) { + fprintf(stderr, + "failed to remove %lu from tree\n", + found); + return -1; + } + btrfs_release_path(root, &path); + ptr = radix_tree_delete(radix, found); + if (!ptr) + goto error; + if (!keep_running) + break; + } + return 0; +error: + fprintf(stderr, "failed to delete from the radix %lu\n", found); + return -1; +} + +static int fill_tree(struct btrfs_root *root, struct radix_tree_root *radix, + int count) +{ + int i; + int ret = 0; + for (i = 0; i < count; i++) { + ret = ins_one(root, radix); + if (ret) { + fprintf(stderr, "fill failed\n"); + goto out; + } + if (i % 1000 == 0) { + ret = btrfs_commit_transaction(root, &super); + if (ret) { + fprintf(stderr, "fill commit failed\n"); + return ret; + } + } + if (i && i % 10000 == 0) { + printf("bigfill %d\n", i); + } + if (!keep_running) + break; + } +out: + return ret; +} + +static int bulk_op(struct btrfs_root *root, struct radix_tree_root *radix) +{ + int ret; + int nr = rand() % 5000; + static int run_nr = 0; + + /* do the bulk op much less frequently */ + if (run_nr++ % 100) + return 0; + ret = empty_tree(root, radix, nr); + if (ret) + return ret; + ret = fill_tree(root, radix, nr); + if (ret) + return ret; + return 0; +} + + +int (*ops[])(struct btrfs_root *root, struct radix_tree_root *radix) = + { ins_one, insert_dup, del_one, lookup_item, + lookup_enoent, bulk_op }; + +void sigstopper(int ignored) +{ + keep_running = 0; + fprintf(stderr, "caught exit signal, stopping\n"); +} + +int print_usage(void) +{ + printf("usage: tester [-ih] [-c count] [-f count]\n"); + printf("\t -c count -- iteration count after filling\n"); + printf("\t -f count -- run this many random inserts before starting\n"); + printf("\t -i -- only do initial fill\n"); + printf("\t -h -- this help text\n"); + exit(1); +} +int main(int ac, char **av) +{ + RADIX_TREE(radix, GFP_KERNEL); + struct btrfs_root *root; + int i; + int ret; + int count; + int op; + int iterations = 20000; + int init_fill_count = 800000; + int err = 0; + int initial_only = 0; + radix_tree_init(); + + printf("removing old tree\n"); + unlink("dbfile"); + root = open_ctree("dbfile", &super); + + signal(SIGTERM, sigstopper); + signal(SIGINT, sigstopper); + + for (i = 1 ; i < ac ; i++) { + if (strcmp(av[i], "-i") == 0) { + initial_only = 1; + } else if (strcmp(av[i], "-c") == 0) { + iterations = atoi(av[i+1]); + i++; + } else if (strcmp(av[i], "-f") == 0) { + init_fill_count = atoi(av[i+1]); + i++; + } else { + print_usage(); + } + } + printf("initial fill\n"); + ret = fill_tree(root, &radix, init_fill_count); + printf("starting run\n"); + if (ret) { + err = ret; + goto out; + } + if (initial_only == 1) { + goto out; + } + for (i = 0; i < iterations; i++) { + op = rand() % ARRAY_SIZE(ops); + count = rand() % 128; + if (i % 2000 == 0) { + printf("%d\n", i); + fflush(stdout); + } + if (i && i % 5000 == 0) { + printf("open & close, root level %d nritems %d\n", + btrfs_header_level(&root->node->node.header), + btrfs_header_nritems(&root->node->node.header)); + close_ctree(root, &super); + root = open_ctree("dbfile", &super); + } + while(count--) { + ret = ops[op](root, &radix); + if (ret) { + fprintf(stderr, "op %d failed %d:%d\n", + op, i, iterations); + btrfs_print_tree(root, root->node); + fprintf(stderr, "op %d failed %d:%d\n", + op, i, iterations); + err = ret; + goto out; + } + if (ops[op] == bulk_op) + break; + if (keep_running == 0) { + err = 0; + goto out; + } + } + } +out: + close_ctree(root, &super); + return err; +} + diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index f2745b247473..ad244d658117 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -12,7 +12,10 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_item *item; struct btrfs_extent_item *ei; struct btrfs_root_item *ri; + struct btrfs_dir_item *di; u32 type; + u32 namelen; + printf("leaf %Lu total ptrs %d free space %d\n", btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(root, l)); @@ -31,6 +34,15 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) case BTRFS_INODE_ITEM_KEY: break; case BTRFS_DIR_ITEM_KEY: + namelen = btrfs_item_size(l->items + i) - sizeof(*di); + di = btrfs_item_ptr(l, i, struct btrfs_dir_item); + printf("\t\tdir oid %Lu flags %u type %u\n", + btrfs_dir_objectid(di), + btrfs_dir_flags(di), + btrfs_dir_type(di)); + printf("\t\tname %.*s\n", + namelen, (char *)(di + 1)); + break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); From 1e1d27017c5986c1ea81181506042cf9cba3f6ea Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 15 Mar 2007 19:03:33 -0400 Subject: [PATCH 0050/2985] Btrfs: add inode item Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/TODO | 1 + fs/btrfs/ctree.h | 175 ++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/file-item.c | 7 ++ fs/btrfs/inode-item.c | 37 +++++++++ 5 files changed, 215 insertions(+), 7 deletions(-) create mode 100644 fs/btrfs/file-item.c create mode 100644 fs/btrfs/inode-item.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index ea3e1493baa9..36964f54bb47 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -2,7 +2,7 @@ CC=gcc CFLAGS = -g -Wall headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o \ - root-tree.o dir-item.o hash.o + root-tree.o dir-item.o hash.o file-item.o inode-item.o # if you don't have sparse installed, use ls instead CHECKFLAGS=-D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \ diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 249e2e90bea8..2ae4b3aae1e5 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -9,6 +9,7 @@ * Add simple tree locking (semaphore per tree) * Make allocator smarter * Do actual block accounting +* Check compat and incompat flags on the inode * Port into the kernel * Add virtual filesystems, mountable snapshots * Get rid of struct ctree_path, limiting tree levels held at one time diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7a3492d5888e..dbf3917833fe 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -132,6 +132,37 @@ struct btrfs_extent_item { __le64 owner; } __attribute__ ((__packed__)); +struct btrfs_inode_timespec { + __le32 sec; + __le32 nsec; +} __attribute__ ((__packed__)); + +/* + * there is no padding here on purpose. If you want to extent the inode, + * make a new item type + */ +struct btrfs_inode_item { + __le64 generation; + __le64 size; + __le64 nblocks; + __le32 nlink; + __le32 uid; + __le32 gid; + __le32 mode; + __le32 rdev; + __le16 flags; + __le16 compat_flags; + struct btrfs_inode_timespec atime; + struct btrfs_inode_timespec ctime; + struct btrfs_inode_timespec mtime; + struct btrfs_inode_timespec otime; +} __attribute__ ((__packed__)); + +/* inline data is just a blob of bytes */ +struct btrfs_inline_data_item { + u8 data; +} __attribute__ ((__packed__)); + struct btrfs_dir_item { __le64 objectid; __le16 flags; @@ -170,15 +201,149 @@ struct btrfs_root { u32 blocksize; }; - /* the lower bits in the key flags defines the item type */ #define BTRFS_KEY_TYPE_MAX 256 #define BTRFS_KEY_TYPE_MASK (BTRFS_KEY_TYPE_MAX - 1) + +/* + * inode items have the data typically returned from stat and store other + * info about object characteristics. There is one for every file and dir in + * the FS + */ #define BTRFS_INODE_ITEM_KEY 1 + +/* + * dir items are the name -> inode pointers in a directory. There is one + * for every name in a directory. + */ #define BTRFS_DIR_ITEM_KEY 2 -#define BTRFS_ROOT_ITEM_KEY 3 -#define BTRFS_EXTENT_ITEM_KEY 4 -#define BTRFS_STRING_ITEM_KEY 5 +/* + * inline data is file data that fits in the btree. + */ +#define BTRFS_INLINE_DATA_KEY 3 +/* + * extent data is for data that can't fit in the btree. It points to + * a (hopefully) huge chunk of disk + */ +#define BTRFS_EXTENT_DATA_KEY 4 +/* + * root items point to tree roots. There are typically in the root + * tree used by the super block to find all the other trees + */ +#define BTRFS_ROOT_ITEM_KEY 5 +/* + * extent items are in the extent map tree. These record which blocks + * are used, and how many references there are to each block + */ +#define BTRFS_EXTENT_ITEM_KEY 6 +/* + * string items are for debugging. They just store a short string of + * data in the FS + */ +#define BTRFS_STRING_ITEM_KEY 7 + +static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) +{ + return le64_to_cpu(i->generation); +} + +static inline void btrfs_set_inode_generation(struct btrfs_inode_item *i, + u64 val) +{ + i->generation = cpu_to_le64(val); +} + +static inline u64 btrfs_inode_size(struct btrfs_inode_item *i) +{ + return le64_to_cpu(i->size); +} + +static inline void btrfs_set_inode_size(struct btrfs_inode_item *i, u64 val) +{ + i->size = cpu_to_le64(val); +} + +static inline u64 btrfs_inode_nblocks(struct btrfs_inode_item *i) +{ + return le64_to_cpu(i->nblocks); +} + +static inline void btrfs_set_inode_nblocks(struct btrfs_inode_item *i, u64 val) +{ + i->nblocks = cpu_to_le64(val); +} + +static inline u32 btrfs_inode_nlink(struct btrfs_inode_item *i) +{ + return le32_to_cpu(i->nlink); +} + +static inline void btrfs_set_inode_nlink(struct btrfs_inode_item *i, u32 val) +{ + i->nlink = cpu_to_le32(val); +} + +static inline u32 btrfs_inode_uid(struct btrfs_inode_item *i) +{ + return le32_to_cpu(i->uid); +} + +static inline void btrfs_set_inode_uid(struct btrfs_inode_item *i, u32 val) +{ + i->uid = cpu_to_le32(val); +} + +static inline u32 btrfs_inode_gid(struct btrfs_inode_item *i) +{ + return le32_to_cpu(i->gid); +} + +static inline void btrfs_set_inode_gid(struct btrfs_inode_item *i, u32 val) +{ + i->gid = cpu_to_le32(val); +} + +static inline u32 btrfs_inode_mode(struct btrfs_inode_item *i) +{ + return le32_to_cpu(i->mode); +} + +static inline void btrfs_set_inode_mode(struct btrfs_inode_item *i, u32 val) +{ + i->mode = cpu_to_le32(val); +} + +static inline u32 btrfs_inode_rdev(struct btrfs_inode_item *i) +{ + return le32_to_cpu(i->rdev); +} + +static inline void btrfs_set_inode_rdev(struct btrfs_inode_item *i, u32 val) +{ + i->rdev = cpu_to_le32(val); +} + +static inline u16 btrfs_inode_flags(struct btrfs_inode_item *i) +{ + return le16_to_cpu(i->flags); +} + +static inline void btrfs_set_inode_flags(struct btrfs_inode_item *i, u16 val) +{ + i->flags = cpu_to_le16(val); +} + +static inline u16 btrfs_inode_compat_flags(struct btrfs_inode_item *i) +{ + return le16_to_cpu(i->compat_flags); +} + +static inline void btrfs_set_inode_compat_flags(struct btrfs_inode_item *i, + u16 val) +{ + i->compat_flags = cpu_to_le16(val); +} + static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) { @@ -344,8 +509,6 @@ static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key, u32 type) btrfs_set_disk_key_flags(key, flags); } - - static inline u64 btrfs_header_blocknr(struct btrfs_header *h) { return le64_to_cpu(h->blocknr); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c new file mode 100644 index 000000000000..f5b36c732c5e --- /dev/null +++ b/fs/btrfs/file-item.c @@ -0,0 +1,7 @@ +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" + diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c new file mode 100644 index 000000000000..4908f9afdf86 --- /dev/null +++ b/fs/btrfs/inode-item.c @@ -0,0 +1,37 @@ +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" + +int btrfs_insert_inode(struct btrfs_root *root, u64 objectid, + struct btrfs_inode_item *inode_item) +{ + struct btrfs_path path; + struct btrfs_key key; + int ret; + key.objectid = objectid; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.offset = 0; + + btrfs_init_path(&path); + ret = btrfs_insert_item(root, &key, inode_item, sizeof(*inode_item)); + btrfs_release_path(root, &path); + return ret; +} + +int btrfs_lookup_inode(struct btrfs_root *root, struct btrfs_path *path, + u64 objectid, int mod) +{ + struct btrfs_key key; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + + key.objectid = objectid; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.offset = 0; + return btrfs_search_slot(root, &key, path, ins_len, cow); +} From a8a2ee0c600a213d13170c2f4d7bd0b304bbec19 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 16 Mar 2007 08:46:49 -0400 Subject: [PATCH 0051/2985] Btrfs: add a name_len to dir items, reorder key Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 8 ++++---- fs/btrfs/ctree.h | 14 ++++++++++---- fs/btrfs/dir-item.c | 16 ++++++---------- fs/btrfs/dir-test.c | 6 ++---- fs/btrfs/print-tree.c | 8 ++++---- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7edfbd468042..68f66b320a88 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -107,14 +107,14 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 1; if (k1.objectid < k2->objectid) return -1; - if (k1.offset > k2->offset) - return 1; - if (k1.offset < k2->offset) - return -1; if (k1.flags > k2->flags) return 1; if (k1.flags < k2->flags) return -1; + if (k1.offset > k2->offset) + return 1; + if (k1.offset < k2->offset) + return -1; return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index dbf3917833fe..7c66b647ea24 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -26,14 +26,14 @@ */ struct btrfs_disk_key { __le64 objectid; - __le64 offset; __le32 flags; + __le64 offset; } __attribute__ ((__packed__)); struct btrfs_key { u64 objectid; - u64 offset; u32 flags; + u64 offset; } __attribute__ ((__packed__)); /* @@ -166,6 +166,7 @@ struct btrfs_inline_data_item { struct btrfs_dir_item { __le64 objectid; __le16 flags; + __le16 name_len; u8 type; } __attribute__ ((__packed__)); @@ -431,9 +432,14 @@ static inline void btrfs_set_dir_type(struct btrfs_dir_item *d, u8 val) d->type = val; } -static inline u32 btrfs_dir_name_len(struct btrfs_item *i) +static inline u16 btrfs_dir_name_len(struct btrfs_dir_item *d) { - return btrfs_item_size(i) - sizeof(struct btrfs_dir_item); + return le16_to_cpu(d->name_len); +} + +static inline void btrfs_set_dir_name_len(struct btrfs_dir_item *d, u16 val) +{ + d->name_len = cpu_to_le16(val); } static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 8043b2ef10d1..a42a67b99753 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -32,6 +32,7 @@ int btrfs_insert_dir_item(struct btrfs_root *root, char *name, int name_len, btrfs_set_dir_objectid(dir_item, objectid); btrfs_set_dir_type(dir_item, type); btrfs_set_dir_flags(dir_item, 0); + btrfs_set_dir_name_len(dir_item, name_len); name_ptr = (char *)(dir_item + 1); memcpy(name_ptr, name, name_len); out: @@ -59,20 +60,15 @@ int btrfs_lookup_dir_item(struct btrfs_root *root, struct btrfs_path *path, int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, char *name, int name_len) { - struct btrfs_item *item; struct btrfs_dir_item *dir_item; char *name_ptr; - u32 item_len; - item = path->nodes[0]->leaf.items + path->slots[0]; - item_len = btrfs_item_size(item); - if (item_len != name_len + sizeof(struct btrfs_dir_item)) { - return 0; - } + dir_item = btrfs_item_ptr(&path->nodes[0]->leaf, path->slots[0], struct btrfs_dir_item); - name_ptr = (char *)(dir_item + 1); - if (memcmp(name_ptr, name, name_len)) { + if (btrfs_dir_name_len(dir_item) != name_len) + return 0; + name_ptr = (char *)(dir_item + 1); + if (memcmp(name_ptr, name, name_len)) return 0; - } return 1; } diff --git a/fs/btrfs/dir-test.c b/fs/btrfs/dir-test.c index b482b8f49f8a..f73aa7623398 100644 --- a/fs/btrfs/dir-test.c +++ b/fs/btrfs/dir-test.c @@ -81,8 +81,7 @@ error: di = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], struct btrfs_dir_item); found = (char *)(di + 1); - found_len = btrfs_dir_name_len(path.nodes[0]->leaf.items + - path.slots[0]); + found_len = btrfs_dir_name_len(di); btrfs_name_hash(buf, strlen(buf), &myhash); btrfs_name_hash(found, found_len, &foundhash); if (myhash != foundhash) @@ -227,8 +226,7 @@ static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, slot = path.slots[0]; di = btrfs_item_ptr(&path.nodes[0]->leaf, slot, struct btrfs_dir_item); - found_len = btrfs_dir_name_len(path.nodes[0]->leaf.items + - slot); + found_len = btrfs_dir_name_len(di); memcpy(buf, (char *)(di + 1), found_len); BUG_ON(found_len > 128); buf[found_len] = '\0'; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index ad244d658117..0bb5c38427c9 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -23,11 +23,11 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) for (i = 0 ; i < nr ; i++) { item = l->items + i; type = btrfs_disk_key_type(&item->key); - printf("\titem %d key (%Lu %Lu %u) itemoff %d itemsize %d\n", + printf("\titem %d key (%Lu %u %Lu) itemoff %d itemsize %d\n", i, btrfs_disk_key_objectid(&item->key), - btrfs_disk_key_offset(&item->key), btrfs_disk_key_flags(&item->key), + btrfs_disk_key_offset(&item->key), btrfs_item_offset(item), btrfs_item_size(item)); switch (type) { @@ -81,11 +81,11 @@ void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); fflush(stdout); for (i = 0; i < nr; i++) { - printf("\tkey %d (%Lu %Lu %u) block %Lu\n", + printf("\tkey %d (%Lu %u %Lu) block %Lu\n", i, c->ptrs[i].key.objectid, - c->ptrs[i].key.offset, c->ptrs[i].key.flags, + c->ptrs[i].key.offset, btrfs_node_blockptr(c, i)); fflush(stdout); } From 88fd146c27da0f34c512f47e2b3776a0762ecd81 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 16 Mar 2007 08:56:18 -0400 Subject: [PATCH 0052/2985] Btrfs: pin freed blocks from the FS tree too Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 14 +++++++------- fs/btrfs/ctree.h | 3 ++- fs/btrfs/extent-tree.c | 28 +++++++++++++++++----------- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 68f66b320a88..13128b5ed65c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -55,13 +55,13 @@ static int btrfs_cow_block(struct btrfs_root *root, root->node = cow; cow->count++; if (buf != root->commit_root) - btrfs_free_extent(root, buf->blocknr, 1); + btrfs_free_extent(root, buf->blocknr, 1, 1); btrfs_block_release(root, buf); } else { btrfs_set_node_blockptr(&parent->node, parent_slot, cow->blocknr); BUG_ON(list_empty(&parent->dirty)); - btrfs_free_extent(root, buf->blocknr, 1); + btrfs_free_extent(root, buf->blocknr, 1, 1); } btrfs_block_release(root, buf); return 0; @@ -311,7 +311,7 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, /* once for the root ptr */ btrfs_block_release(root, mid_buf); clean_tree_block(root, mid_buf); - return btrfs_free_extent(root, blocknr, 1); + return btrfs_free_extent(root, blocknr, 1, 1); } parent = &parent_buf->node; @@ -352,7 +352,7 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, wret = del_ptr(root, path, level + 1, pslot + 1); if (wret) ret = wret; - wret = btrfs_free_extent(root, blocknr, 1); + wret = btrfs_free_extent(root, blocknr, 1, 1); if (wret) ret = wret; } else { @@ -388,7 +388,7 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, wret = del_ptr(root, path, level + 1, pslot); if (wret) ret = wret; - wret = btrfs_free_extent(root, blocknr, 1); + wret = btrfs_free_extent(root, blocknr, 1, 1); if (wret) ret = wret; } else { @@ -1310,7 +1310,7 @@ int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path) wret = del_ptr(root, path, 1, path->slots[1]); if (wret) ret = wret; - wret = btrfs_free_extent(root, leaf_buf->blocknr, 1); + wret = btrfs_free_extent(root, leaf_buf->blocknr, 1, 1); if (wret) ret = wret; } @@ -1348,7 +1348,7 @@ int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path) if (wret) ret = wret; btrfs_block_release(root, leaf_buf); - wret = btrfs_free_extent(root, blocknr, 1); + wret = btrfs_free_extent(root, blocknr, 1, 1); if (wret) ret = wret; } else { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7c66b647ea24..d15a2ed95076 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -658,7 +658,8 @@ static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root); int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf); -int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks); +int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, + int pin); int btrfs_search_slot(struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c81e14162ef1..4a40282b45f7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -95,6 +95,7 @@ int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf) int btrfs_finish_extent_commit(struct btrfs_root *root) { unsigned long gang[8]; + u64 first = 0; int ret; int i; @@ -104,11 +105,13 @@ int btrfs_finish_extent_commit(struct btrfs_root *root) ARRAY_SIZE(gang)); if (!ret) break; + if (!first) + first = gang[0]; for (i = 0; i < ret; i++) { radix_tree_delete(&root->pinned_radix, gang[i]); } } - root->last_insert.objectid = 0; + root->last_insert.objectid = first; root->last_insert.offset = 0; return 0; } @@ -140,7 +143,8 @@ static int finish_current_insert(struct btrfs_root *extent_root) /* * remove an extent from the root, returns 0 on success */ -static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) +static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, + int pin) { struct btrfs_path path; struct btrfs_key key; @@ -150,6 +154,7 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) struct btrfs_key ins; u32 refs; + BUG_ON(pin && num_blocks != 1); key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -170,7 +175,7 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); if (refs == 0) { - if (!root->ref_cows) { + if (pin) { int err; radix_tree_preload(GFP_KERNEL); err = radix_tree_insert(&extent_root->pinned_radix, @@ -179,8 +184,7 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) radix_tree_preload_end(); } ret = btrfs_del_item(extent_root, &path); - if (root != extent_root && - extent_root->last_insert.objectid > blocknr) + if (!pin && extent_root->last_insert.objectid > blocknr) extent_root->last_insert.objectid = blocknr; if (ret) BUG(); @@ -208,7 +212,8 @@ static int del_pending_extents(struct btrfs_root *extent_root) if (!ret) break; for (i = 0; i < ret; i++) { - ret = __free_extent(extent_root, gang[i]->blocknr, 1); + ret = __free_extent(extent_root, + gang[i]->blocknr, 1, 1); radix_tree_tag_clear(&extent_root->cache_radix, gang[i]->blocknr, CTREE_EXTENT_PENDING_DEL); @@ -230,7 +235,8 @@ static int run_pending(struct btrfs_root *extent_root) /* * remove an extent from the root, returns 0 on success */ -int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) +int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, + int pin) { struct btrfs_root *extent_root = root->extent_root; struct btrfs_buffer *t; @@ -243,7 +249,7 @@ int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks) CTREE_EXTENT_PENDING_DEL); return 0; } - ret = __free_extent(root, blocknr, num_blocks); + ret = __free_extent(root, blocknr, num_blocks, pin); pending_ret = run_pending(root->extent_root); return ret ? ret : pending_ret; } @@ -451,7 +457,7 @@ static int walk_down_tree(struct btrfs_root *root, ret = lookup_block_ref(root, blocknr, &refs); if (refs != 1 || *level == 1) { path->slots[*level]++; - ret = btrfs_free_extent(root, blocknr, 1); + ret = btrfs_free_extent(root, blocknr, 1, 1); BUG_ON(ret); continue; } @@ -464,7 +470,7 @@ static int walk_down_tree(struct btrfs_root *root, path->slots[*level] = 0; } out: - ret = btrfs_free_extent(root, path->nodes[*level]->blocknr, 1); + ret = btrfs_free_extent(root, path->nodes[*level]->blocknr, 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -492,7 +498,7 @@ static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, return 0; } else { ret = btrfs_free_extent(root, - path->nodes[*level]->blocknr, 1); + path->nodes[*level]->blocknr, 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; From e089f05c18ab36ed5fa7e2319052e03ab800d518 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 16 Mar 2007 16:20:31 -0400 Subject: [PATCH 0053/2985] Btrfs: transaction handles everywhere Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 4 +- fs/btrfs/TODO | 2 + fs/btrfs/ctree.c | 204 +++++++++++++++++++++-------------------- fs/btrfs/ctree.h | 63 ++++++++----- fs/btrfs/debug-tree.c | 1 + fs/btrfs/dir-item.c | 20 ++-- fs/btrfs/dir-test.c | 70 ++++++++------ fs/btrfs/disk-io.c | 60 +++++++----- fs/btrfs/disk-io.h | 16 ++-- fs/btrfs/extent-tree.c | 135 +++++++++++++++------------ fs/btrfs/inode-item.c | 15 +-- fs/btrfs/print-tree.h | 4 +- fs/btrfs/quick-test.c | 23 +++-- fs/btrfs/random-test.c | 62 +++++++------ fs/btrfs/root-tree.c | 23 +++-- fs/btrfs/transaction.h | 27 ++++++ 16 files changed, 426 insertions(+), 303 deletions(-) create mode 100644 fs/btrfs/transaction.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 36964f54bb47..76bcd87d8575 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,5 +1,5 @@ CC=gcc -CFLAGS = -g -Wall +CFLAGS = -g -Wall -Werror headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o \ root-tree.o dir-item.o hash.o file-item.o inode-item.o @@ -14,7 +14,7 @@ check=sparse $(CHECKFLAGS) $(check) $< $(CC) $(CFLAGS) -c $< -all: tester debug-tree quick-test dir-test +all: tester debug-tree quick-test dir-test tags debug-tree: $(objects) debug-tree.o gcc $(CFLAGS) -o debug-tree $(objects) debug-tree.o diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 2ae4b3aae1e5..e9894999322e 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -8,6 +8,8 @@ * Add block mapping tree (simple dm layer) * Add simple tree locking (semaphore per tree) * Make allocator smarter +* Only pin blocks allocated in this transaction +* Add transaction handles * Do actual block accounting * Check compat and incompat flags on the inode * Port into the kernel diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 13128b5ed65c..17a3ff2f1828 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -6,17 +6,18 @@ #include "disk-io.h" #include "print-tree.h" -static int split_node(struct btrfs_root *root, struct btrfs_path *path, - int level); -static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, - int data_size); -static int push_node_left(struct btrfs_root *root, struct btrfs_buffer *dst, - struct btrfs_buffer *src); -static int balance_node_right(struct btrfs_root *root, - struct btrfs_buffer *dst_buf, +static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int level); +static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int data_size); +static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_buffer *dst, struct btrfs_buffer + *src); +static int balance_node_right(struct btrfs_trans_handle *trans, struct + btrfs_root *root, struct btrfs_buffer *dst_buf, struct btrfs_buffer *src_buf); -static int del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, - int slot); +static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_path *path, int level, int slot); inline void btrfs_init_path(struct btrfs_path *p) { @@ -34,11 +35,10 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) memset(p, 0, sizeof(*p)); } -static int btrfs_cow_block(struct btrfs_root *root, - struct btrfs_buffer *buf, - struct btrfs_buffer *parent, - int parent_slot, - struct btrfs_buffer **cow_ret) +static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_buffer *buf, struct btrfs_buffer + *parent, int parent_slot, struct btrfs_buffer + **cow_ret) { struct btrfs_buffer *cow; @@ -46,22 +46,22 @@ static int btrfs_cow_block(struct btrfs_root *root, *cow_ret = buf; return 0; } - cow = btrfs_alloc_free_block(root); + cow = btrfs_alloc_free_block(trans, root); memcpy(&cow->node, &buf->node, root->blocksize); btrfs_set_header_blocknr(&cow->node.header, cow->blocknr); *cow_ret = cow; - btrfs_inc_ref(root, buf); + btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; cow->count++; if (buf != root->commit_root) - btrfs_free_extent(root, buf->blocknr, 1, 1); + btrfs_free_extent(trans, root, buf->blocknr, 1, 1); btrfs_block_release(root, buf); } else { btrfs_set_node_blockptr(&parent->node, parent_slot, cow->blocknr); BUG_ON(list_empty(&parent->dirty)); - btrfs_free_extent(root, buf->blocknr, 1, 1); + btrfs_free_extent(trans, root, buf->blocknr, 1, 1); } btrfs_block_release(root, buf); return 0; @@ -266,8 +266,8 @@ static struct btrfs_buffer *read_node_slot(struct btrfs_root *root, return read_tree_block(root, btrfs_node_blockptr(node, slot)); } -static int balance_level(struct btrfs_root *root, struct btrfs_path *path, - int level) +static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int level) { struct btrfs_buffer *right_buf; struct btrfs_buffer *mid_buf; @@ -310,8 +310,8 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, btrfs_block_release(root, mid_buf); /* once for the root ptr */ btrfs_block_release(root, mid_buf); - clean_tree_block(root, mid_buf); - return btrfs_free_extent(root, blocknr, 1, 1); + clean_tree_block(trans, root, mid_buf); + return btrfs_free_extent(trans, root, blocknr, 1, 1); } parent = &parent_buf->node; @@ -324,11 +324,11 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, /* first, try to make some room in the middle buffer */ if (left_buf) { - btrfs_cow_block(root, left_buf, parent_buf, - pslot - 1, &left_buf); + btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1, + &left_buf); left = &left_buf->node; orig_slot += btrfs_header_nritems(&left->header); - wret = push_node_left(root, left_buf, mid_buf); + wret = push_node_left(trans, root, left_buf, mid_buf); if (wret < 0) ret = wret; } @@ -337,22 +337,23 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, * then try to empty the right most buffer into the middle */ if (right_buf) { - btrfs_cow_block(root, right_buf, parent_buf, - pslot + 1, &right_buf); + btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1, + &right_buf); right = &right_buf->node; - wret = push_node_left(root, mid_buf, right_buf); + wret = push_node_left(trans, root, mid_buf, right_buf); if (wret < 0) ret = wret; if (btrfs_header_nritems(&right->header) == 0) { u64 blocknr = right_buf->blocknr; btrfs_block_release(root, right_buf); - clean_tree_block(root, right_buf); + clean_tree_block(trans, root, right_buf); right_buf = NULL; right = NULL; - wret = del_ptr(root, path, level + 1, pslot + 1); + wret = del_ptr(trans, root, path, level + 1, pslot + + 1); if (wret) ret = wret; - wret = btrfs_free_extent(root, blocknr, 1, 1); + wret = btrfs_free_extent(trans, root, blocknr, 1, 1); if (wret) ret = wret; } else { @@ -373,7 +374,7 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, * right */ BUG_ON(!left_buf); - wret = balance_node_right(root, mid_buf, left_buf); + wret = balance_node_right(trans, root, mid_buf, left_buf); if (wret < 0) ret = wret; BUG_ON(wret == 1); @@ -382,13 +383,13 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, /* we've managed to empty the middle node, drop it */ u64 blocknr = mid_buf->blocknr; btrfs_block_release(root, mid_buf); - clean_tree_block(root, mid_buf); + clean_tree_block(trans, root, mid_buf); mid_buf = NULL; mid = NULL; - wret = del_ptr(root, path, level + 1, pslot); + wret = del_ptr(trans, root, path, level + 1, pslot); if (wret) ret = wret; - wret = btrfs_free_extent(root, blocknr, 1, 1); + wret = btrfs_free_extent(trans, root, blocknr, 1, 1); if (wret) ret = wret; } else { @@ -438,8 +439,9 @@ static int balance_level(struct btrfs_root *root, struct btrfs_path *path, * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if * possible) */ -int btrfs_search_slot(struct btrfs_root *root, struct btrfs_key *key, - struct btrfs_path *p, int ins_len, int cow) +int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_path *p, int + ins_len, int cow) { struct btrfs_buffer *b; struct btrfs_buffer *cow_buf; @@ -455,8 +457,9 @@ again: level = btrfs_header_level(&b->node.header); if (cow) { int wret; - wret = btrfs_cow_block(root, b, p->nodes[level + 1], - p->slots[level + 1], &cow_buf); + wret = btrfs_cow_block(trans, root, b, p->nodes[level + + 1], p->slots[level + 1], + &cow_buf); b = cow_buf; } BUG_ON(!cow && ins_len); @@ -472,7 +475,7 @@ again: p->slots[level] = slot; if (ins_len > 0 && btrfs_header_nritems(&c->header) == BTRFS_NODEPTRS_PER_BLOCK(root)) { - int sret = split_node(root, p, level); + int sret = split_node(trans, root, p, level); BUG_ON(sret > 0); if (sret) return sret; @@ -480,7 +483,8 @@ again: c = &b->node; slot = p->slots[level]; } else if (ins_len < 0) { - int sret = balance_level(root, p, level); + int sret = balance_level(trans, root, p, + level); if (sret) return sret; b = p->nodes[level]; @@ -496,7 +500,7 @@ again: p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, l) < sizeof(struct btrfs_item) + ins_len) { - int sret = split_leaf(root, p, ins_len); + int sret = split_leaf(trans, root, p, ins_len); BUG_ON(sret > 0); if (sret) return sret; @@ -519,9 +523,9 @@ again: * If this fails to write a tree block, it returns -1, but continues * fixing up the blocks in ram so the tree is consistent. */ -static int fixup_low_keys(struct btrfs_root *root, - struct btrfs_path *path, struct btrfs_disk_key *key, - int level) +static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, struct btrfs_disk_key + *key, int level) { int i; int ret = 0; @@ -546,8 +550,9 @@ static int fixup_low_keys(struct btrfs_root *root, * returns 0 if some ptrs were pushed left, < 0 if there was some horrible * error, and > 0 if there was no room in the left hand block. */ -static int push_node_left(struct btrfs_root *root, struct btrfs_buffer *dst_buf, - struct btrfs_buffer *src_buf) +static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_buffer *dst_buf, struct + btrfs_buffer *src_buf) { struct btrfs_node *src = &src_buf->node; struct btrfs_node *dst = &dst_buf->node; @@ -589,8 +594,8 @@ static int push_node_left(struct btrfs_root *root, struct btrfs_buffer *dst_buf, * * this will only push up to 1/2 the contents of the left node over */ -static int balance_node_right(struct btrfs_root *root, - struct btrfs_buffer *dst_buf, +static int balance_node_right(struct btrfs_trans_handle *trans, struct + btrfs_root *root, struct btrfs_buffer *dst_buf, struct btrfs_buffer *src_buf) { struct btrfs_node *src = &src_buf->node; @@ -635,8 +640,8 @@ static int balance_node_right(struct btrfs_root *root, * * returns zero on success or < 0 on failure. */ -static int insert_new_root(struct btrfs_root *root, - struct btrfs_path *path, int level) +static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int level) { struct btrfs_buffer *t; struct btrfs_node *lower; @@ -646,7 +651,7 @@ static int insert_new_root(struct btrfs_root *root, BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = btrfs_alloc_free_block(root); + t = btrfs_alloc_free_block(trans, root); c = &t->node; memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); @@ -679,9 +684,9 @@ static int insert_new_root(struct btrfs_root *root, * * returns zero on success and < 0 on any error */ -static int insert_ptr(struct btrfs_root *root, - struct btrfs_path *path, struct btrfs_disk_key *key, - u64 blocknr, int slot, int level) +static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, struct btrfs_disk_key + *key, u64 blocknr, int slot, int level) { struct btrfs_node *lower; int nritems; @@ -713,8 +718,8 @@ static int insert_ptr(struct btrfs_root *root, * * returns 0 on success and < 0 on failure */ -static int split_node(struct btrfs_root *root, struct btrfs_path *path, - int level) +static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int level) { struct btrfs_buffer *t; struct btrfs_node *c; @@ -729,12 +734,12 @@ static int split_node(struct btrfs_root *root, struct btrfs_path *path, c = &t->node; if (t == root->node) { /* trying to split the root, lets make a new one */ - ret = insert_new_root(root, path, level + 1); + ret = insert_new_root(trans, root, path, level + 1); if (ret) return ret; } c_nritems = btrfs_header_nritems(&c->header); - split_buffer = btrfs_alloc_free_block(root); + split_buffer = btrfs_alloc_free_block(trans, root); split = &split_buffer->node; btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); btrfs_set_header_blocknr(&split->header, split_buffer->blocknr); @@ -748,7 +753,7 @@ static int split_node(struct btrfs_root *root, struct btrfs_path *path, ret = 0; BUG_ON(list_empty(&t->dirty)); - wret = insert_ptr(root, path, &split->ptrs[0].key, + wret = insert_ptr(trans, root, path, &split->ptrs[0].key, split_buffer->blocknr, path->slots[level + 1] + 1, level + 1); if (wret) @@ -790,8 +795,8 @@ static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) * returns 1 if the push failed because the other node didn't have enough * room, 0 if everything worked out and < 0 if there were major errors. */ -static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, - int data_size) +static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int data_size) { struct btrfs_buffer *left_buf = path->nodes[0]; struct btrfs_leaf *left = &left_buf->leaf; @@ -824,7 +829,7 @@ static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, return 1; } /* cow and double check */ - btrfs_cow_block(root, right_buf, upper, slot + 1, &right_buf); + btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf); right = &right_buf->leaf; free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { @@ -897,8 +902,8 @@ static int push_leaf_right(struct btrfs_root *root, struct btrfs_path *path, * push some data in the path leaf to the left, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise */ -static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, - int data_size) +static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int data_size) { struct btrfs_buffer *right_buf = path->nodes[0]; struct btrfs_leaf *right = &right_buf->leaf; @@ -931,7 +936,7 @@ static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, } /* cow and double check */ - btrfs_cow_block(root, t, path->nodes[1], slot - 1, &t); + btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); left = &t->leaf; free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { @@ -997,7 +1002,7 @@ static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(list_empty(&t->dirty)); BUG_ON(list_empty(&right_buf->dirty)); - wret = fixup_low_keys(root, path, &right->items[0].key, 1); + wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); if (wret) ret = wret; @@ -1021,8 +1026,8 @@ static int push_leaf_left(struct btrfs_root *root, struct btrfs_path *path, * * returns 0 if all went well and < 0 on failure. */ -static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, - int data_size) +static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int data_size) { struct btrfs_buffer *l_buf; struct btrfs_leaf *l; @@ -1038,11 +1043,11 @@ static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, int ret; int wret; - wret = push_leaf_left(root, path, data_size); + wret = push_leaf_left(trans, root, path, data_size); if (wret < 0) return wret; if (wret) { - wret = push_leaf_right(root, path, data_size); + wret = push_leaf_right(trans, root, path, data_size); if (wret < 0) return wret; } @@ -1055,14 +1060,14 @@ static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, return 0; if (!path->nodes[1]) { - ret = insert_new_root(root, path, 1); + ret = insert_new_root(trans, root, path, 1); if (ret) return ret; } slot = path->slots[0]; nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; - right_buffer = btrfs_alloc_free_block(root); + right_buffer = btrfs_alloc_free_block(trans, root); BUG_ON(!right_buffer); BUG_ON(mid == nritems); right = &right_buffer->leaf; @@ -1100,7 +1105,7 @@ static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, btrfs_set_header_nritems(&l->header, mid); ret = 0; - wret = insert_ptr(root, path, &right->items[0].key, + wret = insert_ptr(trans, root, path, &right->items[0].key, right_buffer->blocknr, path->slots[1] + 1, 1); if (wret) ret = wret; @@ -1122,8 +1127,9 @@ static int split_leaf(struct btrfs_root *root, struct btrfs_path *path, * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ -int btrfs_insert_empty_item(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *cpu_key, u32 data_size) +int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, struct btrfs_key + *cpu_key, u32 data_size) { int ret = 0; int slot; @@ -1139,7 +1145,7 @@ int btrfs_insert_empty_item(struct btrfs_root *root, struct btrfs_path *path, /* create a root if there isn't one */ if (!root->node) BUG(); - ret = btrfs_search_slot(root, cpu_key, path, data_size, 1); + ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1); if (ret == 0) { btrfs_release_path(root, path); return -EEXIST; @@ -1193,7 +1199,7 @@ int btrfs_insert_empty_item(struct btrfs_root *root, struct btrfs_path *path, ret = 0; if (slot == 0) - ret = fixup_low_keys(root, path, &disk_key, 1); + ret = fixup_low_keys(trans, root, path, &disk_key, 1); BUG_ON(list_empty(&leaf_buf->dirty)); if (btrfs_leaf_free_space(root, leaf) < 0) @@ -1207,15 +1213,16 @@ out: * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ -int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, - void *data, u32 data_size) +int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *cpu_key, void *data, u32 + data_size) { int ret = 0; struct btrfs_path path; u8 *ptr; btrfs_init_path(&path); - ret = btrfs_insert_empty_item(root, &path, cpu_key, data_size); + ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size); if (!ret) { ptr = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], u8); memcpy(ptr, data, data_size); @@ -1231,8 +1238,8 @@ int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *cpu_key, * continuing all the way the root if required. The root is converted into * a leaf if all the nodes are emptied. */ -static int del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, - int slot) +static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_path *path, int level, int slot) { struct btrfs_node *node; struct btrfs_buffer *parent = path->nodes[level]; @@ -1253,7 +1260,7 @@ static int del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, /* just turn the root into a leaf and break */ btrfs_set_header_level(&root->node->node.header, 0); } else if (slot == 0) { - wret = fixup_low_keys(root, path, &node->ptrs[0].key, + wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key, level + 1); if (wret) ret = wret; @@ -1266,7 +1273,8 @@ static int del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, * delete the item at the leaf level in path. If that empties * the leaf, remove it from the tree */ -int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path) +int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_path *path) { int slot; struct btrfs_leaf *leaf; @@ -1306,19 +1314,20 @@ int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path) btrfs_set_header_level(&leaf->header, 0); BUG_ON(list_empty(&leaf_buf->dirty)); } else { - clean_tree_block(root, leaf_buf); - wret = del_ptr(root, path, 1, path->slots[1]); + clean_tree_block(trans, root, leaf_buf); + wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; - wret = btrfs_free_extent(root, leaf_buf->blocknr, 1, 1); + wret = btrfs_free_extent(trans, root, + leaf_buf->blocknr, 1, 1); if (wret) ret = wret; } } else { int used = leaf_space_used(leaf, 0, nritems); if (slot == 0) { - wret = fixup_low_keys(root, path, - &leaf->items[0].key, 1); + wret = fixup_low_keys(trans, root, path, + &leaf->items[0].key, 1); if (wret) ret = wret; } @@ -1332,23 +1341,24 @@ int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path) */ slot = path->slots[1]; leaf_buf->count++; - wret = push_leaf_left(root, path, 1); + wret = push_leaf_left(trans, root, path, 1); if (wret < 0) ret = wret; if (path->nodes[0] == leaf_buf && btrfs_header_nritems(&leaf->header)) { - wret = push_leaf_right(root, path, 1); + wret = push_leaf_right(trans, root, path, 1); if (wret < 0) ret = wret; } if (btrfs_header_nritems(&leaf->header) == 0) { u64 blocknr = leaf_buf->blocknr; - clean_tree_block(root, leaf_buf); - wret = del_ptr(root, path, 1, slot); + clean_tree_block(trans, root, leaf_buf); + wret = del_ptr(trans, root, path, 1, slot); if (wret) ret = wret; btrfs_block_release(root, leaf_buf); - wret = btrfs_free_extent(root, blocknr, 1, 1); + wret = btrfs_free_extent(trans, root, blocknr, + 1, 1); if (wret) ret = wret; } else { @@ -1401,5 +1411,3 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) } return 0; } - - diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d15a2ed95076..68f0af39777f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -4,6 +4,8 @@ #include "list.h" #include "kerncompat.h" +struct btrfs_trans_handle; + #define BTRFS_MAGIC "_BtRfS_M" #define BTRFS_ROOT_TREE_OBJECTID 1 @@ -200,6 +202,7 @@ struct btrfs_root { struct btrfs_root_item root_item; struct btrfs_key root_key; u32 blocksize; + struct btrfs_trans_handle *running_transaction; }; /* the lower bits in the key flags defines the item type */ @@ -656,34 +659,46 @@ static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) ((type *)(btrfs_leaf_data(leaf) + \ btrfs_item_offset((leaf)->items + (slot)))) -struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root); -int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf); -int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, - int pin); -int btrfs_search_slot(struct btrfs_root *root, struct btrfs_key *key, - struct btrfs_path *p, int ins_len, int cow); +struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_buffer *buf); +int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 blocknr, u64 num_blocks, int pin); +int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_path *p, int + ins_len, int cow); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); void btrfs_init_path(struct btrfs_path *p); -int btrfs_del_item(struct btrfs_root *root, struct btrfs_path *path); -int btrfs_insert_item(struct btrfs_root *root, struct btrfs_key *key, - void *data, u32 data_size); -int btrfs_insert_empty_item(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *cpu_key, u32 data_size); +int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_path *path); +int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, void *data, u32 data_size); +int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, struct btrfs_key + *cpu_key, u32 data_size); int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); -int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap); -int btrfs_finish_extent_commit(struct btrfs_root *root); -int btrfs_del_root(struct btrfs_root *root, struct btrfs_key *key); -int btrfs_insert_root(struct btrfs_root *root, struct btrfs_key *key, - struct btrfs_root_item *item); -int btrfs_update_root(struct btrfs_root *root, struct btrfs_key *key, - struct btrfs_root_item *item); -int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, - struct btrfs_root_item *item, struct btrfs_key *key); -int btrfs_insert_dir_item(struct btrfs_root *root, char *name, int name_len, - u64 dir, u64 objectid, u8 type); -int btrfs_lookup_dir_item(struct btrfs_root *root, struct btrfs_path *path, - u64 dir, char *name, int name_len, int mod); +int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_buffer *snap); +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct + btrfs_root *root); +int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_key *key); +int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_root_item + *item); +int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_root_item + *item); +int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct + btrfs_root_item *item, struct btrfs_key *key); +int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, char *name, int name_len, u64 dir, u64 + objectid, u8 type); +int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, u64 dir, char *name, + int name_len, int mod); int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, char *name, int name_len); #endif diff --git a/fs/btrfs/debug-tree.c b/fs/btrfs/debug-tree.c index de45fb4dfddd..91dea7a0a476 100644 --- a/fs/btrfs/debug-tree.c +++ b/fs/btrfs/debug-tree.c @@ -5,6 +5,7 @@ #include "ctree.h" #include "disk-io.h" #include "print-tree.h" +#include "transaction.h" int main(int ac, char **av) { struct btrfs_super_block super; diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index a42a67b99753..949c4e526798 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -5,9 +5,11 @@ #include "ctree.h" #include "disk-io.h" #include "hash.h" +#include "transaction.h" -int btrfs_insert_dir_item(struct btrfs_root *root, char *name, int name_len, - u64 dir, u64 objectid, u8 type) +int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, char *name, int name_len, u64 dir, u64 + objectid, u8 type) { int ret = 0; struct btrfs_path path; @@ -23,7 +25,7 @@ int btrfs_insert_dir_item(struct btrfs_root *root, char *name, int name_len, BUG_ON(ret); btrfs_init_path(&path); data_size = sizeof(*dir_item) + name_len; - ret = btrfs_insert_empty_item(root, &path, &key, data_size); + ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size); if (ret) goto out; @@ -40,8 +42,9 @@ out: return ret; } -int btrfs_lookup_dir_item(struct btrfs_root *root, struct btrfs_path *path, - u64 dir, char *name, int name_len, int mod) +int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, u64 dir, char *name, + int name_len, int mod) { int ret; struct btrfs_key key; @@ -53,12 +56,13 @@ int btrfs_lookup_dir_item(struct btrfs_root *root, struct btrfs_path *path, btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); - ret = btrfs_search_slot(root, &key, path, ins_len, cow); + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); return ret; } -int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, - char *name, int name_len) +int btrfs_match_dir_item_name(struct btrfs_root *root, + struct btrfs_path *path, char + *name, int name_len) { struct btrfs_dir_item *dir_item; char *name_ptr; diff --git a/fs/btrfs/dir-test.c b/fs/btrfs/dir-test.c index f73aa7623398..e908c0c588cc 100644 --- a/fs/btrfs/dir-test.c +++ b/fs/btrfs/dir-test.c @@ -8,6 +8,7 @@ #include "disk-io.h" #include "print-tree.h" #include "hash.h" +#include "transaction.h" int keep_running = 1; struct btrfs_super_block super; @@ -38,7 +39,8 @@ again: return 0; } -static int ins_one(struct btrfs_root *root, struct radix_tree_root *radix) +static int ins_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct radix_tree_root *radix) { int ret; char buf[128]; @@ -48,8 +50,8 @@ static int ins_one(struct btrfs_root *root, struct radix_tree_root *radix) find_num(radix, &oid, 0); sprintf(buf, "str-%lu", oid); - ret = btrfs_insert_dir_item(root, buf, strlen(buf), dir_oid, file_oid, - 1); + ret = btrfs_insert_dir_item(trans, root, buf, strlen(buf), dir_oid, + file_oid, 1); if (ret) goto error; @@ -68,7 +70,8 @@ error: * check */ btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(root, &path, dir_oid, buf, strlen(buf), 0); + ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, + strlen(buf), 0); if (ret) goto fatal_release; if (!btrfs_match_dir_item_name(root, &path, buf, strlen(buf))) { @@ -96,7 +99,8 @@ fatal: return -1; } -static int insert_dup(struct btrfs_root *root, struct radix_tree_root *radix) +static int insert_dup(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct radix_tree_root *radix) { int ret; char buf[128]; @@ -107,8 +111,8 @@ static int insert_dup(struct btrfs_root *root, struct radix_tree_root *radix) return 0; sprintf(buf, "str-%lu", oid); - ret = btrfs_insert_dir_item(root, buf, strlen(buf), dir_oid, file_oid, - 1); + ret = btrfs_insert_dir_item(trans, root, buf, strlen(buf), dir_oid, + file_oid, 1); if (ret != -EEXIST) { printf("insert on %s gave us %d\n", buf, ret); return 1; @@ -116,7 +120,8 @@ static int insert_dup(struct btrfs_root *root, struct radix_tree_root *radix) return 0; } -static int del_one(struct btrfs_root *root, struct radix_tree_root *radix) +static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct radix_tree_root *radix) { int ret; char buf[128]; @@ -129,10 +134,11 @@ static int del_one(struct btrfs_root *root, struct radix_tree_root *radix) return 0; sprintf(buf, "str-%lu", oid); btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(root, &path, dir_oid, buf, strlen(buf), -1); + ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, + strlen(buf), -1); if (ret) goto out_release; - ret = btrfs_del_item(root, &path); + ret = btrfs_del_item(trans, root, &path); if (ret) goto out_release; btrfs_release_path(root, &path); @@ -149,7 +155,8 @@ out: return -1; } -static int lookup_item(struct btrfs_root *root, struct radix_tree_root *radix) +static int lookup_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct radix_tree_root *radix) { struct btrfs_path path; char buf[128]; @@ -161,7 +168,8 @@ static int lookup_item(struct btrfs_root *root, struct radix_tree_root *radix) return 0; sprintf(buf, "str-%lu", oid); btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(root, &path, dir_oid, buf, strlen(buf), 0); + ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, + strlen(buf), 0); btrfs_release_path(root, &path); if (ret) { printf("unable to find key %lu\n", oid); @@ -170,7 +178,8 @@ static int lookup_item(struct btrfs_root *root, struct radix_tree_root *radix) return 0; } -static int lookup_enoent(struct btrfs_root *root, struct radix_tree_root *radix) +static int lookup_enoent(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct radix_tree_root *radix) { struct btrfs_path path; char buf[128]; @@ -182,7 +191,8 @@ static int lookup_enoent(struct btrfs_root *root, struct radix_tree_root *radix) return 0; sprintf(buf, "str-%lu", oid); btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(root, &path, dir_oid, buf, strlen(buf), 0); + ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, + strlen(buf), 0); btrfs_release_path(root, &path); if (!ret) { printf("able to find key that should not exist %lu\n", oid); @@ -191,8 +201,8 @@ static int lookup_enoent(struct btrfs_root *root, struct radix_tree_root *radix) return 0; } -static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, - int nr) +static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct radix_tree_root *radix, int nr) { struct btrfs_path path; struct btrfs_key key; @@ -211,7 +221,7 @@ static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, key.objectid = dir_oid; while(nr-- >= 0) { btrfs_init_path(&path); - ret = btrfs_search_slot(root, &key, &path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); if (ret < 0) { btrfs_release_path(root, &path); return ret; @@ -231,7 +241,7 @@ static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, BUG_ON(found_len > 128); buf[found_len] = '\0'; found = atoi(buf + 4); - ret = btrfs_del_item(root, &path); + ret = btrfs_del_item(trans, root, &path); count++; if (ret) { fprintf(stderr, @@ -252,19 +262,19 @@ error: return -1; } -static int fill_tree(struct btrfs_root *root, struct radix_tree_root *radix, - int count) +static int fill_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct radix_tree_root *radix, int count) { int i; int ret = 0; for (i = 0; i < count; i++) { - ret = ins_one(root, radix); + ret = ins_one(trans, root, radix); if (ret) { fprintf(stderr, "fill failed\n"); goto out; } if (i % 1000 == 0) { - ret = btrfs_commit_transaction(root, &super); + ret = btrfs_commit_transaction(trans, root, &super); if (ret) { fprintf(stderr, "fill commit failed\n"); return ret; @@ -280,7 +290,8 @@ out: return ret; } -static int bulk_op(struct btrfs_root *root, struct radix_tree_root *radix) +static int bulk_op(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct radix_tree_root *radix) { int ret; int nr = rand() % 5000; @@ -289,17 +300,18 @@ static int bulk_op(struct btrfs_root *root, struct radix_tree_root *radix) /* do the bulk op much less frequently */ if (run_nr++ % 100) return 0; - ret = empty_tree(root, radix, nr); + ret = empty_tree(trans, root, radix, nr); if (ret) return ret; - ret = fill_tree(root, radix, nr); + ret = fill_tree(trans, root, radix, nr); if (ret) return ret; return 0; } -int (*ops[])(struct btrfs_root *root, struct radix_tree_root *radix) = +int (*ops[])(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct + radix_tree_root *radix) = { ins_one, insert_dup, del_one, lookup_item, lookup_enoent, bulk_op }; @@ -330,11 +342,13 @@ int main(int ac, char **av) int init_fill_count = 800000; int err = 0; int initial_only = 0; + struct btrfs_trans_handle *trans; radix_tree_init(); printf("removing old tree\n"); unlink("dbfile"); root = open_ctree("dbfile", &super); + trans = btrfs_start_transaction(root, 1); signal(SIGTERM, sigstopper); signal(SIGINT, sigstopper); @@ -353,7 +367,7 @@ int main(int ac, char **av) } } printf("initial fill\n"); - ret = fill_tree(root, &radix, init_fill_count); + ret = fill_tree(trans, root, &radix, init_fill_count); printf("starting run\n"); if (ret) { err = ret; @@ -377,7 +391,7 @@ int main(int ac, char **av) root = open_ctree("dbfile", &super); } while(count--) { - ret = ops[op](root, &radix); + ret = ops[op](trans, root, &radix); if (ret) { fprintf(stderr, "op %d failed %d:%d\n", op, i, iterations); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8d9457b5aef5..25ce07908ee3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -9,6 +9,7 @@ #include "radix-tree.h" #include "ctree.h" #include "disk-io.h" +#include "transaction.h" static int allocated_blocks = 0; int cache_max = 10000; @@ -107,7 +108,8 @@ struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) return buf; } -int dirty_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) +int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_buffer *buf) { if (!list_empty(&buf->dirty)) return 0; @@ -116,7 +118,8 @@ int dirty_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) return 0; } -int clean_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) +int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_buffer *buf) { if (!list_empty(&buf->dirty)) { list_del_init(&buf->dirty); @@ -125,7 +128,8 @@ int clean_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) return 0; } -int write_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) +int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_buffer *buf) { u64 blocknr = buf->blocknr; loff_t offset = blocknr * root->blocksize; @@ -139,7 +143,8 @@ int write_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) return 0; } -static int __commit_transaction(struct btrfs_root *root) +static int __commit_transaction(struct btrfs_trans_handle *trans, struct + btrfs_root *root) { struct btrfs_buffer *b; int ret = 0; @@ -147,7 +152,7 @@ static int __commit_transaction(struct btrfs_root *root) while(!list_empty(&root->trans)) { b = list_entry(root->trans.next, struct btrfs_buffer, dirty); list_del_init(&b->dirty); - wret = write_tree_block(root, b); + wret = write_tree_block(trans, root, b); if (wret) ret = wret; btrfs_block_release(root, b); @@ -155,8 +160,9 @@ static int __commit_transaction(struct btrfs_root *root) return ret; } -static int commit_extent_and_tree_roots(struct btrfs_root *tree_root, - struct btrfs_root *extent_root) +static int commit_extent_and_tree_roots(struct btrfs_trans_handle *trans, + struct btrfs_root *tree_root, struct + btrfs_root *extent_root) { int ret; u64 old_extent_block; @@ -167,24 +173,24 @@ static int commit_extent_and_tree_roots(struct btrfs_root *tree_root, break; btrfs_set_root_blocknr(&extent_root->root_item, extent_root->node->blocknr); - ret = btrfs_update_root(tree_root, + ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); BUG_ON(ret); } - __commit_transaction(extent_root); - __commit_transaction(tree_root); + __commit_transaction(trans, extent_root); + __commit_transaction(trans, tree_root); return 0; } -int btrfs_commit_transaction(struct btrfs_root *root, - struct btrfs_super_block *s) +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct + btrfs_root *root, struct btrfs_super_block *s) { int ret = 0; struct btrfs_buffer *snap = root->commit_root; struct btrfs_key snap_key; - ret = __commit_transaction(root); + ret = __commit_transaction(trans, root); BUG_ON(ret); if (root->commit_root == root->node) @@ -194,23 +200,24 @@ int btrfs_commit_transaction(struct btrfs_root *root, root->root_key.offset++; btrfs_set_root_blocknr(&root->root_item, root->node->blocknr); - ret = btrfs_insert_root(root->tree_root, &root->root_key, + ret = btrfs_insert_root(trans, root->tree_root, &root->root_key, &root->root_item); BUG_ON(ret); - ret = commit_extent_and_tree_roots(root->tree_root, root->extent_root); + ret = commit_extent_and_tree_roots(trans, root->tree_root, + root->extent_root); BUG_ON(ret); - write_ctree_super(root, s); - btrfs_finish_extent_commit(root->extent_root); - btrfs_finish_extent_commit(root->tree_root); + write_ctree_super(trans, root, s); + btrfs_finish_extent_commit(trans, root->extent_root); + btrfs_finish_extent_commit(trans, root->tree_root); root->commit_root = root->node; root->node->count++; - ret = btrfs_drop_snapshot(root, snap); + ret = btrfs_drop_snapshot(trans, root, snap); BUG_ON(ret); - ret = btrfs_del_root(root->tree_root, &snap_key); + ret = btrfs_del_root(trans, root->tree_root, &snap_key); BUG_ON(ret); return ret; @@ -312,7 +319,8 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) return root; } -int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s) +int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_super_block *s) { int ret; btrfs_set_super_root(s, root->tree_root->node->blocknr); @@ -338,10 +346,14 @@ static int drop_cache(struct btrfs_root *root) int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) { int ret; - btrfs_commit_transaction(root, s); - ret = commit_extent_and_tree_roots(root->tree_root, root->extent_root); + struct btrfs_trans_handle *trans; + + trans = root->running_transaction; + btrfs_commit_transaction(trans, root, s); + ret = commit_extent_and_tree_roots(trans, root->tree_root, + root->extent_root); BUG_ON(ret); - write_ctree_super(root, s); + write_ctree_super(trans, root, s); drop_cache(root->extent_root); drop_cache(root->tree_root); drop_cache(root); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 5771bb90acb2..24a9e77c8311 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -15,15 +15,19 @@ struct btrfs_buffer { struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr); -int write_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf); -int dirty_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf); -int clean_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf); -int btrfs_commit_transaction(struct btrfs_root *root, - struct btrfs_super_block *s); +int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_buffer *buf); +int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_buffer *buf); +int clean_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_buffer *buf); +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_super_block *s); struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *s); int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s); void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf); -int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s); +int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_super_block *s); int mkfs(int fd, u64 num_blocks, u32 blocksize); #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4a40282b45f7..c29b92d440e0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5,12 +5,15 @@ #include "ctree.h" #include "disk-io.h" #include "print-tree.h" +#include "transaction.h" -static int find_free_extent(struct btrfs_root *orig_root, u64 num_blocks, - u64 search_start, u64 search_end, - struct btrfs_key *ins); -static int finish_current_insert(struct btrfs_root *extent_root); -static int run_pending(struct btrfs_root *extent_root); +static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root + *orig_root, u64 num_blocks, u64 search_start, u64 + search_end, struct btrfs_key *ins); +static int finish_current_insert(struct btrfs_trans_handle *trans, struct + btrfs_root *extent_root); +static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root + *extent_root); /* * pending extents are blocks that we're trying to allocate in the extent @@ -21,7 +24,8 @@ static int run_pending(struct btrfs_root *extent_root); */ #define CTREE_EXTENT_PENDING_DEL 0 -static int inc_block_ref(struct btrfs_root *root, u64 blocknr) +static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 blocknr) { struct btrfs_path path; int ret; @@ -31,13 +35,13 @@ static int inc_block_ref(struct btrfs_root *root, u64 blocknr) struct btrfs_key ins; u32 refs; - find_free_extent(root->extent_root, 0, 0, (u64)-1, &ins); + find_free_extent(trans, root->extent_root, 0, 0, (u64)-1, &ins); btrfs_init_path(&path); key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = 1; - ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 1); + ret = btrfs_search_slot(trans, root->extent_root, &key, &path, 0, 1); if (ret != 0) BUG(); BUG_ON(ret != 0); @@ -48,12 +52,13 @@ static int inc_block_ref(struct btrfs_root *root, u64 blocknr) BUG_ON(list_empty(&path.nodes[0]->dirty)); btrfs_release_path(root->extent_root, &path); - finish_current_insert(root->extent_root); - run_pending(root->extent_root); + finish_current_insert(trans, root->extent_root); + run_pending(trans, root->extent_root); return 0; } -static int lookup_block_ref(struct btrfs_root *root, u64 blocknr, u32 *refs) +static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 blocknr, u32 *refs) { struct btrfs_path path; int ret; @@ -65,7 +70,7 @@ static int lookup_block_ref(struct btrfs_root *root, u64 blocknr, u32 *refs) key.offset = 1; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - ret = btrfs_search_slot(root->extent_root, &key, &path, 0, 0); + ret = btrfs_search_slot(trans, root->extent_root, &key, &path, 0, 0); if (ret != 0) BUG(); l = &path.nodes[0]->leaf; @@ -75,7 +80,8 @@ static int lookup_block_ref(struct btrfs_root *root, u64 blocknr, u32 *refs) return 0; } -int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf) +int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_buffer *buf) { u64 blocknr; int i; @@ -87,12 +93,13 @@ int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf) for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) { blocknr = btrfs_node_blockptr(&buf->node, i); - inc_block_ref(root, blocknr); + inc_block_ref(trans, root, blocknr); } return 0; } -int btrfs_finish_extent_commit(struct btrfs_root *root) +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct + btrfs_root *root) { unsigned long gang[8]; u64 first = 0; @@ -116,7 +123,8 @@ int btrfs_finish_extent_commit(struct btrfs_root *root) return 0; } -static int finish_current_insert(struct btrfs_root *extent_root) +static int finish_current_insert(struct btrfs_trans_handle *trans, struct + btrfs_root *extent_root) { struct btrfs_key ins; struct btrfs_extent_item extent_item; @@ -132,8 +140,8 @@ static int finish_current_insert(struct btrfs_root *extent_root) for (i = 0; i < extent_root->current_insert.flags; i++) { ins.objectid = extent_root->current_insert.objectid + i; - ret = btrfs_insert_item(extent_root, &ins, &extent_item, - sizeof(extent_item)); + ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item, + sizeof(extent_item)); BUG_ON(ret); } extent_root->current_insert.offset = 0; @@ -143,8 +151,8 @@ static int finish_current_insert(struct btrfs_root *extent_root) /* * remove an extent from the root, returns 0 on success */ -static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, - int pin) +static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 blocknr, u64 num_blocks, int pin) { struct btrfs_path path; struct btrfs_key key; @@ -160,9 +168,9 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; - find_free_extent(root, 0, 0, (u64)-1, &ins); + find_free_extent(trans, root, 0, 0, (u64)-1, &ins); btrfs_init_path(&path); - ret = btrfs_search_slot(extent_root, &key, &path, -1, 1); + ret = btrfs_search_slot(trans, extent_root, &key, &path, -1, 1); if (ret) { printf("failed to find %Lu\n", key.objectid); btrfs_print_tree(extent_root, extent_root->node); @@ -183,14 +191,14 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, BUG_ON(err); radix_tree_preload_end(); } - ret = btrfs_del_item(extent_root, &path); + ret = btrfs_del_item(trans, extent_root, &path); if (!pin && extent_root->last_insert.objectid > blocknr) extent_root->last_insert.objectid = blocknr; if (ret) BUG(); } btrfs_release_path(extent_root, &path); - finish_current_insert(extent_root); + finish_current_insert(trans, extent_root); return ret; } @@ -198,7 +206,8 @@ static int __free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, * find all the blocks marked as pending in the radix tree and remove * them from the extent map */ -static int del_pending_extents(struct btrfs_root *extent_root) +static int del_pending_extents(struct btrfs_trans_handle *trans, struct + btrfs_root *extent_root) { int ret; struct btrfs_buffer *gang[4]; @@ -212,7 +221,7 @@ static int del_pending_extents(struct btrfs_root *extent_root) if (!ret) break; for (i = 0; i < ret; i++) { - ret = __free_extent(extent_root, + ret = __free_extent(trans, extent_root, gang[i]->blocknr, 1, 1); radix_tree_tag_clear(&extent_root->cache_radix, gang[i]->blocknr, @@ -223,11 +232,12 @@ static int del_pending_extents(struct btrfs_root *extent_root) return 0; } -static int run_pending(struct btrfs_root *extent_root) +static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root + *extent_root) { while(radix_tree_tagged(&extent_root->cache_radix, CTREE_EXTENT_PENDING_DEL)) - del_pending_extents(extent_root); + del_pending_extents(trans, extent_root); return 0; } @@ -235,8 +245,8 @@ static int run_pending(struct btrfs_root *extent_root) /* * remove an extent from the root, returns 0 on success */ -int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, - int pin) +int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 blocknr, u64 num_blocks, int pin) { struct btrfs_root *extent_root = root->extent_root; struct btrfs_buffer *t; @@ -249,8 +259,8 @@ int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, CTREE_EXTENT_PENDING_DEL); return 0; } - ret = __free_extent(root, blocknr, num_blocks, pin); - pending_ret = run_pending(root->extent_root); + ret = __free_extent(trans, root, blocknr, num_blocks, pin); + pending_ret = run_pending(trans, root->extent_root); return ret ? ret : pending_ret; } @@ -262,9 +272,9 @@ int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks, * ins->offset == number of blocks * Any available blocks before search_start are skipped. */ -static int find_free_extent(struct btrfs_root *orig_root, u64 num_blocks, - u64 search_start, u64 search_end, - struct btrfs_key *ins) +static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root + *orig_root, u64 num_blocks, u64 search_start, u64 + search_end, struct btrfs_key *ins) { struct btrfs_path path; struct btrfs_key key; @@ -290,7 +300,7 @@ check_failed: ins->objectid = search_start; ins->offset = 0; start_found = 0; - ret = btrfs_search_slot(root, ins, &path, 0, 0); + ret = btrfs_search_slot(trans, root, ins, &path, 0, 0); if (ret < 0) goto error; @@ -367,9 +377,9 @@ error: * * returns 0 if everything worked, non-zero otherwise. */ -static int alloc_extent(struct btrfs_root *root, u64 num_blocks, - u64 search_start, u64 search_end, u64 owner, - struct btrfs_key *ins) +static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 num_blocks, u64 search_start, u64 + search_end, u64 owner, struct btrfs_key *ins) { int ret; int pending_ret; @@ -389,16 +399,16 @@ static int alloc_extent(struct btrfs_root *root, u64 num_blocks, extent_root->current_insert.flags++; return 0; } - ret = find_free_extent(root, num_blocks, search_start, + ret = find_free_extent(trans, root, num_blocks, search_start, search_end, ins); if (ret) return ret; - ret = btrfs_insert_item(extent_root, ins, &extent_item, - sizeof(extent_item)); + ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, + sizeof(extent_item)); - finish_current_insert(extent_root); - pending_ret = run_pending(extent_root); + finish_current_insert(trans, extent_root); + pending_ret = run_pending(trans, extent_root); if (ret) return ret; if (pending_ret) @@ -410,13 +420,14 @@ static int alloc_extent(struct btrfs_root *root, u64 num_blocks, * helper function to allocate a block for a given tree * returns the tree buffer or NULL. */ -struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root) +struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { struct btrfs_key ins; int ret; struct btrfs_buffer *buf; - ret = alloc_extent(root, 1, 0, (unsigned long)-1, + ret = alloc_extent(trans, root, 1, 0, (unsigned long)-1, btrfs_header_parentid(&root->node->node.header), &ins); if (ret) { @@ -424,7 +435,7 @@ struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root) return NULL; } buf = find_tree_block(root, ins.objectid); - dirty_tree_block(root, buf); + dirty_tree_block(trans, root, buf); return buf; } @@ -432,8 +443,8 @@ struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root) * helper function for drop_snapshot, this walks down the tree dropping ref * counts as it goes. */ -static int walk_down_tree(struct btrfs_root *root, - struct btrfs_path *path, int *level) +static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int *level) { struct btrfs_buffer *next; struct btrfs_buffer *cur; @@ -441,7 +452,8 @@ static int walk_down_tree(struct btrfs_root *root, int ret; u32 refs; - ret = lookup_block_ref(root, path->nodes[*level]->blocknr, &refs); + ret = lookup_block_ref(trans, root, path->nodes[*level]->blocknr, + &refs); BUG_ON(ret); if (refs > 1) goto out; @@ -454,10 +466,10 @@ static int walk_down_tree(struct btrfs_root *root, btrfs_header_nritems(&cur->node.header)) break; blocknr = btrfs_node_blockptr(&cur->node, path->slots[*level]); - ret = lookup_block_ref(root, blocknr, &refs); + ret = lookup_block_ref(trans, root, blocknr, &refs); if (refs != 1 || *level == 1) { path->slots[*level]++; - ret = btrfs_free_extent(root, blocknr, 1, 1); + ret = btrfs_free_extent(trans, root, blocknr, 1, 1); BUG_ON(ret); continue; } @@ -470,7 +482,8 @@ static int walk_down_tree(struct btrfs_root *root, path->slots[*level] = 0; } out: - ret = btrfs_free_extent(root, path->nodes[*level]->blocknr, 1, 1); + ret = btrfs_free_extent(trans, root, path->nodes[*level]->blocknr, 1, + 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -483,8 +496,8 @@ out: * to find the first node higher up where we haven't yet gone through * all the slots */ -static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, - int *level) +static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int *level) { int i; int slot; @@ -497,8 +510,9 @@ static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, *level = i; return 0; } else { - ret = btrfs_free_extent(root, - path->nodes[*level]->blocknr, 1, 1); + ret = btrfs_free_extent(trans, root, + path->nodes[*level]->blocknr, + 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; @@ -513,7 +527,8 @@ static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, * the tree freeing any blocks that have a ref count of zero after being * decremented. */ -int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap) +int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_buffer *snap) { int ret = 0; int wret; @@ -529,13 +544,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap) path.nodes[level] = snap; path.slots[level] = 0; while(1) { - wret = walk_down_tree(root, &path, &level); + wret = walk_down_tree(trans, root, &path, &level); if (wret > 0) break; if (wret < 0) ret = wret; - wret = walk_up_tree(root, &path, &level); + wret = walk_up_tree(trans, root, &path, &level); if (wret > 0) break; if (wret < 0) diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 4908f9afdf86..7caeb11e8756 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -4,9 +4,11 @@ #include "radix-tree.h" #include "ctree.h" #include "disk-io.h" +#include "transaction.h" -int btrfs_insert_inode(struct btrfs_root *root, u64 objectid, - struct btrfs_inode_item *inode_item) +int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 objectid, struct btrfs_inode_item + *inode_item) { struct btrfs_path path; struct btrfs_key key; @@ -17,13 +19,14 @@ int btrfs_insert_inode(struct btrfs_root *root, u64 objectid, key.offset = 0; btrfs_init_path(&path); - ret = btrfs_insert_item(root, &key, inode_item, sizeof(*inode_item)); + ret = btrfs_insert_item(trans, root, &key, inode_item, + sizeof(*inode_item)); btrfs_release_path(root, &path); return ret; } -int btrfs_lookup_inode(struct btrfs_root *root, struct btrfs_path *path, - u64 objectid, int mod) +int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, u64 objectid, int mod) { struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; @@ -33,5 +36,5 @@ int btrfs_lookup_inode(struct btrfs_root *root, struct btrfs_path *path, key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; - return btrfs_search_slot(root, &key, path, ins_len, cow); + return btrfs_search_slot(trans, root, &key, path, ins_len, cow); } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 9236abda2a74..0882ca904eca 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -1,3 +1,5 @@ - +#ifndef __PRINT_TREE_ +#define __PRINT_TREE_ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t); +#endif diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index 3a2571617121..d676577185d5 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -5,6 +5,7 @@ #include "ctree.h" #include "disk-io.h" #include "print-tree.h" +#include "transaction.h" /* for testing only */ int next_key(int i, int max_key) { @@ -25,10 +26,12 @@ int main(int ac, char **av) { struct btrfs_path path; struct btrfs_super_block super; struct btrfs_root *root; + struct btrfs_trans_handle *trans; radix_tree_init(); root = open_ctree("dbfile", &super); + trans = btrfs_start_transaction(root, 1); srand(55); ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_STRING_ITEM_KEY); @@ -41,12 +44,12 @@ int main(int ac, char **av) { fprintf(stderr, "insert %d:%d\n", num, i); ins.objectid = num; ins.offset = 0; - ret = btrfs_insert_item(root, &ins, buf, strlen(buf)); + ret = btrfs_insert_item(trans, root, &ins, buf, strlen(buf)); if (!ret) tree_size++; free(buf); if (i == run_size - 5) { - btrfs_commit_transaction(root, &super); + btrfs_commit_transaction(trans, root, &super); } } @@ -61,7 +64,7 @@ int main(int ac, char **av) { btrfs_init_path(&path); if (i % 10000 == 0) fprintf(stderr, "search %d:%d\n", num, i); - ret = btrfs_search_slot(root, &ins, &path, 0, 0); + ret = btrfs_search_slot(trans, root, &ins, &path, 0, 0); if (ret) { btrfs_print_tree(root, root->node); printf("unable to find %d\n", num); @@ -83,11 +86,11 @@ int main(int ac, char **av) { num = next_key(i, max_key); ins.objectid = num; btrfs_init_path(&path); - ret = btrfs_search_slot(root, &ins, &path, -1, 1); + ret = btrfs_search_slot(trans, root, &ins, &path, -1, 1); if (!ret) { if (i % 10000 == 0) fprintf(stderr, "del %d:%d\n", num, i); - ret = btrfs_del_item(root, &path); + ret = btrfs_del_item(trans, root, &path); if (ret != 0) BUG(); tree_size--; @@ -104,7 +107,7 @@ int main(int ac, char **av) { ins.objectid = num; if (i % 10000 == 0) fprintf(stderr, "insert %d:%d\n", num, i); - ret = btrfs_insert_item(root, &ins, buf, strlen(buf)); + ret = btrfs_insert_item(trans, root, &ins, buf, strlen(buf)); if (!ret) tree_size++; free(buf); @@ -119,7 +122,7 @@ int main(int ac, char **av) { btrfs_init_path(&path); if (i % 10000 == 0) fprintf(stderr, "search %d:%d\n", num, i); - ret = btrfs_search_slot(root, &ins, &path, 0, 0); + ret = btrfs_search_slot(trans, root, &ins, &path, 0, 0); if (ret) { btrfs_print_tree(root, root->node); printf("unable to find %d\n", num); @@ -134,7 +137,7 @@ int main(int ac, char **av) { int slot; ins.objectid = (u64)-1; btrfs_init_path(&path); - ret = btrfs_search_slot(root, &ins, &path, -1, 1); + ret = btrfs_search_slot(trans, root, &ins, &path, -1, 1); if (ret == 0) BUG(); @@ -150,7 +153,7 @@ int main(int ac, char **av) { btrfs_disk_key_to_cpu(&last, &leaf->items[slot].key); if (tree_size % 10000 == 0) printf("big del %d:%d\n", tree_size, i); - ret = btrfs_del_item(root, &path); + ret = btrfs_del_item(trans, root, &path); if (ret != 0) { printf("del_item returned %d\n", ret); BUG(); @@ -165,7 +168,7 @@ int main(int ac, char **av) { printf("map before commit\n"); btrfs_print_tree(root->extent_root, root->extent_root->node); */ - btrfs_commit_transaction(root, &super); + btrfs_commit_transaction(trans, root, &super); printf("tree size is now %d\n", tree_size); printf("root %p commit root %p\n", root->node, root->commit_root); printf("map tree\n"); diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c index f05135f13f9a..3a38ae7a886d 100644 --- a/fs/btrfs/random-test.c +++ b/fs/btrfs/random-test.c @@ -6,6 +6,7 @@ #include "ctree.h" #include "disk-io.h" #include "print-tree.h" +#include "transaction.h" int keep_running = 1; struct btrfs_super_block super; @@ -37,7 +38,8 @@ again: return 0; } -static int ins_one(struct btrfs_root *root, struct radix_tree_root *radix) +static int ins_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct radix_tree_root *radix) { struct btrfs_path path; struct btrfs_key key; @@ -47,7 +49,7 @@ static int ins_one(struct btrfs_root *root, struct radix_tree_root *radix) btrfs_init_path(&path); ret = setup_key(radix, &key, 0); sprintf(buf, "str-%Lu\n", key.objectid); - ret = btrfs_insert_item(root, &key, buf, strlen(buf)); + ret = btrfs_insert_item(trans, root, &key, buf, strlen(buf)); if (ret) goto error; oid = (unsigned long)key.objectid; @@ -62,7 +64,8 @@ error: return -1; } -static int insert_dup(struct btrfs_root *root, struct radix_tree_root *radix) +static int insert_dup(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct radix_tree_root *radix) { struct btrfs_path path; struct btrfs_key key; @@ -73,7 +76,7 @@ static int insert_dup(struct btrfs_root *root, struct radix_tree_root *radix) if (ret < 0) return 0; sprintf(buf, "str-%Lu\n", key.objectid); - ret = btrfs_insert_item(root, &key, buf, strlen(buf)); + ret = btrfs_insert_item(trans, root, &key, buf, strlen(buf)); if (ret != -EEXIST) { printf("insert on %Lu gave us %d\n", key.objectid, ret); return 1; @@ -81,7 +84,8 @@ static int insert_dup(struct btrfs_root *root, struct radix_tree_root *radix) return 0; } -static int del_one(struct btrfs_root *root, struct radix_tree_root *radix) +static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct radix_tree_root *radix) { struct btrfs_path path; struct btrfs_key key; @@ -91,10 +95,10 @@ static int del_one(struct btrfs_root *root, struct radix_tree_root *radix) ret = setup_key(radix, &key, 1); if (ret < 0) return 0; - ret = btrfs_search_slot(root, &key, &path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); if (ret) goto error; - ret = btrfs_del_item(root, &path); + ret = btrfs_del_item(trans, root, &path); btrfs_release_path(root, &path); if (ret != 0) goto error; @@ -107,7 +111,8 @@ error: return -1; } -static int lookup_item(struct btrfs_root *root, struct radix_tree_root *radix) +static int lookup_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct radix_tree_root *radix) { struct btrfs_path path; struct btrfs_key key; @@ -116,7 +121,7 @@ static int lookup_item(struct btrfs_root *root, struct radix_tree_root *radix) ret = setup_key(radix, &key, 1); if (ret < 0) return 0; - ret = btrfs_search_slot(root, &key, &path, 0, 1); + ret = btrfs_search_slot(trans, root, &key, &path, 0, 1); btrfs_release_path(root, &path); if (ret) goto error; @@ -126,7 +131,8 @@ error: return -1; } -static int lookup_enoent(struct btrfs_root *root, struct radix_tree_root *radix) +static int lookup_enoent(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct radix_tree_root *radix) { struct btrfs_path path; struct btrfs_key key; @@ -135,7 +141,7 @@ static int lookup_enoent(struct btrfs_root *root, struct radix_tree_root *radix) ret = setup_key(radix, &key, 0); if (ret < 0) return ret; - ret = btrfs_search_slot(root, &key, &path, 0, 0); + ret = btrfs_search_slot(trans, root, &key, &path, 0, 0); btrfs_release_path(root, &path); if (ret <= 0) goto error; @@ -145,8 +151,8 @@ error: return -1; } -static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, - int nr) +static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct radix_tree_root *radix, int nr) { struct btrfs_path path; struct btrfs_key key; @@ -162,7 +168,7 @@ static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, key.objectid = (unsigned long)-1; while(nr-- >= 0) { btrfs_init_path(&path); - ret = btrfs_search_slot(root, &key, &path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); if (ret < 0) { btrfs_release_path(root, &path); return ret; @@ -177,7 +183,7 @@ static int empty_tree(struct btrfs_root *root, struct radix_tree_root *radix, slot = path.slots[0]; found = btrfs_disk_key_objectid( &path.nodes[0]->leaf.items[slot].key); - ret = btrfs_del_item(root, &path); + ret = btrfs_del_item(trans, root, &path); count++; if (ret) { fprintf(stderr, @@ -198,19 +204,19 @@ error: return -1; } -static int fill_tree(struct btrfs_root *root, struct radix_tree_root *radix, - int count) +static int fill_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct radix_tree_root *radix, int count) { int i; int ret = 0; for (i = 0; i < count; i++) { - ret = ins_one(root, radix); + ret = ins_one(trans, root, radix); if (ret) { fprintf(stderr, "fill failed\n"); goto out; } if (i % 1000 == 0) { - ret = btrfs_commit_transaction(root, &super); + ret = btrfs_commit_transaction(trans, root, &super); if (ret) { fprintf(stderr, "fill commit failed\n"); return ret; @@ -226,7 +232,8 @@ out: return ret; } -static int bulk_op(struct btrfs_root *root, struct radix_tree_root *radix) +static int bulk_op(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct radix_tree_root *radix) { int ret; int nr = rand() % 5000; @@ -235,17 +242,18 @@ static int bulk_op(struct btrfs_root *root, struct radix_tree_root *radix) /* do the bulk op much less frequently */ if (run_nr++ % 100) return 0; - ret = empty_tree(root, radix, nr); + ret = empty_tree(trans, root, radix, nr); if (ret) return ret; - ret = fill_tree(root, radix, nr); + ret = fill_tree(trans, root, radix, nr); if (ret) return ret; return 0; } -int (*ops[])(struct btrfs_root *root, struct radix_tree_root *radix) = +int (*ops[])(struct btrfs_trans_handle *, + struct btrfs_root *root, struct radix_tree_root *radix) = { ins_one, insert_dup, del_one, lookup_item, lookup_enoent, bulk_op }; @@ -264,7 +272,7 @@ static int fill_radix(struct btrfs_root *root, struct radix_tree_root *radix) key.objectid = (unsigned long)-1; while(1) { btrfs_init_path(&path); - ret = btrfs_search_slot(root, &key, &path, 0, 0); + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); if (ret < 0) { btrfs_release_path(root, &path); return ret; @@ -325,6 +333,7 @@ int main(int ac, char **av) int init_fill_count = 800000; int err = 0; int initial_only = 0; + struct btrfs_trans_handle *trans; radix_tree_init(); root = open_ctree("dbfile", &super); fill_radix(root, &radix); @@ -346,7 +355,8 @@ int main(int ac, char **av) } } printf("initial fill\n"); - ret = fill_tree(root, &radix, init_fill_count); + trans = btrfs_start_transaction(root, 1); + ret = fill_tree(trans, root, &radix, init_fill_count); printf("starting run\n"); if (ret) { err = ret; @@ -370,7 +380,7 @@ int main(int ac, char **av) root = open_ctree("dbfile", &super); } while(count--) { - ret = ops[op](root, &radix); + ret = ops[op](trans, root, &radix); if (ret) { fprintf(stderr, "op %d failed %d:%d\n", op, i, iterations); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a24b47274037..9cccecc0f431 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -20,7 +20,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, search_key.offset = (u32)-1; btrfs_init_path(&path); - ret = btrfs_search_slot(root, &search_key, &path, 0, 0); + ret = btrfs_search_slot(NULL, root, &search_key, &path, 0, 0); if (ret < 0) goto out; BUG_ON(ret == 0); @@ -40,8 +40,9 @@ out: return ret; } -int btrfs_update_root(struct btrfs_root *root, struct btrfs_key *key, - struct btrfs_root_item *item) +int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_root_item + *item) { struct btrfs_path path; struct btrfs_leaf *l; @@ -49,7 +50,7 @@ int btrfs_update_root(struct btrfs_root *root, struct btrfs_key *key, int slot; btrfs_init_path(&path); - ret = btrfs_search_slot(root, key, &path, 0, 1); + ret = btrfs_search_slot(trans, root, key, &path, 0, 1); if (ret < 0) goto out; BUG_ON(ret != 0); @@ -62,26 +63,28 @@ out: return ret; } -int btrfs_insert_root(struct btrfs_root *root, struct btrfs_key *key, - struct btrfs_root_item *item) +int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_key *key, struct btrfs_root_item + *item) { int ret; - ret = btrfs_insert_item(root, key, item, sizeof(*item)); + ret = btrfs_insert_item(trans, root, key, item, sizeof(*item)); BUG_ON(ret); return ret; } -int btrfs_del_root(struct btrfs_root *root, struct btrfs_key *key) +int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_key *key) { struct btrfs_path path; int ret; btrfs_init_path(&path); - ret = btrfs_search_slot(root, key, &path, -1, 1); + ret = btrfs_search_slot(trans, root, key, &path, -1, 1); if (ret < 0) goto out; BUG_ON(ret != 0); - ret = btrfs_del_item(root, &path); + ret = btrfs_del_item(trans, root, &path); out: btrfs_release_path(root, &path); return ret; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h new file mode 100644 index 000000000000..3adb6e69fe43 --- /dev/null +++ b/fs/btrfs/transaction.h @@ -0,0 +1,27 @@ +#ifndef __TRANSACTION__ +#define __TRANSACTION__ + +struct btrfs_trans_handle { + u64 transid; + unsigned long blocks_reserved; + unsigned long blocks_used; +}; + +static inline struct btrfs_trans_handle * +btrfs_start_transaction(struct btrfs_root *root, int num_blocks) +{ + struct btrfs_trans_handle *h = malloc(sizeof(*h)); + h->transid = root->root_key.offset; + h->blocks_reserved = num_blocks; + h->blocks_used = 0; + return h; +} + +static inline void btrfs_free_transaction(struct btrfs_root *root, + struct btrfs_trans_handle *handle) +{ + memset(handle, 0, sizeof(*handle)); + free(handle); +} + +#endif From 4068947886f2d2a4c192941397bacd014a04b804 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 17 Mar 2007 14:29:23 -0400 Subject: [PATCH 0054/2985] Btrfs: minor comments Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 17a3ff2f1828..32922643b5bc 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -294,6 +294,10 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root parent_buf = path->nodes[level + 1]; pslot = path->slots[level + 1]; + /* + * deal with the case where there is only one pointer in the root + * by promoting the node below to a root + */ if (!parent_buf) { struct btrfs_buffer *child; u64 blocknr = mid_buf->blocknr; @@ -1043,6 +1047,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root int ret; int wret; + /* first try to make some room by pushing left and right */ wret = push_leaf_left(trans, root, path, data_size); if (wret < 0) return wret; From 631d7d950a2f2706f5c86858402c1ec50b9801f1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 20 Mar 2007 09:13:49 -0400 Subject: [PATCH 0055/2985] Btrfs: add transaction.h to the Makefile Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 76bcd87d8575..7953b41848c1 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,6 +1,7 @@ CC=gcc CFLAGS = -g -Wall -Werror -headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h +headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h \ + transaction.h objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o \ root-tree.o dir-item.o hash.o file-item.o inode-item.o From 9f5fae2fe6dc35b46bf56183f11398451851cb3f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 20 Mar 2007 14:38:32 -0400 Subject: [PATCH 0056/2985] Btrfs: Add inode map, and the start of file extent items Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/ctree.h | 129 ++++++++++++++++++++++++++---- fs/btrfs/debug-tree.c | 9 ++- fs/btrfs/dir-test.c | 91 +++++++++++++++++---- fs/btrfs/disk-io.c | 174 ++++++++++++++++++++++------------------- fs/btrfs/extent-tree.c | 105 +++++++++++++------------ fs/btrfs/file-item.c | 6 ++ fs/btrfs/inode-map.c | 136 ++++++++++++++++++++++++++++++++ fs/btrfs/mkfs.c | 44 +++++++++-- fs/btrfs/print-tree.c | 13 ++- fs/btrfs/quick-test.c | 3 +- 11 files changed, 541 insertions(+), 172 deletions(-) create mode 100644 fs/btrfs/inode-map.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 7953b41848c1..636c63efde6f 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -3,7 +3,8 @@ CFLAGS = -g -Wall -Werror headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h \ transaction.h objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o \ - root-tree.o dir-item.o hash.o file-item.o inode-item.o + root-tree.o dir-item.o hash.o file-item.o inode-item.o \ + inode-map.o \ # if you don't have sparse installed, use ls instead CHECKFLAGS=-D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 68f0af39777f..68e3da274f17 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -10,7 +10,8 @@ struct btrfs_trans_handle; #define BTRFS_ROOT_TREE_OBJECTID 1 #define BTRFS_EXTENT_TREE_OBJECTID 2 -#define BTRFS_FS_TREE_OBJECTID 3 +#define BTRFS_INODE_MAP_OBJECTID 3 +#define BTRFS_FS_TREE_OBJECTID 4 /* * the key defines the order in the tree, and so it also defines (optimal) @@ -178,6 +179,49 @@ struct btrfs_root_item { __le64 block_limit; __le64 blocks_used; __le32 refs; +} __attribute__ ((__packed__)); + +struct btrfs_file_extent_item { + /* + * disk space consumed by the extent, checksum blocks are included + * in these numbers + */ + __le64 disk_blocknr; + __le64 disk_num_blocks; + /* + * the logical offset in file bytes (no csums) + * this extent record is for. This allows a file extent to point + * into the middle of an existing extent on disk, sharing it + * between two snapshots (useful if some bytes in the middle of the + * extent have changed + */ + __le64 offset; + /* + * the logical number of file blocks (no csums included) + */ + __le64 num_blocks; +} __attribute__ ((__packed__)); + +struct btrfs_inode_map_item { + struct btrfs_disk_key key; +} __attribute__ ((__packed__)); + +struct btrfs_fs_info { + struct btrfs_root *fs_root; + struct btrfs_root *extent_root; + struct btrfs_root *tree_root; + struct btrfs_root *inode_root; + struct btrfs_key current_insert; + struct btrfs_key last_insert; + struct radix_tree_root cache_radix; + struct radix_tree_root pinned_radix; + struct list_head trans; + struct list_head cache; + u64 last_inode_alloc; + u64 last_inode_alloc_dirid; + int cache_size; + int fp; + struct btrfs_trans_handle *running_transaction; }; /* @@ -188,21 +232,12 @@ struct btrfs_root_item { struct btrfs_root { struct btrfs_buffer *node; struct btrfs_buffer *commit_root; - struct btrfs_root *extent_root; - struct btrfs_root *tree_root; - struct btrfs_key current_insert; - struct btrfs_key last_insert; - int fp; - struct radix_tree_root cache_radix; - struct radix_tree_root pinned_radix; - struct list_head trans; - struct list_head cache; - int cache_size; - int ref_cows; struct btrfs_root_item root_item; struct btrfs_key root_key; + struct btrfs_fs_info *fs_info; u32 blocksize; - struct btrfs_trans_handle *running_transaction; + int ref_cows; + u32 type; }; /* the lower bits in the key flags defines the item type */ @@ -240,11 +275,17 @@ struct btrfs_root { * are used, and how many references there are to each block */ #define BTRFS_EXTENT_ITEM_KEY 6 + +/* + * the inode map records which inode numbers are in use and where + * they actually live on disk + */ +#define BTRFS_INODE_MAP_ITEM_KEY 7 /* * string items are for debugging. They just store a short string of * data in the FS */ -#define BTRFS_STRING_ITEM_KEY 7 +#define BTRFS_STRING_ITEM_KEY 8 static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) { @@ -654,6 +695,57 @@ static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) { return (u8 *)l->items; } + +static inline u64 btrfs_file_extent_disk_blocknr(struct btrfs_file_extent_item + *e) +{ + return le64_to_cpu(e->disk_blocknr); +} + +static inline void btrfs_set_file_extent_disk_blocknr(struct + btrfs_file_extent_item + *e, u64 val) +{ + e->disk_blocknr = cpu_to_le64(val); +} + +static inline u64 btrfs_file_extent_disk_num_blocks(struct + btrfs_file_extent_item *e) +{ + return le64_to_cpu(e->disk_num_blocks); +} + +static inline void btrfs_set_file_extent_disk_num_blocks(struct + btrfs_file_extent_item + *e, u64 val) +{ + e->disk_num_blocks = cpu_to_le64(val); +} + +static inline u64 btrfs_file_extent_offset(struct btrfs_file_extent_item *e) +{ + return le64_to_cpu(e->offset); +} + +static inline void btrfs_set_file_extent_offset(struct btrfs_file_extent_item + *e, u64 val) +{ + e->offset = cpu_to_le64(val); +} + +static inline u64 btrfs_file_extent_num_blocks(struct btrfs_file_extent_item + *e) +{ + return le64_to_cpu(e->num_blocks); +} + +static inline void btrfs_set_file_extent_num_blocks(struct + btrfs_file_extent_item *e, + u64 val) +{ + e->num_blocks = cpu_to_le64(val); +} + /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ @@ -701,4 +793,13 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root int name_len, int mod); int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, char *name, int name_len); +int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, + struct btrfs_root *fs_root, + u64 dirid, u64 *objectid); +int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, struct btrfs_key *location); +int btrfs_lookup_inode_map(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u64 objectid, int mod); #endif diff --git a/fs/btrfs/debug-tree.c b/fs/btrfs/debug-tree.c index 91dea7a0a476..d9f36efc3dd1 100644 --- a/fs/btrfs/debug-tree.c +++ b/fs/btrfs/debug-tree.c @@ -15,8 +15,13 @@ int main(int ac, char **av) { printf("fs tree\n"); btrfs_print_tree(root, root->node); printf("map tree\n"); - btrfs_print_tree(root->extent_root, root->extent_root->node); + btrfs_print_tree(root->fs_info->extent_root, + root->fs_info->extent_root->node); + printf("inode tree\n"); + btrfs_print_tree(root->fs_info->inode_root, + root->fs_info->inode_root->node); printf("root tree\n"); - btrfs_print_tree(root->tree_root, root->tree_root->node); + btrfs_print_tree(root->fs_info->tree_root, + root->fs_info->tree_root->node); return 0; } diff --git a/fs/btrfs/dir-test.c b/fs/btrfs/dir-test.c index e908c0c588cc..56f06c3ba573 100644 --- a/fs/btrfs/dir-test.c +++ b/fs/btrfs/dir-test.c @@ -45,13 +45,26 @@ static int ins_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, int ret; char buf[128]; unsigned long oid; + u64 objectid; struct btrfs_path path; + struct btrfs_key inode_map; find_num(radix, &oid, 0); sprintf(buf, "str-%lu", oid); + ret = btrfs_find_free_objectid(trans, root, dir_oid + 1, &objectid); + if (ret) + goto error; + + inode_map.objectid = objectid; + inode_map.flags = 0; + inode_map.offset = 0; + + ret = btrfs_insert_inode_map(trans, root, objectid, &inode_map); + if (ret) + goto error; ret = btrfs_insert_dir_item(trans, root, buf, strlen(buf), dir_oid, - file_oid, 1); + objectid, 1); if (ret) goto error; @@ -120,6 +133,53 @@ static int insert_dup(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } +static int del_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct radix_tree_root *radix, + unsigned long radix_index, + struct btrfs_path *path) +{ + int ret; + unsigned long *ptr; + u64 file_objectid; + struct btrfs_dir_item *di; + struct btrfs_path map_path; + + /* find the inode number of the file */ + di = btrfs_item_ptr(&path->nodes[0]->leaf, path->slots[0], + struct btrfs_dir_item); + file_objectid = btrfs_dir_objectid(di); + + /* delete the directory item */ + ret = btrfs_del_item(trans, root, path); + if (ret) + goto out; + + /* delete the inode mapping */ + btrfs_init_path(&map_path); + ret = btrfs_lookup_inode_map(trans, root, &map_path, file_objectid, -1); + if (ret) + goto out_release; + ret = btrfs_del_item(trans, root->fs_info->inode_root, &map_path); + if (ret) + goto out_release; + + if (root->fs_info->last_inode_alloc > file_objectid) + root->fs_info->last_inode_alloc = file_objectid; + btrfs_release_path(root, &map_path); + ptr = radix_tree_delete(radix, radix_index); + if (!ptr) { + ret = -5555; + goto out; + } + return 0; +out_release: + btrfs_release_path(root, &map_path); +out: + printf("failed to delete %lu %d\n", radix_index, ret); + return -1; +} + static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct radix_tree_root *radix) { @@ -127,7 +187,6 @@ static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, char buf[128]; unsigned long oid; struct btrfs_path path; - unsigned long *ptr; ret = find_num(radix, &oid, 1); if (ret < 0) @@ -138,19 +197,14 @@ static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, strlen(buf), -1); if (ret) goto out_release; - ret = btrfs_del_item(trans, root, &path); + + ret = del_dir_item(trans, root, radix, oid, &path); if (ret) goto out_release; btrfs_release_path(root, &path); - ptr = radix_tree_delete(radix, oid); - if (!ptr) { - ret = -5555; - goto out; - } - return 0; + return ret; out_release: btrfs_release_path(root, &path); -out: printf("failed to delete %lu %d\n", oid, ret); return -1; } @@ -162,6 +216,8 @@ static int lookup_item(struct btrfs_trans_handle *trans, struct btrfs_root char buf[128]; int ret; unsigned long oid; + u64 objectid; + struct btrfs_dir_item *di; ret = find_num(radix, &oid, 1); if (ret < 0) @@ -170,6 +226,14 @@ static int lookup_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_init_path(&path); ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, strlen(buf), 0); + if (!ret) { + di = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + struct btrfs_dir_item); + objectid = btrfs_dir_objectid(di); + btrfs_release_path(root, &path); + btrfs_init_path(&path); + ret = btrfs_lookup_inode_map(trans, root, &path, objectid, 0); + } btrfs_release_path(root, &path); if (ret) { printf("unable to find key %lu\n", oid); @@ -210,7 +274,6 @@ static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root u32 found_len; int ret; int slot; - int *ptr; int count = 0; char buf[128]; struct btrfs_dir_item *di; @@ -241,7 +304,7 @@ static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(found_len > 128); buf[found_len] = '\0'; found = atoi(buf + 4); - ret = btrfs_del_item(trans, root, &path); + ret = del_dir_item(trans, root, radix, found, &path); count++; if (ret) { fprintf(stderr, @@ -250,14 +313,10 @@ static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root return -1; } btrfs_release_path(root, &path); - ptr = radix_tree_delete(radix, found); - if (!ptr) - goto error; if (!keep_running) break; } return 0; -error: fprintf(stderr, "failed to delete from the radix %lu\n", found); return -1; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 25ce07908ee3..1849a99690c8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -28,15 +28,15 @@ static int free_some_buffers(struct btrfs_root *root) { struct list_head *node, *next; struct btrfs_buffer *b; - if (root->cache_size < cache_max) + if (root->fs_info->cache_size < cache_max) return 0; - list_for_each_safe(node, next, &root->cache) { + list_for_each_safe(node, next, &root->fs_info->cache) { b = list_entry(node, struct btrfs_buffer, cache); if (b->count == 1) { BUG_ON(!list_empty(&b->dirty)); list_del_init(&b->cache); btrfs_block_release(root, b); - if (root->cache_size < cache_max) + if (root->fs_info->cache_size < cache_max) break; } } @@ -57,10 +57,10 @@ struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr) INIT_LIST_HEAD(&buf->dirty); free_some_buffers(root); radix_tree_preload(GFP_KERNEL); - ret = radix_tree_insert(&root->cache_radix, blocknr, buf); + ret = radix_tree_insert(&root->fs_info->cache_radix, blocknr, buf); radix_tree_preload_end(); - list_add_tail(&buf->cache, &root->cache); - root->cache_size++; + list_add_tail(&buf->cache, &root->fs_info->cache); + root->fs_info->cache_size++; if (ret) { free(buf); return NULL; @@ -71,7 +71,7 @@ struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr) struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr) { struct btrfs_buffer *buf; - buf = radix_tree_lookup(&root->cache_radix, blocknr); + buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr); if (buf) { buf->count++; } else { @@ -90,14 +90,15 @@ struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) struct btrfs_buffer *buf; int ret; - buf = radix_tree_lookup(&root->cache_radix, blocknr); + buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr); if (buf) { buf->count++; } else { buf = alloc_tree_block(root, blocknr); if (!buf) return NULL; - ret = pread(root->fp, &buf->node, root->blocksize, offset); + ret = pread(root->fs_info->fp, &buf->node, root->blocksize, + offset); if (ret != root->blocksize) { free(buf); return NULL; @@ -113,7 +114,7 @@ int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, { if (!list_empty(&buf->dirty)) return 0; - list_add_tail(&buf->dirty, &root->trans); + list_add_tail(&buf->dirty, &root->fs_info->trans); buf->count++; return 0; } @@ -137,7 +138,7 @@ int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); - ret = pwrite(root->fp, &buf->node, root->blocksize, offset); + ret = pwrite(root->fs_info->fp, &buf->node, root->blocksize, offset); if (ret != root->blocksize) return ret; return 0; @@ -149,8 +150,9 @@ static int __commit_transaction(struct btrfs_trans_handle *trans, struct struct btrfs_buffer *b; int ret = 0; int wret; - while(!list_empty(&root->trans)) { - b = list_entry(root->trans.next, struct btrfs_buffer, dirty); + while(!list_empty(&root->fs_info->trans)) { + b = list_entry(root->fs_info->trans.next, struct btrfs_buffer, + dirty); list_del_init(&b->dirty); wret = write_tree_block(trans, root, b); if (wret) @@ -160,13 +162,21 @@ static int __commit_transaction(struct btrfs_trans_handle *trans, struct return ret; } -static int commit_extent_and_tree_roots(struct btrfs_trans_handle *trans, - struct btrfs_root *tree_root, struct - btrfs_root *extent_root) +static int commit_tree_roots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) { int ret; u64 old_extent_block; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *inode_root = fs_info->inode_root; + btrfs_set_root_blocknr(&inode_root->root_item, + inode_root->node->blocknr); + ret = btrfs_update_root(trans, tree_root, + &inode_root->root_key, + &inode_root->root_item); + BUG_ON(ret); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); if (old_extent_block == extent_root->node->blocknr) @@ -178,8 +188,6 @@ static int commit_extent_and_tree_roots(struct btrfs_trans_handle *trans, &extent_root->root_item); BUG_ON(ret); } - __commit_transaction(trans, extent_root); - __commit_transaction(trans, tree_root); return 0; } @@ -190,9 +198,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct struct btrfs_buffer *snap = root->commit_root; struct btrfs_key snap_key; - ret = __commit_transaction(trans, root); - BUG_ON(ret); - if (root->commit_root == root->node) return 0; @@ -200,54 +205,55 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct root->root_key.offset++; btrfs_set_root_blocknr(&root->root_item, root->node->blocknr); - ret = btrfs_insert_root(trans, root->tree_root, &root->root_key, - &root->root_item); + ret = btrfs_insert_root(trans, root->fs_info->tree_root, + &root->root_key, &root->root_item); BUG_ON(ret); - ret = commit_extent_and_tree_roots(trans, root->tree_root, - root->extent_root); + ret = commit_tree_roots(trans, root->fs_info); + BUG_ON(ret); + + ret = __commit_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root, s); - btrfs_finish_extent_commit(trans, root->extent_root); - btrfs_finish_extent_commit(trans, root->tree_root); + btrfs_finish_extent_commit(trans, root->fs_info->extent_root); + btrfs_finish_extent_commit(trans, root->fs_info->tree_root); root->commit_root = root->node; root->node->count++; ret = btrfs_drop_snapshot(trans, root, snap); BUG_ON(ret); - ret = btrfs_del_root(trans, root->tree_root, &snap_key); + ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key); BUG_ON(ret); return ret; } static int __setup_root(struct btrfs_super_block *super, - struct btrfs_root *root, u64 objectid, int fp) + struct btrfs_root *root, + struct btrfs_fs_info *fs_info, + u64 objectid, int fp) { - INIT_LIST_HEAD(&root->trans); - INIT_LIST_HEAD(&root->cache); - root->cache_size = 0; - root->fp = fp; root->node = NULL; root->commit_root = NULL; root->blocksize = btrfs_super_blocksize(super); root->ref_cows = 0; - memset(&root->current_insert, 0, sizeof(root->current_insert)); - memset(&root->last_insert, 0, sizeof(root->last_insert)); + root->fs_info = fs_info; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); return 0; } static int find_and_setup_root(struct btrfs_super_block *super, - struct btrfs_root *tree_root, u64 objectid, + struct btrfs_root *tree_root, + struct btrfs_fs_info *fs_info, + u64 objectid, struct btrfs_root *root, int fp) { int ret; - __setup_root(super, root, objectid, fp); + __setup_root(super, root, fs_info, objectid, fp); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); @@ -263,29 +269,31 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root)); + struct btrfs_root *inode_root = malloc(sizeof(struct btrfs_root)); + struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info)); int fp; int ret; - root->extent_root = extent_root; - root->tree_root = tree_root; - - extent_root->extent_root = extent_root; - extent_root->tree_root = tree_root; - - tree_root->extent_root = extent_root; - tree_root->tree_root = tree_root; - fp = open(filename, O_CREAT | O_RDWR, 0600); if (fp < 0) { free(root); return NULL; } - INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); - INIT_RADIX_TREE(&root->pinned_radix, GFP_KERNEL); - INIT_RADIX_TREE(&extent_root->pinned_radix, GFP_KERNEL); - INIT_RADIX_TREE(&extent_root->cache_radix, GFP_KERNEL); - INIT_RADIX_TREE(&tree_root->pinned_radix, GFP_KERNEL); - INIT_RADIX_TREE(&tree_root->cache_radix, GFP_KERNEL); + INIT_RADIX_TREE(&fs_info->cache_radix, GFP_KERNEL); + INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL); + INIT_LIST_HEAD(&fs_info->trans); + INIT_LIST_HEAD(&fs_info->cache); + fs_info->cache_size = 0; + fs_info->fp = fp; + fs_info->running_transaction = NULL; + fs_info->fs_root = root; + fs_info->tree_root = tree_root; + fs_info->extent_root = extent_root; + fs_info->inode_root = inode_root; + fs_info->last_inode_alloc = 0; + fs_info->last_inode_alloc_dirid = 0; + memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); + memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); ret = pread(fp, super, sizeof(struct btrfs_super_block), BTRFS_SUPER_INFO_OFFSET); @@ -301,16 +309,20 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) } BUG_ON(ret < 0); - __setup_root(super, tree_root, BTRFS_ROOT_TREE_OBJECTID, fp); + __setup_root(super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID, fp); tree_root->node = read_tree_block(tree_root, btrfs_super_root(super)); BUG_ON(!tree_root->node); - ret = find_and_setup_root(super, tree_root, BTRFS_EXTENT_TREE_OBJECTID, - extent_root, fp); + ret = find_and_setup_root(super, tree_root, fs_info, + BTRFS_EXTENT_TREE_OBJECTID, extent_root, fp); BUG_ON(ret); - ret = find_and_setup_root(super, tree_root, BTRFS_FS_TREE_OBJECTID, - root, fp); + ret = find_and_setup_root(super, tree_root, fs_info, + BTRFS_INODE_MAP_OBJECTID, inode_root, fp); + BUG_ON(ret); + + ret = find_and_setup_root(super, tree_root, fs_info, + BTRFS_FS_TREE_OBJECTID, root, fp); BUG_ON(ret); root->commit_root = root->node; @@ -323,8 +335,8 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s) { int ret; - btrfs_set_super_root(s, root->tree_root->node->blocknr); - ret = pwrite(root->fp, s, sizeof(*s), + btrfs_set_super_root(s, root->fs_info->tree_root->node->blocknr); + ret = pwrite(root->fs_info->fp, s, sizeof(*s), BTRFS_SUPER_INFO_OFFSET); if (ret != sizeof(*s)) { fprintf(stderr, "failed to write new super block err %d\n", ret); @@ -335,9 +347,10 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root static int drop_cache(struct btrfs_root *root) { - while(!list_empty(&root->cache)) { - struct btrfs_buffer *b = list_entry(root->cache.next, - struct btrfs_buffer, cache); + while(!list_empty(&root->fs_info->cache)) { + struct btrfs_buffer *b = list_entry(root->fs_info->cache.next, + struct btrfs_buffer, + cache); list_del_init(&b->cache); btrfs_block_release(root, b); } @@ -348,26 +361,28 @@ int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) int ret; struct btrfs_trans_handle *trans; - trans = root->running_transaction; + trans = root->fs_info->running_transaction; btrfs_commit_transaction(trans, root, s); - ret = commit_extent_and_tree_roots(trans, root->tree_root, - root->extent_root); + ret = commit_tree_roots(trans, root->fs_info); + BUG_ON(ret); + ret = __commit_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root, s); - drop_cache(root->extent_root); - drop_cache(root->tree_root); drop_cache(root); - BUG_ON(!list_empty(&root->trans)); - BUG_ON(!list_empty(&root->extent_root->trans)); - BUG_ON(!list_empty(&root->tree_root->trans)); + BUG_ON(!list_empty(&root->fs_info->trans)); - close(root->fp); + close(root->fs_info->fp); if (root->node) btrfs_block_release(root, root->node); - if (root->extent_root->node) - btrfs_block_release(root->extent_root, root->extent_root->node); - if (root->tree_root->node) - btrfs_block_release(root->tree_root, root->tree_root->node); + if (root->fs_info->extent_root->node) + btrfs_block_release(root->fs_info->extent_root, + root->fs_info->extent_root->node); + if (root->fs_info->inode_root->node) + btrfs_block_release(root->fs_info->inode_root, + root->fs_info->inode_root->node); + if (root->fs_info->tree_root->node) + btrfs_block_release(root->fs_info->tree_root, + root->fs_info->tree_root->node); btrfs_block_release(root, root->commit_root); free(root); printf("on close %d blocks are allocated\n", allocated_blocks); @@ -382,15 +397,16 @@ void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf) if (buf->count == 0) { BUG_ON(!list_empty(&buf->cache)); BUG_ON(!list_empty(&buf->dirty)); - if (!radix_tree_lookup(&root->cache_radix, buf->blocknr)) + if (!radix_tree_lookup(&root->fs_info->cache_radix, + buf->blocknr)) BUG(); - radix_tree_delete(&root->cache_radix, buf->blocknr); + radix_tree_delete(&root->fs_info->cache_radix, buf->blocknr); memset(buf, 0, sizeof(*buf)); free(buf); BUG_ON(allocated_blocks == 0); allocated_blocks--; - BUG_ON(root->cache_size == 0); - root->cache_size--; + BUG_ON(root->fs_info->cache_size == 0); + root->fs_info->cache_size--; } } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c29b92d440e0..09eeeb4d9d28 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -35,13 +35,15 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key ins; u32 refs; - find_free_extent(trans, root->extent_root, 0, 0, (u64)-1, &ins); + find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, + &ins); btrfs_init_path(&path); key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = 1; - ret = btrfs_search_slot(trans, root->extent_root, &key, &path, 0, 1); + ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, + 0, 1); if (ret != 0) BUG(); BUG_ON(ret != 0); @@ -51,9 +53,9 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_extent_refs(item, refs + 1); BUG_ON(list_empty(&path.nodes[0]->dirty)); - btrfs_release_path(root->extent_root, &path); - finish_current_insert(trans, root->extent_root); - run_pending(trans, root->extent_root); + btrfs_release_path(root->fs_info->extent_root, &path); + finish_current_insert(trans, root->fs_info->extent_root); + run_pending(trans, root->fs_info->extent_root); return 0; } @@ -70,13 +72,14 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root key.offset = 1; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - ret = btrfs_search_slot(trans, root->extent_root, &key, &path, 0, 0); + ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, + 0, 0); if (ret != 0) BUG(); l = &path.nodes[0]->leaf; item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); *refs = btrfs_extent_refs(item); - btrfs_release_path(root->extent_root, &path); + btrfs_release_path(root->fs_info->extent_root, &path); return 0; } @@ -107,19 +110,20 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct int i; while(1) { - ret = radix_tree_gang_lookup(&root->pinned_radix, - (void **)gang, 0, - ARRAY_SIZE(gang)); + ret = radix_tree_gang_lookup(&root->fs_info->pinned_radix, + (void **)gang, 0, + ARRAY_SIZE(gang)); if (!ret) break; if (!first) first = gang[0]; for (i = 0; i < ret; i++) { - radix_tree_delete(&root->pinned_radix, gang[i]); + radix_tree_delete(&root->fs_info->pinned_radix, + gang[i]); } } - root->last_insert.objectid = first; - root->last_insert.offset = 0; + root->fs_info->last_insert.objectid = first; + root->fs_info->last_insert.offset = 0; return 0; } @@ -138,13 +142,14 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); - for (i = 0; i < extent_root->current_insert.flags; i++) { - ins.objectid = extent_root->current_insert.objectid + i; + for (i = 0; i < extent_root->fs_info->current_insert.flags; i++) { + ins.objectid = extent_root->fs_info->current_insert.objectid + + i; ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item, sizeof(extent_item)); BUG_ON(ret); } - extent_root->current_insert.offset = 0; + extent_root->fs_info->current_insert.offset = 0; return 0; } @@ -156,7 +161,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root { struct btrfs_path path; struct btrfs_key key; - struct btrfs_root *extent_root = root->extent_root; + struct btrfs_root *extent_root = root->fs_info->extent_root; int ret; struct btrfs_extent_item *ei; struct btrfs_key ins; @@ -186,14 +191,16 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (pin) { int err; radix_tree_preload(GFP_KERNEL); - err = radix_tree_insert(&extent_root->pinned_radix, - blocknr, (void *)blocknr); + err = radix_tree_insert( + &extent_root->fs_info->pinned_radix, + blocknr, (void *)blocknr); BUG_ON(err); radix_tree_preload_end(); } ret = btrfs_del_item(trans, extent_root, &path); - if (!pin && extent_root->last_insert.objectid > blocknr) - extent_root->last_insert.objectid = blocknr; + if (!pin && extent_root->fs_info->last_insert.objectid > + blocknr) + extent_root->fs_info->last_insert.objectid = blocknr; if (ret) BUG(); } @@ -214,18 +221,19 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct int i; while(1) { - ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix, - (void **)gang, 0, - ARRAY_SIZE(gang), - CTREE_EXTENT_PENDING_DEL); + ret = radix_tree_gang_lookup_tag( + &extent_root->fs_info->cache_radix, + (void **)gang, 0, + ARRAY_SIZE(gang), + CTREE_EXTENT_PENDING_DEL); if (!ret) break; for (i = 0; i < ret; i++) { ret = __free_extent(trans, extent_root, gang[i]->blocknr, 1, 1); - radix_tree_tag_clear(&extent_root->cache_radix, - gang[i]->blocknr, - CTREE_EXTENT_PENDING_DEL); + radix_tree_tag_clear(&extent_root->fs_info->cache_radix, + gang[i]->blocknr, + CTREE_EXTENT_PENDING_DEL); btrfs_block_release(extent_root, gang[i]); } } @@ -235,8 +243,8 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root) { - while(radix_tree_tagged(&extent_root->cache_radix, - CTREE_EXTENT_PENDING_DEL)) + while(radix_tree_tagged(&extent_root->fs_info->cache_radix, + CTREE_EXTENT_PENDING_DEL)) del_pending_extents(trans, extent_root); return 0; } @@ -248,19 +256,19 @@ static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin) { - struct btrfs_root *extent_root = root->extent_root; + struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_buffer *t; int pending_ret; int ret; if (root == extent_root) { t = find_tree_block(root, blocknr); - radix_tree_tag_set(&root->cache_radix, blocknr, + radix_tree_tag_set(&root->fs_info->cache_radix, blocknr, CTREE_EXTENT_PENDING_DEL); return 0; } ret = __free_extent(trans, root, blocknr, num_blocks, pin); - pending_ret = run_pending(trans, root->extent_root); + pending_ret = run_pending(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } @@ -285,12 +293,12 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root u64 test_block; int start_found; struct btrfs_leaf *l; - struct btrfs_root * root = orig_root->extent_root; + struct btrfs_root * root = orig_root->fs_info->extent_root; int total_needed = num_blocks; total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3; - if (root->last_insert.objectid > search_start) - search_start = root->last_insert.objectid; + if (root->fs_info->last_insert.objectid > search_start) + search_start = root->fs_info->last_insert.objectid; ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); @@ -353,16 +361,17 @@ check_pending: BUG_ON(ins->objectid < search_start); for (test_block = ins->objectid; test_block < ins->objectid + total_needed; test_block++) { - if (radix_tree_lookup(&root->pinned_radix, test_block)) { + if (radix_tree_lookup(&root->fs_info->pinned_radix, + test_block)) { search_start = test_block + 1; goto check_failed; } } - BUG_ON(root->current_insert.offset); - root->current_insert.offset = total_needed - num_blocks; - root->current_insert.objectid = ins->objectid + num_blocks; - root->current_insert.flags = 0; - root->last_insert.objectid = ins->objectid; + BUG_ON(root->fs_info->current_insert.offset); + root->fs_info->current_insert.offset = total_needed - num_blocks; + root->fs_info->current_insert.objectid = ins->objectid + num_blocks; + root->fs_info->current_insert.flags = 0; + root->fs_info->last_insert.objectid = ins->objectid; ins->offset = num_blocks; return 0; error: @@ -383,20 +392,20 @@ static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root { int ret; int pending_ret; - struct btrfs_root *extent_root = root->extent_root; + struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_extent_item extent_item; btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, owner); if (root == extent_root) { - BUG_ON(extent_root->current_insert.offset == 0); + BUG_ON(extent_root->fs_info->current_insert.offset == 0); BUG_ON(num_blocks != 1); - BUG_ON(extent_root->current_insert.flags == - extent_root->current_insert.offset); + BUG_ON(extent_root->fs_info->current_insert.flags == + extent_root->fs_info->current_insert.offset); ins->offset = 1; - ins->objectid = extent_root->current_insert.objectid + - extent_root->current_insert.flags++; + ins->objectid = extent_root->fs_info->current_insert.objectid + + extent_root->fs_info->current_insert.flags++; return 0; } ret = find_free_extent(trans, root, num_blocks, search_start, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index f5b36c732c5e..24cfd6d85240 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -4,4 +4,10 @@ #include "radix-tree.h" #include "ctree.h" #include "disk-io.h" +#include "transaction.h" +int btrfs_create_file(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 dirid, u64 *objectid) +{ + return 0; +} diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c new file mode 100644 index 000000000000..f412b3392135 --- /dev/null +++ b/fs/btrfs/inode-map.c @@ -0,0 +1,136 @@ +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" + +/* + * walks the btree of allocated inodes and find a hole. + */ +int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, + struct btrfs_root *fs_root, + u64 dirid, u64 *objectid) +{ + struct btrfs_path path; + struct btrfs_key key; + int ret; + u64 hole_size = 0; + int slot = 0; + u64 last_ino; + int start_found; + struct btrfs_leaf *l; + struct btrfs_root *root = fs_root->fs_info->inode_root; + struct btrfs_key search_key; + u64 search_start = dirid; + + if (fs_root->fs_info->last_inode_alloc_dirid == dirid) + search_start = fs_root->fs_info->last_inode_alloc; + + search_key.objectid = search_start; + search_key.flags = 0; + btrfs_set_key_type(&search_key, BTRFS_INODE_MAP_ITEM_KEY); + search_key.offset = 0; + + btrfs_init_path(&path); + start_found = 0; + ret = btrfs_search_slot(trans, root, &search_key, &path, 0, 0); + if (ret < 0) + goto error; + + if (path.slots[0] > 0) + path.slots[0]--; + + while (1) { + l = &path.nodes[0]->leaf; + slot = path.slots[0]; + if (slot >= btrfs_header_nritems(&l->header)) { + ret = btrfs_next_leaf(root, &path); + if (ret == 0) + continue; + if (ret < 0) + goto error; + if (!start_found) { + *objectid = search_start; + start_found = 1; + goto found; + } + *objectid = last_ino > search_start ? + last_ino : search_start; + goto found; + } + btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + if (key.objectid >= search_start) { + if (start_found) { + if (last_ino < search_start) + last_ino = search_start; + hole_size = key.objectid - last_ino; + if (hole_size > 0) { + *objectid = last_ino; + goto found; + } + } + } + start_found = 1; + last_ino = key.objectid + 1; + path.slots[0]++; + } + // FIXME -ENOSPC +found: + root->fs_info->last_inode_alloc = *objectid; + root->fs_info->last_inode_alloc_dirid = dirid; + btrfs_release_path(root, &path); + BUG_ON(*objectid < search_start); + return 0; +error: + btrfs_release_path(root, &path); + return ret; +} + +int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, + struct btrfs_root *fs_root, + u64 objectid, struct btrfs_key *location) +{ + int ret = 0; + struct btrfs_path path; + struct btrfs_inode_map_item *inode_item; + struct btrfs_key key; + struct btrfs_root *inode_root = fs_root->fs_info->inode_root; + + key.objectid = objectid; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_INODE_MAP_ITEM_KEY); + key.offset = 0; + btrfs_init_path(&path); + ret = btrfs_insert_empty_item(trans, inode_root, &path, &key, + sizeof(struct btrfs_inode_map_item)); + if (ret) + goto out; + + inode_item = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + struct btrfs_inode_map_item); + btrfs_cpu_key_to_disk(&inode_item->key, location); +out: + btrfs_release_path(inode_root, &path); + return ret; +} + +int btrfs_lookup_inode_map(struct btrfs_trans_handle *trans, + struct btrfs_root *fs_root, struct btrfs_path *path, + u64 objectid, int mod) +{ + int ret; + struct btrfs_key key; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + struct btrfs_root *inode_root = fs_root->fs_info->inode_root; + + key.objectid = objectid; + key.flags = 0; + key.offset = 0; + btrfs_set_key_type(&key, BTRFS_INODE_MAP_ITEM_KEY); + ret = btrfs_search_slot(trans, inode_root, &key, path, ins_len, cow); + return ret; +} + diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index 9aa900811c33..1cac5ab114dd 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -42,7 +42,7 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) btrfs_set_header_parentid(&empty_leaf->header, BTRFS_ROOT_TREE_OBJECTID); btrfs_set_header_blocknr(&empty_leaf->header, start_block + 1); - btrfs_set_header_nritems(&empty_leaf->header, 2); + btrfs_set_header_nritems(&empty_leaf->header, 3); /* create the items for the root tree */ btrfs_set_root_blocknr(&root_item, start_block + 2); @@ -61,8 +61,16 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) btrfs_set_root_blocknr(&root_item, start_block + 3); itemoff = itemoff - sizeof(root_item); btrfs_set_item_offset(&item, itemoff); - btrfs_set_disk_key_objectid(&item.key, BTRFS_FS_TREE_OBJECTID); + btrfs_set_disk_key_objectid(&item.key, BTRFS_INODE_MAP_OBJECTID); memcpy(empty_leaf->items + 1, &item, sizeof(item)); + memcpy(btrfs_leaf_data(empty_leaf) + itemoff, + &root_item, sizeof(root_item)); + + btrfs_set_root_blocknr(&root_item, start_block + 4); + itemoff = itemoff - sizeof(root_item); + btrfs_set_item_offset(&item, itemoff); + btrfs_set_disk_key_objectid(&item.key, BTRFS_FS_TREE_OBJECTID); + memcpy(empty_leaf->items + 2, &item, sizeof(item)); memcpy(btrfs_leaf_data(empty_leaf) + itemoff, &root_item, sizeof(root_item)); ret = pwrite(fd, empty_leaf, blocksize, (start_block + 1) * blocksize); @@ -71,7 +79,7 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) btrfs_set_header_parentid(&empty_leaf->header, BTRFS_EXTENT_TREE_OBJECTID); btrfs_set_header_blocknr(&empty_leaf->header, start_block + 2); - btrfs_set_header_nritems(&empty_leaf->header, 4); + btrfs_set_header_nritems(&empty_leaf->header, 5); /* item1, reserve blocks 0-16 */ btrfs_set_disk_key_objectid(&item.key, 0); @@ -108,12 +116,12 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), &extent_item, btrfs_item_size(&item)); - /* item4, give block 19 to the FS root */ + /* item4, give block 19 to the inode map */ btrfs_set_disk_key_objectid(&item.key, start_block + 3); btrfs_set_disk_key_offset(&item.key, 1); itemoff = itemoff - sizeof(struct btrfs_extent_item); btrfs_set_item_offset(&item, itemoff); - btrfs_set_extent_owner(&extent_item, BTRFS_FS_TREE_OBJECTID); + btrfs_set_extent_owner(&extent_item, BTRFS_INODE_MAP_OBJECTID); memcpy(empty_leaf->items + 3, &item, sizeof(item)); memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), &extent_item, btrfs_item_size(&item)); @@ -121,11 +129,33 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) if (ret != blocksize) return -1; - /* finally create the FS root */ - btrfs_set_header_parentid(&empty_leaf->header, BTRFS_FS_TREE_OBJECTID); + /* item5, give block 20 to the FS root */ + btrfs_set_disk_key_objectid(&item.key, start_block + 4); + btrfs_set_disk_key_offset(&item.key, 1); + itemoff = itemoff - sizeof(struct btrfs_extent_item); + btrfs_set_item_offset(&item, itemoff); + btrfs_set_extent_owner(&extent_item, BTRFS_FS_TREE_OBJECTID); + memcpy(empty_leaf->items + 4, &item, sizeof(item)); + memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), + &extent_item, btrfs_item_size(&item)); + ret = pwrite(fd, empty_leaf, blocksize, (start_block + 2) * blocksize); + if (ret != blocksize) + return -1; + + /* create the inode map */ + btrfs_set_header_parentid(&empty_leaf->header, + BTRFS_INODE_MAP_OBJECTID); btrfs_set_header_blocknr(&empty_leaf->header, start_block + 3); btrfs_set_header_nritems(&empty_leaf->header, 0); ret = pwrite(fd, empty_leaf, blocksize, (start_block + 3) * blocksize); + if (ret != blocksize) + return -1; + + /* finally create the FS root */ + btrfs_set_header_parentid(&empty_leaf->header, BTRFS_FS_TREE_OBJECTID); + btrfs_set_header_blocknr(&empty_leaf->header, start_block + 4); + btrfs_set_header_nritems(&empty_leaf->header, 0); + ret = pwrite(fd, empty_leaf, blocksize, (start_block + 4) * blocksize); if (ret != blocksize) return -1; return 0; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 0bb5c38427c9..f250e5fad773 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -13,8 +13,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_extent_item *ei; struct btrfs_root_item *ri; struct btrfs_dir_item *di; + struct btrfs_inode_map_item *mi; u32 type; - u32 namelen; printf("leaf %Lu total ptrs %d free space %d\n", btrfs_header_blocknr(&l->header), nr, @@ -34,15 +34,13 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) case BTRFS_INODE_ITEM_KEY: break; case BTRFS_DIR_ITEM_KEY: - namelen = btrfs_item_size(l->items + i) - sizeof(*di); di = btrfs_item_ptr(l, i, struct btrfs_dir_item); printf("\t\tdir oid %Lu flags %u type %u\n", btrfs_dir_objectid(di), btrfs_dir_flags(di), btrfs_dir_type(di)); printf("\t\tname %.*s\n", - namelen, (char *)(di + 1)); - + btrfs_dir_name_len(di),(char *)(di + 1)); break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); @@ -54,6 +52,13 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) printf("\t\textent data refs %u owner %Lu\n", btrfs_extent_refs(ei), btrfs_extent_owner(ei)); break; + case BTRFS_INODE_MAP_ITEM_KEY: + mi = btrfs_item_ptr(l, i, struct btrfs_inode_map_item); + printf("\t\tinode map key %Lu %u %Lu\n", + btrfs_disk_key_objectid(&mi->key), + btrfs_disk_key_flags(&mi->key), + btrfs_disk_key_offset(&mi->key)); + break; case BTRFS_STRING_ITEM_KEY: printf("\t\titem data %.*s\n", btrfs_item_size(item), btrfs_leaf_data(l) + btrfs_item_offset(item)); diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c index d676577185d5..ef12cf888783 100644 --- a/fs/btrfs/quick-test.c +++ b/fs/btrfs/quick-test.c @@ -172,7 +172,8 @@ int main(int ac, char **av) { printf("tree size is now %d\n", tree_size); printf("root %p commit root %p\n", root->node, root->commit_root); printf("map tree\n"); - btrfs_print_tree(root->extent_root, root->extent_root->node); + btrfs_print_tree(root->fs_info->extent_root, + root->fs_info->extent_root->node); close_ctree(root, &super); return 0; } From 313a013bb927cfba849b1bb746e9112ba856e43a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 20 Mar 2007 14:51:12 -0400 Subject: [PATCH 0057/2985] Btrfs: Update TODO Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index e9894999322e..c5d67bbd4d90 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -4,7 +4,6 @@ * make a real mkfs and superblock * Do checksumming * Define FS objects in terms of different item types -* add inode tree * Add block mapping tree (simple dm layer) * Add simple tree locking (semaphore per tree) * Make allocator smarter From 293ffd5fd340428276fbbd24ce7b98bf6728466b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 20 Mar 2007 15:57:25 -0400 Subject: [PATCH 0058/2985] Btrfs: change dir-test to insert inode_items Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 6 ++++++ fs/btrfs/dir-test.c | 41 +++++++++++++++++++++++++++++++---------- fs/btrfs/disk-io.c | 2 ++ fs/btrfs/print-tree.c | 5 +++++ 4 files changed, 44 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 68e3da274f17..b30b2ce72455 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -219,6 +219,7 @@ struct btrfs_fs_info { struct list_head cache; u64 last_inode_alloc; u64 last_inode_alloc_dirid; + u64 generation; int cache_size; int fp; struct btrfs_trans_handle *running_transaction; @@ -802,4 +803,9 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, int btrfs_lookup_inode_map(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, int mod); +int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 objectid, struct btrfs_inode_item + *inode_item); +int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, u64 objectid, int mod); #endif diff --git a/fs/btrfs/dir-test.c b/fs/btrfs/dir-test.c index 56f06c3ba573..8fc77c83a351 100644 --- a/fs/btrfs/dir-test.c +++ b/fs/btrfs/dir-test.c @@ -39,6 +39,13 @@ again: return 0; } +static void initial_inode_init(struct btrfs_root *root, + struct btrfs_inode_item *inode_item) +{ + memset(inode_item, 0, sizeof(*inode_item)); + btrfs_set_inode_generation(inode_item, root->fs_info->generation); +} + static int ins_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct radix_tree_root *radix) { @@ -48,6 +55,7 @@ static int ins_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid; struct btrfs_path path; struct btrfs_key inode_map; + struct btrfs_inode_item inode_item; find_num(radix, &oid, 0); sprintf(buf, "str-%lu", oid); @@ -61,6 +69,11 @@ static int ins_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, inode_map.offset = 0; ret = btrfs_insert_inode_map(trans, root, objectid, &inode_map); + if (ret) + goto error; + + initial_inode_init(root, &inode_item); + ret = btrfs_insert_inode(trans, root, objectid, &inode_item); if (ret) goto error; ret = btrfs_insert_dir_item(trans, root, buf, strlen(buf), dir_oid, @@ -143,7 +156,6 @@ static int del_dir_item(struct btrfs_trans_handle *trans, unsigned long *ptr; u64 file_objectid; struct btrfs_dir_item *di; - struct btrfs_path map_path; /* find the inode number of the file */ di = btrfs_item_ptr(&path->nodes[0]->leaf, path->slots[0], @@ -153,20 +165,31 @@ static int del_dir_item(struct btrfs_trans_handle *trans, /* delete the directory item */ ret = btrfs_del_item(trans, root, path); if (ret) - goto out; + goto out_release; + btrfs_release_path(root, path); - /* delete the inode mapping */ - btrfs_init_path(&map_path); - ret = btrfs_lookup_inode_map(trans, root, &map_path, file_objectid, -1); + /* delete the inode */ + btrfs_init_path(path); + ret = btrfs_lookup_inode(trans, root, path, file_objectid, -1); if (ret) goto out_release; - ret = btrfs_del_item(trans, root->fs_info->inode_root, &map_path); + ret = btrfs_del_item(trans, root, path); + if (ret) + goto out_release; + btrfs_release_path(root, path); + + /* delete the inode mapping */ + btrfs_init_path(path); + ret = btrfs_lookup_inode_map(trans, root, path, file_objectid, -1); + if (ret) + goto out_release; + ret = btrfs_del_item(trans, root->fs_info->inode_root, path); if (ret) goto out_release; if (root->fs_info->last_inode_alloc > file_objectid) root->fs_info->last_inode_alloc = file_objectid; - btrfs_release_path(root, &map_path); + btrfs_release_path(root, path); ptr = radix_tree_delete(radix, radix_index); if (!ptr) { ret = -5555; @@ -174,7 +197,7 @@ static int del_dir_item(struct btrfs_trans_handle *trans, } return 0; out_release: - btrfs_release_path(root, &map_path); + btrfs_release_path(root, path); out: printf("failed to delete %lu %d\n", radix_index, ret); return -1; @@ -201,7 +224,6 @@ static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = del_dir_item(trans, root, radix, oid, &path); if (ret) goto out_release; - btrfs_release_path(root, &path); return ret; out_release: btrfs_release_path(root, &path); @@ -312,7 +334,6 @@ static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root found); return -1; } - btrfs_release_path(root, &path); if (!keep_running) break; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1849a99690c8..bacaa38ea82e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -226,6 +226,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key); BUG_ON(ret); + root->fs_info->generation = root->root_key.offset + 1; return ret; } @@ -328,6 +329,7 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) root->commit_root = root->node; root->node->count++; root->ref_cows = 1; + root->fs_info->generation = root->root_key.offset + 1; return root; } diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index f250e5fad773..f53b99da12f3 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -14,6 +14,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_root_item *ri; struct btrfs_dir_item *di; struct btrfs_inode_map_item *mi; + struct btrfs_inode_item *ii; u32 type; printf("leaf %Lu total ptrs %d free space %d\n", @@ -32,6 +33,10 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) btrfs_item_size(item)); switch (type) { case BTRFS_INODE_ITEM_KEY: + ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); + printf("\t\tinode generation %Lu size %Lu\n", + btrfs_inode_generation(ii), + btrfs_inode_size(ii)); break; case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); From 1261ec42b3d3a3ad878bd172144940e3ac710749 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 20 Mar 2007 20:35:03 -0400 Subject: [PATCH 0059/2985] Btrfs: Better block record keeping, real mkfs Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 7 +++- fs/btrfs/ctree.h | 1 + fs/btrfs/debug-tree.c | 13 +++++- fs/btrfs/dir-test.c | 2 - fs/btrfs/disk-io.c | 11 ++--- fs/btrfs/extent-tree.c | 24 ++++++++--- fs/btrfs/mkfs.c | 95 +++++++++++++++++++++++++++++++++++++++++- 7 files changed, 134 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 636c63efde6f..0720169b6d66 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -2,7 +2,7 @@ CC=gcc CFLAGS = -g -Wall -Werror headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h \ transaction.h -objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o \ +objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ root-tree.o dir-item.o hash.o file-item.o inode-item.o \ inode-map.o \ @@ -16,7 +16,10 @@ check=sparse $(CHECKFLAGS) $(check) $< $(CC) $(CFLAGS) -c $< -all: tester debug-tree quick-test dir-test tags +all: tester debug-tree quick-test dir-test tags mkfs.btrfs + +mkfs.btrfs: $(objects) mkfs.o + gcc $(CFLAGS) -o mkfs.btrfs $(objects) mkfs.o debug-tree: $(objects) debug-tree.o gcc $(CFLAGS) -o debug-tree $(objects) debug-tree.o diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b30b2ce72455..1a4d1d6fa401 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -223,6 +223,7 @@ struct btrfs_fs_info { int cache_size; int fp; struct btrfs_trans_handle *running_transaction; + struct btrfs_super_block *disk_super; }; /* diff --git a/fs/btrfs/debug-tree.c b/fs/btrfs/debug-tree.c index d9f36efc3dd1..fd07969600c6 100644 --- a/fs/btrfs/debug-tree.c +++ b/fs/btrfs/debug-tree.c @@ -10,8 +10,17 @@ int main(int ac, char **av) { struct btrfs_super_block super; struct btrfs_root *root; + + if (ac != 2) { + fprintf(stderr, "usage: %s device\n", av[0]); + exit(1); + } radix_tree_init(); - root = open_ctree("dbfile", &super); + root = open_ctree(av[1], &super); + if (!root) { + fprintf(stderr, "unable to open %s\n", av[1]); + exit(1); + } printf("fs tree\n"); btrfs_print_tree(root, root->node); printf("map tree\n"); @@ -23,5 +32,7 @@ int main(int ac, char **av) { printf("root tree\n"); btrfs_print_tree(root->fs_info->tree_root, root->fs_info->tree_root->node); + printf("total blocks %Lu\n", btrfs_super_total_blocks(&super)); + printf("blocks used %Lu\n", btrfs_super_blocks_used(&super)); return 0; } diff --git a/fs/btrfs/dir-test.c b/fs/btrfs/dir-test.c index 8fc77c83a351..b673982a1f3c 100644 --- a/fs/btrfs/dir-test.c +++ b/fs/btrfs/dir-test.c @@ -425,8 +425,6 @@ int main(int ac, char **av) struct btrfs_trans_handle *trans; radix_tree_init(); - printf("removing old tree\n"); - unlink("dbfile"); root = open_ctree("dbfile", &super); trans = btrfs_start_transaction(root, 1); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bacaa38ea82e..0322c55162cb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -293,20 +293,15 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; + fs_info->disk_super = super; memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); ret = pread(fp, super, sizeof(struct btrfs_super_block), BTRFS_SUPER_INFO_OFFSET); if (ret == 0 || btrfs_super_root(super) == 0) { - printf("making new FS!\n"); - ret = mkfs(fp, 0, 1024); - if (ret) - return NULL; - ret = pread(fp, super, sizeof(struct btrfs_super_block), - BTRFS_SUPER_INFO_OFFSET); - if (ret != sizeof(struct btrfs_super_block)) - return NULL; + BUG(); + return NULL; } BUG_ON(ret < 0); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 09eeeb4d9d28..9bc4ad38876d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -134,6 +134,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct struct btrfs_extent_item extent_item; int i; int ret; + u64 super_blocks_used; + struct btrfs_fs_info *info = extent_root->fs_info; btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, @@ -145,6 +147,9 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct for (i = 0; i < extent_root->fs_info->current_insert.flags; i++) { ins.objectid = extent_root->fs_info->current_insert.objectid + i; + super_blocks_used = btrfs_super_blocks_used(info->disk_super); + btrfs_set_super_blocks_used(info->disk_super, + super_blocks_used + 1); ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item, sizeof(extent_item)); BUG_ON(ret); @@ -161,7 +166,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root { struct btrfs_path path; struct btrfs_key key; - struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_root *extent_root = info->extent_root; int ret; struct btrfs_extent_item *ei; struct btrfs_key ins; @@ -188,15 +194,18 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); if (refs == 0) { + u64 super_blocks_used; if (pin) { int err; radix_tree_preload(GFP_KERNEL); - err = radix_tree_insert( - &extent_root->fs_info->pinned_radix, - blocknr, (void *)blocknr); + err = radix_tree_insert(&info->pinned_radix, + blocknr, (void *)blocknr); BUG_ON(err); radix_tree_preload_end(); } + super_blocks_used = btrfs_super_blocks_used(info->disk_super); + btrfs_set_super_blocks_used(info->disk_super, + super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, &path); if (!pin && extent_root->fs_info->last_insert.objectid > blocknr) @@ -392,7 +401,9 @@ static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root { int ret; int pending_ret; - struct btrfs_root *extent_root = root->fs_info->extent_root; + u64 super_blocks_used; + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; btrfs_set_extent_refs(&extent_item, 1); @@ -413,6 +424,9 @@ static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) return ret; + super_blocks_used = btrfs_super_blocks_used(info->disk_super); + btrfs_set_super_blocks_used(info->disk_super, super_blocks_used + + num_blocks); ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, sizeof(extent_item)); diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c index 1cac5ab114dd..f7efc8a5fb1a 100644 --- a/fs/btrfs/mkfs.c +++ b/fs/btrfs/mkfs.c @@ -1,4 +1,8 @@ #define _XOPEN_SOURCE 500 +#ifndef __CHECKER__ +#include +#include +#endif #include #include #include @@ -10,6 +14,17 @@ #include "ctree.h" #include "disk-io.h" +#ifdef __CHECKER__ +#define BLKGETSIZE64 0 +static inline int ioctl(int fd, int define, u64 *size) { return 0; } +#endif + +#if 0 +#if defined(__linux__) && defined(_IOR) && !defined(BLKGETSIZE64) +# define BLKGETSIZE64 _IOR(0x12, 114, __u64) +#endif +#endif + int mkfs(int fd, u64 num_blocks, u32 blocksize) { struct btrfs_super_block super; @@ -27,7 +42,7 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) strcpy((char *)(&super.magic), BTRFS_MAGIC); btrfs_set_super_blocksize(&super, blocksize); btrfs_set_super_total_blocks(&super, num_blocks); - btrfs_set_super_blocks_used(&super, 0); + btrfs_set_super_blocks_used(&super, start_block + 5); block = malloc(blocksize); memset(block, 0, blocksize); @@ -160,3 +175,81 @@ int mkfs(int fd, u64 num_blocks, u32 blocksize) return -1; return 0; } + +u64 device_size(int fd, struct stat *st) +{ + u64 size; + if (S_ISREG(st->st_mode)) { + return st->st_size; + } + if (!S_ISBLK(st->st_mode)) { + return 0; + } + if (ioctl(fd, BLKGETSIZE64, &size) >= 0) { + return size; + } + return 0; +} + +int main(int ac, char **av) +{ + char *file; + u64 block_count = 0; + int fd; + struct stat st; + int ret; + int i; + char *buf = malloc(4096); + if (ac >= 2) { + file = av[1]; + if (ac == 3) { + block_count = atoi(av[2]); + if (!block_count) { + fprintf(stderr, "error finding block count\n"); + exit(1); + } + } + } else { + fprintf(stderr, "usage: mkfs.btrfs file [block count]\n"); + exit(1); + } + fd = open(file, O_RDWR); + if (fd < 0) { + fprintf(stderr, "unable to open %s\n", file); + exit(1); + } + ret = fstat(fd, &st); + if (ret < 0) { + fprintf(stderr, "unable to stat %s\n", file); + exit(1); + } + if (block_count == 0) { + block_count = device_size(fd, &st); + if (block_count == 0) { + fprintf(stderr, "unable to find %s size\n", file); + exit(1); + } + } + block_count /= 4096; + if (block_count < 256) { + fprintf(stderr, "device %s is too small\n", file); + exit(1); + } + memset(buf, 0, 4096); + for(i = 0; i < 6; i++) { + ret = write(fd, buf, 4096); + if (ret != 4096) { + fprintf(stderr, "unable to zero fill device\n"); + exit(1); + } + } + ret = mkfs(fd, block_count, 4096); + if (ret) { + fprintf(stderr, "error during mkfs %d\n", ret); + exit(1); + } + printf("fs created on %s blocksize %d blocks %Lu\n", + file, 4096, block_count); + return 0; +} + From 2e635a278354a1a7951e16cfea4c247d6d0e7c99 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 21 Mar 2007 11:12:56 -0400 Subject: [PATCH 0060/2985] Btrfs: initial move to kernel module land Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 50 +-- fs/btrfs/ctree.c | 6 +- fs/btrfs/ctree.h | 15 +- fs/btrfs/debug-tree.c | 38 -- fs/btrfs/dir-item.c | 12 +- fs/btrfs/dir-test.c | 494 ------------------------ fs/btrfs/disk-io.c | 17 +- fs/btrfs/disk-io.h | 1 + fs/btrfs/extent-tree.c | 10 +- fs/btrfs/file-item.c | 6 +- fs/btrfs/hash.c | 1 - fs/btrfs/hasher.c | 23 -- fs/btrfs/inode-item.c | 5 +- fs/btrfs/inode-map.c | 5 +- fs/btrfs/kerncompat.h | 96 ----- fs/btrfs/list.h | 418 --------------------- fs/btrfs/mkfs.c | 255 ------------- fs/btrfs/print-tree.c | 30 +- fs/btrfs/quick-test.c | 179 --------- fs/btrfs/radix-tree.c | 836 ----------------------------------------- fs/btrfs/radix-tree.h | 73 ---- fs/btrfs/random-test.c | 405 -------------------- fs/btrfs/root-tree.c | 5 +- fs/btrfs/super.c | 205 ++++++++++ 24 files changed, 274 insertions(+), 2911 deletions(-) delete mode 100644 fs/btrfs/debug-tree.c delete mode 100644 fs/btrfs/dir-test.c delete mode 100644 fs/btrfs/hasher.c delete mode 100644 fs/btrfs/kerncompat.h delete mode 100644 fs/btrfs/list.h delete mode 100644 fs/btrfs/mkfs.c delete mode 100644 fs/btrfs/quick-test.c delete mode 100644 fs/btrfs/radix-tree.c delete mode 100644 fs/btrfs/radix-tree.h delete mode 100644 fs/btrfs/random-test.c create mode 100644 fs/btrfs/super.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 0720169b6d66..99e45a54ebd6 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,40 +1,20 @@ -CC=gcc -CFLAGS = -g -Wall -Werror -headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h \ - transaction.h -objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ - root-tree.o dir-item.o hash.o file-item.o inode-item.o \ - inode-map.o \ +ifneq ($(KERNELRELEASE),) +# kbuild part of makefile -# if you don't have sparse installed, use ls instead -CHECKFLAGS=-D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \ - -Wcontext -Wcast-truncate -Wuninitialized -Wshadow -Wundef -check=sparse $(CHECKFLAGS) -#check=ls +obj-m := btrfs.o +btrfs-y := super.o -.c.o: - $(check) $< - $(CC) $(CFLAGS) -c $< +#btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ +# root-tree.o dir-item.o hash.o file-item.o inode-item.o \ +# inode-map.o \ -all: tester debug-tree quick-test dir-test tags mkfs.btrfs - -mkfs.btrfs: $(objects) mkfs.o - gcc $(CFLAGS) -o mkfs.btrfs $(objects) mkfs.o - -debug-tree: $(objects) debug-tree.o - gcc $(CFLAGS) -o debug-tree $(objects) debug-tree.o - -tester: $(objects) random-test.o - gcc $(CFLAGS) -o tester $(objects) random-test.o - -dir-test: $(objects) dir-test.o - gcc $(CFLAGS) -o dir-test $(objects) dir-test.o -quick-test: $(objects) quick-test.o - gcc $(CFLAGS) -o quick-test $(objects) quick-test.o - -$(objects): $(headers) - -clean : - rm debug-tree tester *.o +else +# Normal Makefile +KERNELDIR := /lib/modules/`uname -r`/build +all:: + $(MAKE) -C $(KERNELDIR) M=`pwd` modules +clean:: + rm *.o btrfs.ko +endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 32922643b5bc..9fbd07c37fde 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1,10 +1,6 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include #include "ctree.h" #include "disk-io.h" -#include "print-tree.h" static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1a4d1d6fa401..ae8518cb94bf 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,9 +1,6 @@ #ifndef __BTRFS__ #define __BTRFS__ -#include "list.h" -#include "kerncompat.h" - struct btrfs_trans_handle; #define BTRFS_MAGIC "_BtRfS_M" @@ -75,6 +72,7 @@ struct btrfs_super_block { __le64 root; __le64 total_blocks; __le64 blocks_used; + __le64 root_dir_objectid; } __attribute__ ((__packed__)); /* @@ -693,6 +691,17 @@ static inline void btrfs_set_super_blocksize(struct btrfs_super_block *s, s->blocksize = cpu_to_le32(val); } +static inline u64 btrfs_super_root_dir(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->root_dir_objectid); +} + +static inline void btrfs_set_super_root_dir(struct btrfs_super_block *s, u64 + val) +{ + s->root_dir_objectid = cpu_to_le64(val); +} + static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) { return (u8 *)l->items; diff --git a/fs/btrfs/debug-tree.c b/fs/btrfs/debug-tree.c deleted file mode 100644 index fd07969600c6..000000000000 --- a/fs/btrfs/debug-tree.c +++ /dev/null @@ -1,38 +0,0 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" -#include "ctree.h" -#include "disk-io.h" -#include "print-tree.h" -#include "transaction.h" - -int main(int ac, char **av) { - struct btrfs_super_block super; - struct btrfs_root *root; - - if (ac != 2) { - fprintf(stderr, "usage: %s device\n", av[0]); - exit(1); - } - radix_tree_init(); - root = open_ctree(av[1], &super); - if (!root) { - fprintf(stderr, "unable to open %s\n", av[1]); - exit(1); - } - printf("fs tree\n"); - btrfs_print_tree(root, root->node); - printf("map tree\n"); - btrfs_print_tree(root->fs_info->extent_root, - root->fs_info->extent_root->node); - printf("inode tree\n"); - btrfs_print_tree(root->fs_info->inode_root, - root->fs_info->inode_root->node); - printf("root tree\n"); - btrfs_print_tree(root->fs_info->tree_root, - root->fs_info->tree_root->node); - printf("total blocks %Lu\n", btrfs_super_total_blocks(&super)); - printf("blocks used %Lu\n", btrfs_super_blocks_used(&super)); - return 0; -} diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 949c4e526798..4d8083d92fa0 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -1,7 +1,4 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include #include "ctree.h" #include "disk-io.h" #include "hash.h" @@ -21,7 +18,12 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root key.objectid = dir; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - ret = btrfs_name_hash(name, name_len, &key.offset); + if (name_len == 1 && *name == '.') + key.offset = 1; + else if (name_len == 2 && name[0] == '.' && name[1] == '.') + key.offset = 2; + else + ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); btrfs_init_path(&path); data_size = sizeof(*dir_item) + name_len; diff --git a/fs/btrfs/dir-test.c b/fs/btrfs/dir-test.c deleted file mode 100644 index b673982a1f3c..000000000000 --- a/fs/btrfs/dir-test.c +++ /dev/null @@ -1,494 +0,0 @@ -#include -#include -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" -#include "ctree.h" -#include "disk-io.h" -#include "print-tree.h" -#include "hash.h" -#include "transaction.h" - -int keep_running = 1; -struct btrfs_super_block super; -static u64 dir_oid = 44556; -static u64 file_oid = 33778; - -static int find_num(struct radix_tree_root *root, unsigned long *num_ret, - int exists) -{ - unsigned long num = rand(); - unsigned long res[2]; - int ret; - -again: - ret = radix_tree_gang_lookup(root, (void **)res, num, 2); - if (exists) { - if (ret == 0) - return -1; - num = res[0]; - } else if (ret != 0 && num == res[0]) { - num++; - if (ret > 1 && num == res[1]) { - num++; - goto again; - } - } - *num_ret = num; - return 0; -} - -static void initial_inode_init(struct btrfs_root *root, - struct btrfs_inode_item *inode_item) -{ - memset(inode_item, 0, sizeof(*inode_item)); - btrfs_set_inode_generation(inode_item, root->fs_info->generation); -} - -static int ins_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *radix) -{ - int ret; - char buf[128]; - unsigned long oid; - u64 objectid; - struct btrfs_path path; - struct btrfs_key inode_map; - struct btrfs_inode_item inode_item; - - find_num(radix, &oid, 0); - sprintf(buf, "str-%lu", oid); - - ret = btrfs_find_free_objectid(trans, root, dir_oid + 1, &objectid); - if (ret) - goto error; - - inode_map.objectid = objectid; - inode_map.flags = 0; - inode_map.offset = 0; - - ret = btrfs_insert_inode_map(trans, root, objectid, &inode_map); - if (ret) - goto error; - - initial_inode_init(root, &inode_item); - ret = btrfs_insert_inode(trans, root, objectid, &inode_item); - if (ret) - goto error; - ret = btrfs_insert_dir_item(trans, root, buf, strlen(buf), dir_oid, - objectid, 1); - if (ret) - goto error; - - radix_tree_preload(GFP_KERNEL); - ret = radix_tree_insert(radix, oid, (void *)oid); - radix_tree_preload_end(); - if (ret) - goto error; - return ret; -error: - if (ret != -EEXIST) - goto fatal; - - /* - * if we got an EEXIST, it may be due to hash collision, double - * check - */ - btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, - strlen(buf), 0); - if (ret) - goto fatal_release; - if (!btrfs_match_dir_item_name(root, &path, buf, strlen(buf))) { - struct btrfs_dir_item *di; - char *found; - u32 found_len; - u64 myhash; - u64 foundhash; - - di = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], - struct btrfs_dir_item); - found = (char *)(di + 1); - found_len = btrfs_dir_name_len(di); - btrfs_name_hash(buf, strlen(buf), &myhash); - btrfs_name_hash(found, found_len, &foundhash); - if (myhash != foundhash) - goto fatal_release; - btrfs_release_path(root, &path); - return 0; - } -fatal_release: - btrfs_release_path(root, &path); -fatal: - printf("failed to insert %lu ret %d\n", oid, ret); - return -1; -} - -static int insert_dup(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct radix_tree_root *radix) -{ - int ret; - char buf[128]; - unsigned long oid; - - ret = find_num(radix, &oid, 1); - if (ret < 0) - return 0; - sprintf(buf, "str-%lu", oid); - - ret = btrfs_insert_dir_item(trans, root, buf, strlen(buf), dir_oid, - file_oid, 1); - if (ret != -EEXIST) { - printf("insert on %s gave us %d\n", buf, ret); - return 1; - } - return 0; -} - -static int del_dir_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct radix_tree_root *radix, - unsigned long radix_index, - struct btrfs_path *path) -{ - int ret; - unsigned long *ptr; - u64 file_objectid; - struct btrfs_dir_item *di; - - /* find the inode number of the file */ - di = btrfs_item_ptr(&path->nodes[0]->leaf, path->slots[0], - struct btrfs_dir_item); - file_objectid = btrfs_dir_objectid(di); - - /* delete the directory item */ - ret = btrfs_del_item(trans, root, path); - if (ret) - goto out_release; - btrfs_release_path(root, path); - - /* delete the inode */ - btrfs_init_path(path); - ret = btrfs_lookup_inode(trans, root, path, file_objectid, -1); - if (ret) - goto out_release; - ret = btrfs_del_item(trans, root, path); - if (ret) - goto out_release; - btrfs_release_path(root, path); - - /* delete the inode mapping */ - btrfs_init_path(path); - ret = btrfs_lookup_inode_map(trans, root, path, file_objectid, -1); - if (ret) - goto out_release; - ret = btrfs_del_item(trans, root->fs_info->inode_root, path); - if (ret) - goto out_release; - - if (root->fs_info->last_inode_alloc > file_objectid) - root->fs_info->last_inode_alloc = file_objectid; - btrfs_release_path(root, path); - ptr = radix_tree_delete(radix, radix_index); - if (!ptr) { - ret = -5555; - goto out; - } - return 0; -out_release: - btrfs_release_path(root, path); -out: - printf("failed to delete %lu %d\n", radix_index, ret); - return -1; -} - -static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *radix) -{ - int ret; - char buf[128]; - unsigned long oid; - struct btrfs_path path; - - ret = find_num(radix, &oid, 1); - if (ret < 0) - return 0; - sprintf(buf, "str-%lu", oid); - btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, - strlen(buf), -1); - if (ret) - goto out_release; - - ret = del_dir_item(trans, root, radix, oid, &path); - if (ret) - goto out_release; - return ret; -out_release: - btrfs_release_path(root, &path); - printf("failed to delete %lu %d\n", oid, ret); - return -1; -} - -static int lookup_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct radix_tree_root *radix) -{ - struct btrfs_path path; - char buf[128]; - int ret; - unsigned long oid; - u64 objectid; - struct btrfs_dir_item *di; - - ret = find_num(radix, &oid, 1); - if (ret < 0) - return 0; - sprintf(buf, "str-%lu", oid); - btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, - strlen(buf), 0); - if (!ret) { - di = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], - struct btrfs_dir_item); - objectid = btrfs_dir_objectid(di); - btrfs_release_path(root, &path); - btrfs_init_path(&path); - ret = btrfs_lookup_inode_map(trans, root, &path, objectid, 0); - } - btrfs_release_path(root, &path); - if (ret) { - printf("unable to find key %lu\n", oid); - return -1; - } - return 0; -} - -static int lookup_enoent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct radix_tree_root *radix) -{ - struct btrfs_path path; - char buf[128]; - int ret; - unsigned long oid; - - ret = find_num(radix, &oid, 0); - if (ret < 0) - return 0; - sprintf(buf, "str-%lu", oid); - btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(trans, root, &path, dir_oid, buf, - strlen(buf), 0); - btrfs_release_path(root, &path); - if (!ret) { - printf("able to find key that should not exist %lu\n", oid); - return -1; - } - return 0; -} - -static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct radix_tree_root *radix, int nr) -{ - struct btrfs_path path; - struct btrfs_key key; - unsigned long found = 0; - u32 found_len; - int ret; - int slot; - int count = 0; - char buf[128]; - struct btrfs_dir_item *di; - - key.offset = (u64)-1; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - key.objectid = dir_oid; - while(nr-- >= 0) { - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); - if (ret < 0) { - btrfs_release_path(root, &path); - return ret; - } - if (ret != 0) { - if (path.slots[0] == 0) { - btrfs_release_path(root, &path); - break; - } - path.slots[0] -= 1; - } - slot = path.slots[0]; - di = btrfs_item_ptr(&path.nodes[0]->leaf, slot, - struct btrfs_dir_item); - found_len = btrfs_dir_name_len(di); - memcpy(buf, (char *)(di + 1), found_len); - BUG_ON(found_len > 128); - buf[found_len] = '\0'; - found = atoi(buf + 4); - ret = del_dir_item(trans, root, radix, found, &path); - count++; - if (ret) { - fprintf(stderr, - "failed to remove %lu from tree\n", - found); - return -1; - } - if (!keep_running) - break; - } - return 0; - fprintf(stderr, "failed to delete from the radix %lu\n", found); - return -1; -} - -static int fill_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *radix, int count) -{ - int i; - int ret = 0; - for (i = 0; i < count; i++) { - ret = ins_one(trans, root, radix); - if (ret) { - fprintf(stderr, "fill failed\n"); - goto out; - } - if (i % 1000 == 0) { - ret = btrfs_commit_transaction(trans, root, &super); - if (ret) { - fprintf(stderr, "fill commit failed\n"); - return ret; - } - } - if (i && i % 10000 == 0) { - printf("bigfill %d\n", i); - } - if (!keep_running) - break; - } -out: - return ret; -} - -static int bulk_op(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *radix) -{ - int ret; - int nr = rand() % 5000; - static int run_nr = 0; - - /* do the bulk op much less frequently */ - if (run_nr++ % 100) - return 0; - ret = empty_tree(trans, root, radix, nr); - if (ret) - return ret; - ret = fill_tree(trans, root, radix, nr); - if (ret) - return ret; - return 0; -} - - -int (*ops[])(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct - radix_tree_root *radix) = - { ins_one, insert_dup, del_one, lookup_item, - lookup_enoent, bulk_op }; - -void sigstopper(int ignored) -{ - keep_running = 0; - fprintf(stderr, "caught exit signal, stopping\n"); -} - -int print_usage(void) -{ - printf("usage: tester [-ih] [-c count] [-f count]\n"); - printf("\t -c count -- iteration count after filling\n"); - printf("\t -f count -- run this many random inserts before starting\n"); - printf("\t -i -- only do initial fill\n"); - printf("\t -h -- this help text\n"); - exit(1); -} -int main(int ac, char **av) -{ - RADIX_TREE(radix, GFP_KERNEL); - struct btrfs_root *root; - int i; - int ret; - int count; - int op; - int iterations = 20000; - int init_fill_count = 800000; - int err = 0; - int initial_only = 0; - struct btrfs_trans_handle *trans; - radix_tree_init(); - - root = open_ctree("dbfile", &super); - trans = btrfs_start_transaction(root, 1); - - signal(SIGTERM, sigstopper); - signal(SIGINT, sigstopper); - - for (i = 1 ; i < ac ; i++) { - if (strcmp(av[i], "-i") == 0) { - initial_only = 1; - } else if (strcmp(av[i], "-c") == 0) { - iterations = atoi(av[i+1]); - i++; - } else if (strcmp(av[i], "-f") == 0) { - init_fill_count = atoi(av[i+1]); - i++; - } else { - print_usage(); - } - } - printf("initial fill\n"); - ret = fill_tree(trans, root, &radix, init_fill_count); - printf("starting run\n"); - if (ret) { - err = ret; - goto out; - } - if (initial_only == 1) { - goto out; - } - for (i = 0; i < iterations; i++) { - op = rand() % ARRAY_SIZE(ops); - count = rand() % 128; - if (i % 2000 == 0) { - printf("%d\n", i); - fflush(stdout); - } - if (i && i % 5000 == 0) { - printf("open & close, root level %d nritems %d\n", - btrfs_header_level(&root->node->node.header), - btrfs_header_nritems(&root->node->node.header)); - close_ctree(root, &super); - root = open_ctree("dbfile", &super); - } - while(count--) { - ret = ops[op](trans, root, &radix); - if (ret) { - fprintf(stderr, "op %d failed %d:%d\n", - op, i, iterations); - btrfs_print_tree(root, root->node); - fprintf(stderr, "op %d failed %d:%d\n", - op, i, iterations); - err = ret; - goto out; - } - if (ops[op] == bulk_op) - break; - if (keep_running == 0) { - err = 0; - goto out; - } - } - } -out: - close_ctree(root, &super); - return err; -} - diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0322c55162cb..05637f9fd7c7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -266,20 +266,25 @@ static int find_and_setup_root(struct btrfs_super_block *super, } struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) +{ + int fp; + + fp = open(filename, O_CREAT | O_RDWR, 0600); + if (fp < 0) { + return NULL; + } + return open_ctree_fd(fp, super); +} + +struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) { struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *inode_root = malloc(sizeof(struct btrfs_root)); struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info)); - int fp; int ret; - fp = open(filename, O_CREAT | O_RDWR, 0600); - if (fp < 0) { - free(root); - return NULL; - } INIT_RADIX_TREE(&fs_info->cache_radix, GFP_KERNEL); INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 24a9e77c8311..d888cf5c350b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -24,6 +24,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s); struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *s); +struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super); int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s); void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9bc4ad38876d..53a7550b5c1e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1,7 +1,5 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include +#include #include "ctree.h" #include "disk-io.h" #include "print-tree.h" @@ -183,9 +181,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_init_path(&path); ret = btrfs_search_slot(trans, extent_root, &key, &path, -1, 1); if (ret) { - printf("failed to find %Lu\n", key.objectid); + printk("failed to find %Lu\n", key.objectid); btrfs_print_tree(extent_root, extent_root->node); - printf("failed to find %Lu\n", key.objectid); + printk("failed to find %Lu\n", key.objectid); BUG(); } ei = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 24cfd6d85240..8e1e5b4ccfaf 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -1,9 +1,5 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include #include "ctree.h" -#include "disk-io.h" #include "transaction.h" int btrfs_create_file(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index 92c37eece1f2..6c2a71a46c7d 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -10,7 +10,6 @@ * License. */ -#include "kerncompat.h" #define DELTA 0x9E3779B9 static void TEA_transform(__u32 buf[2], __u32 const in[]) diff --git a/fs/btrfs/hasher.c b/fs/btrfs/hasher.c deleted file mode 100644 index 96702da4329c..000000000000 --- a/fs/btrfs/hasher.c +++ /dev/null @@ -1,23 +0,0 @@ -#include -#include -#include -#include "kerncompat.h" -#include "hash.h" - -int main() { - u64 result; - int ret; - char line[255]; - char *p; - while(1) { - p = fgets(line, 255, stdin); - if (!p) - break; - if (strlen(line) == 0) - continue; - ret = btrfs_name_hash(line, strlen(line), &result); - BUG_ON(ret); - printf("hash returns %Lu\n", result); - } - return 0; -} diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 7caeb11e8756..8d8c26a6c1a0 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -1,7 +1,4 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index f412b3392135..c7fda3bf7b21 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -1,7 +1,4 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/kerncompat.h b/fs/btrfs/kerncompat.h deleted file mode 100644 index 105d3f584089..000000000000 --- a/fs/btrfs/kerncompat.h +++ /dev/null @@ -1,96 +0,0 @@ -#ifndef __KERNCOMPAT -#define __KERNCOMPAT -#define gfp_t int -#define get_cpu_var(p) (p) -#define __get_cpu_var(p) (p) -#define BITS_PER_LONG 64 -#define __GFP_BITS_SHIFT 20 -#define __GFP_BITS_MASK ((int)((1 << __GFP_BITS_SHIFT) - 1)) -#define GFP_KERNEL 0 -#define __read_mostly -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#define PAGE_SHIFT 12 -#define ULONG_MAX (~0UL) -#define BUG() abort() -#ifdef __CHECKER__ -#define __force __attribute__((force)) -#define __bitwise__ __attribute__((bitwise)) -#else -#define __force -#define __bitwise__ -#endif - -typedef unsigned int u32; -typedef u32 __u32; -typedef unsigned long long u64; -typedef unsigned char u8; -typedef unsigned short u16; - -typedef unsigned long pgoff_t; - -#include -#include -#include - -struct vma_shared { int prio_tree_node; }; -struct vm_area_struct { - unsigned long vm_pgoff; - unsigned long vm_start; - unsigned long vm_end; - struct vma_shared shared; -}; - -struct page { - unsigned long index; -}; - -static inline void preempt_enable(void) { do {; } while(0);} -static inline void preempt_disable(void) { do {; } while(0);} - -static inline void __set_bit(int bit, unsigned long *map) { - unsigned long *p = map + bit / BITS_PER_LONG; - bit = bit & (BITS_PER_LONG -1); - *p |= 1UL << bit; -} - -static inline int test_bit(int bit, unsigned long *map) { - unsigned long *p = map + bit / BITS_PER_LONG; - bit = bit & (BITS_PER_LONG -1); - return *p & (1UL << bit) ? 1 : 0; -} - -static inline void __clear_bit(int bit, unsigned long *map) { - unsigned long *p = map + bit / BITS_PER_LONG; - bit = bit & (BITS_PER_LONG -1); - *p &= ~(1UL << bit); -} -#define BUG_ON(c) do { if (c) abort(); } while (0) - -#define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ - (type *)( (char *)__mptr - __builtin_offsetof(type,member) );}) - -#define ENOMEM 5 -#define EEXIST 6 - -#define __CHECK_ENDIAN__ -#ifdef __CHECK_ENDIAN__ -#define __bitwise __bitwise__ -#else -#define __bitwise -#endif - -typedef u16 __bitwise __le16; -typedef u16 __bitwise __be16; -typedef u32 __bitwise __le32; -typedef u32 __bitwise __be32; -typedef u64 __bitwise __le64; -typedef u64 __bitwise __be64; - -#define cpu_to_le64(x) ((__force __le64)(u64)(x)) -#define le64_to_cpu(x) ((__force u64)(__le64)(x)) -#define cpu_to_le32(x) ((__force __le32)(u32)(x)) -#define le32_to_cpu(x) ((__force u32)(__le32)(x)) -#define cpu_to_le16(x) ((__force __le16)(u16)(x)) -#define le16_to_cpu(x) ((__force u16)(__le16)(x)) -#endif diff --git a/fs/btrfs/list.h b/fs/btrfs/list.h deleted file mode 100644 index 1aafafb13370..000000000000 --- a/fs/btrfs/list.h +++ /dev/null @@ -1,418 +0,0 @@ -#ifndef _LINUX_LIST_H -#define _LINUX_LIST_H - -#define LIST_POISON1 ((void *) 0x00100100) -#define LIST_POISON2 ((void *) 0x00200200) - -/* - * Simple doubly linked list implementation. - * - * Some of the internal functions ("__xxx") are useful when - * manipulating whole lists rather than single entries, as - * sometimes we already know the next/prev entries and we can - * generate better code by using them directly rather than - * using the generic single-entry routines. - */ - -struct list_head { - struct list_head *next, *prev; -}; - -#define LIST_HEAD_INIT(name) { &(name), &(name) } - -#define LIST_HEAD(name) \ - struct list_head name = LIST_HEAD_INIT(name) - -static inline void INIT_LIST_HEAD(struct list_head *list) -{ - list->next = list; - list->prev = list; -} - -/* - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -#ifndef CONFIG_DEBUG_LIST -static inline void __list_add(struct list_head *new, - struct list_head *prev, - struct list_head *next) -{ - next->prev = new; - new->next = next; - new->prev = prev; - prev->next = new; -} -#else -extern void __list_add(struct list_head *new, - struct list_head *prev, - struct list_head *next); -#endif - -/** - * list_add - add a new entry - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - */ -#ifndef CONFIG_DEBUG_LIST -static inline void list_add(struct list_head *new, struct list_head *head) -{ - __list_add(new, head, head->next); -} -#else -extern void list_add(struct list_head *new, struct list_head *head); -#endif - - -/** - * list_add_tail - add a new entry - * @new: new entry to be added - * @head: list head to add it before - * - * Insert a new entry before the specified head. - * This is useful for implementing queues. - */ -static inline void list_add_tail(struct list_head *new, struct list_head *head) -{ - __list_add(new, head->prev, head); -} - -/* - * Delete a list entry by making the prev/next entries - * point to each other. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_del(struct list_head * prev, struct list_head * next) -{ - next->prev = prev; - prev->next = next; -} - -/** - * list_del - deletes entry from list. - * @entry: the element to delete from the list. - * Note: list_empty on entry does not return true after this, the entry is - * in an undefined state. - */ -#ifndef CONFIG_DEBUG_LIST -static inline void list_del(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - entry->next = LIST_POISON1; - entry->prev = LIST_POISON2; -} -#else -extern void list_del(struct list_head *entry); -#endif - -/** - * list_replace - replace old entry by new one - * @old : the element to be replaced - * @new : the new element to insert - * Note: if 'old' was empty, it will be overwritten. - */ -static inline void list_replace(struct list_head *old, - struct list_head *new) -{ - new->next = old->next; - new->next->prev = new; - new->prev = old->prev; - new->prev->next = new; -} - -static inline void list_replace_init(struct list_head *old, - struct list_head *new) -{ - list_replace(old, new); - INIT_LIST_HEAD(old); -} -/** - * list_del_init - deletes entry from list and reinitialize it. - * @entry: the element to delete from the list. - */ -static inline void list_del_init(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - INIT_LIST_HEAD(entry); -} - -/** - * list_move - delete from one list and add as another's head - * @list: the entry to move - * @head: the head that will precede our entry - */ -static inline void list_move(struct list_head *list, struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add(list, head); -} - -/** - * list_move_tail - delete from one list and add as another's tail - * @list: the entry to move - * @head: the head that will follow our entry - */ -static inline void list_move_tail(struct list_head *list, - struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add_tail(list, head); -} - -/** - * list_is_last - tests whether @list is the last entry in list @head - * @list: the entry to test - * @head: the head of the list - */ -static inline int list_is_last(const struct list_head *list, - const struct list_head *head) -{ - return list->next == head; -} - -/** - * list_empty - tests whether a list is empty - * @head: the list to test. - */ -static inline int list_empty(const struct list_head *head) -{ - return head->next == head; -} - -/** - * list_empty_careful - tests whether a list is empty and not being modified - * @head: the list to test - * - * Description: - * tests whether a list is empty _and_ checks that no other CPU might be - * in the process of modifying either member (next or prev) - * - * NOTE: using list_empty_careful() without synchronization - * can only be safe if the only activity that can happen - * to the list entry is list_del_init(). Eg. it cannot be used - * if another CPU could re-list_add() it. - */ -static inline int list_empty_careful(const struct list_head *head) -{ - struct list_head *next = head->next; - return (next == head) && (next == head->prev); -} - -static inline void __list_splice(struct list_head *list, - struct list_head *head) -{ - struct list_head *first = list->next; - struct list_head *last = list->prev; - struct list_head *at = head->next; - - first->prev = head; - head->next = first; - - last->next = at; - at->prev = last; -} - -/** - * list_splice - join two lists - * @list: the new list to add. - * @head: the place to add it in the first list. - */ -static inline void list_splice(struct list_head *list, struct list_head *head) -{ - if (!list_empty(list)) - __list_splice(list, head); -} - -/** - * list_splice_init - join two lists and reinitialise the emptied list. - * @list: the new list to add. - * @head: the place to add it in the first list. - * - * The list at @list is reinitialised - */ -static inline void list_splice_init(struct list_head *list, - struct list_head *head) -{ - if (!list_empty(list)) { - __list_splice(list, head); - INIT_LIST_HEAD(list); - } -} - -/** - * list_entry - get the struct for this entry - * @ptr: the &struct list_head pointer. - * @type: the type of the struct this is embedded in. - * @member: the name of the list_struct within the struct. - */ -#define list_entry(ptr, type, member) \ - container_of(ptr, type, member) - -/** - * list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - */ -#define list_for_each(pos, head) \ - for (pos = (head)->next; prefetch(pos->next), pos != (head); \ - pos = pos->next) - -/** - * __list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * This variant differs from list_for_each() in that it's the - * simplest possible list iteration code, no prefetching is done. - * Use this for code that knows the list to be very short (empty - * or 1 entry) most of the time. - */ -#define __list_for_each(pos, head) \ - for (pos = (head)->next; pos != (head); pos = pos->next) - -/** - * list_for_each_prev - iterate over a list backwards - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - */ -#define list_for_each_prev(pos, head) \ - for (pos = (head)->prev; prefetch(pos->prev), pos != (head); \ - pos = pos->prev) - -/** - * list_for_each_safe - iterate over a list safe against removal of list entry - * @pos: the &struct list_head to use as a loop cursor. - * @n: another &struct list_head to use as temporary storage - * @head: the head for your list. - */ -#define list_for_each_safe(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = n, n = pos->next) - -/** - * list_for_each_entry - iterate over list of given type - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member); \ - prefetch(pos->member.next), &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_entry_reverse - iterate backwards over list of given type. - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_reverse(pos, head, member) \ - for (pos = list_entry((head)->prev, typeof(*pos), member); \ - prefetch(pos->member.prev), &pos->member != (head); \ - pos = list_entry(pos->member.prev, typeof(*pos), member)) - -/** - * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue - * @pos: the type * to use as a start point - * @head: the head of the list - * @member: the name of the list_struct within the struct. - * - * Prepares a pos entry for use as a start point in list_for_each_entry_continue. - */ -#define list_prepare_entry(pos, head, member) \ - ((pos) ? : list_entry(head, typeof(*pos), member)) - -/** - * list_for_each_entry_continue - continue iteration over list of given type - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Continue to iterate over list of given type, continuing after - * the current position. - */ -#define list_for_each_entry_continue(pos, head, member) \ - for (pos = list_entry(pos->member.next, typeof(*pos), member); \ - prefetch(pos->member.next), &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_entry_from - iterate over list of given type from the current point - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate over list of given type, continuing from current position. - */ -#define list_for_each_entry_from(pos, head, member) \ - for (; prefetch(pos->member.next), &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_safe(pos, n, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member), \ - n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) - -/** - * list_for_each_entry_safe_continue - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate over list of given type, continuing after current point, - * safe against removal of list entry. - */ -#define list_for_each_entry_safe_continue(pos, n, head, member) \ - for (pos = list_entry(pos->member.next, typeof(*pos), member), \ - n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) - -/** - * list_for_each_entry_safe_from - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate over list of given type from current point, safe against - * removal of list entry. - */ -#define list_for_each_entry_safe_from(pos, n, head, member) \ - for (n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) - -/** - * list_for_each_entry_safe_reverse - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate backwards over list of given type, safe against removal - * of list entry. - */ -#define list_for_each_entry_safe_reverse(pos, n, head, member) \ - for (pos = list_entry((head)->prev, typeof(*pos), member), \ - n = list_entry(pos->member.prev, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.prev, typeof(*n), member)) - -#endif diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c deleted file mode 100644 index f7efc8a5fb1a..000000000000 --- a/fs/btrfs/mkfs.c +++ /dev/null @@ -1,255 +0,0 @@ -#define _XOPEN_SOURCE 500 -#ifndef __CHECKER__ -#include -#include -#endif -#include -#include -#include -#include -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" -#include "ctree.h" -#include "disk-io.h" - -#ifdef __CHECKER__ -#define BLKGETSIZE64 0 -static inline int ioctl(int fd, int define, u64 *size) { return 0; } -#endif - -#if 0 -#if defined(__linux__) && defined(_IOR) && !defined(BLKGETSIZE64) -# define BLKGETSIZE64 _IOR(0x12, 114, __u64) -#endif -#endif - -int mkfs(int fd, u64 num_blocks, u32 blocksize) -{ - struct btrfs_super_block super; - struct btrfs_leaf *empty_leaf; - struct btrfs_root_item root_item; - struct btrfs_item item; - struct btrfs_extent_item extent_item; - char *block; - int ret; - u32 itemoff; - u32 start_block = BTRFS_SUPER_INFO_OFFSET / blocksize; - - btrfs_set_super_blocknr(&super, start_block); - btrfs_set_super_root(&super, start_block + 1); - strcpy((char *)(&super.magic), BTRFS_MAGIC); - btrfs_set_super_blocksize(&super, blocksize); - btrfs_set_super_total_blocks(&super, num_blocks); - btrfs_set_super_blocks_used(&super, start_block + 5); - - block = malloc(blocksize); - memset(block, 0, blocksize); - BUG_ON(sizeof(super) > blocksize); - memcpy(block, &super, sizeof(super)); - ret = pwrite(fd, block, blocksize, BTRFS_SUPER_INFO_OFFSET); - BUG_ON(ret != blocksize); - - /* create the tree of root objects */ - empty_leaf = malloc(blocksize); - memset(empty_leaf, 0, blocksize); - btrfs_set_header_parentid(&empty_leaf->header, - BTRFS_ROOT_TREE_OBJECTID); - btrfs_set_header_blocknr(&empty_leaf->header, start_block + 1); - btrfs_set_header_nritems(&empty_leaf->header, 3); - - /* create the items for the root tree */ - btrfs_set_root_blocknr(&root_item, start_block + 2); - btrfs_set_root_refs(&root_item, 1); - itemoff = __BTRFS_LEAF_DATA_SIZE(blocksize) - sizeof(root_item); - btrfs_set_item_offset(&item, itemoff); - btrfs_set_item_size(&item, sizeof(root_item)); - btrfs_set_disk_key_objectid(&item.key, BTRFS_EXTENT_TREE_OBJECTID); - btrfs_set_disk_key_offset(&item.key, 0); - btrfs_set_disk_key_flags(&item.key, 0); - btrfs_set_disk_key_type(&item.key, BTRFS_ROOT_ITEM_KEY); - memcpy(empty_leaf->items, &item, sizeof(item)); - memcpy(btrfs_leaf_data(empty_leaf) + itemoff, - &root_item, sizeof(root_item)); - - btrfs_set_root_blocknr(&root_item, start_block + 3); - itemoff = itemoff - sizeof(root_item); - btrfs_set_item_offset(&item, itemoff); - btrfs_set_disk_key_objectid(&item.key, BTRFS_INODE_MAP_OBJECTID); - memcpy(empty_leaf->items + 1, &item, sizeof(item)); - memcpy(btrfs_leaf_data(empty_leaf) + itemoff, - &root_item, sizeof(root_item)); - - btrfs_set_root_blocknr(&root_item, start_block + 4); - itemoff = itemoff - sizeof(root_item); - btrfs_set_item_offset(&item, itemoff); - btrfs_set_disk_key_objectid(&item.key, BTRFS_FS_TREE_OBJECTID); - memcpy(empty_leaf->items + 2, &item, sizeof(item)); - memcpy(btrfs_leaf_data(empty_leaf) + itemoff, - &root_item, sizeof(root_item)); - ret = pwrite(fd, empty_leaf, blocksize, (start_block + 1) * blocksize); - - /* create the items for the extent tree */ - btrfs_set_header_parentid(&empty_leaf->header, - BTRFS_EXTENT_TREE_OBJECTID); - btrfs_set_header_blocknr(&empty_leaf->header, start_block + 2); - btrfs_set_header_nritems(&empty_leaf->header, 5); - - /* item1, reserve blocks 0-16 */ - btrfs_set_disk_key_objectid(&item.key, 0); - btrfs_set_disk_key_offset(&item.key, start_block + 1); - btrfs_set_disk_key_flags(&item.key, 0); - btrfs_set_disk_key_type(&item.key, BTRFS_EXTENT_ITEM_KEY); - itemoff = __BTRFS_LEAF_DATA_SIZE(blocksize) - - sizeof(struct btrfs_extent_item); - btrfs_set_item_offset(&item, itemoff); - btrfs_set_item_size(&item, sizeof(struct btrfs_extent_item)); - btrfs_set_extent_refs(&extent_item, 1); - btrfs_set_extent_owner(&extent_item, 0); - memcpy(empty_leaf->items, &item, sizeof(item)); - memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), - &extent_item, btrfs_item_size(&item)); - - /* item2, give block 17 to the root */ - btrfs_set_disk_key_objectid(&item.key, start_block + 1); - btrfs_set_disk_key_offset(&item.key, 1); - itemoff = itemoff - sizeof(struct btrfs_extent_item); - btrfs_set_item_offset(&item, itemoff); - btrfs_set_extent_owner(&extent_item, BTRFS_ROOT_TREE_OBJECTID); - memcpy(empty_leaf->items + 1, &item, sizeof(item)); - memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), - &extent_item, btrfs_item_size(&item)); - - /* item3, give block 18 to the extent root */ - btrfs_set_disk_key_objectid(&item.key, start_block + 2); - btrfs_set_disk_key_offset(&item.key, 1); - itemoff = itemoff - sizeof(struct btrfs_extent_item); - btrfs_set_item_offset(&item, itemoff); - btrfs_set_extent_owner(&extent_item, BTRFS_EXTENT_TREE_OBJECTID); - memcpy(empty_leaf->items + 2, &item, sizeof(item)); - memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), - &extent_item, btrfs_item_size(&item)); - - /* item4, give block 19 to the inode map */ - btrfs_set_disk_key_objectid(&item.key, start_block + 3); - btrfs_set_disk_key_offset(&item.key, 1); - itemoff = itemoff - sizeof(struct btrfs_extent_item); - btrfs_set_item_offset(&item, itemoff); - btrfs_set_extent_owner(&extent_item, BTRFS_INODE_MAP_OBJECTID); - memcpy(empty_leaf->items + 3, &item, sizeof(item)); - memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), - &extent_item, btrfs_item_size(&item)); - ret = pwrite(fd, empty_leaf, blocksize, (start_block + 2) * blocksize); - if (ret != blocksize) - return -1; - - /* item5, give block 20 to the FS root */ - btrfs_set_disk_key_objectid(&item.key, start_block + 4); - btrfs_set_disk_key_offset(&item.key, 1); - itemoff = itemoff - sizeof(struct btrfs_extent_item); - btrfs_set_item_offset(&item, itemoff); - btrfs_set_extent_owner(&extent_item, BTRFS_FS_TREE_OBJECTID); - memcpy(empty_leaf->items + 4, &item, sizeof(item)); - memcpy(btrfs_leaf_data(empty_leaf) + btrfs_item_offset(&item), - &extent_item, btrfs_item_size(&item)); - ret = pwrite(fd, empty_leaf, blocksize, (start_block + 2) * blocksize); - if (ret != blocksize) - return -1; - - /* create the inode map */ - btrfs_set_header_parentid(&empty_leaf->header, - BTRFS_INODE_MAP_OBJECTID); - btrfs_set_header_blocknr(&empty_leaf->header, start_block + 3); - btrfs_set_header_nritems(&empty_leaf->header, 0); - ret = pwrite(fd, empty_leaf, blocksize, (start_block + 3) * blocksize); - if (ret != blocksize) - return -1; - - /* finally create the FS root */ - btrfs_set_header_parentid(&empty_leaf->header, BTRFS_FS_TREE_OBJECTID); - btrfs_set_header_blocknr(&empty_leaf->header, start_block + 4); - btrfs_set_header_nritems(&empty_leaf->header, 0); - ret = pwrite(fd, empty_leaf, blocksize, (start_block + 4) * blocksize); - if (ret != blocksize) - return -1; - return 0; -} - -u64 device_size(int fd, struct stat *st) -{ - u64 size; - if (S_ISREG(st->st_mode)) { - return st->st_size; - } - if (!S_ISBLK(st->st_mode)) { - return 0; - } - if (ioctl(fd, BLKGETSIZE64, &size) >= 0) { - return size; - } - return 0; -} - -int main(int ac, char **av) -{ - char *file; - u64 block_count = 0; - int fd; - struct stat st; - int ret; - int i; - char *buf = malloc(4096); - if (ac >= 2) { - file = av[1]; - if (ac == 3) { - block_count = atoi(av[2]); - if (!block_count) { - fprintf(stderr, "error finding block count\n"); - exit(1); - } - } - } else { - fprintf(stderr, "usage: mkfs.btrfs file [block count]\n"); - exit(1); - } - fd = open(file, O_RDWR); - if (fd < 0) { - fprintf(stderr, "unable to open %s\n", file); - exit(1); - } - ret = fstat(fd, &st); - if (ret < 0) { - fprintf(stderr, "unable to stat %s\n", file); - exit(1); - } - if (block_count == 0) { - block_count = device_size(fd, &st); - if (block_count == 0) { - fprintf(stderr, "unable to find %s size\n", file); - exit(1); - } - } - block_count /= 4096; - if (block_count < 256) { - fprintf(stderr, "device %s is too small\n", file); - exit(1); - } - memset(buf, 0, 4096); - for(i = 0; i < 6; i++) { - ret = write(fd, buf, 4096); - if (ret != 4096) { - fprintf(stderr, "unable to zero fill device\n"); - exit(1); - } - } - ret = mkfs(fd, block_count, 4096); - if (ret) { - fprintf(stderr, "error during mkfs %d\n", ret); - exit(1); - } - printf("fs created on %s blocksize %d blocks %Lu\n", - file, 4096, block_count); - return 0; -} - diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index f53b99da12f3..aa2d3fac8804 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -1,7 +1,4 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include #include "ctree.h" #include "disk-io.h" @@ -17,14 +14,14 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_inode_item *ii; u32 type; - printf("leaf %Lu total ptrs %d free space %d\n", + printk("leaf %Lu total ptrs %d free space %d\n", btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(root, l)); fflush(stdout); for (i = 0 ; i < nr ; i++) { item = l->items + i; type = btrfs_disk_key_type(&item->key); - printf("\titem %d key (%Lu %u %Lu) itemoff %d itemsize %d\n", + printk("\titem %d key (%Lu %u %Lu) itemoff %d itemsize %d\n", i, btrfs_disk_key_objectid(&item->key), btrfs_disk_key_flags(&item->key), @@ -34,38 +31,39 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) switch (type) { case BTRFS_INODE_ITEM_KEY: ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); - printf("\t\tinode generation %Lu size %Lu\n", + printk("\t\tinode generation %Lu size %Lu mode %o\n", btrfs_inode_generation(ii), - btrfs_inode_size(ii)); + btrfs_inode_size(ii), + btrfs_inode_mode(ii)); break; case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); - printf("\t\tdir oid %Lu flags %u type %u\n", + printk("\t\tdir oid %Lu flags %u type %u\n", btrfs_dir_objectid(di), btrfs_dir_flags(di), btrfs_dir_type(di)); - printf("\t\tname %.*s\n", + printk("\t\tname %.*s\n", btrfs_dir_name_len(di),(char *)(di + 1)); break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); - printf("\t\troot data blocknr %Lu refs %u\n", + printk("\t\troot data blocknr %Lu refs %u\n", btrfs_root_blocknr(ri), btrfs_root_refs(ri)); break; case BTRFS_EXTENT_ITEM_KEY: ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); - printf("\t\textent data refs %u owner %Lu\n", + printk("\t\textent data refs %u owner %Lu\n", btrfs_extent_refs(ei), btrfs_extent_owner(ei)); break; case BTRFS_INODE_MAP_ITEM_KEY: mi = btrfs_item_ptr(l, i, struct btrfs_inode_map_item); - printf("\t\tinode map key %Lu %u %Lu\n", + printk("\t\tinode map key %Lu %u %Lu\n", btrfs_disk_key_objectid(&mi->key), btrfs_disk_key_flags(&mi->key), btrfs_disk_key_offset(&mi->key)); break; case BTRFS_STRING_ITEM_KEY: - printf("\t\titem data %.*s\n", btrfs_item_size(item), + printk("\t\titem data %.*s\n", btrfs_item_size(item), btrfs_leaf_data(l) + btrfs_item_offset(item)); break; }; @@ -86,12 +84,12 @@ void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) btrfs_print_leaf(root, (struct btrfs_leaf *)c); return; } - printf("node %Lu level %d total ptrs %d free spc %u\n", t->blocknr, + printk("node %Lu level %d total ptrs %d free spc %u\n", t->blocknr, btrfs_header_level(&c->header), nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); fflush(stdout); for (i = 0; i < nr; i++) { - printf("\tkey %d (%Lu %u %Lu) block %Lu\n", + printk("\tkey %d (%Lu %u %Lu) block %Lu\n", i, c->ptrs[i].key.objectid, c->ptrs[i].key.flags, diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c deleted file mode 100644 index ef12cf888783..000000000000 --- a/fs/btrfs/quick-test.c +++ /dev/null @@ -1,179 +0,0 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" -#include "ctree.h" -#include "disk-io.h" -#include "print-tree.h" -#include "transaction.h" - -/* for testing only */ -int next_key(int i, int max_key) { - return rand() % max_key; - // return i; -} - -int main(int ac, char **av) { - struct btrfs_key ins; - struct btrfs_key last = { (u64)-1, 0, 0}; - char *buf; - int i; - int num; - int ret; - int run_size = 100000; - int max_key = 100000000; - int tree_size = 0; - struct btrfs_path path; - struct btrfs_super_block super; - struct btrfs_root *root; - struct btrfs_trans_handle *trans; - - radix_tree_init(); - - root = open_ctree("dbfile", &super); - trans = btrfs_start_transaction(root, 1); - srand(55); - ins.flags = 0; - btrfs_set_key_type(&ins, BTRFS_STRING_ITEM_KEY); - for (i = 0; i < run_size; i++) { - buf = malloc(64); - num = next_key(i, max_key); - // num = i; - sprintf(buf, "string-%d", num); - if (i % 10000 == 0) - fprintf(stderr, "insert %d:%d\n", num, i); - ins.objectid = num; - ins.offset = 0; - ret = btrfs_insert_item(trans, root, &ins, buf, strlen(buf)); - if (!ret) - tree_size++; - free(buf); - if (i == run_size - 5) { - btrfs_commit_transaction(trans, root, &super); - } - - } - close_ctree(root, &super); - - root = open_ctree("dbfile", &super); - printf("starting search\n"); - srand(55); - for (i = 0; i < run_size; i++) { - num = next_key(i, max_key); - ins.objectid = num; - btrfs_init_path(&path); - if (i % 10000 == 0) - fprintf(stderr, "search %d:%d\n", num, i); - ret = btrfs_search_slot(trans, root, &ins, &path, 0, 0); - if (ret) { - btrfs_print_tree(root, root->node); - printf("unable to find %d\n", num); - exit(1); - } - btrfs_release_path(root, &path); - } - close_ctree(root, &super); - root = open_ctree("dbfile", &super); - printf("node %p level %d total ptrs %d free spc %lu\n", root->node, - btrfs_header_level(&root->node->node.header), - btrfs_header_nritems(&root->node->node.header), - BTRFS_NODEPTRS_PER_BLOCK(root) - - btrfs_header_nritems(&root->node->node.header)); - printf("all searches good, deleting some items\n"); - i = 0; - srand(55); - for (i = 0 ; i < run_size/4; i++) { - num = next_key(i, max_key); - ins.objectid = num; - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, &ins, &path, -1, 1); - if (!ret) { - if (i % 10000 == 0) - fprintf(stderr, "del %d:%d\n", num, i); - ret = btrfs_del_item(trans, root, &path); - if (ret != 0) - BUG(); - tree_size--; - } - btrfs_release_path(root, &path); - } - close_ctree(root, &super); - root = open_ctree("dbfile", &super); - srand(128); - for (i = 0; i < run_size; i++) { - buf = malloc(64); - num = next_key(i, max_key); - sprintf(buf, "string-%d", num); - ins.objectid = num; - if (i % 10000 == 0) - fprintf(stderr, "insert %d:%d\n", num, i); - ret = btrfs_insert_item(trans, root, &ins, buf, strlen(buf)); - if (!ret) - tree_size++; - free(buf); - } - close_ctree(root, &super); - root = open_ctree("dbfile", &super); - srand(128); - printf("starting search2\n"); - for (i = 0; i < run_size; i++) { - num = next_key(i, max_key); - ins.objectid = num; - btrfs_init_path(&path); - if (i % 10000 == 0) - fprintf(stderr, "search %d:%d\n", num, i); - ret = btrfs_search_slot(trans, root, &ins, &path, 0, 0); - if (ret) { - btrfs_print_tree(root, root->node); - printf("unable to find %d\n", num); - exit(1); - } - btrfs_release_path(root, &path); - } - printf("starting big long delete run\n"); - while(root->node && - btrfs_header_nritems(&root->node->node.header) > 0) { - struct btrfs_leaf *leaf; - int slot; - ins.objectid = (u64)-1; - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, &ins, &path, -1, 1); - if (ret == 0) - BUG(); - - leaf = &path.nodes[0]->leaf; - slot = path.slots[0]; - if (slot != btrfs_header_nritems(&leaf->header)) - BUG(); - while(path.slots[0] > 0) { - path.slots[0] -= 1; - slot = path.slots[0]; - leaf = &path.nodes[0]->leaf; - - btrfs_disk_key_to_cpu(&last, &leaf->items[slot].key); - if (tree_size % 10000 == 0) - printf("big del %d:%d\n", tree_size, i); - ret = btrfs_del_item(trans, root, &path); - if (ret != 0) { - printf("del_item returned %d\n", ret); - BUG(); - } - tree_size--; - } - btrfs_release_path(root, &path); - } - /* - printf("previous tree:\n"); - btrfs_print_tree(root, root->commit_root); - printf("map before commit\n"); - btrfs_print_tree(root->extent_root, root->extent_root->node); - */ - btrfs_commit_transaction(trans, root, &super); - printf("tree size is now %d\n", tree_size); - printf("root %p commit root %p\n", root->node, root->commit_root); - printf("map tree\n"); - btrfs_print_tree(root->fs_info->extent_root, - root->fs_info->extent_root->node); - close_ctree(root, &super); - return 0; -} diff --git a/fs/btrfs/radix-tree.c b/fs/btrfs/radix-tree.c deleted file mode 100644 index baa25ca1c2ac..000000000000 --- a/fs/btrfs/radix-tree.c +++ /dev/null @@ -1,836 +0,0 @@ -/* - * Copyright (C) 2001 Momchil Velikov - * Portions Copyright (C) 2001 Christoph Hellwig - * Copyright (C) 2005 SGI, Christoph Lameter - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "kerncompat.h" -#include "radix-tree.h" -#ifdef __KERNEL__ -#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) -#else -#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ -#endif - -#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) -#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) - -#define RADIX_TREE_TAG_LONGS \ - ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) - -struct radix_tree_node { - unsigned int count; - void *slots[RADIX_TREE_MAP_SIZE]; - unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; -}; - -struct radix_tree_path { - struct radix_tree_node *node; - int offset; -}; - -#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) -#define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2) - -static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly; - -/* - * Per-cpu pool of preloaded nodes - */ -struct radix_tree_preload { - int nr; - struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; -}; -struct radix_tree_preload radix_tree_preloads = { 0, }; - -static inline gfp_t root_gfp_mask(struct radix_tree_root *root) -{ - return root->gfp_mask & __GFP_BITS_MASK; -} - -static int internal_nodes = 0; -/* - * This assumes that the caller has performed appropriate preallocation, and - * that the caller has pinned this thread of control to the current CPU. - */ -static struct radix_tree_node * -radix_tree_node_alloc(struct radix_tree_root *root) -{ - struct radix_tree_node *ret; - ret = malloc(sizeof(struct radix_tree_node)); - if (ret) { - memset(ret, 0, sizeof(struct radix_tree_node)); - internal_nodes++; - } - return ret; -} - -static inline void -radix_tree_node_free(struct radix_tree_node *node) -{ - internal_nodes--; - free(node); -} - -/* - * Load up this CPU's radix_tree_node buffer with sufficient objects to - * ensure that the addition of a single element in the tree cannot fail. On - * success, return zero, with preemption disabled. On error, return -ENOMEM - * with preemption not disabled. - */ -int radix_tree_preload(gfp_t gfp_mask) -{ - struct radix_tree_preload *rtp; - struct radix_tree_node *node; - int ret = -ENOMEM; - - preempt_disable(); - rtp = &__get_cpu_var(radix_tree_preloads); - while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { - preempt_enable(); - node = radix_tree_node_alloc(NULL); - if (node == NULL) - goto out; - preempt_disable(); - rtp = &__get_cpu_var(radix_tree_preloads); - if (rtp->nr < ARRAY_SIZE(rtp->nodes)) - rtp->nodes[rtp->nr++] = node; - else - radix_tree_node_free(node); - } - ret = 0; -out: - return ret; -} - -static inline void tag_set(struct radix_tree_node *node, unsigned int tag, - int offset) -{ - __set_bit(offset, node->tags[tag]); -} - -static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, - int offset) -{ - __clear_bit(offset, node->tags[tag]); -} - -static inline int tag_get(struct radix_tree_node *node, unsigned int tag, - int offset) -{ - return test_bit(offset, node->tags[tag]); -} - -static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) -{ - root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); -} - - -static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) -{ - root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); -} - -static inline void root_tag_clear_all(struct radix_tree_root *root) -{ - root->gfp_mask &= __GFP_BITS_MASK; -} - -static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) -{ - return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); -} - -/* - * Returns 1 if any slot in the node has this tag set. - * Otherwise returns 0. - */ -static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) -{ - int idx; - for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { - if (node->tags[tag][idx]) - return 1; - } - return 0; -} - -/* - * Return the maximum key which can be store into a - * radix tree with height HEIGHT. - */ -static inline unsigned long radix_tree_maxindex(unsigned int height) -{ - return height_to_maxindex[height]; -} - -/* - * Extend a radix tree so it can store key @index. - */ -static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) -{ - struct radix_tree_node *node; - unsigned int height; - int tag; - - /* Figure out what the height should be. */ - height = root->height + 1; - while (index > radix_tree_maxindex(height)) - height++; - - if (root->rnode == NULL) { - root->height = height; - goto out; - } - - do { - if (!(node = radix_tree_node_alloc(root))) - return -ENOMEM; - - /* Increase the height. */ - node->slots[0] = root->rnode; - - /* Propagate the aggregated tag info into the new root */ - for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { - if (root_tag_get(root, tag)) - tag_set(node, tag, 0); - } - - node->count = 1; - root->rnode = node; - root->height++; - } while (height > root->height); -out: - return 0; -} - -/** - * radix_tree_insert - insert into a radix tree - * @root: radix tree root - * @index: index key - * @item: item to insert - * - * Insert an item into the radix tree at position @index. - */ -int radix_tree_insert(struct radix_tree_root *root, - unsigned long index, void *item) -{ - struct radix_tree_node *node = NULL, *slot; - unsigned int height, shift; - int offset; - int error; - - /* Make sure the tree is high enough. */ - if (index > radix_tree_maxindex(root->height)) { - error = radix_tree_extend(root, index); - if (error) - return error; - } - - slot = root->rnode; - height = root->height; - shift = (height-1) * RADIX_TREE_MAP_SHIFT; - - offset = 0; /* uninitialised var warning */ - while (height > 0) { - if (slot == NULL) { - /* Have to add a child node. */ - if (!(slot = radix_tree_node_alloc(root))) - return -ENOMEM; - if (node) { - node->slots[offset] = slot; - node->count++; - } else - root->rnode = slot; - } - - /* Go a level down */ - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - node = slot; - slot = node->slots[offset]; - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } - - if (slot != NULL) - return -EEXIST; - - if (node) { - node->count++; - node->slots[offset] = item; - BUG_ON(tag_get(node, 0, offset)); - BUG_ON(tag_get(node, 1, offset)); - } else { - root->rnode = item; - BUG_ON(root_tag_get(root, 0)); - BUG_ON(root_tag_get(root, 1)); - } - - return 0; -} - -static inline void **__lookup_slot(struct radix_tree_root *root, - unsigned long index) -{ - unsigned int height, shift; - struct radix_tree_node **slot; - - height = root->height; - - if (index > radix_tree_maxindex(height)) - return NULL; - - if (height == 0 && root->rnode) - return (void **)&root->rnode; - - shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = &root->rnode; - - while (height > 0) { - if (*slot == NULL) - return NULL; - - slot = (struct radix_tree_node **) - ((*slot)->slots + - ((index >> shift) & RADIX_TREE_MAP_MASK)); - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } - - return (void **)slot; -} - -/** - * radix_tree_lookup_slot - lookup a slot in a radix tree - * @root: radix tree root - * @index: index key - * - * Lookup the slot corresponding to the position @index in the radix tree - * @root. This is useful for update-if-exists operations. - */ -void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) -{ - return __lookup_slot(root, index); -} - -/** - * radix_tree_lookup - perform lookup operation on a radix tree - * @root: radix tree root - * @index: index key - * - * Lookup the item at the position @index in the radix tree @root. - */ -void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) -{ - void **slot; - - slot = __lookup_slot(root, index); - return slot != NULL ? *slot : NULL; -} - -/** - * radix_tree_tag_set - set a tag on a radix tree node - * @root: radix tree root - * @index: index key - * @tag: tag index - * - * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) - * corresponding to @index in the radix tree. From - * the root all the way down to the leaf node. - * - * Returns the address of the tagged item. Setting a tag on a not-present - * item is a bug. - */ -void *radix_tree_tag_set(struct radix_tree_root *root, - unsigned long index, unsigned int tag) -{ - unsigned int height, shift; - struct radix_tree_node *slot; - - height = root->height; - BUG_ON(index > radix_tree_maxindex(height)); - - slot = root->rnode; - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - - while (height > 0) { - int offset; - - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - if (!tag_get(slot, tag, offset)) - tag_set(slot, tag, offset); - slot = slot->slots[offset]; - BUG_ON(slot == NULL); - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } - - /* set the root's tag bit */ - if (slot && !root_tag_get(root, tag)) - root_tag_set(root, tag); - - return slot; -} - -/** - * radix_tree_tag_clear - clear a tag on a radix tree node - * @root: radix tree root - * @index: index key - * @tag: tag index - * - * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) - * corresponding to @index in the radix tree. If - * this causes the leaf node to have no tags set then clear the tag in the - * next-to-leaf node, etc. - * - * Returns the address of the tagged item on success, else NULL. ie: - * has the same return value and semantics as radix_tree_lookup(). - */ -void *radix_tree_tag_clear(struct radix_tree_root *root, - unsigned long index, unsigned int tag) -{ - struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; - struct radix_tree_node *slot = NULL; - unsigned int height, shift; - - height = root->height; - if (index > radix_tree_maxindex(height)) - goto out; - - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - pathp->node = NULL; - slot = root->rnode; - - while (height > 0) { - int offset; - - if (slot == NULL) - goto out; - - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - pathp[1].offset = offset; - pathp[1].node = slot; - slot = slot->slots[offset]; - pathp++; - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } - - if (slot == NULL) - goto out; - - while (pathp->node) { - if (!tag_get(pathp->node, tag, pathp->offset)) - goto out; - tag_clear(pathp->node, tag, pathp->offset); - if (any_tag_set(pathp->node, tag)) - goto out; - pathp--; - } - - /* clear the root's tag bit */ - if (root_tag_get(root, tag)) - root_tag_clear(root, tag); - -out: - return slot; -} - -#ifndef __KERNEL__ /* Only the test harness uses this at present */ -/** - * radix_tree_tag_get - get a tag on a radix tree node - * @root: radix tree root - * @index: index key - * @tag: tag index (< RADIX_TREE_MAX_TAGS) - * - * Return values: - * - * 0: tag not present or not set - * 1: tag set - */ -int radix_tree_tag_get(struct radix_tree_root *root, - unsigned long index, unsigned int tag) -{ - unsigned int height, shift; - struct radix_tree_node *slot; - int saw_unset_tag = 0; - - height = root->height; - if (index > radix_tree_maxindex(height)) - return 0; - - /* check the root's tag bit */ - if (!root_tag_get(root, tag)) - return 0; - - if (height == 0) - return 1; - - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; - - for ( ; ; ) { - int offset; - - if (slot == NULL) - return 0; - - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - - /* - * This is just a debug check. Later, we can bale as soon as - * we see an unset tag. - */ - if (!tag_get(slot, tag, offset)) - saw_unset_tag = 1; - if (height == 1) { - int ret = tag_get(slot, tag, offset); - - BUG_ON(ret && saw_unset_tag); - return !!ret; - } - slot = slot->slots[offset]; - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } -} -#endif - -static unsigned int -__lookup(struct radix_tree_root *root, void **results, unsigned long index, - unsigned int max_items, unsigned long *next_index) -{ - unsigned int nr_found = 0; - unsigned int shift, height; - struct radix_tree_node *slot; - unsigned long i; - - height = root->height; - if (height == 0) { - if (root->rnode && index == 0) - results[nr_found++] = root->rnode; - goto out; - } - - shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; - - for ( ; height > 1; height--) { - - for (i = (index >> shift) & RADIX_TREE_MAP_MASK ; - i < RADIX_TREE_MAP_SIZE; i++) { - if (slot->slots[i] != NULL) - break; - index &= ~((1UL << shift) - 1); - index += 1UL << shift; - if (index == 0) - goto out; /* 32-bit wraparound */ - } - if (i == RADIX_TREE_MAP_SIZE) - goto out; - - shift -= RADIX_TREE_MAP_SHIFT; - slot = slot->slots[i]; - } - - /* Bottom level: grab some items */ - for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { - index++; - if (slot->slots[i]) { - results[nr_found++] = slot->slots[i]; - if (nr_found == max_items) - goto out; - } - } -out: - *next_index = index; - return nr_found; -} - -/** - * radix_tree_gang_lookup - perform multiple lookup on a radix tree - * @root: radix tree root - * @results: where the results of the lookup are placed - * @first_index: start the lookup from this key - * @max_items: place up to this many items at *results - * - * Performs an index-ascending scan of the tree for present items. Places - * them at *@results and returns the number of items which were placed at - * *@results. - * - * The implementation is naive. - */ -unsigned int -radix_tree_gang_lookup(struct radix_tree_root *root, void **results, - unsigned long first_index, unsigned int max_items) -{ - const unsigned long max_index = radix_tree_maxindex(root->height); - unsigned long cur_index = first_index; - unsigned int ret = 0; - - while (ret < max_items) { - unsigned int nr_found; - unsigned long next_index; /* Index of next search */ - - if (cur_index > max_index) - break; - nr_found = __lookup(root, results + ret, cur_index, - max_items - ret, &next_index); - ret += nr_found; - if (next_index == 0) - break; - cur_index = next_index; - } - return ret; -} - -/* - * FIXME: the two tag_get()s here should use find_next_bit() instead of - * open-coding the search. - */ -static unsigned int -__lookup_tag(struct radix_tree_root *root, void **results, unsigned long index, - unsigned int max_items, unsigned long *next_index, unsigned int tag) -{ - unsigned int nr_found = 0; - unsigned int shift; - unsigned int height = root->height; - struct radix_tree_node *slot; - - if (height == 0) { - if (root->rnode && index == 0) - results[nr_found++] = root->rnode; - goto out; - } - - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; - - do { - unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK; - - for ( ; i < RADIX_TREE_MAP_SIZE; i++) { - if (tag_get(slot, tag, i)) { - BUG_ON(slot->slots[i] == NULL); - break; - } - index &= ~((1UL << shift) - 1); - index += 1UL << shift; - if (index == 0) - goto out; /* 32-bit wraparound */ - } - if (i == RADIX_TREE_MAP_SIZE) - goto out; - height--; - if (height == 0) { /* Bottom level: grab some items */ - unsigned long j = index & RADIX_TREE_MAP_MASK; - - for ( ; j < RADIX_TREE_MAP_SIZE; j++) { - index++; - if (tag_get(slot, tag, j)) { - BUG_ON(slot->slots[j] == NULL); - results[nr_found++] = slot->slots[j]; - if (nr_found == max_items) - goto out; - } - } - } - shift -= RADIX_TREE_MAP_SHIFT; - slot = slot->slots[i]; - } while (height > 0); -out: - *next_index = index; - return nr_found; -} - -/** - * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree - * based on a tag - * @root: radix tree root - * @results: where the results of the lookup are placed - * @first_index: start the lookup from this key - * @max_items: place up to this many items at *results - * @tag: the tag index (< RADIX_TREE_MAX_TAGS) - * - * Performs an index-ascending scan of the tree for present items which - * have the tag indexed by @tag set. Places the items at *@results and - * returns the number of items which were placed at *@results. - */ -unsigned int -radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, - unsigned long first_index, unsigned int max_items, - unsigned int tag) -{ - const unsigned long max_index = radix_tree_maxindex(root->height); - unsigned long cur_index = first_index; - unsigned int ret = 0; - - /* check the root's tag bit */ - if (!root_tag_get(root, tag)) - return 0; - - while (ret < max_items) { - unsigned int nr_found; - unsigned long next_index; /* Index of next search */ - - if (cur_index > max_index) - break; - nr_found = __lookup_tag(root, results + ret, cur_index, - max_items - ret, &next_index, tag); - ret += nr_found; - if (next_index == 0) - break; - cur_index = next_index; - } - return ret; -} - -/** - * radix_tree_shrink - shrink height of a radix tree to minimal - * @root radix tree root - */ -static inline void radix_tree_shrink(struct radix_tree_root *root) -{ - /* try to shrink tree height */ - while (root->height > 0 && - root->rnode->count == 1 && - root->rnode->slots[0]) { - struct radix_tree_node *to_free = root->rnode; - - root->rnode = to_free->slots[0]; - root->height--; - /* must only free zeroed nodes into the slab */ - tag_clear(to_free, 0, 0); - tag_clear(to_free, 1, 0); - to_free->slots[0] = NULL; - to_free->count = 0; - radix_tree_node_free(to_free); - } -} - -/** - * radix_tree_delete - delete an item from a radix tree - * @root: radix tree root - * @index: index key - * - * Remove the item at @index from the radix tree rooted at @root. - * - * Returns the address of the deleted item, or NULL if it was not present. - */ -void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) -{ - struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; - struct radix_tree_node *slot = NULL; - unsigned int height, shift; - int tag; - int offset; - - height = root->height; - if (index > radix_tree_maxindex(height)) - goto out; - - slot = root->rnode; - if (height == 0 && root->rnode) { - root_tag_clear_all(root); - root->rnode = NULL; - goto out; - } - - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - pathp->node = NULL; - - do { - if (slot == NULL) - goto out; - - pathp++; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - pathp->offset = offset; - pathp->node = slot; - slot = slot->slots[offset]; - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } while (height > 0); - - if (slot == NULL) - goto out; - - /* - * Clear all tags associated with the just-deleted item - */ - for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { - if (tag_get(pathp->node, tag, pathp->offset)) - radix_tree_tag_clear(root, index, tag); - } - - /* Now free the nodes we do not need anymore */ - while (pathp->node) { - pathp->node->slots[pathp->offset] = NULL; - pathp->node->count--; - - if (pathp->node->count) { - if (pathp->node == root->rnode) - radix_tree_shrink(root); - goto out; - } - - /* Node with zero slots in use so free it */ - radix_tree_node_free(pathp->node); - - pathp--; - } - root_tag_clear_all(root); - root->height = 0; - root->rnode = NULL; - -out: - return slot; -} - -/** - * radix_tree_tagged - test whether any items in the tree are tagged - * @root: radix tree root - * @tag: tag to test - */ -int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) -{ - return root_tag_get(root, tag); -} - -static unsigned long __maxindex(unsigned int height) -{ - unsigned int tmp = height * RADIX_TREE_MAP_SHIFT; - unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1; - - if (tmp >= RADIX_TREE_INDEX_BITS) - index = ~0UL; - return index; -} - -static void radix_tree_init_maxindex(void) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) - height_to_maxindex[i] = __maxindex(i); -} - -void radix_tree_init(void) -{ - radix_tree_init_maxindex(); -} diff --git a/fs/btrfs/radix-tree.h b/fs/btrfs/radix-tree.h deleted file mode 100644 index c3ce88137f77..000000000000 --- a/fs/btrfs/radix-tree.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (C) 2001 Momchil Velikov - * Portions Copyright (C) 2001 Christoph Hellwig - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#ifndef _LINUX_RADIX_TREE_H -#define _LINUX_RADIX_TREE_H - -#define RADIX_TREE_MAX_TAGS 2 - -/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */ -struct radix_tree_root { - unsigned int height; - gfp_t gfp_mask; - struct radix_tree_node *rnode; -}; - -#define RADIX_TREE_INIT(mask) { \ - .height = 0, \ - .gfp_mask = (mask), \ - .rnode = NULL, \ -} - -#define RADIX_TREE(name, mask) \ - struct radix_tree_root name = RADIX_TREE_INIT(mask) - -#define INIT_RADIX_TREE(root, mask) \ -do { \ - (root)->height = 0; \ - (root)->gfp_mask = (mask); \ - (root)->rnode = NULL; \ -} while (0) - -int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); -void *radix_tree_lookup(struct radix_tree_root *, unsigned long); -void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); -void *radix_tree_delete(struct radix_tree_root *, unsigned long); -unsigned int -radix_tree_gang_lookup(struct radix_tree_root *root, void **results, - unsigned long first_index, unsigned int max_items); -int radix_tree_preload(gfp_t gfp_mask); -void radix_tree_init(void); -void *radix_tree_tag_set(struct radix_tree_root *root, - unsigned long index, unsigned int tag); -void *radix_tree_tag_clear(struct radix_tree_root *root, - unsigned long index, unsigned int tag); -int radix_tree_tag_get(struct radix_tree_root *root, - unsigned long index, unsigned int tag); -unsigned int -radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, - unsigned long first_index, unsigned int max_items, - unsigned int tag); -int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); - -static inline void radix_tree_preload_end(void) -{ - preempt_enable(); -} - -#endif /* _LINUX_RADIX_TREE_H */ diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c deleted file mode 100644 index 3a38ae7a886d..000000000000 --- a/fs/btrfs/random-test.c +++ /dev/null @@ -1,405 +0,0 @@ -#include -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" -#include "ctree.h" -#include "disk-io.h" -#include "print-tree.h" -#include "transaction.h" - -int keep_running = 1; -struct btrfs_super_block super; - -static int setup_key(struct radix_tree_root *root, struct btrfs_key *key, - int exists) -{ - int num = rand(); - unsigned long res[2]; - int ret; - - key->flags = 0; - btrfs_set_key_type(key, BTRFS_STRING_ITEM_KEY); - key->offset = 0; -again: - ret = radix_tree_gang_lookup(root, (void **)res, num, 2); - if (exists) { - if (ret == 0) - return -1; - num = res[0]; - } else if (ret != 0 && num == res[0]) { - num++; - if (ret > 1 && num == res[1]) { - num++; - goto again; - } - } - key->objectid = num; - return 0; -} - -static int ins_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *radix) -{ - struct btrfs_path path; - struct btrfs_key key; - int ret; - char buf[128]; - unsigned long oid; - btrfs_init_path(&path); - ret = setup_key(radix, &key, 0); - sprintf(buf, "str-%Lu\n", key.objectid); - ret = btrfs_insert_item(trans, root, &key, buf, strlen(buf)); - if (ret) - goto error; - oid = (unsigned long)key.objectid; - radix_tree_preload(GFP_KERNEL); - ret = radix_tree_insert(radix, oid, (void *)oid); - radix_tree_preload_end(); - if (ret) - goto error; - return ret; -error: - printf("failed to insert %Lu\n", key.objectid); - return -1; -} - -static int insert_dup(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct radix_tree_root *radix) -{ - struct btrfs_path path; - struct btrfs_key key; - int ret; - char buf[128]; - btrfs_init_path(&path); - ret = setup_key(radix, &key, 1); - if (ret < 0) - return 0; - sprintf(buf, "str-%Lu\n", key.objectid); - ret = btrfs_insert_item(trans, root, &key, buf, strlen(buf)); - if (ret != -EEXIST) { - printf("insert on %Lu gave us %d\n", key.objectid, ret); - return 1; - } - return 0; -} - -static int del_one(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *radix) -{ - struct btrfs_path path; - struct btrfs_key key; - int ret; - unsigned long *ptr; - btrfs_init_path(&path); - ret = setup_key(radix, &key, 1); - if (ret < 0) - return 0; - ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); - if (ret) - goto error; - ret = btrfs_del_item(trans, root, &path); - btrfs_release_path(root, &path); - if (ret != 0) - goto error; - ptr = radix_tree_delete(radix, key.objectid); - if (!ptr) - goto error; - return 0; -error: - printf("failed to delete %Lu\n", key.objectid); - return -1; -} - -static int lookup_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct radix_tree_root *radix) -{ - struct btrfs_path path; - struct btrfs_key key; - int ret; - btrfs_init_path(&path); - ret = setup_key(radix, &key, 1); - if (ret < 0) - return 0; - ret = btrfs_search_slot(trans, root, &key, &path, 0, 1); - btrfs_release_path(root, &path); - if (ret) - goto error; - return 0; -error: - printf("unable to find key %Lu\n", key.objectid); - return -1; -} - -static int lookup_enoent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct radix_tree_root *radix) -{ - struct btrfs_path path; - struct btrfs_key key; - int ret; - btrfs_init_path(&path); - ret = setup_key(radix, &key, 0); - if (ret < 0) - return ret; - ret = btrfs_search_slot(trans, root, &key, &path, 0, 0); - btrfs_release_path(root, &path); - if (ret <= 0) - goto error; - return 0; -error: - printf("able to find key that should not exist %Lu\n", key.objectid); - return -1; -} - -static int empty_tree(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct radix_tree_root *radix, int nr) -{ - struct btrfs_path path; - struct btrfs_key key; - unsigned long found = 0; - int ret; - int slot; - int *ptr; - int count = 0; - - key.offset = 0; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_STRING_ITEM_KEY); - key.objectid = (unsigned long)-1; - while(nr-- >= 0) { - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); - if (ret < 0) { - btrfs_release_path(root, &path); - return ret; - } - if (ret != 0) { - if (path.slots[0] == 0) { - btrfs_release_path(root, &path); - break; - } - path.slots[0] -= 1; - } - slot = path.slots[0]; - found = btrfs_disk_key_objectid( - &path.nodes[0]->leaf.items[slot].key); - ret = btrfs_del_item(trans, root, &path); - count++; - if (ret) { - fprintf(stderr, - "failed to remove %lu from tree\n", - found); - return -1; - } - btrfs_release_path(root, &path); - ptr = radix_tree_delete(radix, found); - if (!ptr) - goto error; - if (!keep_running) - break; - } - return 0; -error: - fprintf(stderr, "failed to delete from the radix %lu\n", found); - return -1; -} - -static int fill_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *radix, int count) -{ - int i; - int ret = 0; - for (i = 0; i < count; i++) { - ret = ins_one(trans, root, radix); - if (ret) { - fprintf(stderr, "fill failed\n"); - goto out; - } - if (i % 1000 == 0) { - ret = btrfs_commit_transaction(trans, root, &super); - if (ret) { - fprintf(stderr, "fill commit failed\n"); - return ret; - } - } - if (i && i % 10000 == 0) { - printf("bigfill %d\n", i); - } - if (!keep_running) - break; - } -out: - return ret; -} - -static int bulk_op(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *radix) -{ - int ret; - int nr = rand() % 5000; - static int run_nr = 0; - - /* do the bulk op much less frequently */ - if (run_nr++ % 100) - return 0; - ret = empty_tree(trans, root, radix, nr); - if (ret) - return ret; - ret = fill_tree(trans, root, radix, nr); - if (ret) - return ret; - return 0; -} - - -int (*ops[])(struct btrfs_trans_handle *, - struct btrfs_root *root, struct radix_tree_root *radix) = - { ins_one, insert_dup, del_one, lookup_item, - lookup_enoent, bulk_op }; - -static int fill_radix(struct btrfs_root *root, struct radix_tree_root *radix) -{ - struct btrfs_path path; - struct btrfs_key key; - unsigned long found; - int ret; - int slot; - int i; - - key.offset = 0; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_STRING_ITEM_KEY); - key.objectid = (unsigned long)-1; - while(1) { - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); - if (ret < 0) { - btrfs_release_path(root, &path); - return ret; - } - slot = path.slots[0]; - if (ret != 0) { - if (slot == 0) { - btrfs_release_path(root, &path); - break; - } - slot -= 1; - } - for (i = slot; i >= 0; i--) { - found = btrfs_disk_key_objectid(&path.nodes[0]-> - leaf.items[i].key); - radix_tree_preload(GFP_KERNEL); - ret = radix_tree_insert(radix, found, (void *)found); - if (ret) { - fprintf(stderr, - "failed to insert %lu into radix\n", - found); - exit(1); - } - - radix_tree_preload_end(); - } - btrfs_release_path(root, &path); - key.objectid = found - 1; - if (key.objectid > found) - break; - } - return 0; -} -void sigstopper(int ignored) -{ - keep_running = 0; - fprintf(stderr, "caught exit signal, stopping\n"); -} - -int print_usage(void) -{ - printf("usage: tester [-ih] [-c count] [-f count]\n"); - printf("\t -c count -- iteration count after filling\n"); - printf("\t -f count -- run this many random inserts before starting\n"); - printf("\t -i -- only do initial fill\n"); - printf("\t -h -- this help text\n"); - exit(1); -} -int main(int ac, char **av) -{ - RADIX_TREE(radix, GFP_KERNEL); - struct btrfs_root *root; - int i; - int ret; - int count; - int op; - int iterations = 20000; - int init_fill_count = 800000; - int err = 0; - int initial_only = 0; - struct btrfs_trans_handle *trans; - radix_tree_init(); - root = open_ctree("dbfile", &super); - fill_radix(root, &radix); - - signal(SIGTERM, sigstopper); - signal(SIGINT, sigstopper); - - for (i = 1 ; i < ac ; i++) { - if (strcmp(av[i], "-i") == 0) { - initial_only = 1; - } else if (strcmp(av[i], "-c") == 0) { - iterations = atoi(av[i+1]); - i++; - } else if (strcmp(av[i], "-f") == 0) { - init_fill_count = atoi(av[i+1]); - i++; - } else { - print_usage(); - } - } - printf("initial fill\n"); - trans = btrfs_start_transaction(root, 1); - ret = fill_tree(trans, root, &radix, init_fill_count); - printf("starting run\n"); - if (ret) { - err = ret; - goto out; - } - if (initial_only == 1) { - goto out; - } - for (i = 0; i < iterations; i++) { - op = rand() % ARRAY_SIZE(ops); - count = rand() % 128; - if (i % 2000 == 0) { - printf("%d\n", i); - fflush(stdout); - } - if (i && i % 5000 == 0) { - printf("open & close, root level %d nritems %d\n", - btrfs_header_level(&root->node->node.header), - btrfs_header_nritems(&root->node->node.header)); - close_ctree(root, &super); - root = open_ctree("dbfile", &super); - } - while(count--) { - ret = ops[op](trans, root, &radix); - if (ret) { - fprintf(stderr, "op %d failed %d:%d\n", - op, i, iterations); - btrfs_print_tree(root, root->node); - fprintf(stderr, "op %d failed %d:%d\n", - op, i, iterations); - err = ret; - goto out; - } - if (ops[op] == bulk_op) - break; - if (keep_running == 0) { - err = 0; - goto out; - } - } - } -out: - close_ctree(root, &super); - return err; -} - diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 9cccecc0f431..52c83be4b307 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -1,7 +1,4 @@ -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include #include "ctree.h" #include "disk-io.h" #include "print-tree.h" diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c new file mode 100644 index 000000000000..4ae76044aeab --- /dev/null +++ b/fs/btrfs/super.c @@ -0,0 +1,205 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ctree.h" + +#define BTRFS_SUPER_MAGIC 0x9123682E +#if 0 +/* some random number */ + +static struct super_operations ramfs_ops; +static struct inode_operations ramfs_dir_inode_operations; + +static struct backing_dev_info ramfs_backing_dev_info = { + .ra_pages = 0, /* No readahead */ + .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | + BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | + BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, +}; + +struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) +{ + struct inode * inode = new_inode(sb); + + if (inode) { + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_blocks = 0; + inode->i_mapping->a_ops = &ramfs_aops; + inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + switch (mode & S_IFMT) { + default: + init_special_inode(inode, mode, dev); + break; + case S_IFREG: + inode->i_op = &ramfs_file_inode_operations; + inode->i_fop = &ramfs_file_operations; + break; + case S_IFDIR: + inode->i_op = &ramfs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inc_nlink(inode); + break; + case S_IFLNK: + inode->i_op = &page_symlink_inode_operations; + break; + } + } + return inode; +} + +/* + * File creation. Allocate an inode, and we're done.. + */ +/* SMP-safe */ +static int +ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +{ + struct inode * inode = ramfs_get_inode(dir->i_sb, mode, dev); + int error = -ENOSPC; + + if (inode) { + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + inode->i_mode |= S_ISGID; + } + d_instantiate(dentry, inode); + dget(dentry); /* Extra count - pin the dentry in core */ + error = 0; + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + } + return error; +} + +static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) +{ + int retval = ramfs_mknod(dir, dentry, mode | S_IFDIR, 0); + if (!retval) + inc_nlink(dir); + return retval; +} + +static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) +{ + return ramfs_mknod(dir, dentry, mode | S_IFREG, 0); +} + +static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname) +{ + struct inode *inode; + int error = -ENOSPC; + + inode = ramfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); + if (inode) { + int l = strlen(symname)+1; + error = page_symlink(inode, symname, l); + if (!error) { + if (dir->i_mode & S_ISGID) + inode->i_gid = dir->i_gid; + d_instantiate(dentry, inode); + dget(dentry); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + } else + iput(inode); + } + return error; +} + +static struct inode_operations ramfs_dir_inode_operations = { + .create = ramfs_create, + .lookup = simple_lookup, + .link = simple_link, + .unlink = simple_unlink, + .symlink = ramfs_symlink, + .mkdir = ramfs_mkdir, + .rmdir = simple_rmdir, + .mknod = ramfs_mknod, + .rename = simple_rename, +}; +#endif + +struct inode *btrfs_get_inode(struct super_block *sb, int mode, dev_t dev) +{ + struct inode * inode = new_inode(sb); + + if (inode) { + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_blocks = 0; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + } + return inode; +} + +static struct super_operations btrfs_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, +}; + +static int btrfs_fill_super(struct super_block * sb, void * data, int silent) +{ + struct inode * inode; + struct dentry * root; + + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = BTRFS_SUPER_MAGIC; + sb->s_op = &btrfs_ops; + sb->s_time_gran = 1; + inode = btrfs_get_inode(sb, S_IFDIR | 0755, 0); + if (!inode) + return -ENOMEM; + + root = d_alloc_root(inode); + if (!root) { + iput(inode); + return -ENOMEM; + } + sb->s_root = root; + return 0; +} + +static int btrfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) +{ + return get_sb_bdev(fs_type, flags, dev_name, data, + btrfs_fill_super, mnt); +} + +static struct file_system_type btrfs_fs_type = { + .owner = THIS_MODULE, + .name = "btrfs", + .get_sb = btrfs_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init init_btrfs_fs(void) +{ + printk("btrfs loaded!\n"); + return register_filesystem(&btrfs_fs_type); +} + +static void __exit exit_btrfs_fs(void) +{ + unregister_filesystem(&btrfs_fs_type); + printk("btrfs unloaded\n"); +} + +module_init(init_btrfs_fs) +module_exit(exit_btrfs_fs) + +MODULE_LICENSE("GPL"); From e20d96d64f9cf9288ffecc9ad4714e91c3b97ca8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 22 Mar 2007 12:13:20 -0400 Subject: [PATCH 0061/2985] Mountable btrfs, with readdir Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 5 +- fs/btrfs/ctree.c | 273 ++++++++++++++++++++------------------- fs/btrfs/ctree.h | 62 +++++++-- fs/btrfs/dir-item.c | 17 +-- fs/btrfs/disk-io.c | 283 ++++++++++++----------------------------- fs/btrfs/disk-io.h | 63 ++++----- fs/btrfs/extent-tree.c | 144 +++++++++++---------- fs/btrfs/hash.c | 9 ++ fs/btrfs/inode-map.c | 8 +- fs/btrfs/print-tree.c | 20 ++- fs/btrfs/print-tree.h | 2 +- fs/btrfs/root-tree.c | 4 +- fs/btrfs/super.c | 263 +++++++++++++++++++++++++++++++++++--- fs/btrfs/transaction.h | 4 +- 14 files changed, 659 insertions(+), 498 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 99e45a54ebd6..1a0fb7422393 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -2,7 +2,8 @@ ifneq ($(KERNELRELEASE),) # kbuild part of makefile obj-m := btrfs.o -btrfs-y := super.o +btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ + hash.o file-item.o inode-item.o inode-map.o disk-io.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ @@ -14,7 +15,7 @@ else KERNELDIR := /lib/modules/`uname -r`/build all:: - $(MAKE) -C $(KERNELDIR) M=`pwd` modules + $(MAKE) C=1 -C $(KERNELDIR) M=`pwd` modules clean:: rm *.o btrfs.ko endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9fbd07c37fde..e690e2bb47d2 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -7,11 +7,11 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size); static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *dst, struct btrfs_buffer + *root, struct buffer_head *dst, struct buffer_head *src); static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct btrfs_buffer *dst_buf, - struct btrfs_buffer *src_buf); + btrfs_root *root, struct buffer_head *dst_buf, + struct buffer_head *src_buf); static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); @@ -32,32 +32,34 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) } static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *buf, struct btrfs_buffer - *parent, int parent_slot, struct btrfs_buffer + *root, struct buffer_head *buf, struct buffer_head + *parent, int parent_slot, struct buffer_head **cow_ret) { - struct btrfs_buffer *cow; + struct buffer_head *cow; + struct btrfs_node *cow_node; - if (!list_empty(&buf->dirty)) { + if (!buffer_dirty(buf)) { *cow_ret = buf; return 0; } cow = btrfs_alloc_free_block(trans, root); - memcpy(&cow->node, &buf->node, root->blocksize); - btrfs_set_header_blocknr(&cow->node.header, cow->blocknr); + cow_node = btrfs_buffer_node(cow); + memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); + btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); *cow_ret = cow; btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; - cow->count++; + get_bh(cow); if (buf != root->commit_root) - btrfs_free_extent(trans, root, buf->blocknr, 1, 1); + btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); btrfs_block_release(root, buf); } else { - btrfs_set_node_blockptr(&parent->node, parent_slot, - cow->blocknr); - BUG_ON(list_empty(&parent->dirty)); - btrfs_free_extent(trans, root, buf->blocknr, 1, 1); + btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, + cow->b_blocknr); + BUG_ON(!buffer_dirty(parent)); + btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); } btrfs_block_release(root, buf); return 0; @@ -119,12 +121,12 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, { int i; struct btrfs_node *parent = NULL; - struct btrfs_node *node = &path->nodes[level]->node; + struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); int parent_slot; u32 nritems = btrfs_header_nritems(&node->header); if (path->nodes[level + 1]) - parent = &path->nodes[level + 1]->node; + parent = btrfs_buffer_node(path->nodes[level + 1]); parent_slot = path->slots[level + 1]; BUG_ON(nritems == 0); if (parent) { @@ -148,13 +150,13 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, int level) { int i; - struct btrfs_leaf *leaf = &path->nodes[level]->leaf; + struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]); struct btrfs_node *parent = NULL; int parent_slot; u32 nritems = btrfs_header_nritems(&leaf->header); if (path->nodes[level + 1]) - parent = &path->nodes[level + 1]->node; + parent = btrfs_buffer_node(path->nodes[level + 1]); parent_slot = path->slots[level + 1]; BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); @@ -250,11 +252,11 @@ static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) return -1; } -static struct btrfs_buffer *read_node_slot(struct btrfs_root *root, - struct btrfs_buffer *parent_buf, +static struct buffer_head *read_node_slot(struct btrfs_root *root, + struct buffer_head *parent_buf, int slot) { - struct btrfs_node *node = &parent_buf->node; + struct btrfs_node *node = btrfs_buffer_node(parent_buf); if (slot < 0) return NULL; if (slot >= btrfs_header_nritems(&node->header)) @@ -265,10 +267,10 @@ static struct btrfs_buffer *read_node_slot(struct btrfs_root *root, static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_buffer *right_buf; - struct btrfs_buffer *mid_buf; - struct btrfs_buffer *left_buf; - struct btrfs_buffer *parent_buf = NULL; + struct buffer_head *right_buf; + struct buffer_head *mid_buf; + struct buffer_head *left_buf; + struct buffer_head *parent_buf = NULL; struct btrfs_node *right = NULL; struct btrfs_node *mid; struct btrfs_node *left = NULL; @@ -283,7 +285,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root return 0; mid_buf = path->nodes[level]; - mid = &mid_buf->node; + mid = btrfs_buffer_node(mid_buf); orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) @@ -295,8 +297,8 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root * by promoting the node below to a root */ if (!parent_buf) { - struct btrfs_buffer *child; - u64 blocknr = mid_buf->blocknr; + struct buffer_head *child; + u64 blocknr = mid_buf->b_blocknr; if (btrfs_header_nritems(&mid->header) != 1) return 0; @@ -313,7 +315,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root clean_tree_block(trans, root, mid_buf); return btrfs_free_extent(trans, root, blocknr, 1, 1); } - parent = &parent_buf->node; + parent = btrfs_buffer_node(parent_buf); if (btrfs_header_nritems(&mid->header) > BTRFS_NODEPTRS_PER_BLOCK(root) / 4) @@ -326,7 +328,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (left_buf) { btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1, &left_buf); - left = &left_buf->node; + left = btrfs_buffer_node(left_buf); orig_slot += btrfs_header_nritems(&left->header); wret = push_node_left(trans, root, left_buf, mid_buf); if (wret < 0) @@ -339,12 +341,12 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (right_buf) { btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1, &right_buf); - right = &right_buf->node; + right = btrfs_buffer_node(right_buf); wret = push_node_left(trans, root, mid_buf, right_buf); if (wret < 0) ret = wret; if (btrfs_header_nritems(&right->header) == 0) { - u64 blocknr = right_buf->blocknr; + u64 blocknr = right_buf->b_blocknr; btrfs_block_release(root, right_buf); clean_tree_block(trans, root, right_buf); right_buf = NULL; @@ -360,7 +362,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(&parent->ptrs[pslot + 1].key, &right->ptrs[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(list_empty(&parent_buf->dirty)); + BUG_ON(!buffer_dirty(parent_buf)); } } if (btrfs_header_nritems(&mid->header) == 1) { @@ -381,7 +383,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } if (btrfs_header_nritems(&mid->header) == 0) { /* we've managed to empty the middle node, drop it */ - u64 blocknr = mid_buf->blocknr; + u64 blocknr = mid_buf->b_blocknr; btrfs_block_release(root, mid_buf); clean_tree_block(trans, root, mid_buf); mid_buf = NULL; @@ -396,13 +398,13 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root /* update the parent key to reflect our changes */ memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(list_empty(&parent_buf->dirty)); + BUG_ON(!buffer_dirty(parent_buf)); } /* update the path */ if (left_buf) { if (btrfs_header_nritems(&left->header) > orig_slot) { - left_buf->count++; // released below + get_bh(left_buf); path->nodes[level] = left_buf; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; @@ -415,8 +417,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } /* double check we haven't messed things up */ check_block(root, path, level); - if (orig_ptr != btrfs_node_blockptr(&path->nodes[level]->node, - path->slots[level])) + if (orig_ptr != + btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]), + path->slots[level])) BUG(); if (right_buf) @@ -443,8 +446,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow) { - struct btrfs_buffer *b; - struct btrfs_buffer *cow_buf; + struct buffer_head *b; + struct buffer_head *cow_buf; struct btrfs_node *c; int slot; int ret; @@ -452,18 +455,20 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root again: b = root->node; - b->count++; + get_bh(b); while (b) { - level = btrfs_header_level(&b->node.header); + c = btrfs_buffer_node(b); + level = btrfs_header_level(&c->header); if (cow) { int wret; - wret = btrfs_cow_block(trans, root, b, p->nodes[level + - 1], p->slots[level + 1], + wret = btrfs_cow_block(trans, root, b, + p->nodes[level + 1], + p->slots[level + 1], &cow_buf); b = cow_buf; } BUG_ON(!cow && ins_len); - c = &b->node; + c = btrfs_buffer_node(b); p->nodes[level] = b; ret = check_block(root, p, level); if (ret) @@ -480,7 +485,7 @@ again: if (sret) return sret; b = p->nodes[level]; - c = &b->node; + c = btrfs_buffer_node(b); slot = p->slots[level]; } else if (ins_len < 0) { int sret = balance_level(trans, root, p, @@ -490,7 +495,7 @@ again: b = p->nodes[level]; if (!b) goto again; - c = &b->node; + c = btrfs_buffer_node(b); slot = p->slots[level]; BUG_ON(btrfs_header_nritems(&c->header) == 1); } @@ -505,11 +510,9 @@ again: if (sret) return sret; } - BUG_ON(root->node->count == 1); return ret; } } - BUG_ON(root->node->count == 1); return 1; } @@ -534,9 +537,9 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root int tslot = path->slots[i]; if (!path->nodes[i]) break; - t = &path->nodes[i]->node; + t = btrfs_buffer_node(path->nodes[i]); memcpy(&t->ptrs[tslot].key, key, sizeof(*key)); - BUG_ON(list_empty(&path->nodes[i]->dirty)); + BUG_ON(!buffer_dirty(path->nodes[i])); if (tslot != 0) break; } @@ -551,11 +554,11 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root * error, and > 0 if there was no room in the left hand block. */ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *dst_buf, struct - btrfs_buffer *src_buf) + *root, struct buffer_head *dst_buf, struct + buffer_head *src_buf) { - struct btrfs_node *src = &src_buf->node; - struct btrfs_node *dst = &dst_buf->node; + struct btrfs_node *src = btrfs_buffer_node(src_buf); + struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int src_nritems; int dst_nritems; @@ -580,8 +583,8 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root } btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - BUG_ON(list_empty(&src_buf->dirty)); - BUG_ON(list_empty(&dst_buf->dirty)); + BUG_ON(!buffer_dirty(src_buf)); + BUG_ON(!buffer_dirty(dst_buf)); return ret; } @@ -595,11 +598,11 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root * this will only push up to 1/2 the contents of the left node over */ static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct btrfs_buffer *dst_buf, - struct btrfs_buffer *src_buf) + btrfs_root *root, struct buffer_head *dst_buf, + struct buffer_head *src_buf) { - struct btrfs_node *src = &src_buf->node; - struct btrfs_node *dst = &dst_buf->node; + struct btrfs_node *src = btrfs_buffer_node(src_buf); + struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int max_push; int src_nritems; @@ -628,8 +631,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - BUG_ON(list_empty(&src_buf->dirty)); - BUG_ON(list_empty(&dst_buf->dirty)); + BUG_ON(!buffer_dirty(src_buf)); + BUG_ON(!buffer_dirty(dst_buf)); return ret; } @@ -643,7 +646,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_buffer *t; + struct buffer_head *t; struct btrfs_node *lower; struct btrfs_node *c; struct btrfs_disk_key *lower_key; @@ -652,24 +655,24 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->nodes[level-1] != root->node); t = btrfs_alloc_free_block(trans, root); - c = &t->node; + c = btrfs_buffer_node(t); memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); btrfs_set_header_level(&c->header, level); - btrfs_set_header_blocknr(&c->header, t->blocknr); + btrfs_set_header_blocknr(&c->header, t->b_blocknr); btrfs_set_header_parentid(&c->header, - btrfs_header_parentid(&root->node->node.header)); - lower = &path->nodes[level-1]->node; + btrfs_header_parentid(btrfs_buffer_header(root->node))); + lower = btrfs_buffer_node(path->nodes[level-1]); if (btrfs_is_leaf(lower)) lower_key = &((struct btrfs_leaf *)lower)->items[0].key; else lower_key = &lower->ptrs[0].key; memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key)); - btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->blocknr); + btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr); /* the super has an extra ref to root->node */ btrfs_block_release(root, root->node); root->node = t; - t->count++; + get_bh(t); path->nodes[level] = t; path->slots[level] = 0; return 0; @@ -692,7 +695,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root int nritems; BUG_ON(!path->nodes[level]); - lower = &path->nodes[level]->node; + lower = btrfs_buffer_node(path->nodes[level]); nritems = btrfs_header_nritems(&lower->header); if (slot > nritems) BUG(); @@ -705,7 +708,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(lower, slot, blocknr); btrfs_set_header_nritems(&lower->header, nritems + 1); - BUG_ON(list_empty(&path->nodes[level]->dirty)); + BUG_ON(!buffer_dirty(path->nodes[level])); return 0; } @@ -721,9 +724,9 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_buffer *t; + struct buffer_head *t; struct btrfs_node *c; - struct btrfs_buffer *split_buffer; + struct buffer_head *split_buffer; struct btrfs_node *split; int mid; int ret; @@ -731,7 +734,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root u32 c_nritems; t = path->nodes[level]; - c = &t->node; + c = btrfs_buffer_node(t); if (t == root->node) { /* trying to split the root, lets make a new one */ ret = insert_new_root(trans, root, path, level + 1); @@ -740,11 +743,11 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(&c->header); split_buffer = btrfs_alloc_free_block(trans, root); - split = &split_buffer->node; + split = btrfs_buffer_node(split_buffer); btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); - btrfs_set_header_blocknr(&split->header, split_buffer->blocknr); + btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr); btrfs_set_header_parentid(&split->header, - btrfs_header_parentid(&root->node->node.header)); + btrfs_header_parentid(btrfs_buffer_header(root->node))); mid = (c_nritems + 1) / 2; memcpy(split->ptrs, c->ptrs + mid, (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); @@ -752,9 +755,9 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&c->header, mid); ret = 0; - BUG_ON(list_empty(&t->dirty)); + BUG_ON(!buffer_dirty(t)); wret = insert_ptr(trans, root, path, &split->ptrs[0].key, - split_buffer->blocknr, path->slots[level + 1] + 1, + split_buffer->b_blocknr, path->slots[level + 1] + 1, level + 1); if (wret) ret = wret; @@ -798,11 +801,12 @@ static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct btrfs_buffer *left_buf = path->nodes[0]; - struct btrfs_leaf *left = &left_buf->leaf; + struct buffer_head *left_buf = path->nodes[0]; + struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf); struct btrfs_leaf *right; - struct btrfs_buffer *right_buf; - struct btrfs_buffer *upper; + struct buffer_head *right_buf; + struct buffer_head *upper; + struct btrfs_node *upper_node; int slot; int i; int free_space; @@ -817,12 +821,13 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } upper = path->nodes[1]; - if (slot >= btrfs_header_nritems(&upper->node.header) - 1) { + upper_node = btrfs_buffer_node(upper); + if (slot >= btrfs_header_nritems(&upper_node->header) - 1) { return 1; } - right_buf = read_tree_block(root, btrfs_node_blockptr(&upper->node, - slot + 1)); - right = &right_buf->leaf; + right_buf = read_tree_block(root, + btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1)); + right = btrfs_buffer_leaf(right_buf); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, right_buf); @@ -830,7 +835,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root } /* cow and double check */ btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf); - right = &right_buf->leaf; + right = btrfs_buffer_leaf(right_buf); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, right_buf); @@ -881,11 +886,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root left_nritems -= push_items; btrfs_set_header_nritems(&left->header, left_nritems); - BUG_ON(list_empty(&left_buf->dirty)); - BUG_ON(list_empty(&right_buf->dirty)); - memcpy(&upper->node.ptrs[slot + 1].key, + BUG_ON(!buffer_dirty(left_buf)); + BUG_ON(!buffer_dirty(right_buf)); + memcpy(&upper_node->ptrs[slot + 1].key, &right->items[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(list_empty(&upper->dirty)); + BUG_ON(!buffer_dirty(upper)); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { @@ -905,9 +910,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct btrfs_buffer *right_buf = path->nodes[0]; - struct btrfs_leaf *right = &right_buf->leaf; - struct btrfs_buffer *t; + struct buffer_head *right_buf = path->nodes[0]; + struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf); + struct buffer_head *t; struct btrfs_leaf *left; int slot; int i; @@ -926,9 +931,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root if (!path->nodes[1]) { return 1; } - t = read_tree_block(root, btrfs_node_blockptr(&path->nodes[1]->node, - slot - 1)); - left = &t->leaf; + t = read_tree_block(root, + btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1)); + left = btrfs_buffer_leaf(t); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, t); @@ -937,7 +942,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root /* cow and double check */ btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); - left = &t->leaf; + left = btrfs_buffer_leaf(t); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, t); @@ -999,8 +1004,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root push_space = btrfs_item_offset(right->items + i); } - BUG_ON(list_empty(&t->dirty)); - BUG_ON(list_empty(&right_buf->dirty)); + BUG_ON(!buffer_dirty(t)); + BUG_ON(!buffer_dirty(right_buf)); wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); if (wret) @@ -1029,13 +1034,13 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct btrfs_buffer *l_buf; + struct buffer_head *l_buf; struct btrfs_leaf *l; u32 nritems; int mid; int slot; struct btrfs_leaf *right; - struct btrfs_buffer *right_buffer; + struct buffer_head *right_buffer; int space_needed = data_size + sizeof(struct btrfs_item); int data_copy_size; int rt_data_off; @@ -1053,7 +1058,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return wret; } l_buf = path->nodes[0]; - l = &l_buf->leaf; + l = btrfs_buffer_leaf(l_buf); /* did the pushes work? */ if (btrfs_leaf_free_space(root, l) >= @@ -1071,7 +1076,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root right_buffer = btrfs_alloc_free_block(trans, root); BUG_ON(!right_buffer); BUG_ON(mid == nritems); - right = &right_buffer->leaf; + right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); if (mid <= slot) { /* FIXME, just alloc a new leaf here */ @@ -1085,10 +1090,10 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BUG(); } btrfs_set_header_nritems(&right->header, nritems - mid); - btrfs_set_header_blocknr(&right->header, right_buffer->blocknr); + btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); btrfs_set_header_level(&right->header, 0); btrfs_set_header_parentid(&right->header, - btrfs_header_parentid(&root->node->node.header)); + btrfs_header_parentid(btrfs_buffer_header(root->node))); data_copy_size = btrfs_item_end(l->items + mid) - leaf_data_end(root, l); memcpy(right->items, l->items + mid, @@ -1107,11 +1112,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&l->header, mid); ret = 0; wret = insert_ptr(trans, root, path, &right->items[0].key, - right_buffer->blocknr, path->slots[1] + 1, 1); + right_buffer->b_blocknr, path->slots[1] + 1, 1); if (wret) ret = wret; - BUG_ON(list_empty(&right_buffer->dirty)); - BUG_ON(list_empty(&l_buf->dirty)); + BUG_ON(!buffer_dirty(right_buffer)); + BUG_ON(!buffer_dirty(l_buf)); BUG_ON(path->slots[0] != slot); if (mid <= slot) { btrfs_block_release(root, path->nodes[0]); @@ -1136,7 +1141,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root int slot; int slot_orig; struct btrfs_leaf *leaf; - struct btrfs_buffer *leaf_buf; + struct buffer_head *leaf_buf; u32 nritems; unsigned int data_end; struct btrfs_disk_key disk_key; @@ -1156,7 +1161,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root slot_orig = path->slots[0]; leaf_buf = path->nodes[0]; - leaf = &leaf_buf->leaf; + leaf = btrfs_buffer_leaf(leaf_buf); nritems = btrfs_header_nritems(&leaf->header); data_end = leaf_data_end(root, leaf); @@ -1202,7 +1207,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root if (slot == 0) ret = fixup_low_keys(trans, root, path, &disk_key, 1); - BUG_ON(list_empty(&leaf_buf->dirty)); + BUG_ON(!buffer_dirty(leaf_buf)); if (btrfs_leaf_free_space(root, leaf) < 0) BUG(); check_leaf(root, path, 0); @@ -1225,7 +1230,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_init_path(&path); ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size); if (!ret) { - ptr = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], u8); + ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], u8); memcpy(ptr, data, data_size); } btrfs_release_path(root, &path); @@ -1243,12 +1249,12 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { struct btrfs_node *node; - struct btrfs_buffer *parent = path->nodes[level]; + struct buffer_head *parent = path->nodes[level]; u32 nritems; int ret = 0; int wret; - node = &parent->node; + node = btrfs_buffer_node(parent); nritems = btrfs_header_nritems(&node->header); if (slot != nritems -1) { memmove(node->ptrs + slot, node->ptrs + slot + 1, @@ -1257,16 +1263,17 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, nritems--; btrfs_set_header_nritems(&node->header, nritems); if (nritems == 0 && parent == root->node) { - BUG_ON(btrfs_header_level(&root->node->node.header) != 1); + struct btrfs_header *header = btrfs_buffer_header(root->node); + BUG_ON(btrfs_header_level(header) != 1); /* just turn the root into a leaf and break */ - btrfs_set_header_level(&root->node->node.header, 0); + btrfs_set_header_level(header, 0); } else if (slot == 0) { wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key, level + 1); if (wret) ret = wret; } - BUG_ON(list_empty(&parent->dirty)); + BUG_ON(!buffer_dirty(parent)); return ret; } @@ -1279,7 +1286,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, { int slot; struct btrfs_leaf *leaf; - struct btrfs_buffer *leaf_buf; + struct buffer_head *leaf_buf; int doff; int dsize; int ret = 0; @@ -1287,7 +1294,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 nritems; leaf_buf = path->nodes[0]; - leaf = &leaf_buf->leaf; + leaf = btrfs_buffer_leaf(leaf_buf); slot = path->slots[0]; doff = btrfs_item_offset(leaf->items + slot); dsize = btrfs_item_size(leaf->items + slot); @@ -1313,14 +1320,13 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (nritems == 0) { if (leaf_buf == root->node) { btrfs_set_header_level(&leaf->header, 0); - BUG_ON(list_empty(&leaf_buf->dirty)); } else { clean_tree_block(trans, root, leaf_buf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - leaf_buf->blocknr, 1, 1); + leaf_buf->b_blocknr, 1, 1); if (wret) ret = wret; } @@ -1332,7 +1338,6 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } - BUG_ON(list_empty(&leaf_buf->dirty)); /* delete the leaf if it is mostly empty */ if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { @@ -1341,7 +1346,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, * for possible call to del_ptr below */ slot = path->slots[1]; - leaf_buf->count++; + get_bh(leaf_buf); wret = push_leaf_left(trans, root, path, 1); if (wret < 0) ret = wret; @@ -1352,7 +1357,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = wret; } if (btrfs_header_nritems(&leaf->header) == 0) { - u64 blocknr = leaf_buf->blocknr; + u64 blocknr = leaf_buf->b_blocknr; clean_tree_block(trans, root, leaf_buf); wret = del_ptr(trans, root, path, 1, slot); if (wret) @@ -1380,19 +1385,21 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) int slot; int level = 1; u64 blocknr; - struct btrfs_buffer *c; - struct btrfs_buffer *next = NULL; + struct buffer_head *c; + struct btrfs_node *c_node; + struct buffer_head *next = NULL; while(level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) return 1; slot = path->slots[level] + 1; c = path->nodes[level]; - if (slot >= btrfs_header_nritems(&c->node.header)) { + c_node = btrfs_buffer_node(c); + if (slot >= btrfs_header_nritems(&c_node->header)) { level++; continue; } - blocknr = btrfs_node_blockptr(&c->node, slot); + blocknr = btrfs_node_blockptr(c_node, slot); if (next) btrfs_block_release(root, next); next = read_tree_block(root, blocknr); @@ -1408,7 +1415,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) if (!level) break; next = read_tree_block(root, - btrfs_node_blockptr(&next->node, 0)); + btrfs_node_blockptr(btrfs_buffer_node(next), 0)); } return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ae8518cb94bf..7748eecd9304 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,6 +1,9 @@ #ifndef __BTRFS__ #define __BTRFS__ +#include +#include + struct btrfs_trans_handle; #define BTRFS_MAGIC "_BtRfS_M" @@ -10,6 +13,12 @@ struct btrfs_trans_handle; #define BTRFS_INODE_MAP_OBJECTID 3 #define BTRFS_FS_TREE_OBJECTID 4 +/* + * we can actually store much bigger names, but lets not confuse the rest + * of linux + */ +#define BTRFS_NAME_LEN 255 + /* * the key defines the order in the tree, and so it also defines (optimal) * block layout. objectid corresonds to the inode number. The flags @@ -57,7 +66,7 @@ struct btrfs_header { #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize)) -struct btrfs_buffer; +struct buffer_head; /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc @@ -120,7 +129,7 @@ struct btrfs_node { * used while walking the tree. */ struct btrfs_path { - struct btrfs_buffer *nodes[BTRFS_MAX_LEVEL]; + struct buffer_head *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; }; @@ -211,17 +220,14 @@ struct btrfs_fs_info { struct btrfs_root *inode_root; struct btrfs_key current_insert; struct btrfs_key last_insert; - struct radix_tree_root cache_radix; struct radix_tree_root pinned_radix; - struct list_head trans; - struct list_head cache; u64 last_inode_alloc; u64 last_inode_alloc_dirid; u64 generation; - int cache_size; - int fp; struct btrfs_trans_handle *running_transaction; struct btrfs_super_block *disk_super; + struct buffer_head *sb_buffer; + struct super_block *sb; }; /* @@ -230,8 +236,8 @@ struct btrfs_fs_info { * only for the extent tree. */ struct btrfs_root { - struct btrfs_buffer *node; - struct btrfs_buffer *commit_root; + struct buffer_head *node; + struct buffer_head *commit_root; struct btrfs_root_item root_item; struct btrfs_key root_key; struct btrfs_fs_info *fs_info; @@ -389,6 +395,29 @@ static inline void btrfs_set_inode_compat_flags(struct btrfs_inode_item *i, i->compat_flags = cpu_to_le16(val); } +static inline u32 btrfs_timespec_sec(struct btrfs_inode_timespec *ts) +{ + return le32_to_cpu(ts->sec); +} + +static inline void btrfs_set_timespec_sec(struct btrfs_inode_timespec *ts, + u32 val) +{ + ts->sec = cpu_to_le32(val); +} + +static inline u32 btrfs_timespec_nsec(struct btrfs_inode_timespec *ts) +{ + return le32_to_cpu(ts->nsec); +} + +static inline void btrfs_set_timespec_nsec(struct btrfs_inode_timespec *ts, + u32 val) +{ + ts->nsec = cpu_to_le32(val); +} + + static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) { @@ -757,15 +786,20 @@ static inline void btrfs_set_file_extent_num_blocks(struct e->num_blocks = cpu_to_le64(val); } +static inline struct btrfs_root *btrfs_sb(struct super_block *sb) +{ + return sb->s_fs_info; +} + /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ btrfs_item_offset((leaf)->items + (slot)))) -struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, +struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf); + struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root @@ -783,7 +817,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *snap); + *root, struct buffer_head *snap); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -800,8 +834,8 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, char *name, int name_len, u64 dir, u64 objectid, u8 type); int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u64 dir, char *name, - int name_len, int mod); + *root, struct btrfs_path *path, u64 dir, + const char *name, int name_len, int mod); int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, char *name, int name_len); int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 4d8083d92fa0..75d6e373e98d 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -18,12 +18,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root key.objectid = dir; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - if (name_len == 1 && *name == '.') - key.offset = 1; - else if (name_len == 2 && name[0] == '.' && name[1] == '.') - key.offset = 2; - else - ret = btrfs_name_hash(name, name_len, &key.offset); + ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); btrfs_init_path(&path); data_size = sizeof(*dir_item) + name_len; @@ -31,7 +26,8 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) goto out; - dir_item = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], struct btrfs_dir_item); btrfs_set_dir_objectid(dir_item, objectid); btrfs_set_dir_type(dir_item, type); @@ -45,8 +41,8 @@ out: } int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u64 dir, char *name, - int name_len, int mod) + *root, struct btrfs_path *path, u64 dir, + const char *name, int name_len, int mod) { int ret; struct btrfs_key key; @@ -69,7 +65,8 @@ int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_dir_item *dir_item; char *name_ptr; - dir_item = btrfs_item_ptr(&path->nodes[0]->leaf, path->slots[0], + dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_dir_item); if (btrfs_dir_name_len(dir_item) != name_len) return 0; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 05637f9fd7c7..df2061a735cd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,165 +1,67 @@ -#define _XOPEN_SOURCE 500 -#include -#include -#include -#include -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" -static int allocated_blocks = 0; -int cache_max = 10000; - -static int check_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) +static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { - if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) + struct btrfs_node *node = btrfs_buffer_node(buf); + if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) BUG(); - if (root->node && btrfs_header_parentid(&buf->node.header) != - btrfs_header_parentid(&root->node->node.header)) + if (root->node && btrfs_header_parentid(&node->header) != + btrfs_header_parentid(btrfs_buffer_header(root->node))) BUG(); return 0; } -static int free_some_buffers(struct btrfs_root *root) +struct buffer_head *alloc_tree_block(struct btrfs_root *root, u64 blocknr) { - struct list_head *node, *next; - struct btrfs_buffer *b; - if (root->fs_info->cache_size < cache_max) - return 0; - list_for_each_safe(node, next, &root->fs_info->cache) { - b = list_entry(node, struct btrfs_buffer, cache); - if (b->count == 1) { - BUG_ON(!list_empty(&b->dirty)); - list_del_init(&b->cache); - btrfs_block_release(root, b); - if (root->fs_info->cache_size < cache_max) - break; - } - } - return 0; + return sb_getblk(root->fs_info->sb, blocknr); } -struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr) +struct buffer_head *find_tree_block(struct btrfs_root *root, u64 blocknr) { - struct btrfs_buffer *buf; - int ret; + return sb_getblk(root->fs_info->sb, blocknr); +} + +struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) +{ + struct buffer_head *buf = sb_bread(root->fs_info->sb, blocknr); - buf = malloc(sizeof(struct btrfs_buffer) + root->blocksize); if (!buf) return buf; - allocated_blocks++; - buf->blocknr = blocknr; - buf->count = 2; - INIT_LIST_HEAD(&buf->dirty); - free_some_buffers(root); - radix_tree_preload(GFP_KERNEL); - ret = radix_tree_insert(&root->fs_info->cache_radix, blocknr, buf); - radix_tree_preload_end(); - list_add_tail(&buf->cache, &root->fs_info->cache); - root->fs_info->cache_size++; - if (ret) { - free(buf); - return NULL; - } - return buf; -} - -struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr) -{ - struct btrfs_buffer *buf; - buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr); - if (buf) { - buf->count++; - } else { - buf = alloc_tree_block(root, blocknr); - if (!buf) { - BUG(); - return NULL; - } - } - return buf; -} - -struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) -{ - loff_t offset = blocknr * root->blocksize; - struct btrfs_buffer *buf; - int ret; - - buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr); - if (buf) { - buf->count++; - } else { - buf = alloc_tree_block(root, blocknr); - if (!buf) - return NULL; - ret = pread(root->fs_info->fp, &buf->node, root->blocksize, - offset); - if (ret != root->blocksize) { - free(buf); - return NULL; - } - } if (check_tree_block(root, buf)) BUG(); return buf; } int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { - if (!list_empty(&buf->dirty)) - return 0; - list_add_tail(&buf->dirty, &root->fs_info->trans); - buf->count++; + mark_buffer_dirty(buf); return 0; } int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { - if (!list_empty(&buf->dirty)) { - list_del_init(&buf->dirty); - btrfs_block_release(root, buf); - } + clear_buffer_dirty(buf); return 0; } int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { - u64 blocknr = buf->blocknr; - loff_t offset = blocknr * root->blocksize; - int ret; - - if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) - BUG(); - ret = pwrite(root->fs_info->fp, &buf->node, root->blocksize, offset); - if (ret != root->blocksize) - return ret; + mark_buffer_dirty(buf); return 0; } static int __commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct btrfs_buffer *b; - int ret = 0; - int wret; - while(!list_empty(&root->fs_info->trans)) { - b = list_entry(root->fs_info->trans.next, struct btrfs_buffer, - dirty); - list_del_init(&b->dirty); - wret = write_tree_block(trans, root, b); - if (wret) - ret = wret; - btrfs_block_release(root, b); - } - return ret; + filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); + return 0; } static int commit_tree_roots(struct btrfs_trans_handle *trans, @@ -172,17 +74,17 @@ static int commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *inode_root = fs_info->inode_root; btrfs_set_root_blocknr(&inode_root->root_item, - inode_root->node->blocknr); + inode_root->node->b_blocknr); ret = btrfs_update_root(trans, tree_root, &inode_root->root_key, &inode_root->root_item); BUG_ON(ret); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == extent_root->node->blocknr) + if (old_extent_block == extent_root->node->b_blocknr) break; btrfs_set_root_blocknr(&extent_root->root_item, - extent_root->node->blocknr); + extent_root->node->b_blocknr); ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); @@ -195,7 +97,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s) { int ret = 0; - struct btrfs_buffer *snap = root->commit_root; + struct buffer_head *snap = root->commit_root; struct btrfs_key snap_key; if (root->commit_root == root->node) @@ -204,7 +106,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct memcpy(&snap_key, &root->root_key, sizeof(snap_key)); root->root_key.offset++; - btrfs_set_root_blocknr(&root->root_item, root->node->blocknr); + btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); BUG_ON(ret); @@ -220,7 +122,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_finish_extent_commit(trans, root->fs_info->tree_root); root->commit_root = root->node; - root->node->count++; + get_bh(root->node); ret = btrfs_drop_snapshot(trans, root, snap); BUG_ON(ret); @@ -234,7 +136,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct static int __setup_root(struct btrfs_super_block *super, struct btrfs_root *root, struct btrfs_fs_info *fs_info, - u64 objectid, int fp) + u64 objectid) { root->node = NULL; root->commit_root = NULL; @@ -250,11 +152,11 @@ static int find_and_setup_root(struct btrfs_super_block *super, struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info, u64 objectid, - struct btrfs_root *root, int fp) + struct btrfs_root *root) { int ret; - __setup_root(super, root, fs_info, objectid, fp); + __setup_root(super, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); @@ -265,32 +167,26 @@ static int find_and_setup_root(struct btrfs_super_block *super, return 0; } -struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) +struct btrfs_root *open_ctree(struct super_block *sb, + struct buffer_head *sb_buffer, + struct btrfs_super_block *disk_super) { - int fp; - - fp = open(filename, O_CREAT | O_RDWR, 0600); - if (fp < 0) { - return NULL; - } - return open_ctree_fd(fp, super); -} - -struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) -{ - struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *inode_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info)); + struct btrfs_root *root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *inode_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), + GFP_NOFS); int ret; - INIT_RADIX_TREE(&fs_info->cache_radix, GFP_KERNEL); + /* FIXME: don't be stupid */ + if (!btrfs_super_root(disk_super)) + return NULL; INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL); - INIT_LIST_HEAD(&fs_info->trans); - INIT_LIST_HEAD(&fs_info->cache); - fs_info->cache_size = 0; - fs_info->fp = fp; fs_info->running_transaction = NULL; fs_info->fs_root = root; fs_info->tree_root = tree_root; @@ -298,36 +194,31 @@ struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; - fs_info->disk_super = super; + fs_info->disk_super = disk_super; + fs_info->sb_buffer = sb_buffer; + fs_info->sb = sb; memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); - ret = pread(fp, super, sizeof(struct btrfs_super_block), - BTRFS_SUPER_INFO_OFFSET); - if (ret == 0 || btrfs_super_root(super) == 0) { - BUG(); - return NULL; - } - BUG_ON(ret < 0); - - __setup_root(super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID, fp); - tree_root->node = read_tree_block(tree_root, btrfs_super_root(super)); + __setup_root(disk_super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); + tree_root->node = read_tree_block(tree_root, + btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); - ret = find_and_setup_root(super, tree_root, fs_info, - BTRFS_EXTENT_TREE_OBJECTID, extent_root, fp); + ret = find_and_setup_root(disk_super, tree_root, fs_info, + BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); - ret = find_and_setup_root(super, tree_root, fs_info, - BTRFS_INODE_MAP_OBJECTID, inode_root, fp); + ret = find_and_setup_root(disk_super, tree_root, fs_info, + BTRFS_INODE_MAP_OBJECTID, inode_root); BUG_ON(ret); - ret = find_and_setup_root(super, tree_root, fs_info, - BTRFS_FS_TREE_OBJECTID, root, fp); + ret = find_and_setup_root(disk_super, tree_root, fs_info, + BTRFS_FS_TREE_OBJECTID, root); BUG_ON(ret); root->commit_root = root->node; - root->node->count++; + get_bh(root->node); root->ref_cows = 1; root->fs_info->generation = root->root_key.offset + 1; return root; @@ -336,8 +227,11 @@ struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s) { + return 0; +#if 0 int ret; - btrfs_set_super_root(s, root->fs_info->tree_root->node->blocknr); + btrfs_set_super_root(s, root->fs_info->tree_root->node->b_blocknr); + ret = pwrite(root->fs_info->fp, s, sizeof(*s), BTRFS_SUPER_INFO_OFFSET); if (ret != sizeof(*s)) { @@ -345,35 +239,38 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } return 0; +#endif } static int drop_cache(struct btrfs_root *root) { + return 0; +#if 0 while(!list_empty(&root->fs_info->cache)) { - struct btrfs_buffer *b = list_entry(root->fs_info->cache.next, - struct btrfs_buffer, + struct buffer_head *b = list_entry(root->fs_info->cache.next, + struct buffer_head, cache); list_del_init(&b->cache); btrfs_block_release(root, b); } return 0; +#endif } -int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) + +int close_ctree(struct btrfs_root *root) { int ret; struct btrfs_trans_handle *trans; trans = root->fs_info->running_transaction; - btrfs_commit_transaction(trans, root, s); + btrfs_commit_transaction(trans, root, root->fs_info->disk_super); ret = commit_tree_roots(trans, root->fs_info); BUG_ON(ret); ret = __commit_transaction(trans, root); BUG_ON(ret); - write_ctree_super(trans, root, s); + write_ctree_super(trans, root, root->fs_info->disk_super); drop_cache(root); - BUG_ON(!list_empty(&root->fs_info->trans)); - close(root->fs_info->fp); if (root->node) btrfs_block_release(root, root->node); if (root->fs_info->extent_root->node) @@ -386,29 +283,17 @@ int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) btrfs_block_release(root->fs_info->tree_root, root->fs_info->tree_root->node); btrfs_block_release(root, root->commit_root); - free(root); - printf("on close %d blocks are allocated\n", allocated_blocks); + btrfs_block_release(root, root->fs_info->sb_buffer); + kfree(root->fs_info->extent_root); + kfree(root->fs_info->inode_root); + kfree(root->fs_info->tree_root); + kfree(root->fs_info); + kfree(root); return 0; } -void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf) +void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { - buf->count--; - if (buf->count < 0) - BUG(); - if (buf->count == 0) { - BUG_ON(!list_empty(&buf->cache)); - BUG_ON(!list_empty(&buf->dirty)); - if (!radix_tree_lookup(&root->fs_info->cache_radix, - buf->blocknr)) - BUG(); - radix_tree_delete(&root->fs_info->cache_radix, buf->blocknr); - memset(buf, 0, sizeof(*buf)); - free(buf); - BUG_ON(allocated_blocks == 0); - allocated_blocks--; - BUG_ON(root->fs_info->cache_size == 0); - root->fs_info->cache_size--; - } + brelse(buf); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index d888cf5c350b..7f4bb729b734 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -1,36 +1,41 @@ #ifndef __DISKIO__ #define __DISKIO__ -#include "list.h" -struct btrfs_buffer { - u64 blocknr; - int count; - struct list_head dirty; - struct list_head cache; - union { - struct btrfs_node node; - struct btrfs_leaf leaf; - }; -}; - -struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); -struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr); -int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf); -int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf); -int clean_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_buffer *buf); -int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_super_block *s); -struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *s); -struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super); -int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s); -void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf); -int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_super_block *s); -int mkfs(int fd, u64 num_blocks, u32 blocksize); +#include #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) +static inline struct btrfs_node *btrfs_buffer_node(struct buffer_head *bh) +{ + return (struct btrfs_node *)bh->b_data; +} + +static inline struct btrfs_leaf *btrfs_buffer_leaf(struct buffer_head *bh) +{ + return (struct btrfs_leaf *)bh->b_data; +} + +static inline struct btrfs_header *btrfs_buffer_header(struct buffer_head *bh) +{ + return &((struct btrfs_node *)bh->b_data)->header; +} + +struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr); +struct buffer_head *find_tree_block(struct btrfs_root *root, u64 blocknr); +int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct buffer_head *buf); +int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct buffer_head *buf); +int clean_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct buffer_head *buf); +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_super_block *s); +struct btrfs_root *open_ctree(struct super_block *sb, + struct buffer_head *sb_buffer, + struct btrfs_super_block *disk_super); +int close_ctree(struct btrfs_root *root); +void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); +int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_super_block *s); +int mkfs(int fd, u64 num_blocks, u32 blocksize); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 53a7550b5c1e..e3af2c035687 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10,9 +10,8 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root search_end, struct btrfs_key *ins); static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root - *extent_root); - +static int del_pending_extents(struct btrfs_trans_handle *trans, struct + btrfs_root *extent_root); /* * pending extents are blocks that we're trying to allocate in the extent * map while trying to grow the map because of other allocations. To avoid @@ -21,6 +20,7 @@ static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root * manner for deletes. */ #define CTREE_EXTENT_PENDING_DEL 0 +#define CTREE_EXTENT_PINNED 1 static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr) @@ -45,15 +45,14 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root if (ret != 0) BUG(); BUG_ON(ret != 0); - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(item); btrfs_set_extent_refs(item, refs + 1); - BUG_ON(list_empty(&path.nodes[0]->dirty)); btrfs_release_path(root->fs_info->extent_root, &path); finish_current_insert(trans, root->fs_info->extent_root); - run_pending(trans, root->fs_info->extent_root); + del_pending_extents(trans, root->fs_info->extent_root); return 0; } @@ -74,7 +73,7 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root 0, 0); if (ret != 0) BUG(); - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); *refs = btrfs_extent_refs(item); btrfs_release_path(root->fs_info->extent_root, &path); @@ -82,18 +81,20 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { u64 blocknr; + struct btrfs_node *buf_node; int i; if (!root->ref_cows) return 0; - if (btrfs_is_leaf(&buf->node)) + buf_node = btrfs_buffer_node(buf); + if (btrfs_is_leaf(buf_node)) return 0; - for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) { - blocknr = btrfs_node_blockptr(&buf->node, i); + for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) { + blocknr = btrfs_node_blockptr(buf_node, i); inc_block_ref(trans, root, blocknr); } return 0; @@ -108,9 +109,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct int i; while(1) { - ret = radix_tree_gang_lookup(&root->fs_info->pinned_radix, + ret = radix_tree_gang_lookup_tag(&root->fs_info->pinned_radix, (void **)gang, 0, - ARRAY_SIZE(gang)); + ARRAY_SIZE(gang), + CTREE_EXTENT_PINNED); if (!ret) break; if (!first) @@ -137,7 +139,7 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, - btrfs_header_parentid(&extent_root->node->node.header)); + btrfs_header_parentid(btrfs_buffer_header(extent_root->node))); ins.offset = 1; ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); @@ -156,11 +158,24 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct return 0; } +static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) +{ + int err; + err = radix_tree_insert(&root->fs_info->pinned_radix, + blocknr, (void *)blocknr); + BUG_ON(err); + if (err) + return err; + radix_tree_tag_set(&root->fs_info->pinned_radix, blocknr, + tag); + return 0; +} + /* * remove an extent from the root, returns 0 on success */ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin) + *root, u64 blocknr, u64 num_blocks) { struct btrfs_path path; struct btrfs_key key; @@ -171,7 +186,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key ins; u32 refs; - BUG_ON(pin && num_blocks != 1); key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -186,26 +200,18 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root printk("failed to find %Lu\n", key.objectid); BUG(); } - ei = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + ei = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], struct btrfs_extent_item); BUG_ON(ei->refs == 0); refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); if (refs == 0) { u64 super_blocks_used; - if (pin) { - int err; - radix_tree_preload(GFP_KERNEL); - err = radix_tree_insert(&info->pinned_radix, - blocknr, (void *)blocknr); - BUG_ON(err); - radix_tree_preload_end(); - } super_blocks_used = btrfs_super_blocks_used(info->disk_super); btrfs_set_super_blocks_used(info->disk_super, super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, &path); - if (!pin && extent_root->fs_info->last_insert.objectid > + if (extent_root->fs_info->last_insert.objectid > blocknr) extent_root->fs_info->last_insert.objectid = blocknr; if (ret) @@ -224,39 +230,32 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root) { int ret; - struct btrfs_buffer *gang[4]; + int wret; + int err = 0; + unsigned long gang[4]; int i; + struct radix_tree_root *radix = &extent_root->fs_info->pinned_radix; while(1) { ret = radix_tree_gang_lookup_tag( - &extent_root->fs_info->cache_radix, + &extent_root->fs_info->pinned_radix, (void **)gang, 0, ARRAY_SIZE(gang), CTREE_EXTENT_PENDING_DEL); if (!ret) break; for (i = 0; i < ret; i++) { - ret = __free_extent(trans, extent_root, - gang[i]->blocknr, 1, 1); - radix_tree_tag_clear(&extent_root->fs_info->cache_radix, - gang[i]->blocknr, + radix_tree_tag_set(radix, gang[i], CTREE_EXTENT_PINNED); + radix_tree_tag_clear(radix, gang[i], CTREE_EXTENT_PENDING_DEL); - btrfs_block_release(extent_root, gang[i]); + wret = __free_extent(trans, extent_root, gang[i], 1); + if (wret) + err = wret; } } - return 0; + return err; } -static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root - *extent_root) -{ - while(radix_tree_tagged(&extent_root->fs_info->cache_radix, - CTREE_EXTENT_PENDING_DEL)) - del_pending_extents(trans, extent_root); - return 0; -} - - /* * remove an extent from the root, returns 0 on success */ @@ -264,18 +263,21 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; - struct btrfs_buffer *t; + struct buffer_head *t; int pending_ret; int ret; if (root == extent_root) { t = find_tree_block(root, blocknr); - radix_tree_tag_set(&root->fs_info->cache_radix, blocknr, - CTREE_EXTENT_PENDING_DEL); + pin_down_block(root, blocknr, CTREE_EXTENT_PENDING_DEL); return 0; } - ret = __free_extent(trans, root, blocknr, num_blocks, pin); - pending_ret = run_pending(trans, root->fs_info->extent_root); + if (pin) { + ret = pin_down_block(root, blocknr, CTREE_EXTENT_PINNED); + BUG_ON(ret); + } + ret = __free_extent(trans, root, blocknr, num_blocks); + pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } @@ -296,14 +298,16 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int ret; u64 hole_size = 0; int slot = 0; - u64 last_block; + u64 last_block = 0; u64 test_block; int start_found; struct btrfs_leaf *l; struct btrfs_root * root = orig_root->fs_info->extent_root; int total_needed = num_blocks; + int level; - total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3; + level = btrfs_header_level(btrfs_buffer_header(root->node)); + total_needed += (level + 1) * 3; if (root->fs_info->last_insert.objectid > search_start) search_start = root->fs_info->last_insert.objectid; @@ -323,7 +327,7 @@ check_failed: path.slots[0]--; while (1) { - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); slot = path.slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { ret = btrfs_next_leaf(root, &path); @@ -429,7 +433,7 @@ static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root sizeof(extent_item)); finish_current_insert(trans, extent_root); - pending_ret = run_pending(trans, extent_root); + pending_ret = del_pending_extents(trans, extent_root); if (ret) return ret; if (pending_ret) @@ -441,16 +445,15 @@ static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root * helper function to allocate a block for a given tree * returns the tree buffer or NULL. */ -struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, +struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_key ins; int ret; - struct btrfs_buffer *buf; + struct buffer_head *buf; ret = alloc_extent(trans, root, 1, 0, (unsigned long)-1, - btrfs_header_parentid(&root->node->node.header), - &ins); + btrfs_header_parentid(btrfs_buffer_header(root->node)), &ins); if (ret) { BUG(); return NULL; @@ -467,13 +470,13 @@ struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level) { - struct btrfs_buffer *next; - struct btrfs_buffer *cur; + struct buffer_head *next; + struct buffer_head *cur; u64 blocknr; int ret; u32 refs; - ret = lookup_block_ref(trans, root, path->nodes[*level]->blocknr, + ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr, &refs); BUG_ON(ret); if (refs > 1) @@ -484,9 +487,10 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root while(*level > 0) { cur = path->nodes[*level]; if (path->slots[*level] >= - btrfs_header_nritems(&cur->node.header)) + btrfs_header_nritems(btrfs_buffer_header(cur))) break; - blocknr = btrfs_node_blockptr(&cur->node, path->slots[*level]); + blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), + path->slots[*level]); ret = lookup_block_ref(trans, root, blocknr, &refs); if (refs != 1 || *level == 1) { path->slots[*level]++; @@ -499,12 +503,12 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); path->nodes[*level-1] = next; - *level = btrfs_header_level(&next->node.header); + *level = btrfs_header_level(btrfs_buffer_header(next)); path->slots[*level] = 0; } out: - ret = btrfs_free_extent(trans, root, path->nodes[*level]->blocknr, 1, - 1); + ret = btrfs_free_extent(trans, root, path->nodes[*level]->b_blocknr, + 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -525,14 +529,14 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root int ret; for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; - if (slot < - btrfs_header_nritems(&path->nodes[i]->node.header)- 1) { + if (slot < btrfs_header_nritems( + btrfs_buffer_header(path->nodes[i])) - 1) { path->slots[i]++; *level = i; return 0; } else { ret = btrfs_free_extent(trans, root, - path->nodes[*level]->blocknr, + path->nodes[*level]->b_blocknr, 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; @@ -549,7 +553,7 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root * decremented. */ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *snap) + *root, struct buffer_head *snap) { int ret = 0; int wret; @@ -560,7 +564,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_init_path(&path); - level = btrfs_header_level(&snap->node.header); + level = btrfs_header_level(btrfs_buffer_header(snap)); orig_level = level; path.nodes[level] = snap; path.slots[level] = 0; diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index 6c2a71a46c7d..22519b8e0cf2 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -10,6 +10,7 @@ * License. */ +#include #define DELTA 0x9E3779B9 static void TEA_transform(__u32 buf[2], __u32 const in[]) @@ -63,6 +64,14 @@ int btrfs_name_hash(const char *name, int len, u64 *hash_result) const char *p; __u32 in[8], buf[2]; + if (len == 1 && *name == '.') { + *hash_result = 1; + return 0; + } else if (len == 2 && name[0] == '.' && name[1] == '.') { + *hash_result = 2; + return 0; + } + /* Initialize the default seed for the hash checksum functions */ buf[0] = 0x67452301; buf[1] = 0xefcdab89; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index c7fda3bf7b21..c45aec258bd5 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -15,7 +15,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, int ret; u64 hole_size = 0; int slot = 0; - u64 last_ino; + u64 last_ino = 0; int start_found; struct btrfs_leaf *l; struct btrfs_root *root = fs_root->fs_info->inode_root; @@ -40,7 +40,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, path.slots[0]--; while (1) { - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); slot = path.slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { ret = btrfs_next_leaf(root, &path); @@ -105,8 +105,8 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, if (ret) goto out; - inode_item = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], - struct btrfs_inode_map_item); + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], struct btrfs_inode_map_item); btrfs_cpu_key_to_disk(&inode_item->key, location); out: btrfs_release_path(inode_root, &path); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index aa2d3fac8804..c8ee938c1251 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -17,7 +17,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) printk("leaf %Lu total ptrs %d free space %d\n", btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(root, l)); - fflush(stdout); for (i = 0 ; i < nr ; i++) { item = l->items + i; type = btrfs_disk_key_type(&item->key); @@ -67,10 +66,10 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) btrfs_leaf_data(l) + btrfs_item_offset(item)); break; }; - fflush(stdout); } } -void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) + +void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t) { int i; u32 nr; @@ -78,16 +77,16 @@ void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) if (!t) return; - c = &t->node; + c = btrfs_buffer_node(t); nr = btrfs_header_nritems(&c->header); if (btrfs_is_leaf(c)) { btrfs_print_leaf(root, (struct btrfs_leaf *)c); return; } - printk("node %Lu level %d total ptrs %d free spc %u\n", t->blocknr, - btrfs_header_level(&c->header), nr, - (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); - fflush(stdout); + printk("node %Lu level %d total ptrs %d free spc %u\n", + btrfs_header_blocknr(&c->header), + btrfs_header_level(&c->header), nr, + (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { printk("\tkey %d (%Lu %u %Lu) block %Lu\n", i, @@ -95,12 +94,11 @@ void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) c->ptrs[i].key.flags, c->ptrs[i].key.offset, btrfs_node_blockptr(c, i)); - fflush(stdout); } for (i = 0; i < nr; i++) { - struct btrfs_buffer *next_buf = read_tree_block(root, + struct buffer_head *next_buf = read_tree_block(root, btrfs_node_blockptr(c, i)); - struct btrfs_node *next = &next_buf->node; + struct btrfs_node *next = btrfs_buffer_node(next_buf); if (btrfs_is_leaf(next) && btrfs_header_level(&c->header) != 1) BUG(); diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 0882ca904eca..396041a05cfa 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -1,5 +1,5 @@ #ifndef __PRINT_TREE_ #define __PRINT_TREE_ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); -void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t); +void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t); #endif diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 52c83be4b307..a4554c007ef7 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -21,7 +21,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, if (ret < 0) goto out; BUG_ON(ret == 0); - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); BUG_ON(path.slots[0] == 0); slot = path.slots[0] - 1; if (btrfs_disk_key_objectid(&l->items[slot].key) != objectid) { @@ -51,7 +51,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root if (ret < 0) goto out; BUG_ON(ret != 0); - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); slot = path.slots[0]; memcpy(btrfs_item_ptr(l, slot, struct btrfs_root_item), item, sizeof(*item)); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4ae76044aeab..ccc056aad692 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -8,13 +9,18 @@ #include #include #include "ctree.h" +#include "disk-io.h" #define BTRFS_SUPER_MAGIC 0x9123682E + +static struct inode_operations btrfs_dir_inode_operations; +static struct super_operations btrfs_super_ops; +static struct file_operations btrfs_dir_file_operations; + #if 0 /* some random number */ static struct super_operations ramfs_ops; -static struct inode_operations ramfs_dir_inode_operations; static struct backing_dev_info ramfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ @@ -129,46 +135,243 @@ static struct inode_operations ramfs_dir_inode_operations = { }; #endif -struct inode *btrfs_get_inode(struct super_block *sb, int mode, dev_t dev) +static void btrfs_read_locked_inode(struct inode *inode) { - struct inode * inode = new_inode(sb); - - if (inode) { - inode->i_mode = mode; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_blocks = 0; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + struct btrfs_path path; + struct btrfs_inode_item *inode_item; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + int ret; +printk("read locked inode %lu\n", inode->i_ino); + btrfs_init_path(&path); + ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); + if (ret) { + make_bad_inode(inode); + return; } - return inode; + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], + struct btrfs_inode_item); + +printk("found locked inode %lu\n", inode->i_ino); + inode->i_mode = btrfs_inode_mode(inode_item); + inode->i_nlink = btrfs_inode_nlink(inode_item); + inode->i_uid = btrfs_inode_uid(inode_item); + inode->i_gid = btrfs_inode_gid(inode_item); + inode->i_size = btrfs_inode_size(inode_item); + inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime); + inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime); + inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime); + inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime); + inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime); + inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); + inode->i_blocks = btrfs_inode_nblocks(inode_item); + inode->i_generation = btrfs_inode_generation(inode_item); +printk("about to release\n"); + btrfs_release_path(root, &path); + switch (inode->i_mode & S_IFMT) { +#if 0 + default: + init_special_inode(inode, inode->i_mode, + btrfs_inode_rdev(inode_item)); + break; +#endif + case S_IFREG: +printk("inode %lu now a file\n", inode->i_ino); + break; + case S_IFDIR: +printk("inode %lu now a directory\n", inode->i_ino); + inode->i_op = &btrfs_dir_inode_operations; + inode->i_fop = &btrfs_dir_file_operations; + break; + case S_IFLNK: +printk("inode %lu now a link\n", inode->i_ino); + // inode->i_op = &page_symlink_inode_operations; + break; + } +printk("returning!\n"); + return; } -static struct super_operations btrfs_ops = { - .statfs = simple_statfs, - .drop_inode = generic_delete_inode, -}; +static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, + ino_t *ino) +{ + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + struct btrfs_dir_item *di; + struct btrfs_path path; + struct btrfs_root *root = btrfs_sb(dir->i_sb); + int ret; + + btrfs_init_path(&path); + ret = btrfs_lookup_dir_item(NULL, root, &path, dir->i_ino, name, + namelen, 0); + if (ret) { + *ino = 0; + goto out; + } + di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_dir_item); + *ino = btrfs_dir_objectid(di); +out: + btrfs_release_path(root, &path); + return ret; +} + +static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct inode * inode; + ino_t ino; + int ret; + + if (dentry->d_name.len > BTRFS_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + ret = btrfs_inode_by_name(dir, dentry, &ino); + if (ret < 0) + return ERR_PTR(ret); + inode = NULL; + if (ino) { +printk("lookup on %.*s returns %lu\n", dentry->d_name.len, dentry->d_name.name, ino); + inode = iget(dir->i_sb, ino); + if (!inode) + return ERR_PTR(-EACCES); + } + return d_splice_alias(inode, dentry); +} + +static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_item *item; + struct btrfs_dir_item *di; + struct btrfs_key key; + struct btrfs_path path; + int ret; + u32 nritems; + struct btrfs_leaf *leaf; + int slot; + int advance; + unsigned char d_type = DT_UNKNOWN; + int over; + + key.objectid = inode->i_ino; +printk("readdir on dir %Lu pos %Lu\n", key.objectid, filp->f_pos); + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + key.offset = filp->f_pos; + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) { + goto err; + } +printk("first ret %d\n", ret); + advance = filp->f_pos > 0 && ret != 0; + while(1) { + leaf = btrfs_buffer_leaf(path.nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path.slots[0]; +printk("leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slot); + if (advance) { +printk("advancing!\n"); + if (slot == nritems -1) { + ret = btrfs_next_leaf(root, &path); + if (ret) + break; + leaf = btrfs_buffer_leaf(path.nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path.slots[0]; +printk("2leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slot); + } else { + slot++; + path.slots[0]++; + } + } + advance = 1; + item = leaf->items + slot; +printk("item key %Lu %u %Lu\n", btrfs_disk_key_objectid(&item->key), + btrfs_disk_key_flags(&item->key), btrfs_disk_key_offset(&item->key)); + if (btrfs_disk_key_objectid(&item->key) != key.objectid) + break; + if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_ITEM_KEY) + continue; + di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); +printk("filldir name %.*s, objectid %Lu\n", btrfs_dir_name_len(di), + (const char *)(di + 1), btrfs_dir_objectid(di)); + over = filldir(dirent, (const char *)(di + 1), + btrfs_dir_name_len(di), + btrfs_disk_key_offset(&item->key), + btrfs_dir_objectid(di), d_type); + if (over) + break; + filp->f_pos = btrfs_disk_key_offset(&item->key) + 1; + } +printk("filldir all done\n"); + ret = 0; +err: + btrfs_release_path(root, &path); + return ret; +} + +static void btrfs_put_super (struct super_block * sb) +{ + struct btrfs_root *root = btrfs_sb(sb); + int ret; + + ret = close_ctree(root); + if (ret) { + printk("close ctree returns %d\n", ret); + } + sb->s_fs_info = NULL; +} static int btrfs_fill_super(struct super_block * sb, void * data, int silent) { struct inode * inode; - struct dentry * root; + struct dentry * root_dentry; + struct btrfs_super_block *disk_super; + struct buffer_head *bh; + struct btrfs_root *root; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = BTRFS_SUPER_MAGIC; - sb->s_op = &btrfs_ops; + sb->s_op = &btrfs_super_ops; sb->s_time_gran = 1; - inode = btrfs_get_inode(sb, S_IFDIR | 0755, 0); + + bh = sb_bread(sb, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); + if (!bh) { + printk("btrfs: unable to read on disk super\n"); + return -EIO; + } + disk_super = (struct btrfs_super_block *)bh->b_data; + root = open_ctree(sb, bh, disk_super); + sb->s_fs_info = root; + if (!root) { + printk("btrfs: open_ctree failed\n"); + return -EIO; + } + printk("read in super total blocks %Lu root %Lu\n", + btrfs_super_total_blocks(disk_super), + btrfs_super_root_dir(disk_super)); + + inode = iget_locked(sb, btrfs_super_root_dir(disk_super)); if (!inode) return -ENOMEM; + if (inode->i_state & I_NEW) { + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + } - root = d_alloc_root(inode); - if (!root) { + root_dentry = d_alloc_root(inode); + if (!root_dentry) { iput(inode); return -ENOMEM; } - sb->s_root = root; + sb->s_root = root_dentry; + return 0; } @@ -187,6 +390,24 @@ static struct file_system_type btrfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; +static struct super_operations btrfs_super_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, + .put_super = btrfs_put_super, + .read_inode = btrfs_read_locked_inode, +}; + +static struct inode_operations btrfs_dir_inode_operations = { + .lookup = btrfs_lookup, +}; + +static struct file_operations btrfs_dir_file_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = btrfs_readdir, +}; + + static int __init init_btrfs_fs(void) { printk("btrfs loaded!\n"); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 3adb6e69fe43..9ab27b7491c5 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -10,7 +10,7 @@ struct btrfs_trans_handle { static inline struct btrfs_trans_handle * btrfs_start_transaction(struct btrfs_root *root, int num_blocks) { - struct btrfs_trans_handle *h = malloc(sizeof(*h)); + struct btrfs_trans_handle *h = kmalloc(sizeof(*h), GFP_NOFS); h->transid = root->root_key.offset; h->blocks_reserved = num_blocks; h->blocks_used = 0; @@ -21,7 +21,7 @@ static inline void btrfs_free_transaction(struct btrfs_root *root, struct btrfs_trans_handle *handle) { memset(handle, 0, sizeof(*handle)); - free(handle); + kfree(handle); } #endif From 79154b1b5bcf87903db7ff16a30b360b78d6fe4f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 22 Mar 2007 15:59:16 -0400 Subject: [PATCH 0062/2985] Btrfs: transaction rework Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.h | 4 +- fs/btrfs/disk-io.c | 115 ++---------------------- fs/btrfs/disk-io.h | 8 +- fs/btrfs/super.c | 18 ---- fs/btrfs/transaction.c | 199 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/transaction.h | 35 ++++---- 7 files changed, 235 insertions(+), 146 deletions(-) create mode 100644 fs/btrfs/transaction.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 1a0fb7422393..be7d74cdca04 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -3,7 +3,7 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ - hash.o file-item.o inode-item.o inode-map.o disk-io.o + hash.o file-item.o inode-item.o inode-map.o disk-io.o transaction.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7748eecd9304..4c19a3f12afd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -5,6 +5,7 @@ #include struct btrfs_trans_handle; +struct btrfs_transaction; #define BTRFS_MAGIC "_BtRfS_M" @@ -224,10 +225,11 @@ struct btrfs_fs_info { u64 last_inode_alloc; u64 last_inode_alloc_dirid; u64 generation; - struct btrfs_trans_handle *running_transaction; + struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; struct buffer_head *sb_buffer; struct super_block *sb; + struct mutex trans_mutex; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index df2061a735cd..9cacca0c525c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -50,89 +50,6 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } -int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) -{ - mark_buffer_dirty(buf); - return 0; -} - -static int __commit_transaction(struct btrfs_trans_handle *trans, struct - btrfs_root *root) -{ - filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); - return 0; -} - -static int commit_tree_roots(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - int ret; - u64 old_extent_block; - struct btrfs_root *tree_root = fs_info->tree_root; - struct btrfs_root *extent_root = fs_info->extent_root; - struct btrfs_root *inode_root = fs_info->inode_root; - - btrfs_set_root_blocknr(&inode_root->root_item, - inode_root->node->b_blocknr); - ret = btrfs_update_root(trans, tree_root, - &inode_root->root_key, - &inode_root->root_item); - BUG_ON(ret); - while(1) { - old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == extent_root->node->b_blocknr) - break; - btrfs_set_root_blocknr(&extent_root->root_item, - extent_root->node->b_blocknr); - ret = btrfs_update_root(trans, tree_root, - &extent_root->root_key, - &extent_root->root_item); - BUG_ON(ret); - } - return 0; -} - -int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct btrfs_super_block *s) -{ - int ret = 0; - struct buffer_head *snap = root->commit_root; - struct btrfs_key snap_key; - - if (root->commit_root == root->node) - return 0; - - memcpy(&snap_key, &root->root_key, sizeof(snap_key)); - root->root_key.offset++; - - btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); - ret = btrfs_insert_root(trans, root->fs_info->tree_root, - &root->root_key, &root->root_item); - BUG_ON(ret); - - ret = commit_tree_roots(trans, root->fs_info); - BUG_ON(ret); - - ret = __commit_transaction(trans, root); - BUG_ON(ret); - - write_ctree_super(trans, root, s); - btrfs_finish_extent_commit(trans, root->fs_info->extent_root); - btrfs_finish_extent_commit(trans, root->fs_info->tree_root); - - root->commit_root = root->node; - get_bh(root->node); - ret = btrfs_drop_snapshot(trans, root, snap); - BUG_ON(ret); - - ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key); - BUG_ON(ret); - root->fs_info->generation = root->root_key.offset + 1; - - return ret; -} - static int __setup_root(struct btrfs_super_block *super, struct btrfs_root *root, struct btrfs_fs_info *fs_info, @@ -197,6 +114,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->disk_super = disk_super; fs_info->sb_buffer = sb_buffer; fs_info->sb = sb; + mutex_init(&fs_info->trans_mutex); memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); @@ -225,7 +143,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, } int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_super_block *s) + *root) { return 0; #if 0 @@ -242,34 +160,19 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root #endif } -static int drop_cache(struct btrfs_root *root) -{ - return 0; -#if 0 - while(!list_empty(&root->fs_info->cache)) { - struct buffer_head *b = list_entry(root->fs_info->cache.next, - struct buffer_head, - cache); - list_del_init(&b->cache); - btrfs_block_release(root, b); - } - return 0; -#endif -} - int close_ctree(struct btrfs_root *root) { int ret; struct btrfs_trans_handle *trans; - trans = root->fs_info->running_transaction; - btrfs_commit_transaction(trans, root, root->fs_info->disk_super); - ret = commit_tree_roots(trans, root->fs_info); + trans = btrfs_start_transaction(root, 1); + btrfs_commit_transaction(trans, root); + /* run commit again to drop the original snapshot */ + trans = btrfs_start_transaction(root, 1); + btrfs_commit_transaction(trans, root); + ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); - ret = __commit_transaction(trans, root); - BUG_ON(ret); - write_ctree_super(trans, root, root->fs_info->disk_super); - drop_cache(root); + write_ctree_super(NULL, root); if (root->node) btrfs_block_release(root, root->node); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 7f4bb729b734..099f7eea0ec7 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -28,14 +28,14 @@ int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); -int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_super_block *s); +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root); struct btrfs_root *open_ctree(struct super_block *sb, struct buffer_head *sb_buffer, struct btrfs_super_block *disk_super); int close_ctree(struct btrfs_root *root); void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); -int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_super_block *s); +int write_ctree_super(struct btrfs_trans_handle *trans, + struct btrfs_root *root); int mkfs(int fd, u64 num_blocks, u32 blocksize); #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ccc056aad692..620430825840 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -141,7 +141,6 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_inode_item *inode_item; struct btrfs_root *root = btrfs_sb(inode->i_sb); int ret; -printk("read locked inode %lu\n", inode->i_ino); btrfs_init_path(&path); ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); if (ret) { @@ -152,7 +151,6 @@ printk("read locked inode %lu\n", inode->i_ino); path.slots[0], struct btrfs_inode_item); -printk("found locked inode %lu\n", inode->i_ino); inode->i_mode = btrfs_inode_mode(inode_item); inode->i_nlink = btrfs_inode_nlink(inode_item); inode->i_uid = btrfs_inode_uid(inode_item); @@ -166,7 +164,6 @@ printk("found locked inode %lu\n", inode->i_ino); inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); -printk("about to release\n"); btrfs_release_path(root, &path); switch (inode->i_mode & S_IFMT) { #if 0 @@ -176,19 +173,15 @@ printk("about to release\n"); break; #endif case S_IFREG: -printk("inode %lu now a file\n", inode->i_ino); break; case S_IFDIR: -printk("inode %lu now a directory\n", inode->i_ino); inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; break; case S_IFLNK: -printk("inode %lu now a link\n", inode->i_ino); // inode->i_op = &page_symlink_inode_operations; break; } -printk("returning!\n"); return; } @@ -232,7 +225,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(ret); inode = NULL; if (ino) { -printk("lookup on %.*s returns %lu\n", dentry->d_name.len, dentry->d_name.name, ino); inode = iget(dir->i_sb, ino); if (!inode) return ERR_PTR(-EACCES); @@ -257,7 +249,6 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) int over; key.objectid = inode->i_ino; -printk("readdir on dir %Lu pos %Lu\n", key.objectid, filp->f_pos); key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); key.offset = filp->f_pos; @@ -266,15 +257,12 @@ printk("readdir on dir %Lu pos %Lu\n", key.objectid, filp->f_pos); if (ret < 0) { goto err; } -printk("first ret %d\n", ret); advance = filp->f_pos > 0 && ret != 0; while(1) { leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path.slots[0]; -printk("leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slot); if (advance) { -printk("advancing!\n"); if (slot == nritems -1) { ret = btrfs_next_leaf(root, &path); if (ret) @@ -282,7 +270,6 @@ printk("advancing!\n"); leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path.slots[0]; -printk("2leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slot); } else { slot++; path.slots[0]++; @@ -290,15 +277,11 @@ printk("2leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slo } advance = 1; item = leaf->items + slot; -printk("item key %Lu %u %Lu\n", btrfs_disk_key_objectid(&item->key), - btrfs_disk_key_flags(&item->key), btrfs_disk_key_offset(&item->key)); if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_ITEM_KEY) continue; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); -printk("filldir name %.*s, objectid %Lu\n", btrfs_dir_name_len(di), - (const char *)(di + 1), btrfs_dir_objectid(di)); over = filldir(dirent, (const char *)(di + 1), btrfs_dir_name_len(di), btrfs_disk_key_offset(&item->key), @@ -307,7 +290,6 @@ printk("filldir name %.*s, objectid %Lu\n", btrfs_dir_name_len(di), break; filp->f_pos = btrfs_disk_key_offset(&item->key) + 1; } -printk("filldir all done\n"); ret = 0; err: btrfs_release_path(root, &path); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c new file mode 100644 index 000000000000..8dc1c170f10f --- /dev/null +++ b/fs/btrfs/transaction.c @@ -0,0 +1,199 @@ +#include +#include +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" + + +static void put_transaction(struct btrfs_transaction *transaction) +{ + transaction->use_count--; + if (transaction->use_count == 0) + kfree(transaction); +} + +static int join_transaction(struct btrfs_root *root) +{ + struct btrfs_transaction *cur_trans; + cur_trans = root->fs_info->running_transaction; + if (!cur_trans) { + cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS); + BUG_ON(!cur_trans); + root->fs_info->running_transaction = cur_trans; + cur_trans->num_writers = 0; + cur_trans->transid = root->root_key.offset + 1; + init_waitqueue_head(&cur_trans->writer_wait); + init_waitqueue_head(&cur_trans->commit_wait); + cur_trans->in_commit = 0; + cur_trans->use_count = 0; + cur_trans->commit_done = 0; + } + cur_trans->num_writers++; + return 0; +} + +struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, + int num_blocks) +{ + struct btrfs_trans_handle *h = kmalloc(sizeof(*h), GFP_NOFS); + int ret; + + mutex_lock(&root->fs_info->trans_mutex); + ret = join_transaction(root); + BUG_ON(ret); + h->transid = root->fs_info->running_transaction->transid; + h->transaction = root->fs_info->running_transaction; + h->blocks_reserved = num_blocks; + h->blocks_used = 0; + root->fs_info->running_transaction->use_count++; + mutex_unlock(&root->fs_info->trans_mutex); + return h; +} + +int btrfs_end_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_transaction *cur_trans; + mutex_lock(&root->fs_info->trans_mutex); + cur_trans = root->fs_info->running_transaction; + WARN_ON(cur_trans->num_writers <= 1); + if (waitqueue_active(&cur_trans->writer_wait)) + wake_up(&cur_trans->writer_wait); + cur_trans->num_writers--; + put_transaction(cur_trans); + mutex_unlock(&root->fs_info->trans_mutex); + kfree(trans); + return 0; +} + + +int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); + return 0; +} + +int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + u64 old_extent_block; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *inode_root = fs_info->inode_root; + + btrfs_set_root_blocknr(&inode_root->root_item, + inode_root->node->b_blocknr); + ret = btrfs_update_root(trans, tree_root, + &inode_root->root_key, + &inode_root->root_item); + BUG_ON(ret); + while(1) { + old_extent_block = btrfs_root_blocknr(&extent_root->root_item); + if (old_extent_block == extent_root->node->b_blocknr) + break; + btrfs_set_root_blocknr(&extent_root->root_item, + extent_root->node->b_blocknr); + ret = btrfs_update_root(trans, tree_root, + &extent_root->root_key, + &extent_root->root_item); + BUG_ON(ret); + } + return 0; +} + +static int wait_for_commit(struct btrfs_root *root, + struct btrfs_transaction *commit) +{ + DEFINE_WAIT(wait); + commit->use_count++; + while(!commit->commit_done) { + prepare_to_wait(&commit->commit_wait, &wait, + TASK_UNINTERRUPTIBLE); + if (commit->commit_done) + break; + mutex_unlock(&root->fs_info->trans_mutex); + schedule(); + mutex_lock(&root->fs_info->trans_mutex); + } + finish_wait(&commit->commit_wait, &wait); + return 0; +} + +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret = 0; + struct buffer_head *snap = root->commit_root; + struct btrfs_key snap_key; + struct btrfs_transaction *cur_trans; + DEFINE_WAIT(wait); + + mutex_lock(&root->fs_info->trans_mutex); + if (trans->transaction->in_commit) { + cur_trans = trans->transaction; + trans->transaction->use_count++; + btrfs_end_transaction(trans, root); + ret = wait_for_commit(root, cur_trans); + BUG_ON(ret); + put_transaction(cur_trans); + mutex_unlock(&root->fs_info->trans_mutex); + return 0; + } + while (trans->transaction->num_writers > 1) { + prepare_to_wait(&trans->transaction->writer_wait, &wait, + TASK_UNINTERRUPTIBLE); + if (trans->transaction->num_writers <= 1) + break; + mutex_unlock(&root->fs_info->trans_mutex); + schedule(); + mutex_lock(&root->fs_info->trans_mutex); + } + finish_wait(&trans->transaction->writer_wait, &wait); + + cur_trans = root->fs_info->running_transaction; + root->fs_info->running_transaction = NULL; + mutex_unlock(&root->fs_info->trans_mutex); + + memcpy(&snap_key, &root->root_key, sizeof(snap_key)); + root->root_key.offset++; + + if (btrfs_root_blocknr(&root->root_item) != root->node->b_blocknr) { + btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); + ret = btrfs_insert_root(trans, root->fs_info->tree_root, + &root->root_key, &root->root_item); + BUG_ON(ret); + } + + ret = btrfs_commit_tree_roots(trans, root); + BUG_ON(ret); + + ret = btrfs_write_and_wait_transaction(trans, root); + BUG_ON(ret); + + write_ctree_super(trans, root); + btrfs_finish_extent_commit(trans, root->fs_info->extent_root); + btrfs_finish_extent_commit(trans, root->fs_info->tree_root); + put_transaction(cur_trans); + kfree(trans); + + if (root->node != root->commit_root) { + trans = btrfs_start_transaction(root, 1); + root->commit_root = root->node; + get_bh(root->node); + ret = btrfs_drop_snapshot(trans, root, snap); + BUG_ON(ret); + + ret = btrfs_del_root(trans, root->fs_info->tree_root, + &snap_key); + BUG_ON(ret); + root->fs_info->generation = root->root_key.offset + 1; + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + } + + return ret; +} + diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 9ab27b7491c5..7e6c08a0accd 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -1,27 +1,30 @@ #ifndef __TRANSACTION__ #define __TRANSACTION__ +struct btrfs_transaction { + u64 transid; + unsigned long num_writers; + int in_commit; + int use_count; + int commit_done; + wait_queue_head_t writer_wait; + wait_queue_head_t commit_wait; +}; + struct btrfs_trans_handle { u64 transid; unsigned long blocks_reserved; unsigned long blocks_used; + struct btrfs_transaction *transaction; }; -static inline struct btrfs_trans_handle * -btrfs_start_transaction(struct btrfs_root *root, int num_blocks) -{ - struct btrfs_trans_handle *h = kmalloc(sizeof(*h), GFP_NOFS); - h->transid = root->root_key.offset; - h->blocks_reserved = num_blocks; - h->blocks_used = 0; - return h; -} - -static inline void btrfs_free_transaction(struct btrfs_root *root, - struct btrfs_trans_handle *handle) -{ - memset(handle, 0, sizeof(*handle)); - kfree(handle); -} +int btrfs_end_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, + int num_blocks); +int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, + struct btrfs_root *root); #endif From d571976292839cec05a2820b08f7629b145ed157 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 23 Mar 2007 10:01:08 -0400 Subject: [PATCH 0063/2985] btrfs_create, btrfs_write_super, btrfs_sync_fs Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 + fs/btrfs/ctree.c | 49 +++++++++------- fs/btrfs/ctree.h | 2 +- fs/btrfs/dir-item.c | 3 +- fs/btrfs/disk-io.c | 23 ++++---- fs/btrfs/extent-tree.c | 26 ++++++--- fs/btrfs/inode-map.c | 1 + fs/btrfs/root-tree.c | 1 + fs/btrfs/super.c | 129 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/transaction.c | 13 +++-- 10 files changed, 201 insertions(+), 47 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index c5d67bbd4d90..4b5bd05de94f 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -3,6 +3,7 @@ * Fix ENOSPC handling * make a real mkfs and superblock * Do checksumming +* Use a real check instead of mark_buffer_dirty * Define FS objects in terms of different item types * Add block mapping tree (simple dm layer) * Add simple tree locking (semaphore per tree) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e690e2bb47d2..052434a328ec 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -39,7 +39,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *cow; struct btrfs_node *cow_node; - if (!buffer_dirty(buf)) { + if (buffer_dirty(buf)) { *cow_ret = buf; return 0; } @@ -48,6 +48,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); *cow_ret = cow; + mark_buffer_dirty(cow); btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; @@ -58,7 +59,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root } else { btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, cow->b_blocknr); - BUG_ON(!buffer_dirty(parent)); + mark_buffer_dirty(parent); btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); } btrfs_block_release(root, buf); @@ -362,7 +363,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(&parent->ptrs[pslot + 1].key, &right->ptrs[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(!buffer_dirty(parent_buf)); + mark_buffer_dirty(parent_buf); } } if (btrfs_header_nritems(&mid->header) == 1) { @@ -398,7 +399,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root /* update the parent key to reflect our changes */ memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(!buffer_dirty(parent_buf)); + mark_buffer_dirty(parent_buf); } /* update the path */ @@ -539,7 +540,7 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root break; t = btrfs_buffer_node(path->nodes[i]); memcpy(&t->ptrs[tslot].key, key, sizeof(*key)); - BUG_ON(!buffer_dirty(path->nodes[i])); + mark_buffer_dirty(path->nodes[i]); if (tslot != 0) break; } @@ -583,8 +584,8 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root } btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - BUG_ON(!buffer_dirty(src_buf)); - BUG_ON(!buffer_dirty(dst_buf)); + mark_buffer_dirty(src_buf); + mark_buffer_dirty(dst_buf); return ret; } @@ -631,8 +632,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - BUG_ON(!buffer_dirty(src_buf)); - BUG_ON(!buffer_dirty(dst_buf)); + mark_buffer_dirty(src_buf); + mark_buffer_dirty(dst_buf); return ret; } @@ -669,6 +670,9 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root lower_key = &lower->ptrs[0].key; memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr); + + mark_buffer_dirty(t); + /* the super has an extra ref to root->node */ btrfs_block_release(root, root->node); root->node = t; @@ -708,7 +712,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(lower, slot, blocknr); btrfs_set_header_nritems(&lower->header, nritems + 1); - BUG_ON(!buffer_dirty(path->nodes[level])); + mark_buffer_dirty(path->nodes[level]); return 0; } @@ -755,7 +759,8 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&c->header, mid); ret = 0; - BUG_ON(!buffer_dirty(t)); + mark_buffer_dirty(t); + mark_buffer_dirty(split_buffer); wret = insert_ptr(trans, root, path, &split->ptrs[0].key, split_buffer->b_blocknr, path->slots[level + 1] + 1, level + 1); @@ -886,11 +891,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root left_nritems -= push_items; btrfs_set_header_nritems(&left->header, left_nritems); - BUG_ON(!buffer_dirty(left_buf)); - BUG_ON(!buffer_dirty(right_buf)); + mark_buffer_dirty(left_buf); + mark_buffer_dirty(right_buf); memcpy(&upper_node->ptrs[slot + 1].key, &right->items[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(!buffer_dirty(upper)); + mark_buffer_dirty(upper); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { @@ -1004,8 +1009,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root push_space = btrfs_item_offset(right->items + i); } - BUG_ON(!buffer_dirty(t)); - BUG_ON(!buffer_dirty(right_buf)); + mark_buffer_dirty(t); + mark_buffer_dirty(right_buf); wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); if (wret) @@ -1115,8 +1120,8 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root right_buffer->b_blocknr, path->slots[1] + 1, 1); if (wret) ret = wret; - BUG_ON(!buffer_dirty(right_buffer)); - BUG_ON(!buffer_dirty(l_buf)); + mark_buffer_dirty(right_buffer); + mark_buffer_dirty(l_buf); BUG_ON(path->slots[0] != slot); if (mid <= slot) { btrfs_block_release(root, path->nodes[0]); @@ -1202,12 +1207,12 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_item_offset(leaf->items + slot, data_end - data_size); btrfs_set_item_size(leaf->items + slot, data_size); btrfs_set_header_nritems(&leaf->header, nritems + 1); + mark_buffer_dirty(leaf_buf); ret = 0; if (slot == 0) ret = fixup_low_keys(trans, root, path, &disk_key, 1); - BUG_ON(!buffer_dirty(leaf_buf)); if (btrfs_leaf_free_space(root, leaf) < 0) BUG(); check_leaf(root, path, 0); @@ -1233,6 +1238,7 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], u8); memcpy(ptr, data, data_size); + mark_buffer_dirty(path.nodes[0]); } btrfs_release_path(root, &path); return ret; @@ -1273,7 +1279,7 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } - BUG_ON(!buffer_dirty(parent)); + mark_buffer_dirty(parent); return ret; } @@ -1368,8 +1374,11 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } else { + mark_buffer_dirty(leaf_buf); btrfs_block_release(root, leaf_buf); } + } else { + mark_buffer_dirty(leaf_buf); } } return ret; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4c19a3f12afd..983e3cc9ae9f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -833,7 +833,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, char *name, int name_len, u64 dir, u64 + *root, const char *name, int name_len, u64 dir, u64 objectid, u8 type); int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 75d6e373e98d..f81cbcc83b66 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -5,7 +5,7 @@ #include "transaction.h" int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, char *name, int name_len, u64 dir, u64 + *root, const char *name, int name_len, u64 dir, u64 objectid, u8 type) { int ret = 0; @@ -35,6 +35,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_dir_name_len(dir_item, name_len); name_ptr = (char *)(dir_item + 1); memcpy(name_ptr, name, name_len); + mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(root, &path); return ret; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9cacca0c525c..8e1dcda0839c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -145,19 +145,20 @@ struct btrfs_root *open_ctree(struct super_block *sb, int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return 0; -#if 0 - int ret; - btrfs_set_super_root(s, root->fs_info->tree_root->node->b_blocknr); - - ret = pwrite(root->fs_info->fp, s, sizeof(*s), - BTRFS_SUPER_INFO_OFFSET); - if (ret != sizeof(*s)) { - fprintf(stderr, "failed to write new super block err %d\n", ret); - return ret; + struct buffer_head *bh = root->fs_info->sb_buffer; + btrfs_set_super_root(root->fs_info->disk_super, + root->fs_info->tree_root->node->b_blocknr); + lock_buffer(bh); + clear_buffer_dirty(bh); + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + submit_bh(WRITE, bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { + WARN_ON(1); + return -EIO; } return 0; -#endif } int close_ctree(struct btrfs_root *root) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e3af2c035687..2818f1c57170 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -49,6 +49,7 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(item); btrfs_set_extent_refs(item, refs + 1); + mark_buffer_dirty(path.nodes[0]); btrfs_release_path(root->fs_info->extent_root, &path); finish_current_insert(trans, root->fs_info->extent_root); @@ -103,7 +104,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - unsigned long gang[8]; + struct buffer_head *gang[8]; u64 first = 0; int ret; int i; @@ -116,13 +117,15 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct if (!ret) break; if (!first) - first = gang[0]; + first = gang[0]->b_blocknr; for (i = 0; i < ret; i++) { radix_tree_delete(&root->fs_info->pinned_radix, - gang[i]); + gang[i]->b_blocknr); + brelse(gang[i]); } } - root->fs_info->last_insert.objectid = first; + if (root->fs_info->last_insert.objectid > first) + root->fs_info->last_insert.objectid = first; root->fs_info->last_insert.offset = 0; return 0; } @@ -161,8 +164,10 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) { int err; + struct buffer_head *bh = sb_getblk(root->fs_info->sb, blocknr); + BUG_ON(!bh); err = radix_tree_insert(&root->fs_info->pinned_radix, - blocknr, (void *)blocknr); + blocknr, bh); BUG_ON(err); if (err) return err; @@ -217,6 +222,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) BUG(); } + mark_buffer_dirty(path.nodes[0]); btrfs_release_path(extent_root, &path); finish_current_insert(trans, extent_root); return ret; @@ -232,7 +238,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct int ret; int wret; int err = 0; - unsigned long gang[4]; + struct buffer_head *gang[4]; int i; struct radix_tree_root *radix = &extent_root->fs_info->pinned_radix; @@ -245,10 +251,12 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct if (!ret) break; for (i = 0; i < ret; i++) { - radix_tree_tag_set(radix, gang[i], CTREE_EXTENT_PINNED); - radix_tree_tag_clear(radix, gang[i], + radix_tree_tag_set(radix, gang[i]->b_blocknr, + CTREE_EXTENT_PINNED); + radix_tree_tag_clear(radix, gang[i]->b_blocknr, CTREE_EXTENT_PENDING_DEL); - wret = __free_extent(trans, extent_root, gang[i], 1); + wret = __free_extent(trans, extent_root, + gang[i]->b_blocknr, 1); if (wret) err = wret; } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index c45aec258bd5..ad2d375b830d 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -108,6 +108,7 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], struct btrfs_inode_map_item); btrfs_cpu_key_to_disk(&inode_item->key, location); + mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(inode_root, &path); return ret; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a4554c007ef7..a821b5d1e233 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -55,6 +55,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root slot = path.slots[0]; memcpy(btrfs_item_ptr(l, slot, struct btrfs_root_item), item, sizeof(*item)); + mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(root, &path); return ret; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 620430825840..6080a8133d71 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -10,6 +10,7 @@ #include #include "ctree.h" #include "disk-io.h" +#include "transaction.h" #define BTRFS_SUPER_MAGIC 0x9123682E @@ -357,6 +358,131 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) return 0; } +static void fill_inode_item(struct btrfs_inode_item *item, + struct inode *inode) +{ + btrfs_set_inode_uid(item, inode->i_uid); + btrfs_set_inode_gid(item, inode->i_gid); + btrfs_set_inode_size(item, inode->i_size); + btrfs_set_inode_mode(item, inode->i_mode); + btrfs_set_inode_nlink(item, inode->i_nlink); + btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec); + btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec); + btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec); + btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec); + btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec); + btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); + btrfs_set_inode_nblocks(item, inode->i_blocks); + btrfs_set_inode_generation(item, inode->i_generation); +} + +static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, + struct inode *dir, int mode) +{ + struct inode *inode; + struct btrfs_inode_item inode_item; + struct btrfs_root *root = btrfs_sb(dir->i_sb); + struct btrfs_key key; + int ret; + u64 objectid; + + inode = new_inode(dir->i_sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + ret = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); + BUG_ON(ret); + + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_mode = mode; + inode->i_ino = objectid; + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; + fill_inode_item(&inode_item, inode); + + + key.objectid = objectid; + key.flags = 0; + key.offset = 0; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + ret = btrfs_insert_inode_map(trans, root, objectid, &key); + BUG_ON(ret); + + ret = btrfs_insert_inode(trans, root, objectid, &inode_item); + BUG_ON(ret); + + insert_inode_hash(inode); + // FIXME mark_inode_dirty(inode) + return inode; +} + +static int btrfs_add_link(struct btrfs_trans_handle *trans, + struct dentry *dentry, struct inode *inode) +{ + int ret; + ret = btrfs_insert_dir_item(trans, btrfs_sb(inode->i_sb), + dentry->d_name.name, dentry->d_name.len, + dentry->d_parent->d_inode->i_ino, + inode->i_ino, 0); + BUG_ON(ret); + return ret; +} + +static int btrfs_add_nondir(struct btrfs_trans_handle *trans, + struct dentry *dentry, struct inode *inode) +{ + int err = btrfs_add_link(trans, dentry, inode); + if (!err) { + d_instantiate(dentry, inode); + return 0; + } + inode_dec_link_count(inode); + iput(inode); + return err; +} + +static int btrfs_create(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = btrfs_sb(dir->i_sb); + struct inode *inode; + int err; + + trans = btrfs_start_transaction(root, 1); + inode = btrfs_new_inode(trans, dir, mode); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + return err; + // FIXME mark the inode dirty + err = btrfs_add_nondir(trans, dentry, inode); + dir->i_sb->s_dirt = 1; + btrfs_end_transaction(trans, root); + return err; +} + +static void btrfs_write_super(struct super_block *sb) +{ + sb->s_dirt = 0; +printk("btrfs write_super!\n"); +} + +static int btrfs_sync_fs(struct super_block *sb, int wait) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root; + int ret; + sb->s_dirt = 0; + root = btrfs_sb(sb); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_commit_transaction(trans, root); + sb->s_dirt = 0; + BUG_ON(ret); +printk("btrfs sync_fs\n"); + return 0; +} + static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -377,10 +503,13 @@ static struct super_operations btrfs_super_ops = { .drop_inode = generic_delete_inode, .put_super = btrfs_put_super, .read_inode = btrfs_read_locked_inode, + .write_super = btrfs_write_super, + .sync_fs = btrfs_sync_fs, }; static struct inode_operations btrfs_dir_inode_operations = { .lookup = btrfs_lookup, + .create = btrfs_create, }; static struct file_operations btrfs_dir_file_operations = { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8dc1c170f10f..4903b47c9780 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -25,7 +25,7 @@ static int join_transaction(struct btrfs_root *root) init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); cur_trans->in_commit = 0; - cur_trans->use_count = 0; + cur_trans->use_count = 1; cur_trans->commit_done = 0; } cur_trans->num_writers++; @@ -56,7 +56,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; mutex_lock(&root->fs_info->trans_mutex); cur_trans = root->fs_info->running_transaction; - WARN_ON(cur_trans->num_writers <= 1); + WARN_ON(cur_trans->num_writers < 1); if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); cur_trans->num_writers--; @@ -155,10 +155,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; - mutex_unlock(&root->fs_info->trans_mutex); - memcpy(&snap_key, &root->root_key, sizeof(snap_key)); - root->root_key.offset++; + if (root->node != root->commit_root) { + memcpy(&snap_key, &root->root_key, sizeof(snap_key)); + root->root_key.offset++; + } + + mutex_unlock(&root->fs_info->trans_mutex); if (btrfs_root_blocknr(&root->root_item) != root->node->b_blocknr) { btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); From df2ce34c8801af8e00c7f82435fcb1e4b45a2759 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 23 Mar 2007 11:00:45 -0400 Subject: [PATCH 0064/2985] Btrfs: properly set new buffers for new blocks up to date Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 9 +++++++-- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/super.c | 3 +++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e1dcda0839c..2f1eb4ae4e1d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -10,8 +10,13 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) BUG(); if (root->node && btrfs_header_parentid(&node->header) != - btrfs_header_parentid(btrfs_buffer_header(root->node))) - BUG(); + btrfs_header_parentid(btrfs_buffer_header(root->node))) { + printk("block %Lu parentids don't match buf %Lu, root %Lu\n", + buf->b_blocknr, + btrfs_header_parentid(&node->header), + btrfs_header_parentid(btrfs_buffer_header(root->node))); + WARN_ON(1); + } return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2818f1c57170..c86f0e6152f2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -467,7 +467,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return NULL; } buf = find_tree_block(root, ins.objectid); - dirty_tree_block(trans, root, buf); + set_buffer_uptodate(buf); return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6080a8133d71..8970e9a82156 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -473,7 +473,10 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) struct btrfs_trans_handle *trans; struct btrfs_root *root; int ret; + sb->s_dirt = 0; + return 0; + root = btrfs_sb(sb); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); From 7f5c15160e1436a53d01f9190db11c2a3a4d788a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 23 Mar 2007 15:56:19 -0400 Subject: [PATCH 0065/2985] Add generation number to btrfs_header, readdir fixes, hash collision fixes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 8 +++++++- fs/btrfs/ctree.h | 14 +++++++++++++- fs/btrfs/dir-item.c | 4 ++-- fs/btrfs/disk-io.c | 6 +----- fs/btrfs/super.c | 14 +++++++++----- 5 files changed, 32 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 052434a328ec..b33a6bfaf327 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1,6 +1,7 @@ #include #include "ctree.h" #include "disk-io.h" +#include "transaction.h" static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); @@ -39,7 +40,8 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *cow; struct btrfs_node *cow_node; - if (buffer_dirty(buf)) { + if (btrfs_header_generation(btrfs_buffer_header(buf)) == + trans->transid) { *cow_ret = buf; return 0; } @@ -47,6 +49,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root cow_node = btrfs_buffer_node(cow); memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); + btrfs_set_header_generation(&cow_node->header, trans->transid); *cow_ret = cow; mark_buffer_dirty(cow); btrfs_inc_ref(trans, root, buf); @@ -661,6 +664,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&c->header, 1); btrfs_set_header_level(&c->header, level); btrfs_set_header_blocknr(&c->header, t->b_blocknr); + btrfs_set_header_generation(&c->header, trans->transid); btrfs_set_header_parentid(&c->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); lower = btrfs_buffer_node(path->nodes[level-1]); @@ -750,6 +754,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root split = btrfs_buffer_node(split_buffer); btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr); + btrfs_set_header_generation(&split->header, trans->transid); btrfs_set_header_parentid(&split->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); mid = (c_nritems + 1) / 2; @@ -1096,6 +1101,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root } btrfs_set_header_nritems(&right->header, nritems - mid); btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); + btrfs_set_header_generation(&right->header, trans->transid); btrfs_set_header_level(&right->header, 0); btrfs_set_header_parentid(&right->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 983e3cc9ae9f..96cec6352f12 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -52,6 +52,7 @@ struct btrfs_key { struct btrfs_header { u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ + __le64 generation; __le64 parentid; /* objectid of the tree root */ __le32 csum; __le32 ham; @@ -600,6 +601,17 @@ static inline void btrfs_set_header_blocknr(struct btrfs_header *h, u64 blocknr) h->blocknr = cpu_to_le64(blocknr); } +static inline u64 btrfs_header_generation(struct btrfs_header *h) +{ + return le64_to_cpu(h->generation); +} + +static inline void btrfs_set_header_generation(struct btrfs_header *h, + u64 val) +{ + h->generation = cpu_to_le64(val); +} + static inline u64 btrfs_header_parentid(struct btrfs_header *h) { return le64_to_cpu(h->parentid); @@ -839,7 +851,7 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, const char *name, int name_len, int mod); int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, - char *name, int name_len); + const char *name, int name_len); int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_root *fs_root, u64 dirid, u64 *objectid); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index f81cbcc83b66..c18f81797a88 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -60,8 +60,8 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root } int btrfs_match_dir_item_name(struct btrfs_root *root, - struct btrfs_path *path, char - *name, int name_len) + struct btrfs_path *path, + const char *name, int name_len) { struct btrfs_dir_item *dir_item; char *name_ptr; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2f1eb4ae4e1d..3dea757d4cbb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -11,11 +11,7 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) BUG(); if (root->node && btrfs_header_parentid(&node->header) != btrfs_header_parentid(btrfs_buffer_header(root->node))) { - printk("block %Lu parentids don't match buf %Lu, root %Lu\n", - buf->b_blocknr, - btrfs_header_parentid(&node->header), - btrfs_header_parentid(btrfs_buffer_header(root->node))); - WARN_ON(1); + BUG(); } return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8970e9a82156..a2db05594695 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -199,7 +199,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, btrfs_init_path(&path); ret = btrfs_lookup_dir_item(NULL, root, &path, dir->i_ino, name, namelen, 0); - if (ret) { + if (ret || !btrfs_match_dir_item_name(root, &path, name, namelen)) { *ino = 0; goto out; } @@ -247,7 +247,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) int slot; int advance; unsigned char d_type = DT_UNKNOWN; - int over; + int over = 0; key.objectid = inode->i_ino; key.flags = 0; @@ -258,7 +258,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (ret < 0) { goto err; } - advance = filp->f_pos > 0 && ret != 0; + advance = 0; while(1) { leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); @@ -282,13 +282,17 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) break; if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_ITEM_KEY) continue; + if (btrfs_disk_key_offset(&item->key) < filp->f_pos) + continue; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); over = filldir(dirent, (const char *)(di + 1), btrfs_dir_name_len(di), btrfs_disk_key_offset(&item->key), btrfs_dir_objectid(di), d_type); - if (over) + if (over) { + filp->f_pos = btrfs_disk_key_offset(&item->key); break; + } filp->f_pos = btrfs_disk_key_offset(&item->key) + 1; } ret = 0; @@ -425,7 +429,6 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, inode->i_ino, 0); - BUG_ON(ret); return ret; } @@ -466,6 +469,7 @@ static void btrfs_write_super(struct super_block *sb) { sb->s_dirt = 0; printk("btrfs write_super!\n"); + filemap_flush(sb->s_bdev->bd_inode->i_mapping); } static int btrfs_sync_fs(struct super_block *sb, int wait) From d561c025ee3a0aee53a225bedce0ee35fe12f335 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 23 Mar 2007 19:47:49 -0400 Subject: [PATCH 0066/2985] Btrfs: very minimal locking Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 5 +++-- fs/btrfs/super.c | 26 +++++++++++++++++--------- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 96cec6352f12..68cafae6a850 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -231,6 +231,7 @@ struct btrfs_fs_info { struct buffer_head *sb_buffer; struct super_block *sb; struct mutex trans_mutex; + struct mutex fs_mutex; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3dea757d4cbb..e32ddff55b0e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -116,6 +116,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->sb_buffer = sb_buffer; fs_info->sb = sb; mutex_init(&fs_info->trans_mutex); + mutex_init(&fs_info->fs_mutex); memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c86f0e6152f2..9583a9ae8b79 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -168,9 +168,10 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) BUG_ON(!bh); err = radix_tree_insert(&root->fs_info->pinned_radix, blocknr, bh); - BUG_ON(err); - if (err) + if (err && err != -EEXIST) { + BUG(); return err; + } radix_tree_tag_set(&root->fs_info->pinned_radix, blocknr, tag); return 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a2db05594695..0ca1080e0198 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -453,25 +453,21 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, struct inode *inode; int err; + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); inode = btrfs_new_inode(trans, dir, mode); err = PTR_ERR(inode); if (IS_ERR(inode)) - return err; + goto out_unlock; // FIXME mark the inode dirty err = btrfs_add_nondir(trans, dentry, inode); dir->i_sb->s_dirt = 1; btrfs_end_transaction(trans, root); +out_unlock: + mutex_unlock(&root->fs_info->fs_mutex); return err; } -static void btrfs_write_super(struct super_block *sb) -{ - sb->s_dirt = 0; -printk("btrfs write_super!\n"); - filemap_flush(sb->s_bdev->bd_inode->i_mapping); -} - static int btrfs_sync_fs(struct super_block *sb, int wait) { struct btrfs_trans_handle *trans; @@ -479,17 +475,29 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) int ret; sb->s_dirt = 0; - return 0; + if (!wait) { + filemap_flush(sb->s_bdev->bd_inode->i_mapping); + return 0; + } + filemap_write_and_wait(sb->s_bdev->bd_inode->i_mapping); root = btrfs_sb(sb); + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; BUG_ON(ret); printk("btrfs sync_fs\n"); + mutex_unlock(&root->fs_info->fs_mutex); return 0; } +static void btrfs_write_super(struct super_block *sb) +{ + btrfs_sync_fs(sb, 1); +} + + static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { From 78fae27ebf5bd35fb9b2e4213e486635eacfc0ad Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 25 Mar 2007 11:35:08 -0400 Subject: [PATCH 0067/2985] Btrfs: leak fixes, pinning fixes Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 30 +++++++++++++++++++++--------- fs/btrfs/super.c | 1 + fs/btrfs/transaction.c | 32 ++++++++++++++++++-------------- 3 files changed, 40 insertions(+), 23 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9583a9ae8b79..369b960fce45 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -165,13 +165,23 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) { int err; struct buffer_head *bh = sb_getblk(root->fs_info->sb, blocknr); + struct btrfs_header *header; BUG_ON(!bh); + + header = btrfs_buffer_header(bh); + if (btrfs_header_generation(header) == + root->fs_info->running_transaction->transid) { + return 0; + } + err = radix_tree_insert(&root->fs_info->pinned_radix, blocknr, bh); if (err && err != -EEXIST) { BUG(); return err; } + if (err == -EEXIST) + brelse(bh); radix_tree_tag_set(&root->fs_info->pinned_radix, blocknr, tag); return 0; @@ -181,7 +191,7 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) * remove an extent from the root, returns 0 on success */ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks) + *root, u64 blocknr, u64 num_blocks, int pin) { struct btrfs_path path; struct btrfs_key key; @@ -213,12 +223,18 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_extent_refs(ei, refs); if (refs == 0) { u64 super_blocks_used; + + if (pin) { + ret = pin_down_block(root, blocknr, + CTREE_EXTENT_PINNED); + BUG_ON(ret); + } + super_blocks_used = btrfs_super_blocks_used(info->disk_super); btrfs_set_super_blocks_used(info->disk_super, super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, &path); - if (extent_root->fs_info->last_insert.objectid > - blocknr) + if (extent_root->fs_info->last_insert.objectid > blocknr) extent_root->fs_info->last_insert.objectid = blocknr; if (ret) BUG(); @@ -257,7 +273,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct radix_tree_tag_clear(radix, gang[i]->b_blocknr, CTREE_EXTENT_PENDING_DEL); wret = __free_extent(trans, extent_root, - gang[i]->b_blocknr, 1); + gang[i]->b_blocknr, 1, 0); if (wret) err = wret; } @@ -281,11 +297,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root pin_down_block(root, blocknr, CTREE_EXTENT_PENDING_DEL); return 0; } - if (pin) { - ret = pin_down_block(root, blocknr, CTREE_EXTENT_PINNED); - BUG_ON(ret); - } - ret = __free_extent(trans, root, blocknr, num_blocks); + ret = __free_extent(trans, root, blocknr, num_blocks, pin); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0ca1080e0198..094a66c267b4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -146,6 +146,7 @@ static void btrfs_read_locked_inode(struct inode *inode) ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); if (ret) { make_bad_inode(inode); + btrfs_release_path(root, &path); return; } inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4903b47c9780..46a596e345f0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -4,12 +4,15 @@ #include "disk-io.h" #include "transaction.h" - +static int total_trans = 0; static void put_transaction(struct btrfs_transaction *transaction) { transaction->use_count--; - if (transaction->use_count == 0) + if (transaction->use_count == 0) { + WARN_ON(total_trans == 0); + total_trans--; kfree(transaction); + } } static int join_transaction(struct btrfs_root *root) @@ -18,6 +21,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans = root->fs_info->running_transaction; if (!cur_trans) { cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS); + total_trans++; BUG_ON(!cur_trans); root->fs_info->running_transaction = cur_trans; cur_trans->num_writers = 0; @@ -108,7 +112,6 @@ static int wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { DEFINE_WAIT(wait); - commit->use_count++; while(!commit->commit_done) { prepare_to_wait(&commit->commit_wait, &wait, TASK_UNINTERRUPTIBLE); @@ -126,7 +129,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret = 0; - struct buffer_head *snap = root->commit_root; + struct buffer_head *snap; struct btrfs_key snap_key; struct btrfs_transaction *cur_trans; DEFINE_WAIT(wait); @@ -153,15 +156,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } finish_wait(&trans->transaction->writer_wait, &wait); - cur_trans = root->fs_info->running_transaction; - root->fs_info->running_transaction = NULL; - if (root->node != root->commit_root) { memcpy(&snap_key, &root->root_key, sizeof(snap_key)); root->root_key.offset++; } - mutex_unlock(&root->fs_info->trans_mutex); if (btrfs_root_blocknr(&root->root_item) != root->node->b_blocknr) { btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); @@ -173,17 +172,24 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); + cur_trans = root->fs_info->running_transaction; + root->fs_info->running_transaction = NULL; + mutex_unlock(&root->fs_info->trans_mutex); + ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root); - btrfs_finish_extent_commit(trans, root->fs_info->extent_root); - btrfs_finish_extent_commit(trans, root->fs_info->tree_root); + btrfs_finish_extent_commit(trans, root); + mutex_lock(&root->fs_info->trans_mutex); put_transaction(cur_trans); + put_transaction(cur_trans); + mutex_unlock(&root->fs_info->trans_mutex); kfree(trans); if (root->node != root->commit_root) { trans = btrfs_start_transaction(root, 1); + snap = root->commit_root; root->commit_root = root->node; get_bh(root->node); ret = btrfs_drop_snapshot(trans, root, snap); @@ -191,10 +197,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key); - BUG_ON(ret); - root->fs_info->generation = root->root_key.offset + 1; - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); + BUG_ON(ret); root->fs_info->generation = root->root_key.offset + 1; ret = btrfs_end_transaction(trans, root); BUG_ON(ret); + printk("at free, total trans %d\n", total_trans); } return ret; From 134e97313491c3a3d6bc3eca3b7c9c64408cbd08 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 25 Mar 2007 13:44:56 -0400 Subject: [PATCH 0068/2985] Btrfs: unlink and delete_inode Signed-off-by: Chris Mason --- fs/btrfs/super.c | 117 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 114 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 094a66c267b4..f96bd92155cc 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -187,6 +187,104 @@ static void btrfs_read_locked_inode(struct inode *inode) return; } +static int btrfs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct btrfs_path path; + struct btrfs_root *root; + struct btrfs_trans_handle *trans; + const char *name = dentry->d_name.name; + int name_len = dentry->d_name.len; + int ret; + u64 objectid; + struct btrfs_dir_item *di; + + btrfs_init_path(&path); + root = btrfs_sb(dir->i_sb); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + + ret = btrfs_lookup_dir_item(trans, root, &path, dir->i_ino, + name, name_len, -1); + if (ret < 0) + goto err; + if (ret > 0) { + ret = -ENOENT; + goto err; + } + di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_dir_item); + objectid = btrfs_dir_objectid(di); + + ret = btrfs_del_item(trans, root, &path); + BUG_ON(ret); + dentry->d_inode->i_ctime = dir->i_ctime; +err: + btrfs_release_path(root, &path); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + if (ret == 0) + inode_dec_link_count(dentry->d_inode); + return ret; +} + +static int btrfs_free_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + u64 objectid = inode->i_ino; + struct btrfs_path path; + struct btrfs_inode_map_item *map; + struct btrfs_key stat_data_key; + int ret; + clear_inode(inode); + btrfs_init_path(&path); + ret = btrfs_lookup_inode_map(trans, root, &path, objectid, -1); + if (ret) { + if (ret > 0) + ret = -ENOENT; + btrfs_release_path(root, &path); + goto error; + } + map = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_inode_map_item); + btrfs_disk_key_to_cpu(&stat_data_key, &map->key); + ret = btrfs_del_item(trans, root->fs_info->inode_root, &path); + BUG_ON(ret); + btrfs_release_path(root, &path); + btrfs_init_path(&path); + + ret = btrfs_lookup_inode(trans, root, &path, objectid, -1); + BUG_ON(ret); + ret = btrfs_del_item(trans, root, &path); + BUG_ON(ret); + btrfs_release_path(root, &path); +error: + return ret; +} + +static void btrfs_delete_inode(struct inode *inode) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + truncate_inode_pages(&inode->i_data, 0); + if (is_bad_inode(inode)) { + goto no_delete; + } + inode->i_size = 0; + if (inode->i_blocks) + WARN_ON(1); + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_free_inode(trans, root, inode); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return; +no_delete: + clear_inode(inode); +} + + static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, ino_t *ino) { @@ -272,6 +370,13 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path.slots[0]; +#if 0 + page_cache_readahead( + inode->i_sb->s_bdev->bd_inode->i_mapping, + &filp->f_ra, filp, + path.nodes[0]->b_blocknr >> + (PAGE_CACHE_SHIFT - inode->i_blkbits), 1); +#endif } else { slot++; path.slots[0]++; @@ -441,8 +546,6 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans, d_instantiate(dentry, inode); return 0; } - inode_dec_link_count(inode); - iput(inode); return err; } @@ -453,6 +556,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, struct btrfs_root *root = btrfs_sb(dir->i_sb); struct inode *inode; int err; + int drop_inode = 0; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -462,10 +566,16 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; // FIXME mark the inode dirty err = btrfs_add_nondir(trans, dentry, inode); + if (err) + drop_inode = 1; dir->i_sb->s_dirt = 1; btrfs_end_transaction(trans, root); out_unlock: mutex_unlock(&root->fs_info->fs_mutex); + if (drop_inode) { + inode_dec_link_count(inode); + iput(inode); + } return err; } @@ -516,7 +626,7 @@ static struct file_system_type btrfs_fs_type = { static struct super_operations btrfs_super_ops = { .statfs = simple_statfs, - .drop_inode = generic_delete_inode, + .delete_inode = btrfs_delete_inode, .put_super = btrfs_put_super, .read_inode = btrfs_read_locked_inode, .write_super = btrfs_write_super, @@ -526,6 +636,7 @@ static struct super_operations btrfs_super_ops = { static struct inode_operations btrfs_dir_inode_operations = { .lookup = btrfs_lookup, .create = btrfs_create, + .unlink = btrfs_unlink, }; static struct file_operations btrfs_dir_file_operations = { From dcea79152cfb7cf244bdd9c23f4291831ffd6adf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 25 Mar 2007 14:03:08 -0400 Subject: [PATCH 0069/2985] Btrfs: very simple readdir readahead Signed-off-by: Chris Mason --- fs/btrfs/super.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f96bd92155cc..e76fcd80db6a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -332,6 +332,34 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return d_splice_alias(inode, dentry); } +static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path) +{ + struct buffer_head *bh; + struct btrfs_node *node; + int i; + int nritems; + u64 objectid; + u64 item_objectid; + u64 blocknr; + int slot; + + if (!path->nodes[1]) + return; + node = btrfs_buffer_node(path->nodes[1]); + slot = path->slots[1]; + objectid = btrfs_disk_key_objectid(&node->ptrs[slot].key); + nritems = btrfs_header_nritems(&node->header); + for (i = slot; i < nritems; i++) { + item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); + if (item_objectid != objectid) + break; + blocknr = btrfs_node_blockptr(node, i); + bh = sb_getblk(root->fs_info->sb, blocknr); + ll_rw_block(READ, 1, &bh); + brelse(bh); + } + +} static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct inode *inode = filp->f_path.dentry->d_inode; @@ -358,6 +386,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) goto err; } advance = 0; + reada_leaves(root, &path); while(1) { leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); @@ -370,13 +399,8 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path.slots[0]; -#if 0 - page_cache_readahead( - inode->i_sb->s_bdev->bd_inode->i_mapping, - &filp->f_ra, filp, - path.nodes[0]->b_blocknr >> - (PAGE_CACHE_SHIFT - inode->i_blkbits), 1); -#endif + if (path.nodes[1] && path.slots[1] == 0) + reada_leaves(root, &path); } else { slot++; path.slots[0]++; From f7922033efe957f79ae57f6026e93c8148e7f7ed Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 25 Mar 2007 20:17:36 -0400 Subject: [PATCH 0070/2985] btrfs_mkdir Signed-off-by: Chris Mason --- fs/btrfs/super.c | 184 +++++++++++++++++------------------------------ 1 file changed, 65 insertions(+), 119 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index e76fcd80db6a..820d21ee4afd 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -18,124 +18,6 @@ static struct inode_operations btrfs_dir_inode_operations; static struct super_operations btrfs_super_ops; static struct file_operations btrfs_dir_file_operations; -#if 0 -/* some random number */ - -static struct super_operations ramfs_ops; - -static struct backing_dev_info ramfs_backing_dev_info = { - .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | - BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | - BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, -}; - -struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) -{ - struct inode * inode = new_inode(sb); - - if (inode) { - inode->i_mode = mode; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_blocks = 0; - inode->i_mapping->a_ops = &ramfs_aops; - inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - switch (mode & S_IFMT) { - default: - init_special_inode(inode, mode, dev); - break; - case S_IFREG: - inode->i_op = &ramfs_file_inode_operations; - inode->i_fop = &ramfs_file_operations; - break; - case S_IFDIR: - inode->i_op = &ramfs_dir_inode_operations; - inode->i_fop = &simple_dir_operations; - - /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inc_nlink(inode); - break; - case S_IFLNK: - inode->i_op = &page_symlink_inode_operations; - break; - } - } - return inode; -} - -/* - * File creation. Allocate an inode, and we're done.. - */ -/* SMP-safe */ -static int -ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) -{ - struct inode * inode = ramfs_get_inode(dir->i_sb, mode, dev); - int error = -ENOSPC; - - if (inode) { - if (dir->i_mode & S_ISGID) { - inode->i_gid = dir->i_gid; - if (S_ISDIR(mode)) - inode->i_mode |= S_ISGID; - } - d_instantiate(dentry, inode); - dget(dentry); /* Extra count - pin the dentry in core */ - error = 0; - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - } - return error; -} - -static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) -{ - int retval = ramfs_mknod(dir, dentry, mode | S_IFDIR, 0); - if (!retval) - inc_nlink(dir); - return retval; -} - -static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) -{ - return ramfs_mknod(dir, dentry, mode | S_IFREG, 0); -} - -static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname) -{ - struct inode *inode; - int error = -ENOSPC; - - inode = ramfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); - if (inode) { - int l = strlen(symname)+1; - error = page_symlink(inode, symname, l); - if (!error) { - if (dir->i_mode & S_ISGID) - inode->i_gid = dir->i_gid; - d_instantiate(dentry, inode); - dget(dentry); - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - } else - iput(inode); - } - return error; -} - -static struct inode_operations ramfs_dir_inode_operations = { - .create = ramfs_create, - .lookup = simple_lookup, - .link = simple_link, - .unlink = simple_unlink, - .symlink = ramfs_symlink, - .mkdir = ramfs_mkdir, - .rmdir = simple_rmdir, - .mknod = ramfs_mknod, - .rename = simple_rename, -}; -#endif - static void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path path; @@ -533,9 +415,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_ino = objectid; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; + /* FIXME do this on link */ + if (mode & S_IFDIR) + inode->i_size = 3; fill_inode_item(&inode_item, inode); - key.objectid = objectid; key.flags = 0; key.offset = 0; @@ -603,6 +487,67 @@ out_unlock: return err; } +static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir) +{ + struct btrfs_root *root = btrfs_sb(inode->i_sb); + int ret; + char buf[2]; + buf[0] = '.'; + buf[1] = '.'; + + ret = btrfs_insert_dir_item(trans, root, buf, 1, inode->i_ino, + inode->i_ino, 1); + if (ret) + goto error; + ret = btrfs_insert_dir_item(trans, root, buf, 2, inode->i_ino, + dir->i_ino, 1); +error: + return ret; +} + +static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct inode *inode; + struct btrfs_trans_handle *trans; + struct btrfs_root *root = btrfs_sb(dir->i_sb); + int err = 0; + int drop_on_err = 0; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out_unlock; + } + inode = btrfs_new_inode(trans, dir, S_IFDIR | mode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_fail; + } + drop_on_err = 1; + inode->i_op = &btrfs_dir_inode_operations; + inode->i_fop = &btrfs_dir_file_operations; + + err = btrfs_make_empty_dir(trans, inode, dir); + if (err) + goto out_fail; + err = btrfs_add_link(trans, dentry, inode); + if (err) + goto out_fail; + d_instantiate(dentry, inode); + mark_inode_dirty(inode); + drop_on_err = 0; + +out_fail: + btrfs_end_transaction(trans, root); +out_unlock: + mutex_unlock(&root->fs_info->fs_mutex); + if (drop_on_err) + iput(inode); + return err; +} + static int btrfs_sync_fs(struct super_block *sb, int wait) { struct btrfs_trans_handle *trans; @@ -661,6 +606,7 @@ static struct inode_operations btrfs_dir_inode_operations = { .lookup = btrfs_lookup, .create = btrfs_create, .unlink = btrfs_unlink, + .mkdir = btrfs_mkdir, }; static struct file_operations btrfs_dir_file_operations = { From 8ef97622caa2d5f78d1dc58ab918e2fbfa9b357a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Mar 2007 10:15:30 -0400 Subject: [PATCH 0071/2985] Btrfs: add a radix back bit tree Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/bit-radix.c | 107 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/bit-radix.h | 15 ++++++ fs/btrfs/ctree.h | 3 +- fs/btrfs/disk-io.c | 3 +- fs/btrfs/extent-tree.c | 91 +++++++++++++++-------------------- 6 files changed, 166 insertions(+), 56 deletions(-) create mode 100644 fs/btrfs/bit-radix.c create mode 100644 fs/btrfs/bit-radix.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index be7d74cdca04..5346f706b2c5 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -3,7 +3,8 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ - hash.o file-item.o inode-item.o inode-map.o disk-io.o transaction.o + hash.o file-item.o inode-item.o inode-map.o disk-io.o \ + transaction.o bit-radix.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c new file mode 100644 index 000000000000..43807cc3cce7 --- /dev/null +++ b/fs/btrfs/bit-radix.c @@ -0,0 +1,107 @@ +#include +#include "bit-radix.h" + +#define BIT_ARRAY_BYTES 256 +#define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8) + +int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) +{ + unsigned long *bits; + unsigned long slot; + int bit_slot; + int ret; + + slot = bit / BIT_RADIX_BITS_PER_ARRAY; + bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; + + bits = radix_tree_lookup(radix, slot); + if (!bits) { + bits = kmalloc(BIT_ARRAY_BYTES, GFP_NOIO); + if (!bits) + return -ENOMEM; + memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long)); + bits[0] = slot; + ret = radix_tree_insert(radix, slot, bits); + if (ret) + return ret; + } + set_bit(bit_slot, bits + 1); + return 0; +} + +int test_radix_bit(struct radix_tree_root *radix, unsigned long bit) +{ + unsigned long *bits; + unsigned long slot; + int bit_slot; + + slot = bit / BIT_RADIX_BITS_PER_ARRAY; + bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; + + bits = radix_tree_lookup(radix, slot); + if (!bits) + return 0; + return test_bit(bit_slot, bits + 1); +} + +int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) +{ + unsigned long *bits; + unsigned long slot; + int bit_slot; + int i; + int empty = 1; + + slot = bit / BIT_RADIX_BITS_PER_ARRAY; + bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; + + bits = radix_tree_lookup(radix, slot); + if (!bits) + return 0; + clear_bit(bit_slot, bits + 1); + + for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) { + if (bits[i]) { + empty = 0; + break; + } + } + + if (empty) { + bits = radix_tree_delete(radix, slot); + BUG_ON(!bits); + } + return 0; +} + +int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, + int nr) +{ + unsigned long *bits; + unsigned long *gang[4]; + int found; + int ret; + int i; + int total_found = 0; + + ret = radix_tree_gang_lookup(radix, (void *)&gang, 0, ARRAY_SIZE(gang)); + for (i = 0; i < ret && nr > 0; i++) { + found = 0; + bits = gang[i]; + while(nr > 0) { + found = find_next_bit(bits + 1, + BIT_RADIX_BITS_PER_ARRAY, + found); + if (found < BIT_RADIX_BITS_PER_ARRAY) { + *retbits = bits[0] * + BIT_RADIX_BITS_PER_ARRAY + found; + retbits++; + nr--; + total_found++; + found++; + } else + break; + } + } + return total_found; +} diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h new file mode 100644 index 000000000000..56aad4c7d7f7 --- /dev/null +++ b/fs/btrfs/bit-radix.h @@ -0,0 +1,15 @@ +#ifndef __BIT_RADIX__ +#define __BIT_RADIX__ +#include + +int set_radix_bit(struct radix_tree_root *radix, unsigned long bit); +int test_radix_bit(struct radix_tree_root *radix, unsigned long bit); +int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit); +int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, + int nr); + +static inline void init_bit_radix(struct radix_tree_root *radix) +{ + INIT_RADIX_TREE(radix, GFP_NOFS); +} +#endif diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 68cafae6a850..0aa1052d9f67 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,8 +1,8 @@ #ifndef __BTRFS__ #define __BTRFS__ -#include #include +#include "bit-radix.h" struct btrfs_trans_handle; struct btrfs_transaction; @@ -222,6 +222,7 @@ struct btrfs_fs_info { struct btrfs_root *inode_root; struct btrfs_key current_insert; struct btrfs_key last_insert; + struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; u64 last_inode_alloc; u64 last_inode_alloc_dirid; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e32ddff55b0e..758a62aba063 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -104,7 +104,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, /* FIXME: don't be stupid */ if (!btrfs_super_root(disk_super)) return NULL; - INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL); + init_bit_radix(&fs_info->pinned_radix); + init_bit_radix(&fs_info->pending_del_radix); fs_info->running_transaction = NULL; fs_info->fs_root = root; fs_info->tree_root = tree_root; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 369b960fce45..b14104276eea 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1,5 +1,4 @@ #include -#include #include "ctree.h" #include "disk-io.h" #include "print-tree.h" @@ -12,15 +11,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -/* - * pending extents are blocks that we're trying to allocate in the extent - * map while trying to grow the map because of other allocations. To avoid - * recursing, they are tagged in the radix tree and cleaned up after - * other allocations are done. The pending tag is also used in the same - * manner for deletes. - */ -#define CTREE_EXTENT_PENDING_DEL 0 -#define CTREE_EXTENT_PINNED 1 static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr) @@ -104,24 +94,21 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct buffer_head *gang[8]; + unsigned long gang[8]; u64 first = 0; int ret; int i; + struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; while(1) { - ret = radix_tree_gang_lookup_tag(&root->fs_info->pinned_radix, - (void **)gang, 0, - ARRAY_SIZE(gang), - CTREE_EXTENT_PINNED); + ret = find_first_radix_bit(pinned_radix, gang, + ARRAY_SIZE(gang)); if (!ret) break; if (!first) - first = gang[0]->b_blocknr; + first = gang[0]; for (i = 0; i < ret; i++) { - radix_tree_delete(&root->fs_info->pinned_radix, - gang[i]->b_blocknr); - brelse(gang[i]); + clear_radix_bit(pinned_radix, gang[i]); } } if (root->fs_info->last_insert.objectid > first) @@ -161,29 +148,27 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct return 0; } -static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) +static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) { int err; - struct buffer_head *bh = sb_getblk(root->fs_info->sb, blocknr); struct btrfs_header *header; - BUG_ON(!bh); + struct buffer_head *bh; - header = btrfs_buffer_header(bh); - if (btrfs_header_generation(header) == - root->fs_info->running_transaction->transid) { - return 0; - } - - err = radix_tree_insert(&root->fs_info->pinned_radix, - blocknr, bh); - if (err && err != -EEXIST) { - BUG(); - return err; - } - if (err == -EEXIST) + bh = sb_find_get_block(root->fs_info->sb, blocknr); + if (bh) { + header = btrfs_buffer_header(bh); + if (btrfs_header_generation(header) == + root->fs_info->running_transaction->transid) { + brelse(bh); + return 0; + } brelse(bh); - radix_tree_tag_set(&root->fs_info->pinned_radix, blocknr, - tag); + } + if (pending) + err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr); + else + err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); + BUG_ON(err); return 0; } @@ -225,8 +210,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root u64 super_blocks_used; if (pin) { - ret = pin_down_block(root, blocknr, - CTREE_EXTENT_PINNED); + ret = pin_down_block(root, blocknr, 0); BUG_ON(ret); } @@ -255,25 +239,26 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct int ret; int wret; int err = 0; - struct buffer_head *gang[4]; + unsigned long gang[4]; int i; - struct radix_tree_root *radix = &extent_root->fs_info->pinned_radix; + struct radix_tree_root *pending_radix; + struct radix_tree_root *pinned_radix; + + pending_radix = &extent_root->fs_info->pending_del_radix; + pinned_radix = &extent_root->fs_info->pinned_radix; while(1) { - ret = radix_tree_gang_lookup_tag( - &extent_root->fs_info->pinned_radix, - (void **)gang, 0, - ARRAY_SIZE(gang), - CTREE_EXTENT_PENDING_DEL); + ret = find_first_radix_bit(pending_radix, gang, + ARRAY_SIZE(gang)); if (!ret) break; for (i = 0; i < ret; i++) { - radix_tree_tag_set(radix, gang[i]->b_blocknr, - CTREE_EXTENT_PINNED); - radix_tree_tag_clear(radix, gang[i]->b_blocknr, - CTREE_EXTENT_PENDING_DEL); + wret = set_radix_bit(pinned_radix, gang[i]); + BUG_ON(wret); + wret = clear_radix_bit(pending_radix, gang[i]); + BUG_ON(wret); wret = __free_extent(trans, extent_root, - gang[i]->b_blocknr, 1, 0); + gang[i], 1, 0); if (wret) err = wret; } @@ -294,7 +279,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (root == extent_root) { t = find_tree_block(root, blocknr); - pin_down_block(root, blocknr, CTREE_EXTENT_PENDING_DEL); + pin_down_block(root, blocknr, 1); return 0; } ret = __free_extent(trans, root, blocknr, num_blocks, pin); @@ -393,7 +378,7 @@ check_pending: BUG_ON(ins->objectid < search_start); for (test_block = ins->objectid; test_block < ins->objectid + total_needed; test_block++) { - if (radix_tree_lookup(&root->fs_info->pinned_radix, + if (test_radix_bit(&root->fs_info->pinned_radix, test_block)) { search_start = test_block + 1; goto check_failed; From b52d63fecef89191ed52ab223e9b3b9bd7d615a6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Mar 2007 11:01:09 -0400 Subject: [PATCH 0072/2985] Btrfs: TODO update Signed-off-by: Chris Mason --- fs/btrfs/TODO | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 4b5bd05de94f..454028b6bd46 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -8,11 +8,8 @@ * Add block mapping tree (simple dm layer) * Add simple tree locking (semaphore per tree) * Make allocator smarter -* Only pin blocks allocated in this transaction -* Add transaction handles * Do actual block accounting * Check compat and incompat flags on the inode -* Port into the kernel * Add virtual filesystems, mountable snapshots * Get rid of struct ctree_path, limiting tree levels held at one time * Release From 4730a4bc5bf3861d2cb40999374b3ac0ee2a1e0d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Mar 2007 12:00:39 -0400 Subject: [PATCH 0073/2985] btrfs_dirty_inode Signed-off-by: Chris Mason --- fs/btrfs/super.c | 62 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 820d21ee4afd..905b093a85fe 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -242,6 +242,7 @@ static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path) } } + static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct inode *inode = filp->f_path.dentry->d_inode; @@ -392,6 +393,51 @@ static void fill_inode_item(struct btrfs_inode_item *item, btrfs_set_inode_generation(item, inode->i_generation); } +static int btrfs_update_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + struct btrfs_inode_item *inode_item; + struct btrfs_path path; + int ret; + + btrfs_init_path(&path); + + ret = btrfs_lookup_inode(trans, root, &path, inode->i_ino, 1); + if (ret) { + if (ret > 0) + ret = -ENOENT; + goto failed; + } + + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], + struct btrfs_inode_item); + + fill_inode_item(inode_item, inode); + mark_buffer_dirty(path.nodes[0]); +failed: + btrfs_release_path(root, &path); + return 0; +} + +static int btrfs_write_inode(struct inode *inode, int wait) +{ + struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_trans_handle *trans; + int ret; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_update_inode(trans, root, inode); + if (wait) + btrfs_commit_transaction(trans, root); + else + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct inode *dir, int mode) { @@ -415,9 +461,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_ino = objectid; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - /* FIXME do this on link */ - if (mode & S_IFDIR) - inode->i_size = 3; fill_inode_item(&inode_item, inode); key.objectid = objectid; @@ -431,7 +474,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BUG_ON(ret); insert_inode_hash(inode); - // FIXME mark_inode_dirty(inode) return inode; } @@ -443,6 +485,12 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, inode->i_ino, 0); + if (ret == 0) { + dentry->d_parent->d_inode->i_size += dentry->d_name.len; + ret = btrfs_update_inode(trans, btrfs_sb(inode->i_sb), + dentry->d_parent->d_inode); + } + return ret; } @@ -502,6 +550,10 @@ static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, goto error; ret = btrfs_insert_dir_item(trans, root, buf, 2, inode->i_ino, dir->i_ino, 1); + if (ret) + goto error; + inode->i_size = 3; + ret = btrfs_update_inode(trans, root, inode); error: return ret; } @@ -536,7 +588,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (err) goto out_fail; d_instantiate(dentry, inode); - mark_inode_dirty(inode); drop_on_err = 0; out_fail: @@ -600,6 +651,7 @@ static struct super_operations btrfs_super_ops = { .read_inode = btrfs_read_locked_inode, .write_super = btrfs_write_super, .sync_fs = btrfs_sync_fs, + .write_inode = btrfs_write_inode, }; static struct inode_operations btrfs_dir_inode_operations = { From dee26a9f7aab7ffe1193cd1415b23a69426acc9f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Mar 2007 16:00:06 -0400 Subject: [PATCH 0074/2985] btrfs_get_block, file read/write Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 + fs/btrfs/ctree.h | 26 +++++- fs/btrfs/extent-tree.c | 4 +- fs/btrfs/file-item.c | 52 +++++++++++- fs/btrfs/super.c | 188 +++++++++++++++++++++++++++++++++-------- 5 files changed, 230 insertions(+), 41 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 454028b6bd46..2ca301b289a6 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -8,6 +8,7 @@ * Add block mapping tree (simple dm layer) * Add simple tree locking (semaphore per tree) * Make allocator smarter +* Make directory hashing work on 32 bit * Do actual block accounting * Check compat and incompat flags on the inode * Add virtual filesystems, mountable snapshots diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0aa1052d9f67..7b7120d3ab42 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -198,7 +198,7 @@ struct btrfs_file_extent_item { __le64 disk_blocknr; __le64 disk_num_blocks; /* - * the logical offset in file bytes (no csums) + * the logical offset in file blocks (no csums) * this extent record is for. This allows a file extent to point * into the middle of an existing extent on disk, sharing it * between two snapshots (useful if some bytes in the middle of the @@ -812,12 +812,19 @@ static inline struct btrfs_root *btrfs_sb(struct super_block *sb) ((type *)(btrfs_leaf_data(leaf) + \ btrfs_item_offset((leaf)->items + (slot)))) +/* extent-item.c */ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 num_blocks, u64 search_start, u64 + search_end, u64 owner, struct btrfs_key *ins); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct + btrfs_root *root); +/* ctree.c */ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); @@ -834,8 +841,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *snap); -int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct - btrfs_root *root); +/* root-item.c */ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key); int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root @@ -846,6 +852,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *item); int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); +/* dir-item.c */ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, u64 objectid, u8 type); @@ -854,6 +861,7 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root const char *name, int name_len, int mod); int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len); +/* inode-map.c */ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_root *fs_root, u64 dirid, u64 *objectid); @@ -863,9 +871,21 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, int btrfs_lookup_inode_map(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, int mod); +/* inode-item.c */ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, struct btrfs_inode_item *inode_item); int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, int mod); + +/* file-item.c */ +int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, u64 offset, + u64 num_blocks, u64 hint_block, + u64 *result); +int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid, + u64 blocknr, u64 num_blocks, int mod); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b14104276eea..82f6e9eed1d0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -403,7 +403,7 @@ error: * * returns 0 if everything worked, non-zero otherwise. */ -static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root +int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_blocks, u64 search_start, u64 search_end, u64 owner, struct btrfs_key *ins) { @@ -458,7 +458,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int ret; struct buffer_head *buf; - ret = alloc_extent(trans, root, 1, 0, (unsigned long)-1, + ret = btrfs_alloc_extent(trans, root, 1, 0, (unsigned long)-1, btrfs_header_parentid(btrfs_buffer_header(root->node)), &ins); if (ret) { BUG(); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 8e1e5b4ccfaf..4a0367d702b7 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -1,9 +1,57 @@ #include #include "ctree.h" +#include "disk-io.h" #include "transaction.h" -int btrfs_create_file(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 dirid, u64 *objectid) +int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, u64 offset, + u64 num_blocks, u64 hint_block, + u64 *result) { + struct btrfs_key ins; + int ret = 0; + struct btrfs_file_extent_item *item; + struct btrfs_key file_key; + struct btrfs_path path; + + btrfs_init_path(&path); + ret = btrfs_alloc_extent(trans, root, num_blocks, hint_block, + (u64)-1, objectid, &ins); + BUG_ON(ret); + file_key.objectid = objectid; + file_key.offset = offset; + file_key.flags = 0; + btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); + + ret = btrfs_insert_empty_item(trans, root, &path, &file_key, + sizeof(*item)); + item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_disk_blocknr(item, ins.objectid); + btrfs_set_file_extent_disk_num_blocks(item, ins.offset); + btrfs_set_file_extent_offset(item, 0); + btrfs_set_file_extent_num_blocks(item, ins.offset); + mark_buffer_dirty(path.nodes[0]); + *result = ins.objectid; + btrfs_release_path(root, &path); return 0; } + +int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid, + u64 blocknr, u64 num_blocks, int mod) +{ + int ret; + struct btrfs_key file_key; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + + file_key.objectid = objectid; + file_key.offset = blocknr; + file_key.flags = 0; + btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); + ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); + return ret; +} diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 905b093a85fe..2c2883f2856d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -17,6 +18,9 @@ static struct inode_operations btrfs_dir_inode_operations; static struct super_operations btrfs_super_ops; static struct file_operations btrfs_dir_file_operations; +static struct inode_operations btrfs_file_inode_operations; +static struct address_space_operations btrfs_aops; +static struct file_operations btrfs_file_operations; static void btrfs_read_locked_inode(struct inode *inode) { @@ -57,6 +61,9 @@ static void btrfs_read_locked_inode(struct inode *inode) break; #endif case S_IFREG: + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; break; case S_IFDIR: inode->i_op = &btrfs_dir_inode_operations; @@ -214,35 +221,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return d_splice_alias(inode, dentry); } -static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path) -{ - struct buffer_head *bh; - struct btrfs_node *node; - int i; - int nritems; - u64 objectid; - u64 item_objectid; - u64 blocknr; - int slot; - - if (!path->nodes[1]) - return; - node = btrfs_buffer_node(path->nodes[1]); - slot = path->slots[1]; - objectid = btrfs_disk_key_objectid(&node->ptrs[slot].key); - nritems = btrfs_header_nritems(&node->header); - for (i = slot; i < nritems; i++) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); - if (item_objectid != objectid) - break; - blocknr = btrfs_node_blockptr(node, i); - bh = sb_getblk(root->fs_info->sb, blocknr); - ll_rw_block(READ, 1, &bh); - brelse(bh); - } - -} - static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct inode *inode = filp->f_path.dentry->d_inode; @@ -269,21 +247,18 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) goto err; } advance = 0; - reada_leaves(root, &path); while(1) { leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path.slots[0]; - if (advance) { - if (slot == nritems -1) { + if (advance || slot >= nritems) { + if (slot >= nritems -1) { ret = btrfs_next_leaf(root, &path); if (ret) break; leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path.slots[0]; - if (path.nodes[1] && path.slots[1] == 0) - reada_leaves(root, &path); } else { slot++; path.slots[0]++; @@ -297,6 +272,8 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) continue; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) continue; + + advance = 1; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); over = filldir(dirent, (const char *)(di + 1), btrfs_dir_name_len(di), @@ -524,6 +501,11 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, err = btrfs_add_nondir(trans, dentry, inode); if (err) drop_inode = 1; + else { + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; + } dir->i_sb->s_dirt = 1; btrfs_end_transaction(trans, root); out_unlock: @@ -623,11 +605,124 @@ printk("btrfs sync_fs\n"); return 0; } +static int btrfs_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + int ret; + int err = 0; + u64 blocknr; + u64 extent_start = 0; + u64 extent_end = 0; + u64 objectid = inode->i_ino; + struct btrfs_path path; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_trans_handle *trans = NULL; + struct btrfs_file_extent_item *item; + struct btrfs_leaf *leaf; + struct btrfs_disk_key *found_key; + + btrfs_init_path(&path); + mutex_lock(&root->fs_info->fs_mutex); + if (create) + trans = btrfs_start_transaction(root, 1); + + + ret = btrfs_lookup_file_extent(trans, root, &path, + inode->i_ino, iblock, 1, 0); + if (ret < 0) { + btrfs_release_path(root, &path); + err = ret; + goto out; + } + + if (ret != 0) { + if (path.slots[0] == 0) { + btrfs_release_path(root, &path); + goto allocate; + } + path.slots[0]--; + } + + item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_file_extent_item); + leaf = btrfs_buffer_leaf(path.nodes[0]); + blocknr = btrfs_file_extent_disk_blocknr(item); + blocknr += btrfs_file_extent_offset(item); + + /* exact match found, use it */ + if (ret == 0) { + err = 0; + map_bh(result, inode->i_sb, blocknr); + btrfs_release_path(root, &path); + goto out; + } + + /* are we inside the extent that was found? */ + found_key = &leaf->items[path.slots[0]].key; + if (btrfs_disk_key_objectid(found_key) != objectid || + btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) { + extent_end = 0; + extent_start = 0; + btrfs_release_path(root, &path); + goto allocate; + } + + extent_start = btrfs_disk_key_offset(&leaf->items[path.slots[0]].key); + extent_start += btrfs_file_extent_offset(item); + extent_end = extent_start + btrfs_file_extent_num_blocks(item); + btrfs_release_path(root, &path); + if (iblock >= extent_start && iblock < extent_end) { + err = 0; + map_bh(result, inode->i_sb, blocknr + iblock - extent_start); + goto out; + } +allocate: + /* ok, create a new extent */ + if (!create) { + err = 0; + goto out; + } + ret = btrfs_alloc_file_extent(trans, root, objectid, iblock, + 1, extent_end, &blocknr); + if (ret) { + err = ret; + goto out; + } + map_bh(result, inode->i_sb, blocknr); + +out: + if (trans) + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return err; +} + +static int btrfs_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + return block_prepare_write(page, from, to, btrfs_get_block); +} + static void btrfs_write_super(struct super_block *sb) { btrfs_sync_fs(sb, 1); } +static int btrfs_readpage(struct file *file, struct page *page) +{ + return mpage_readpage(page, btrfs_get_block); +} + +static int btrfs_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, btrfs_get_block); +} + +static int btrfs_writepage(struct page *page, struct writeback_control *wbc) +{ + return block_write_full_page(page, btrfs_get_block, wbc); +} static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) @@ -667,6 +762,31 @@ static struct file_operations btrfs_dir_file_operations = { .readdir = btrfs_readdir, }; +static struct address_space_operations btrfs_aops = { + .readpage = btrfs_readpage, + .readpages = btrfs_readpages, + .writepage = btrfs_writepage, + .sync_page = block_sync_page, + .prepare_write = btrfs_prepare_write, + .commit_write = generic_commit_write, +}; + +static struct inode_operations btrfs_file_inode_operations = { + .truncate = NULL, +}; + +static struct file_operations btrfs_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = generic_file_aio_write, + .mmap = generic_file_mmap, + .open = generic_file_open, + .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, + .splice_write = generic_file_splice_write, +}; static int __init init_btrfs_fs(void) { From 6407bf6d7c449cbfb0a39d985194e265eda3baf4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 Mar 2007 06:33:00 -0400 Subject: [PATCH 0075/2985] Btrfs: reference counts on data extents Signed-off-by: Chris Mason --- fs/btrfs/TODO | 3 ++ fs/btrfs/ctree.h | 9 +++-- fs/btrfs/extent-tree.c | 87 +++++++++++++++++++++++++++++++++--------- fs/btrfs/inode-map.c | 1 + fs/btrfs/super.c | 6 +-- 5 files changed, 82 insertions(+), 24 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 2ca301b289a6..7aec75e9a37e 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -8,7 +8,10 @@ * Add block mapping tree (simple dm layer) * Add simple tree locking (semaphore per tree) * Make allocator smarter +* make level a field in header +* add a block group to struct inode * Make directory hashing work on 32 bit +* Make sure nobh stuff is working properly for cows * Do actual block accounting * Check compat and incompat flags on the inode * Add virtual filesystems, mountable snapshots diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7b7120d3ab42..1a98952e0faf 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -9,10 +9,11 @@ struct btrfs_transaction; #define BTRFS_MAGIC "_BtRfS_M" -#define BTRFS_ROOT_TREE_OBJECTID 1 -#define BTRFS_EXTENT_TREE_OBJECTID 2 -#define BTRFS_INODE_MAP_OBJECTID 3 -#define BTRFS_FS_TREE_OBJECTID 4 +#define BTRFS_ROOT_TREE_OBJECTID 1ULL +#define BTRFS_EXTENT_TREE_OBJECTID 2ULL +#define BTRFS_INODE_MAP_OBJECTID 3ULL +#define BTRFS_FS_TREE_OBJECTID 4ULL +#define BTRFS_FIRST_FREE_OBJECTID 5ULL /* * we can actually store much bigger names, but lets not confuse the rest diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 82f6e9eed1d0..4d4fc48c0a31 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -13,7 +13,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr) + *root, u64 blocknr, u64 num_blocks) { struct btrfs_path path; int ret; @@ -29,7 +29,7 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - key.offset = 1; + key.offset = num_blocks; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, 0, 1); if (ret != 0) @@ -48,7 +48,7 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root } static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u32 *refs) + *root, u64 blocknr, u64 num_blocks, u32 *refs) { struct btrfs_path path; int ret; @@ -57,7 +57,7 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_extent_item *item; btrfs_init_path(&path); key.objectid = blocknr; - key.offset = 1; + key.offset = num_blocks; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, @@ -76,17 +76,34 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, { u64 blocknr; struct btrfs_node *buf_node; + struct btrfs_leaf *buf_leaf; + struct btrfs_disk_key *key; + struct btrfs_file_extent_item *fi; int i; + int leaf; + int ret; if (!root->ref_cows) return 0; buf_node = btrfs_buffer_node(buf); - if (btrfs_is_leaf(buf_node)) - return 0; - + leaf = btrfs_is_leaf(buf_node); + buf_leaf = btrfs_buffer_leaf(buf); for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) { - blocknr = btrfs_node_blockptr(buf_node, i); - inc_block_ref(trans, root, blocknr); + if (leaf) { + key = &buf_leaf->items[i].key; + if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(buf_leaf, i, + struct btrfs_file_extent_item); + ret = inc_block_ref(trans, root, + btrfs_file_extent_disk_blocknr(fi), + btrfs_file_extent_disk_num_blocks(fi)); + BUG_ON(ret); + } else { + blocknr = btrfs_node_blockptr(buf_node, i); + ret = inc_block_ref(trans, root, blocknr, 1); + BUG_ON(ret); + } } return 0; } @@ -469,6 +486,37 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return buf; } +static int drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct buffer_head *cur) +{ + struct btrfs_disk_key *key; + struct btrfs_leaf *leaf; + struct btrfs_file_extent_item *fi; + int i; + int nritems; + int ret; + + BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur))); + leaf = btrfs_buffer_leaf(cur); + nritems = btrfs_header_nritems(&leaf->header); + for (i = 0; i < nritems; i++) { + key = &leaf->items[i].key; + if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); + /* + * FIXME make sure to insert a trans record that + * repeats the snapshot del on crash + */ + ret = btrfs_free_extent(trans, root, + btrfs_file_extent_disk_blocknr(fi), + btrfs_file_extent_disk_num_blocks(fi), + 0); + BUG_ON(ret); + } + return 0; +} + /* * helper function for drop_snapshot, this walks down the tree dropping ref * counts as it goes. @@ -483,28 +531,33 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root u32 refs; ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr, - &refs); + 1, &refs); BUG_ON(ret); if (refs > 1) goto out; /* * walk down to the last node level and free all the leaves */ - while(*level > 0) { + while(*level >= 0) { cur = path->nodes[*level]; if (path->slots[*level] >= btrfs_header_nritems(btrfs_buffer_header(cur))) break; + if (*level == 0) { + ret = drop_leaf_ref(trans, root, cur); + BUG_ON(ret); + break; + } blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), path->slots[*level]); - ret = lookup_block_ref(trans, root, blocknr, &refs); - if (refs != 1 || *level == 1) { + ret = lookup_block_ref(trans, root, blocknr, 1, &refs); + BUG_ON(ret); + if (refs != 1) { path->slots[*level]++; ret = btrfs_free_extent(trans, root, blocknr, 1, 1); BUG_ON(ret); continue; } - BUG_ON(ret); next = read_tree_block(root, blocknr); if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); @@ -513,8 +566,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root path->slots[*level] = 0; } out: - ret = btrfs_free_extent(trans, root, path->nodes[*level]->b_blocknr, - 1, 1); + ret = btrfs_free_extent(trans, root, + path->nodes[*level]->b_blocknr, 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -544,10 +597,10 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root ret = btrfs_free_extent(trans, root, path->nodes[*level]->b_blocknr, 1, 1); + BUG_ON(ret); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; - BUG_ON(ret); } } return 1; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index ad2d375b830d..1b2c5e043508 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -25,6 +25,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, if (fs_root->fs_info->last_inode_alloc_dirid == dirid) search_start = fs_root->fs_info->last_inode_alloc; + search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); search_key.objectid = search_start; search_key.flags = 0; btrfs_set_key_type(&search_key, BTRFS_INODE_MAP_ITEM_KEY); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2c2883f2856d..487da5a213c0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -700,7 +700,7 @@ out: static int btrfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { - return block_prepare_write(page, from, to, btrfs_get_block); + return nobh_prepare_write(page, from, to, btrfs_get_block); } static void btrfs_write_super(struct super_block *sb) @@ -721,7 +721,7 @@ static int btrfs_readpages(struct file *file, struct address_space *mapping, static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { - return block_write_full_page(page, btrfs_get_block, wbc); + return nobh_writepage(page, btrfs_get_block, wbc); } static int btrfs_get_sb(struct file_system_type *fs_type, @@ -768,7 +768,7 @@ static struct address_space_operations btrfs_aops = { .writepage = btrfs_writepage, .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, - .commit_write = generic_commit_write, + .commit_write = nobh_commit_write, }; static struct inode_operations btrfs_file_inode_operations = { From 9a6f11ed8f421fb1cc7b37390e32316ff4701f5d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 Mar 2007 09:06:38 -0400 Subject: [PATCH 0076/2985] Btrfs: split out level field in struct header Signed-off-by: Chris Mason --- fs/btrfs/TODO | 5 ----- fs/btrfs/ctree.c | 1 + fs/btrfs/ctree.h | 8 +++----- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 7aec75e9a37e..e9371385ccc0 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -1,12 +1,7 @@ * cleanup, add more error checking, get rid of BUG_ONs -* Make IO functions look more like the page cache * Fix ENOSPC handling -* make a real mkfs and superblock * Do checksumming -* Use a real check instead of mark_buffer_dirty -* Define FS objects in terms of different item types * Add block mapping tree (simple dm layer) -* Add simple tree locking (semaphore per tree) * Make allocator smarter * make level a field in header * add a block group to struct inode diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b33a6bfaf327..48c611948d11 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -753,6 +753,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root split_buffer = btrfs_alloc_free_block(trans, root); split = btrfs_buffer_node(split_buffer); btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); + btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr); btrfs_set_header_generation(&split->header, trans->transid); btrfs_set_header_parentid(&split->header, diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1a98952e0faf..a4ad39b58a4b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -59,7 +59,7 @@ struct btrfs_header { __le32 ham; __le16 nritems; __le16 flags; - /* generation flags to be added */ + u8 level; } __attribute__ ((__packed__)); #define BTRFS_MAX_LEVEL 8 @@ -648,15 +648,13 @@ static inline void btrfs_set_header_flags(struct btrfs_header *h, u16 val) static inline int btrfs_header_level(struct btrfs_header *h) { - return btrfs_header_flags(h) & (BTRFS_MAX_LEVEL - 1); + return h->level; } static inline void btrfs_set_header_level(struct btrfs_header *h, int level) { - u16 flags; BUG_ON(level > BTRFS_MAX_LEVEL); - flags = btrfs_header_flags(h) & ~(BTRFS_MAX_LEVEL - 1); - btrfs_set_header_flags(h, flags | level); + h->level = level; } static inline int btrfs_is_leaf(struct btrfs_node *n) From 71951f35a6e413f2bfbd41829af8cf10a890aeb6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 Mar 2007 09:16:29 -0400 Subject: [PATCH 0077/2985] Btrfs: add generation field to file extent Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 13 +++++++++++++ fs/btrfs/file-item.c | 1 + 2 files changed, 14 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a4ad39b58a4b..c3fa12a6b59e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -192,6 +192,7 @@ struct btrfs_root_item { } __attribute__ ((__packed__)); struct btrfs_file_extent_item { + __le64 generation; /* * disk space consumed by the extent, checksum blocks are included * in these numbers @@ -764,6 +765,18 @@ static inline void btrfs_set_file_extent_disk_blocknr(struct e->disk_blocknr = cpu_to_le64(val); } +static inline u64 btrfs_file_extent_generation(struct btrfs_file_extent_item *e) +{ + return le64_to_cpu(e->generation); +} + +static inline void btrfs_set_file_extent_generation(struct + btrfs_file_extent_item *e, + u64 val) +{ + e->generation = cpu_to_le64(val); +} + static inline u64 btrfs_file_extent_disk_num_blocks(struct btrfs_file_extent_item *e) { diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 4a0367d702b7..09de270c5a52 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -32,6 +32,7 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_disk_num_blocks(item, ins.offset); btrfs_set_file_extent_offset(item, 0); btrfs_set_file_extent_num_blocks(item, ins.offset); + btrfs_set_file_extent_generation(item, trans->transid); mark_buffer_dirty(path.nodes[0]); *result = ins.objectid; btrfs_release_path(root, &path); From f4b9aa8d3b877d0a6044a6d6d9a44b29cab9e265 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 Mar 2007 11:05:53 -0400 Subject: [PATCH 0078/2985] btrfs_truncate Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 25 +++++----- fs/btrfs/super.c | 107 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 114 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4d4fc48c0a31..176c6dc534ba 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -171,20 +171,21 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) struct btrfs_header *header; struct buffer_head *bh; - bh = sb_find_get_block(root->fs_info->sb, blocknr); - if (bh) { - header = btrfs_buffer_header(bh); - if (btrfs_header_generation(header) == - root->fs_info->running_transaction->transid) { + if (!pending) { + bh = sb_find_get_block(root->fs_info->sb, blocknr); + if (bh) { + header = btrfs_buffer_header(bh); + if (btrfs_header_generation(header) == + root->fs_info->running_transaction->transid) { + brelse(bh); + return 0; + } brelse(bh); - return 0; } - brelse(bh); - } - if (pending) - err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr); - else err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); + } else { + err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr); + } BUG_ON(err); return 0; } @@ -223,6 +224,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ei->refs == 0); refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); + mark_buffer_dirty(path.nodes[0]); if (refs == 0) { u64 super_blocks_used; @@ -240,7 +242,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) BUG(); } - mark_buffer_dirty(path.nodes[0]); btrfs_release_path(extent_root, &path); finish_current_insert(trans, extent_root); return ret; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 487da5a213c0..fd3d9d616ff9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -28,11 +28,15 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_inode_item *inode_item; struct btrfs_root *root = btrfs_sb(inode->i_sb); int ret; + btrfs_init_path(&path); + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); if (ret) { - make_bad_inode(inode); btrfs_release_path(root, &path); + mutex_unlock(&root->fs_info->fs_mutex); + make_bad_inode(inode); return; } inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), @@ -53,6 +57,7 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); btrfs_release_path(root, &path); + mutex_unlock(&root->fs_info->fs_mutex); switch (inode->i_mode & S_IFMT) { #if 0 default: @@ -151,20 +156,85 @@ error: return ret; } +static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + int ret; + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_disk_key *found_key; + struct btrfs_leaf *leaf; + struct btrfs_file_extent_item *fi; + u64 extent_start; + u64 extent_num_blocks; + + /* FIXME, add redo link to tree so we don't leak on crash */ + key.objectid = inode->i_ino; + key.offset = (u64)-1; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + while(1) { + btrfs_init_path(&path); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + if (ret < 0) { + btrfs_release_path(root, &path); + goto error; + } + if (ret > 0) { + BUG_ON(path.slots[0] == 0); + path.slots[0]--; + } + leaf = btrfs_buffer_leaf(path.nodes[0]); + found_key = &leaf->items[path.slots[0]].key; + if (btrfs_disk_key_objectid(found_key) != inode->i_ino) + break; + if (btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) + break; + if (btrfs_disk_key_offset(found_key) < inode->i_size) + break; + /* FIXME: add extent truncation */ + if (btrfs_disk_key_offset(found_key) < inode->i_size) + break; + fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], + struct btrfs_file_extent_item); + extent_start = btrfs_file_extent_disk_blocknr(fi); + extent_num_blocks = btrfs_file_extent_disk_num_blocks(fi); + key.offset = btrfs_disk_key_offset(found_key) - 1; + ret = btrfs_del_item(trans, root, &path); + BUG_ON(ret); + inode->i_blocks -= btrfs_file_extent_num_blocks(fi) >> 9; + btrfs_release_path(root, &path); + ret = btrfs_free_extent(trans, root, extent_start, + extent_num_blocks, 0); + BUG_ON(ret); + if (btrfs_disk_key_offset(found_key) == 0) + break; + } + btrfs_release_path(root, &path); + ret = 0; +error: + return ret; +} + static void btrfs_delete_inode(struct inode *inode) { struct btrfs_trans_handle *trans; struct btrfs_root *root = btrfs_sb(inode->i_sb); + int ret; + truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) { goto no_delete; } inode->i_size = 0; - if (inode->i_blocks) - WARN_ON(1); - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + if (S_ISREG(inode->i_mode)) { + ret = btrfs_truncate_in_trans(trans, root, inode); + BUG_ON(ret); + } btrfs_free_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -173,7 +243,6 @@ no_delete: clear_inode(inode); } - static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, ino_t *ino) { @@ -688,6 +757,8 @@ allocate: err = ret; goto out; } + inode->i_blocks += inode->i_sb->s_blocksize >> 9; + set_buffer_new(result); map_bh(result, inode->i_sb, blocknr); out: @@ -724,6 +795,30 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) return nobh_writepage(page, btrfs_get_block, wbc); } +static void btrfs_truncate(struct inode *inode) +{ + struct btrfs_root *root = btrfs_sb(inode->i_sb); + int ret; + struct btrfs_trans_handle *trans; + + if (!S_ISREG(inode->i_mode)) + return; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + + nobh_truncate_page(inode->i_mapping, inode->i_size); + + /* FIXME, add redo link to tree so we don't leak on crash */ + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_truncate_in_trans(trans, root, inode); + BUG_ON(ret); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + mark_inode_dirty(inode); +} + static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -772,7 +867,7 @@ static struct address_space_operations btrfs_aops = { }; static struct inode_operations btrfs_file_inode_operations = { - .truncate = NULL, + .truncate = btrfs_truncate, }; static struct file_operations btrfs_file_operations = { From 9773a788681db1f5c2701b7433737fdca61a14ba Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 Mar 2007 11:26:26 -0400 Subject: [PATCH 0079/2985] Btrfs: byte offsets for file keys Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/file-item.c | 5 +++-- fs/btrfs/super.c | 7 +++++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c3fa12a6b59e..1897f3a65b4f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -899,5 +899,5 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, - u64 blocknr, u64 num_blocks, int mod); + u64 blocknr, int mod); #endif diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 09de270c5a52..5230a44cb19b 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -26,6 +26,7 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, root, &path, &file_key, sizeof(*item)); + BUG_ON(ret); item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_disk_blocknr(item, ins.objectid); @@ -42,7 +43,7 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, - u64 blocknr, u64 num_blocks, int mod) + u64 offset, int mod) { int ret; struct btrfs_key file_key; @@ -50,7 +51,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, int cow = mod != 0; file_key.objectid = objectid; - file_key.offset = blocknr; + file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index fd3d9d616ff9..f2f08189903d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -697,7 +697,8 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, ret = btrfs_lookup_file_extent(trans, root, &path, - inode->i_ino, iblock, 1, 0); + inode->i_ino, + iblock << inode->i_blkbits, 0); if (ret < 0) { btrfs_release_path(root, &path); err = ret; @@ -737,6 +738,7 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, } extent_start = btrfs_disk_key_offset(&leaf->items[path.slots[0]].key); + extent_start = extent_start >> inode->i_blkbits; extent_start += btrfs_file_extent_offset(item); extent_end = extent_start + btrfs_file_extent_num_blocks(item); btrfs_release_path(root, &path); @@ -751,7 +753,8 @@ allocate: err = 0; goto out; } - ret = btrfs_alloc_file_extent(trans, root, objectid, iblock, + ret = btrfs_alloc_file_extent(trans, root, objectid, + iblock << inode->i_blkbits, 1, extent_end, &blocknr); if (ret) { err = ret; From 5f443fd280c76a2170259cebaa4108c9daad6e1b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 Mar 2007 13:42:32 -0400 Subject: [PATCH 0080/2985] btrfs_rmdir Signed-off-by: Chris Mason --- fs/btrfs/super.c | 106 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 93 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f2f08189903d..67659b6ce962 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -81,11 +81,12 @@ static void btrfs_read_locked_inode(struct inode *inode) return; } -static int btrfs_unlink(struct inode *dir, struct dentry *dentry) +static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *dir, + struct dentry *dentry) { struct btrfs_path path; - struct btrfs_root *root; - struct btrfs_trans_handle *trans; const char *name = dentry->d_name.name; int name_len = dentry->d_name.len; int ret; @@ -93,10 +94,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) struct btrfs_dir_item *di; btrfs_init_path(&path); - root = btrfs_sb(dir->i_sb); - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_lookup_dir_item(trans, root, &path, dir->i_ino, name, name_len, -1); if (ret < 0) @@ -114,13 +111,98 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_ctime = dir->i_ctime; err: btrfs_release_path(root, &path); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); if (ret == 0) inode_dec_link_count(dentry->d_inode); return ret; } +static int btrfs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct btrfs_root *root; + struct btrfs_trans_handle *trans; + int ret; + + root = btrfs_sb(dir->i_sb); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_unlink_trans(trans, root, dir, dentry); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + +static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + int err; + int ret; + struct btrfs_root *root = btrfs_sb(dir->i_sb); + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_trans_handle *trans; + struct btrfs_disk_key *found_key; + struct btrfs_leaf *leaf; + + btrfs_init_path(&path); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + key.objectid = inode->i_ino; + key.offset = (u64)-1; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + if (ret < 0) { + err = ret; + goto out; + } + + BUG_ON(ret == 0); + BUG_ON(path.slots[0] == 0); + path.slots[0]--; + leaf = btrfs_buffer_leaf(path.nodes[0]); + found_key = &leaf->items[path.slots[0]].key; + if (btrfs_disk_key_objectid(found_key) != inode->i_ino) { + err = -ENOENT; + goto out; + } + if (btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY || + btrfs_disk_key_offset(found_key) != 2) { + err = -ENOTEMPTY; + goto out; + } + ret = btrfs_del_item(trans, root, &path); + BUG_ON(ret); + btrfs_release_path(root, &path); + key.offset = 1; + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + if (ret < 0) { + err = ret; + goto out; + } + if (ret > 0) { + err = -ENOTEMPTY; + goto out; + } + ret = btrfs_del_item(trans, root, &path); + if (ret) { + err = ret; + goto out; + } + btrfs_release_path(root, &path); + + /* now the directory is empty */ + err = btrfs_unlink_trans(trans, root, dir, dentry); + if (!err) { + inode->i_size = 0; + } +out: + mutex_unlock(&root->fs_info->fs_mutex); + ret = btrfs_end_transaction(trans, root); + if (ret && !err) + err = ret; + return err; +} + static int btrfs_free_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) @@ -191,9 +273,6 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, break; if (btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) break; - if (btrfs_disk_key_offset(found_key) < inode->i_size) - break; - /* FIXME: add extent truncation */ if (btrfs_disk_key_offset(found_key) < inode->i_size) break; fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), @@ -209,7 +288,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, ret = btrfs_free_extent(trans, root, extent_start, extent_num_blocks, 0); BUG_ON(ret); - if (btrfs_disk_key_offset(found_key) == 0) + if (key.offset + 1 == 0) break; } btrfs_release_path(root, &path); @@ -852,6 +931,7 @@ static struct inode_operations btrfs_dir_inode_operations = { .create = btrfs_create, .unlink = btrfs_unlink, .mkdir = btrfs_mkdir, + .rmdir = btrfs_rmdir, }; static struct file_operations btrfs_dir_file_operations = { From d98237b3ede7ab98892f7fa62201a13694c526e2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 28 Mar 2007 13:57:48 -0400 Subject: [PATCH 0081/2985] Btrfs: use a btree inode instead of sb_getblk Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 174 ++++++++++++++++++++++++++++++++++++++--- fs/btrfs/disk-io.h | 5 +- fs/btrfs/extent-tree.c | 8 +- fs/btrfs/super.c | 8 +- fs/btrfs/transaction.c | 2 +- 6 files changed, 174 insertions(+), 24 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1897f3a65b4f..b2ebed756c13 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -233,6 +233,7 @@ struct btrfs_fs_info { struct btrfs_super_block *disk_super; struct buffer_head *sb_buffer; struct super_block *sb; + struct inode *btree_inode; struct mutex trans_mutex; struct mutex fs_mutex; }; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 758a62aba063..970103f2cacd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,14 +1,17 @@ #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" + static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); - if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) + if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) { BUG(); + } if (root->node && btrfs_header_parentid(&node->header) != btrfs_header_parentid(btrfs_buffer_header(root->node))) { BUG(); @@ -16,25 +19,154 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) return 0; } -struct buffer_head *alloc_tree_block(struct btrfs_root *root, u64 blocknr) +struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) { - return sb_getblk(root->fs_info->sb, blocknr); + struct address_space *mapping = root->fs_info->btree_inode->i_mapping; + int blockbits = root->fs_info->sb->s_blocksize_bits; + unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); + struct page *page; + struct buffer_head *bh; + struct buffer_head *head; + struct buffer_head *ret = NULL; + + page = find_lock_page(mapping, index); + if (!page) + return NULL; + + if (!page_has_buffers(page)) + goto out_unlock; + + head = page_buffers(page); + bh = head; + do { + if (buffer_mapped(bh) && bh->b_blocknr == blocknr) { + ret = bh; + get_bh(bh); + goto out_unlock; + } + bh = bh->b_this_page; + } while (bh != head); +out_unlock: + unlock_page(page); + page_cache_release(page); + return ret; } -struct buffer_head *find_tree_block(struct btrfs_root *root, u64 blocknr) +struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 blocknr) { - return sb_getblk(root->fs_info->sb, blocknr); + struct address_space *mapping = root->fs_info->btree_inode->i_mapping; + int blockbits = root->fs_info->sb->s_blocksize_bits; + unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); + struct page *page; + struct buffer_head *bh; + struct buffer_head *head; + struct buffer_head *ret = NULL; + u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); + page = grab_cache_page(mapping, index); + if (!page) + return NULL; + + wait_on_page_writeback(page); + if (!page_has_buffers(page)) + create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0); + head = page_buffers(page); + bh = head; + do { + if (!buffer_mapped(bh)) { + bh->b_bdev = root->fs_info->sb->s_bdev; + bh->b_blocknr = first_block; + set_buffer_mapped(bh); + } + if (bh->b_blocknr == blocknr) { + ret = bh; + get_bh(bh); + goto out_unlock; + } + bh = bh->b_this_page; + first_block++; + } while (bh != head); +out_unlock: + unlock_page(page); + page_cache_release(page); + return ret; } +static sector_t max_block(struct block_device *bdev) +{ + sector_t retval = ~((sector_t)0); + loff_t sz = i_size_read(bdev->bd_inode); + + if (sz) { + unsigned int size = block_size(bdev); + unsigned int sizebits = blksize_bits(size); + retval = (sz >> sizebits); + } + return retval; +} + +static int btree_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int create) +{ + if (iblock >= max_block(inode->i_sb->s_bdev)) { + if (create) + return -EIO; + + /* + * for reads, we're just trying to fill a partial page. + * return a hole, they will have to call get_block again + * before they can fill it, and they will get -EIO at that + * time + */ + return 0; + } + bh->b_bdev = inode->i_sb->s_bdev; + bh->b_blocknr = iblock; + set_buffer_mapped(bh); + return 0; +} + +static int btree_writepage(struct page *page, struct writeback_control *wbc) +{ + return block_write_full_page(page, btree_get_block, wbc); +} + +static int btree_readpage(struct file * file, struct page * page) +{ + return block_read_full_page(page, btree_get_block); +} + +static struct address_space_operations btree_aops = { + .readpage = btree_readpage, + .writepage = btree_writepage, + .sync_page = block_sync_page, +}; + struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { - struct buffer_head *buf = sb_bread(root->fs_info->sb, blocknr); + struct buffer_head *bh = NULL; - if (!buf) - return buf; - if (check_tree_block(root, buf)) + bh = btrfs_find_create_tree_block(root, blocknr); + if (!bh) + return bh; + lock_buffer(bh); + if (!buffer_uptodate(bh)) { + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; + submit_bh(READ, bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + goto fail; + } else { + unlock_buffer(bh); + } + if (check_tree_block(root, bh)) BUG(); - return buf; + return bh; +fail: + brelse(bh); + return NULL; + } int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -101,11 +233,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, GFP_NOFS); int ret; - /* FIXME: don't be stupid */ if (!btrfs_super_root(disk_super)) return NULL; init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); + sb_set_blocksize(sb, sb_buffer->b_size); fs_info->running_transaction = NULL; fs_info->fs_root = root; fs_info->tree_root = tree_root; @@ -114,14 +246,30 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; fs_info->disk_super = disk_super; - fs_info->sb_buffer = sb_buffer; fs_info->sb = sb; + fs_info->btree_inode = new_inode(sb); + fs_info->btree_inode->i_ino = 1; + fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; + fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); + mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); __setup_root(disk_super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); + + fs_info->sb_buffer = read_tree_block(tree_root, sb_buffer->b_blocknr); + + if (!fs_info->sb_buffer) + return NULL; + + brelse(sb_buffer); + sb_buffer = NULL; + disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; + fs_info->disk_super = disk_super; + tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); @@ -137,7 +285,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = find_and_setup_root(disk_super, tree_root, fs_info, BTRFS_FS_TREE_OBJECTID, root); BUG_ON(ret); - root->commit_root = root->node; get_bh(root->node); root->ref_cows = 1; @@ -191,6 +338,7 @@ int close_ctree(struct btrfs_root *root) root->fs_info->tree_root->node); btrfs_block_release(root, root->commit_root); btrfs_block_release(root, root->fs_info->sb_buffer); + iput(root->fs_info->btree_inode); kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); kfree(root->fs_info->tree_root); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 099f7eea0ec7..c2c38bda704d 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -21,7 +21,8 @@ static inline struct btrfs_header *btrfs_buffer_header(struct buffer_head *bh) } struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr); -struct buffer_head *find_tree_block(struct btrfs_root *root, u64 blocknr); +struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 blocknr); int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -37,5 +38,5 @@ int close_ctree(struct btrfs_root *root); void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int mkfs(int fd, u64 num_blocks, u32 blocksize); +struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 176c6dc534ba..b2ae8e768b6c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -172,8 +172,8 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) struct buffer_head *bh; if (!pending) { - bh = sb_find_get_block(root->fs_info->sb, blocknr); - if (bh) { + bh = btrfs_find_tree_block(root, blocknr); + if (bh && buffer_uptodate(bh)) { header = btrfs_buffer_header(bh); if (btrfs_header_generation(header) == root->fs_info->running_transaction->transid) { @@ -291,12 +291,10 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; - struct buffer_head *t; int pending_ret; int ret; if (root == extent_root) { - t = find_tree_block(root, blocknr); pin_down_block(root, blocknr, 1); return 0; } @@ -482,7 +480,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, BUG(); return NULL; } - buf = find_tree_block(root, ins.objectid); + buf = btrfs_find_create_tree_block(root, ins.objectid); set_buffer_uptodate(buf); return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 67659b6ce962..8f07f462236a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -474,6 +474,8 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) disk_super = (struct btrfs_super_block *)bh->b_data; root = open_ctree(sb, bh, disk_super); sb->s_fs_info = root; + disk_super = root->fs_info->disk_super; + if (!root) { printk("btrfs: open_ctree failed\n"); return -EIO; @@ -734,15 +736,15 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) struct btrfs_trans_handle *trans; struct btrfs_root *root; int ret; + root = btrfs_sb(sb); sb->s_dirt = 0; if (!wait) { - filemap_flush(sb->s_bdev->bd_inode->i_mapping); + filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; } - filemap_write_and_wait(sb->s_bdev->bd_inode->i_mapping); + filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); - root = btrfs_sb(sb); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 46a596e345f0..b20fb53a0d27 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -74,7 +74,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); + filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); return 0; } From 87cbda5c1f54aba67b1e318a4a4824c9e9f16324 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 28 Mar 2007 19:44:27 -0400 Subject: [PATCH 0082/2985] Btrfs: sha256 csums on metadata Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 8 ++++-- fs/btrfs/disk-io.c | 62 +++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/super.c | 4 +-- 3 files changed, 67 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b2ebed756c13..6ff87f44c5d8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -51,11 +51,11 @@ struct btrfs_key { * every tree block (leaf or node) starts with this header. */ struct btrfs_header { + __le32 csum[8]; u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ __le64 generation; __le64 parentid; /* objectid of the tree root */ - __le32 csum; __le32 ham; __le16 nritems; __le16 flags; @@ -75,9 +75,10 @@ struct buffer_head; * it currently lacks any block count etc etc */ struct btrfs_super_block { + __le32 csum[8]; + /* the first 3 fields must match struct btrfs_header */ u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* this block number */ - __le32 csum; __le64 magic; __le32 blocksize; __le64 generation; @@ -217,6 +218,7 @@ struct btrfs_inode_map_item { struct btrfs_disk_key key; } __attribute__ ((__packed__)); +struct crypto_hash; struct btrfs_fs_info { struct btrfs_root *fs_root; struct btrfs_root *extent_root; @@ -236,6 +238,8 @@ struct btrfs_fs_info { struct inode *btree_inode; struct mutex trans_mutex; struct mutex fs_mutex; + struct crypto_hash *hash_tfm; + spinlock_t hash_lock; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 970103f2cacd..2afb7922b062 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,6 +1,8 @@ #include #include #include +#include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -126,8 +128,51 @@ static int btree_get_block(struct inode *inode, sector_t iblock, return 0; } +static int csum_tree_block(struct btrfs_root * root, struct buffer_head *bh, + int verify) +{ + struct btrfs_node *node = btrfs_buffer_node(bh); + struct scatterlist sg; + struct crypto_hash *tfm = root->fs_info->hash_tfm; + struct hash_desc desc; + int ret; + char result[32]; + + desc.tfm = tfm; + desc.flags = 0; + sg_init_one(&sg, bh->b_data + 32, bh->b_size - 32); + spin_lock(&root->fs_info->hash_lock); + ret = crypto_hash_digest(&desc, &sg, bh->b_size - 32, result); + spin_unlock(&root->fs_info->hash_lock); + if (ret) { + printk("sha256 digest failed\n"); + } + if (verify) { + if (memcmp(node->header.csum, result, sizeof(result))) + printk("csum verify failed on %Lu\n", bh->b_blocknr); + return -EINVAL; + } else + memcpy(node->header.csum, result, sizeof(node->header.csum)); + return 0; +} + static int btree_writepage(struct page *page, struct writeback_control *wbc) { + struct buffer_head *bh; + struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb); + struct buffer_head *head; + + if (!page_has_buffers(page)) { + create_empty_buffers(page, root->fs_info->sb->s_blocksize, + (1 << BH_Dirty)|(1 << BH_Uptodate)); + } + head = page_buffers(page); + bh = head; + do { + if (buffer_dirty(bh)) + csum_tree_block(root, bh, 0); + bh = bh->b_this_page; + } while (bh != head); return block_write_full_page(page, btree_get_block, wbc); } @@ -157,6 +202,7 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) wait_on_buffer(bh); if (!buffer_uptodate(bh)) goto fail; + csum_tree_block(root, bh, 1); } else { unlock_buffer(bh); } @@ -233,8 +279,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, GFP_NOFS); int ret; - if (!btrfs_super_root(disk_super)) + if (!btrfs_super_root(disk_super)) { return NULL; + } init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); sb_set_blocksize(sb, sb_buffer->b_size); @@ -252,6 +299,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); + fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); + if (!fs_info->hash_tfm) { + printk("failed to allocate sha256 hash\n"); + return NULL; + } + spin_lock_init(&fs_info->hash_lock); mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); @@ -262,9 +315,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->sb_buffer = read_tree_block(tree_root, sb_buffer->b_blocknr); - if (!fs_info->sb_buffer) + if (!fs_info->sb_buffer) { +printk("failed2\n"); return NULL; - + } brelse(sb_buffer); sb_buffer = NULL; disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; @@ -300,6 +354,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root root->fs_info->tree_root->node->b_blocknr); lock_buffer(bh); clear_buffer_dirty(bh); + csum_tree_block(root, bh, 0); bh->b_end_io = end_buffer_write_sync; get_bh(bh); submit_bh(WRITE, bh); @@ -338,6 +393,7 @@ int close_ctree(struct btrfs_root *root) root->fs_info->tree_root->node); btrfs_block_release(root, root->commit_root); btrfs_block_release(root, root->fs_info->sb_buffer); + crypto_free_hash(root->fs_info->hash_tfm); iput(root->fs_info->btree_inode); kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8f07f462236a..151c1002d74e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -473,13 +473,13 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) } disk_super = (struct btrfs_super_block *)bh->b_data; root = open_ctree(sb, bh, disk_super); - sb->s_fs_info = root; - disk_super = root->fs_info->disk_super; if (!root) { printk("btrfs: open_ctree failed\n"); return -EIO; } + sb->s_fs_info = root; + disk_super = root->fs_info->disk_super; printk("read in super total blocks %Lu root %Lu\n", btrfs_super_total_blocks(disk_super), btrfs_super_root_dir(disk_super)); From 30ae8467483d7ab023b5e728bf7d74a575c78023 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 29 Mar 2007 09:59:15 -0400 Subject: [PATCH 0083/2985] Btrfs: btree address space fixes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2afb7922b062..f2c1dd40c087 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -69,7 +69,6 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, if (!page) return NULL; - wait_on_page_writeback(page); if (!page_has_buffers(page)) create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0); head = page_buffers(page); @@ -300,11 +299,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping->a_ops = &btree_aops; mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); - if (!fs_info->hash_tfm) { + spin_lock_init(&fs_info->hash_lock); + + if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { printk("failed to allocate sha256 hash\n"); return NULL; } - spin_lock_init(&fs_info->hash_lock); mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); @@ -394,6 +394,7 @@ int close_ctree(struct btrfs_root *root) btrfs_block_release(root, root->commit_root); btrfs_block_release(root, root->fs_info->sb_buffer); crypto_free_hash(root->fs_info->hash_tfm); + truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); iput(root->fs_info->btree_inode); kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); From 75dfe3960e602e63ea42ac7a2a0520832b189ffa Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 29 Mar 2007 11:56:46 -0400 Subject: [PATCH 0084/2985] btrfs_file_write -- first pass Signed-off-by: Chris Mason --- fs/btrfs/super.c | 222 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 213 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 151c1002d74e..7914b31f5bcd 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -755,7 +757,7 @@ printk("btrfs sync_fs\n"); return 0; } -static int btrfs_get_block(struct inode *inode, sector_t iblock, +static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { int ret; @@ -772,7 +774,6 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, struct btrfs_disk_key *found_key; btrfs_init_path(&path); - mutex_lock(&root->fs_info->fs_mutex); if (create) trans = btrfs_start_transaction(root, 1); @@ -848,6 +849,16 @@ allocate: out: if (trans) btrfs_end_transaction(trans, root); + return err; +} + +static int btrfs_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + int err; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_get_block_lock(inode, iblock, result, create); mutex_unlock(&root->fs_info->fs_mutex); return err; } @@ -855,8 +866,15 @@ out: static int btrfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { + WARN_ON(1); return nobh_prepare_write(page, from, to, btrfs_get_block); } +static int btrfs_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + WARN_ON(1); + return nobh_commit_write(file, page, from, to); +} static void btrfs_write_super(struct super_block *sb) { @@ -903,6 +921,196 @@ static void btrfs_truncate(struct inode *inode) mark_inode_dirty(inode); } +static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, + struct page **prepared_pages, + const char __user * buf) +{ + long page_fault = 0; + int i; + int offset = pos & (PAGE_CACHE_SIZE - 1); + + for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { + size_t count = min_t(size_t, + PAGE_CACHE_SIZE - offset, write_bytes); + struct page *page = prepared_pages[i]; + fault_in_pages_readable(buf, count); + + /* Copy data from userspace to the current page */ + kmap(page); + page_fault = __copy_from_user(page_address(page) + offset, + buf, count); + /* Flush processor's dcache for this page */ + flush_dcache_page(page); + kunmap(page); + buf += count; + write_bytes -= count; + + if (page_fault) + break; + } + return page_fault ? -EFAULT : 0; +} + +static void btrfs_drop_pages(struct page **pages, size_t num_pages) +{ + size_t i; + for (i = 0; i < num_pages; i++) { + if (!pages[i]) + break; + unlock_page(pages[i]); + mark_page_accessed(pages[i]); + page_cache_release(pages[i]); + } +} +static int dirty_and_release_pages(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct file *file, + struct page **pages, + size_t num_pages, + loff_t pos, + size_t write_bytes) +{ + int i; + int offset; + int err = 0; + int ret; + int this_write; + + for (i = 0; i < num_pages; i++) { + offset = pos & (PAGE_CACHE_SIZE -1); + this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); + ret = nobh_commit_write(file, pages[i], offset, + offset + this_write); + pos += this_write; + if (ret) { + err = ret; + goto failed; + } + WARN_ON(this_write > write_bytes); + write_bytes -= this_write; + } +failed: + return err; +} + +static int prepare_pages(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct file *file, + struct page **pages, + size_t num_pages, + loff_t pos, + size_t write_bytes) +{ + int i; + unsigned long index = pos >> PAGE_CACHE_SHIFT; + struct inode *inode = file->f_path.dentry->d_inode; + int offset; + int err = 0; + int ret; + int this_write; + loff_t isize = i_size_read(inode); + + memset(pages, 0, num_pages * sizeof(struct page *)); + + for (i = 0; i < num_pages; i++) { + pages[i] = grab_cache_page(inode->i_mapping, index + i); + if (!pages[i]) { + err = -ENOMEM; + goto failed_release; + } + offset = pos & (PAGE_CACHE_SIZE -1); + this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); + ret = nobh_prepare_write(pages[i], offset, + offset + this_write, + btrfs_get_block_lock); + pos += this_write; + if (ret) { + err = ret; + goto failed_truncate; + } + WARN_ON(this_write > write_bytes); + write_bytes -= this_write; + } + return 0; + +failed_release: + btrfs_drop_pages(pages, num_pages); + return err; + +failed_truncate: + btrfs_drop_pages(pages, num_pages); + if (pos > isize) + vmtruncate(inode, isize); + return err; +} + +static ssize_t btrfs_file_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + loff_t pos; + size_t num_written = 0; + int err = 0; + int ret = 0; + struct btrfs_trans_handle *trans; + struct inode *inode = file->f_path.dentry->d_inode; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct page *pages[1]; + + if (file->f_flags & O_DIRECT) + return -EINVAL; + pos = *ppos; + + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + current->backing_dev_info = inode->i_mapping->backing_dev_info; + err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + if (err) + goto out; + if (count == 0) + goto out; + err = remove_suid(file->f_path.dentry); + if (err) + goto out; + file_update_time(file); + mutex_lock(&inode->i_mutex); + while(count > 0) { + size_t offset = pos & (PAGE_CACHE_SIZE - 1); + size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); + size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + + ret = prepare_pages(trans, root, file, pages, num_pages, + pos, write_bytes); + BUG_ON(ret); + ret = btrfs_copy_from_user(pos, num_pages, + write_bytes, pages, buf); + BUG_ON(ret); + + mutex_unlock(&root->fs_info->fs_mutex); + + ret = dirty_and_release_pages(trans, root, file, pages, + num_pages, pos, write_bytes); + BUG_ON(ret); + btrfs_drop_pages(pages, num_pages); + + ret = btrfs_end_transaction(trans, root); + + buf += write_bytes; + count -= write_bytes; + pos += write_bytes; + num_written += write_bytes; + + balance_dirty_pages_ratelimited(inode->i_mapping); + cond_resched(); + } + mutex_unlock(&inode->i_mutex); +out: + *ppos = pos; + current->backing_dev_info = NULL; + return num_written ? num_written : err; +} + static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -948,7 +1156,7 @@ static struct address_space_operations btrfs_aops = { .writepage = btrfs_writepage, .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, - .commit_write = nobh_commit_write, + .commit_write = btrfs_commit_write, }; static struct inode_operations btrfs_file_inode_operations = { @@ -958,14 +1166,10 @@ static struct inode_operations btrfs_file_inode_operations = { static struct file_operations btrfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .aio_read = generic_file_aio_read, + .write = btrfs_file_write, .mmap = generic_file_mmap, .open = generic_file_open, - .sendfile = generic_file_sendfile, - .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, }; static int __init init_btrfs_fs(void) From f254e52c1ce550fdaa0d31f5e068f0d67c2485d4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 29 Mar 2007 15:15:27 -0400 Subject: [PATCH 0085/2985] Btrfs: verify csums on read Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 9 +-- fs/btrfs/ctree.h | 47 +++++++---- fs/btrfs/disk-io.c | 37 ++++++--- fs/btrfs/disk-io.h | 2 + fs/btrfs/file-item.c | 60 ++++++++++++++ fs/btrfs/super.c | 184 ++++++++++++++++++++++++++++++++++++------- 6 files changed, 279 insertions(+), 60 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 48c611948d11..1eb333e2e18b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -109,14 +109,14 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 1; if (k1.objectid < k2->objectid) return -1; - if (k1.flags > k2->flags) - return 1; - if (k1.flags < k2->flags) - return -1; if (k1.offset > k2->offset) return 1; if (k1.offset < k2->offset) return -1; + if (k1.flags > k2->flags) + return 1; + if (k1.flags < k2->flags) + return -1; return 0; } @@ -1165,7 +1165,6 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root BUG(); ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1); if (ret == 0) { - btrfs_release_path(root, path); return -EEXIST; } if (ret < 0) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6ff87f44c5d8..df1a025a771c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -21,6 +21,9 @@ struct btrfs_transaction; */ #define BTRFS_NAME_LEN 255 +/* 32 bytes in various csum fields */ +#define BTRFS_CSUM_SIZE 32 + /* * the key defines the order in the tree, and so it also defines (optimal) * block layout. objectid corresonds to the inode number. The flags @@ -37,21 +40,21 @@ struct btrfs_transaction; */ struct btrfs_disk_key { __le64 objectid; - __le32 flags; __le64 offset; + __le32 flags; } __attribute__ ((__packed__)); struct btrfs_key { u64 objectid; - u32 flags; u64 offset; + u32 flags; } __attribute__ ((__packed__)); /* * every tree block (leaf or node) starts with this header. */ struct btrfs_header { - __le32 csum[8]; + u8 csum[BTRFS_CSUM_SIZE]; u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ __le64 generation; @@ -75,7 +78,7 @@ struct buffer_head; * it currently lacks any block count etc etc */ struct btrfs_super_block { - __le32 csum[8]; + u8 csum[BTRFS_CSUM_SIZE]; /* the first 3 fields must match struct btrfs_header */ u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* this block number */ @@ -147,7 +150,7 @@ struct btrfs_extent_item { } __attribute__ ((__packed__)); struct btrfs_inode_timespec { - __le32 sec; + __le64 sec; __le32 nsec; } __attribute__ ((__packed__)); @@ -214,6 +217,10 @@ struct btrfs_file_extent_item { __le64 num_blocks; } __attribute__ ((__packed__)); +struct btrfs_csum_item { + u8 csum[BTRFS_CSUM_SIZE]; +} __attribute__ ((__packed__)); + struct btrfs_inode_map_item { struct btrfs_disk_key key; } __attribute__ ((__packed__)); @@ -283,27 +290,32 @@ struct btrfs_root { * a (hopefully) huge chunk of disk */ #define BTRFS_EXTENT_DATA_KEY 4 +/* + * csum items have the checksums for data in the extents + */ +#define BTRFS_CSUM_ITEM_KEY 5 + /* * root items point to tree roots. There are typically in the root * tree used by the super block to find all the other trees */ -#define BTRFS_ROOT_ITEM_KEY 5 +#define BTRFS_ROOT_ITEM_KEY 6 /* * extent items are in the extent map tree. These record which blocks * are used, and how many references there are to each block */ -#define BTRFS_EXTENT_ITEM_KEY 6 +#define BTRFS_EXTENT_ITEM_KEY 7 /* * the inode map records which inode numbers are in use and where * they actually live on disk */ -#define BTRFS_INODE_MAP_ITEM_KEY 7 +#define BTRFS_INODE_MAP_ITEM_KEY 8 /* * string items are for debugging. They just store a short string of * data in the FS */ -#define BTRFS_STRING_ITEM_KEY 8 +#define BTRFS_STRING_ITEM_KEY 9 static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) { @@ -407,15 +419,15 @@ static inline void btrfs_set_inode_compat_flags(struct btrfs_inode_item *i, i->compat_flags = cpu_to_le16(val); } -static inline u32 btrfs_timespec_sec(struct btrfs_inode_timespec *ts) +static inline u64 btrfs_timespec_sec(struct btrfs_inode_timespec *ts) { - return le32_to_cpu(ts->sec); + return le64_to_cpu(ts->sec); } static inline void btrfs_set_timespec_sec(struct btrfs_inode_timespec *ts, - u32 val) + u64 val) { - ts->sec = cpu_to_le32(val); + ts->sec = cpu_to_le64(val); } static inline u32 btrfs_timespec_nsec(struct btrfs_inode_timespec *ts) @@ -429,8 +441,6 @@ static inline void btrfs_set_timespec_nsec(struct btrfs_inode_timespec *ts, ts->nsec = cpu_to_le32(val); } - - static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) { return le64_to_cpu(ei->owner); @@ -905,4 +915,11 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, u64 blocknr, int mod); +int btrfs_csum_file_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, u64 offset, + char *data, size_t len); +int btrfs_csum_verify_file_block(struct btrfs_root *root, + u64 objectid, u64 offset, + char *data, size_t len); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f2c1dd40c087..f5db2b936502 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -127,31 +127,46 @@ static int btree_get_block(struct inode *inode, sector_t iblock, return 0; } -static int csum_tree_block(struct btrfs_root * root, struct buffer_head *bh, - int verify) +int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, + char *result) { - struct btrfs_node *node = btrfs_buffer_node(bh); struct scatterlist sg; struct crypto_hash *tfm = root->fs_info->hash_tfm; struct hash_desc desc; int ret; - char result[32]; desc.tfm = tfm; desc.flags = 0; - sg_init_one(&sg, bh->b_data + 32, bh->b_size - 32); + sg_init_one(&sg, data, len); spin_lock(&root->fs_info->hash_lock); - ret = crypto_hash_digest(&desc, &sg, bh->b_size - 32, result); + ret = crypto_hash_digest(&desc, &sg, len, result); spin_unlock(&root->fs_info->hash_lock); if (ret) { printk("sha256 digest failed\n"); } + return ret; +} +static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, + int verify) +{ + char result[BTRFS_CSUM_SIZE]; + int ret; + struct btrfs_node *node; + + ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE, + bh->b_size - BTRFS_CSUM_SIZE, result); + if (ret) + return ret; if (verify) { - if (memcmp(node->header.csum, result, sizeof(result))) - printk("csum verify failed on %Lu\n", bh->b_blocknr); - return -EINVAL; - } else - memcpy(node->header.csum, result, sizeof(node->header.csum)); + if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) { + printk("checksum verify failed on %lu\n", + bh->b_blocknr); + return 1; + } + } else { + node = btrfs_buffer_node(bh); + memcpy(&node->header.csum, result, BTRFS_CSUM_SIZE); + } return 0; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c2c38bda704d..f6998e2192ce 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -39,4 +39,6 @@ void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr); +int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, + char *result); #endif diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 5230a44cb19b..2d2c23ca7cbf 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -57,3 +57,63 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); return ret; } + +int btrfs_csum_file_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, u64 offset, + char *data, size_t len) +{ + int ret; + struct btrfs_key file_key; + struct btrfs_path path; + struct btrfs_csum_item *item; + + btrfs_init_path(&path); + file_key.objectid = objectid; + file_key.offset = offset; + file_key.flags = 0; + btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); + ret = btrfs_insert_empty_item(trans, root, &path, &file_key, + BTRFS_CSUM_SIZE); + if (ret != 0 && ret != -EEXIST) + goto fail; + item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_csum_item); + ret = 0; + ret = btrfs_csum_data(root, data, len, item->csum); + mark_buffer_dirty(path.nodes[0]); +fail: + btrfs_release_path(root, &path); + return ret; +} + +int btrfs_csum_verify_file_block(struct btrfs_root *root, + u64 objectid, u64 offset, + char *data, size_t len) +{ + int ret; + struct btrfs_key file_key; + struct btrfs_path path; + struct btrfs_csum_item *item; + char result[BTRFS_CSUM_SIZE]; + + btrfs_init_path(&path); + file_key.objectid = objectid; + file_key.offset = offset; + file_key.flags = 0; + btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); + ret = btrfs_search_slot(NULL, root, &file_key, &path, 0, 0); + if (ret) + goto fail; + item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_csum_item); + ret = 0; + ret = btrfs_csum_data(root, data, len, result); + WARN_ON(ret); + if (memcmp(result, item->csum, BTRFS_CSUM_SIZE)) + ret = 1; +fail: + btrfs_release_path(root, &path); + return ret; +} + diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7914b31f5bcd..04428137d75f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -249,15 +249,16 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct btrfs_key key; struct btrfs_disk_key *found_key; struct btrfs_leaf *leaf; - struct btrfs_file_extent_item *fi; - u64 extent_start; - u64 extent_num_blocks; + struct btrfs_file_extent_item *fi = NULL; + u64 extent_start = 0; + u64 extent_num_blocks = 0; + int found_extent; /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; key.offset = (u64)-1; key.flags = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY); while(1) { btrfs_init_path(&path); ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); @@ -273,25 +274,32 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, found_key = &leaf->items[path.slots[0]].key; if (btrfs_disk_key_objectid(found_key) != inode->i_ino) break; - if (btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) + if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY && + btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) break; if (btrfs_disk_key_offset(found_key) < inode->i_size) break; - fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], - struct btrfs_file_extent_item); - extent_start = btrfs_file_extent_disk_blocknr(fi); - extent_num_blocks = btrfs_file_extent_disk_num_blocks(fi); - key.offset = btrfs_disk_key_offset(found_key) - 1; + if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) { + fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], + struct btrfs_file_extent_item); + extent_start = btrfs_file_extent_disk_blocknr(fi); + extent_num_blocks = + btrfs_file_extent_disk_num_blocks(fi); + inode->i_blocks -= + btrfs_file_extent_num_blocks(fi) >> 9; + found_extent = 1; + } else { + found_extent = 0; + } ret = btrfs_del_item(trans, root, &path); BUG_ON(ret); - inode->i_blocks -= btrfs_file_extent_num_blocks(fi) >> 9; btrfs_release_path(root, &path); - ret = btrfs_free_extent(trans, root, extent_start, - extent_num_blocks, 0); - BUG_ON(ret); - if (key.offset + 1 == 0) - break; + if (found_extent) { + ret = btrfs_free_extent(trans, root, extent_start, + extent_num_blocks, 0); + BUG_ON(ret); + } } btrfs_release_path(root, &path); ret = 0; @@ -975,10 +983,24 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, int err = 0; int ret; int this_write; + struct inode *inode = file->f_path.dentry->d_inode; for (i = 0; i < num_pages; i++) { offset = pos & (PAGE_CACHE_SIZE -1); this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); + /* FIXME, one block at a time */ + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_csum_file_block(trans, root, inode->i_ino, + pages[i]->index << PAGE_CACHE_SHIFT, + kmap(pages[i]), PAGE_CACHE_SIZE); + kunmap(pages[i]); + SetPageChecked(pages[i]); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + ret = nobh_commit_write(file, pages[i], offset, offset + this_write); pos += this_write; @@ -1022,7 +1044,7 @@ static int prepare_pages(struct btrfs_trans_handle *trans, this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); ret = nobh_prepare_write(pages[i], offset, offset + this_write, - btrfs_get_block_lock); + btrfs_get_block); pos += this_write; if (ret) { err = ret; @@ -1051,7 +1073,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, size_t num_written = 0; int err = 0; int ret = 0; - struct btrfs_trans_handle *trans; struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = btrfs_sb(inode->i_sb); struct page *pages[1]; @@ -1077,25 +1098,18 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - - ret = prepare_pages(trans, root, file, pages, num_pages, + ret = prepare_pages(NULL, root, file, pages, num_pages, pos, write_bytes); BUG_ON(ret); ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); - - ret = dirty_and_release_pages(trans, root, file, pages, + ret = dirty_and_release_pages(NULL, root, file, pages, num_pages, pos, write_bytes); BUG_ON(ret); btrfs_drop_pages(pages, num_pages); - ret = btrfs_end_transaction(trans, root); - buf += write_bytes; count -= write_bytes; pos += write_bytes; @@ -1111,6 +1125,118 @@ out: return num_written ? num_written : err; } +static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, + unsigned long offset, unsigned long size) +{ + char *kaddr; + unsigned long left, count = desc->count; + + if (size > count) + size = count; + + if (!PageChecked(page)) { + /* FIXME, do it per block */ + struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb); + int ret = btrfs_csum_verify_file_block(root, + page->mapping->host->i_ino, + page->index << PAGE_CACHE_SHIFT, + kmap(page), PAGE_CACHE_SIZE); + if (ret) { + printk("failed to verify ino %lu page %lu\n", + page->mapping->host->i_ino, + page->index); + memset(page_address(page), 0, PAGE_CACHE_SIZE); + } + SetPageChecked(page); + kunmap(page); + } + /* + * Faults on the destination of a read are common, so do it before + * taking the kmap. + */ + if (!fault_in_pages_writeable(desc->arg.buf, size)) { + kaddr = kmap_atomic(page, KM_USER0); + left = __copy_to_user_inatomic(desc->arg.buf, + kaddr + offset, size); + kunmap_atomic(kaddr, KM_USER0); + if (left == 0) + goto success; + } + + /* Do it the slow way */ + kaddr = kmap(page); + left = __copy_to_user(desc->arg.buf, kaddr + offset, size); + kunmap(page); + + if (left) { + size -= left; + desc->error = -EFAULT; + } +success: + desc->count = count - size; + desc->written += size; + desc->arg.buf += size; + return size; +} + +/** + * btrfs_file_aio_read - filesystem read routine + * @iocb: kernel I/O control block + * @iov: io vector request + * @nr_segs: number of segments in the iovec + * @pos: current file position + */ +static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct file *filp = iocb->ki_filp; + ssize_t retval; + unsigned long seg; + size_t count; + loff_t *ppos = &iocb->ki_pos; + + count = 0; + for (seg = 0; seg < nr_segs; seg++) { + const struct iovec *iv = &iov[seg]; + + /* + * If any segment has a negative length, or the cumulative + * length ever wraps negative then return -EINVAL. + */ + count += iv->iov_len; + if (unlikely((ssize_t)(count|iv->iov_len) < 0)) + return -EINVAL; + if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len)) + continue; + if (seg == 0) + return -EFAULT; + nr_segs = seg; + count -= iv->iov_len; /* This segment is no good */ + break; + } + retval = 0; + if (count) { + for (seg = 0; seg < nr_segs; seg++) { + read_descriptor_t desc; + + desc.written = 0; + desc.arg.buf = iov[seg].iov_base; + desc.count = iov[seg].iov_len; + if (desc.count == 0) + continue; + desc.error = 0; + do_generic_file_read(filp, ppos, &desc, + btrfs_read_actor); + retval += desc.written; + if (desc.error) { + retval = retval ?: desc.error; + break; + } + } + } + return retval; +} + static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -1166,7 +1292,7 @@ static struct inode_operations btrfs_file_inode_operations = { static struct file_operations btrfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, - .aio_read = generic_file_aio_read, + .aio_read = btrfs_file_aio_read, .write = btrfs_file_write, .mmap = generic_file_mmap, .open = generic_file_open, From 22b0ebda6c63a1ad66b6a9e806bd226a4a03a049 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 30 Mar 2007 08:47:31 -0400 Subject: [PATCH 0086/2985] Btrfs: hunting slab corruption Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 ++ fs/btrfs/dir-item.c | 2 ++ fs/btrfs/disk-io.c | 16 +++++++++++++--- fs/btrfs/super.c | 7 ++++++- fs/btrfs/transaction.c | 6 ++++-- 5 files changed, 27 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1eb333e2e18b..b7f3fcb72c62 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -457,6 +457,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int ret; int level; + WARN_ON(p->nodes[0] != NULL); + WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); again: b = root->node; get_bh(b); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index c18f81797a88..18d3e168ad0a 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -35,6 +35,8 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_dir_name_len(dir_item, name_len); name_ptr = (char *)(dir_item + 1); memcpy(name_ptr, name, name_len); + if (name_ptr + name_len > path.nodes[0]->b_data + 4096) + WARN_ON(1); mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(root, &path); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f5db2b936502..6c010463b9db 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -50,6 +51,8 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) } while (bh != head); out_unlock: unlock_page(page); + if (ret) + touch_buffer(ret); page_cache_release(page); return ret; } @@ -65,6 +68,7 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, struct buffer_head *head; struct buffer_head *ret = NULL; u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); + page = grab_cache_page(mapping, index); if (!page) return NULL; @@ -89,6 +93,8 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, } while (bh != head); out_unlock: unlock_page(page); + if (ret) + touch_buffer(ret); page_cache_release(page); return ret; } @@ -139,7 +145,7 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, desc.flags = 0; sg_init_one(&sg, data, len); spin_lock(&root->fs_info->hash_lock); - ret = crypto_hash_digest(&desc, &sg, len, result); + ret = crypto_hash_digest(&desc, &sg, 1, result); spin_unlock(&root->fs_info->hash_lock); if (ret) { printk("sha256 digest failed\n"); @@ -153,6 +159,7 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, int ret; struct btrfs_node *node; + return 0; ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE, bh->b_size - BTRFS_CSUM_SIZE, result); if (ret) @@ -165,17 +172,17 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, } } else { node = btrfs_buffer_node(bh); - memcpy(&node->header.csum, result, BTRFS_CSUM_SIZE); + memcpy(node->header.csum, result, BTRFS_CSUM_SIZE); } return 0; } static int btree_writepage(struct page *page, struct writeback_control *wbc) { +#if 0 struct buffer_head *bh; struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb); struct buffer_head *head; - if (!page_has_buffers(page)) { create_empty_buffers(page, root->fs_info->sb->s_blocksize, (1 << BH_Dirty)|(1 << BH_Uptodate)); @@ -187,6 +194,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) csum_tree_block(root, bh, 0); bh = bh->b_this_page; } while (bh != head); +#endif return block_write_full_page(page, btree_get_block, wbc); } @@ -312,6 +320,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + insert_inode_hash(fs_info->btree_inode); + mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); spin_lock_init(&fs_info->hash_lock); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 04428137d75f..ab5419eea93a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -361,13 +361,16 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct inode * inode; + struct btrfs_root *root = btrfs_sb(dir->i_sb); ino_t ino; int ret; if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); + mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_inode_by_name(dir, dentry, &ino); + mutex_unlock(&root->fs_info->fs_mutex); if (ret < 0) return ERR_PTR(ret); inode = NULL; @@ -395,6 +398,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) unsigned char d_type = DT_UNKNOWN; int over = 0; + mutex_lock(&root->fs_info->fs_mutex); key.objectid = inode->i_ino; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); @@ -446,6 +450,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) ret = 0; err: btrfs_release_path(root, &path); + mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -667,8 +672,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; } dir->i_sb->s_dirt = 1; - btrfs_end_transaction(trans, root); out_unlock: + btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); if (drop_inode) { inode_dec_link_count(inode); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b20fb53a0d27..a146463c5049 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -197,8 +197,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key); - BUG_ON(ret); root->fs_info->generation = root->root_key.offset + 1; ret = btrfs_end_transaction(trans, root); BUG_ON(ret); - printk("at free, total trans %d\n", total_trans); + BUG_ON(ret); + root->fs_info->generation = root->root_key.offset + 1; + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); } return ret; From d6025579531b7ea170ba283b171ff7a6bf7d0e12 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 30 Mar 2007 14:27:56 -0400 Subject: [PATCH 0087/2985] Btrfs: corruption hunt continues Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 9 +- fs/btrfs/ctree.c | 201 +++++++++++++++++++++++------------------ fs/btrfs/ctree.h | 32 +++++++ fs/btrfs/dir-item.c | 6 +- fs/btrfs/disk-io.c | 20 +++- fs/btrfs/extent-tree.c | 8 +- fs/btrfs/file-item.c | 4 +- fs/btrfs/inode-map.c | 2 +- fs/btrfs/root-tree.c | 7 +- fs/btrfs/super.c | 2 +- fs/btrfs/transaction.c | 1 + 11 files changed, 187 insertions(+), 105 deletions(-) diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 43807cc3cce7..845422368f48 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -16,12 +16,14 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) bits = radix_tree_lookup(radix, slot); if (!bits) { - bits = kmalloc(BIT_ARRAY_BYTES, GFP_NOIO); + bits = kmalloc(BIT_ARRAY_BYTES, GFP_NOFS); if (!bits) return -ENOMEM; memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long)); bits[0] = slot; + radix_tree_preload(GFP_NOFS); ret = radix_tree_insert(radix, slot, bits); + radix_tree_preload_end(); if (ret) return ret; } @@ -59,7 +61,7 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) if (!bits) return 0; clear_bit(bit_slot, bits + 1); - +#if 0 for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) { if (bits[i]) { empty = 0; @@ -69,8 +71,11 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) if (empty) { bits = radix_tree_delete(radix, slot); + synchronize_rcu(); BUG_ON(!bits); + kfree(bits); } +#endif return 0; } diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b7f3fcb72c62..a0dfa2d6cb9b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -51,7 +51,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); btrfs_set_header_generation(&cow_node->header, trans->transid); *cow_ret = cow; - mark_buffer_dirty(cow); + btrfs_mark_buffer_dirty(cow); btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; @@ -62,7 +62,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root } else { btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, cow->b_blocknr); - mark_buffer_dirty(parent); + btrfs_mark_buffer_dirty(parent); btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); } btrfs_block_release(root, buf); @@ -312,11 +312,12 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(!child); root->node = child; path->nodes[level] = NULL; + clean_tree_block(trans, root, mid_buf); + wait_on_buffer(mid_buf); /* once for the path */ btrfs_block_release(root, mid_buf); /* once for the root ptr */ btrfs_block_release(root, mid_buf); - clean_tree_block(trans, root, mid_buf); return btrfs_free_extent(trans, root, blocknr, 1, 1); } parent = btrfs_buffer_node(parent_buf); @@ -351,8 +352,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; if (btrfs_header_nritems(&right->header) == 0) { u64 blocknr = right_buf->b_blocknr; - btrfs_block_release(root, right_buf); clean_tree_block(trans, root, right_buf); + wait_on_buffer(right_buf); + btrfs_block_release(root, right_buf); right_buf = NULL; right = NULL; wret = del_ptr(trans, root, path, level + 1, pslot + @@ -363,10 +365,11 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (wret) ret = wret; } else { - memcpy(&parent->ptrs[pslot + 1].key, - &right->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - mark_buffer_dirty(parent_buf); + btrfs_memcpy(root, parent, + &parent->ptrs[pslot + 1].key, + &right->ptrs[0].key, + sizeof(struct btrfs_disk_key)); + btrfs_mark_buffer_dirty(parent_buf); } } if (btrfs_header_nritems(&mid->header) == 1) { @@ -388,8 +391,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (btrfs_header_nritems(&mid->header) == 0) { /* we've managed to empty the middle node, drop it */ u64 blocknr = mid_buf->b_blocknr; - btrfs_block_release(root, mid_buf); clean_tree_block(trans, root, mid_buf); + wait_on_buffer(mid_buf); + btrfs_block_release(root, mid_buf); mid_buf = NULL; mid = NULL; wret = del_ptr(trans, root, path, level + 1, pslot); @@ -400,9 +404,10 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; } else { /* update the parent key to reflect our changes */ - memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - mark_buffer_dirty(parent_buf); + btrfs_memcpy(root, parent, + &parent->ptrs[pslot].key, &mid->ptrs[0].key, + sizeof(struct btrfs_disk_key)); + btrfs_mark_buffer_dirty(parent_buf); } /* update the path */ @@ -544,8 +549,8 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root if (!path->nodes[i]) break; t = btrfs_buffer_node(path->nodes[i]); - memcpy(&t->ptrs[tslot].key, key, sizeof(*key)); - mark_buffer_dirty(path->nodes[i]); + btrfs_memcpy(root, t, &t->ptrs[tslot].key, key, sizeof(*key)); + btrfs_mark_buffer_dirty(path->nodes[i]); if (tslot != 0) break; } @@ -580,17 +585,17 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root if (src_nritems < push_items) push_items = src_nritems; - memcpy(dst->ptrs + dst_nritems, src->ptrs, - push_items * sizeof(struct btrfs_key_ptr)); + btrfs_memcpy(root, dst, dst->ptrs + dst_nritems, src->ptrs, + push_items * sizeof(struct btrfs_key_ptr)); if (push_items < src_nritems) { - memmove(src->ptrs, src->ptrs + push_items, + btrfs_memmove(root, src, src->ptrs, src->ptrs + push_items, (src_nritems - push_items) * sizeof(struct btrfs_key_ptr)); } btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - mark_buffer_dirty(src_buf); - mark_buffer_dirty(dst_buf); + btrfs_mark_buffer_dirty(src_buf); + btrfs_mark_buffer_dirty(dst_buf); return ret; } @@ -629,16 +634,18 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct if (max_push < push_items) push_items = max_push; - memmove(dst->ptrs + push_items, dst->ptrs, - dst_nritems * sizeof(struct btrfs_key_ptr)); - memcpy(dst->ptrs, src->ptrs + src_nritems - push_items, - push_items * sizeof(struct btrfs_key_ptr)); + btrfs_memmove(root, dst, dst->ptrs + push_items, dst->ptrs, + dst_nritems * sizeof(struct btrfs_key_ptr)); + + btrfs_memcpy(root, dst, dst->ptrs, + src->ptrs + src_nritems - push_items, + push_items * sizeof(struct btrfs_key_ptr)); btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - mark_buffer_dirty(src_buf); - mark_buffer_dirty(dst_buf); + btrfs_mark_buffer_dirty(src_buf); + btrfs_mark_buffer_dirty(dst_buf); return ret; } @@ -674,10 +681,11 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root lower_key = &((struct btrfs_leaf *)lower)->items[0].key; else lower_key = &lower->ptrs[0].key; - memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key)); + btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key, + sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr); - mark_buffer_dirty(t); + btrfs_mark_buffer_dirty(t); /* the super has an extra ref to root->node */ btrfs_block_release(root, root->node); @@ -712,13 +720,15 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) BUG(); if (slot != nritems) { - memmove(lower->ptrs + slot + 1, lower->ptrs + slot, - (nritems - slot) * sizeof(struct btrfs_key_ptr)); + btrfs_memmove(root, lower, lower->ptrs + slot + 1, + lower->ptrs + slot, + (nritems - slot) * sizeof(struct btrfs_key_ptr)); } - memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key)); + btrfs_memcpy(root, lower, &lower->ptrs[slot].key, + key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(lower, slot, blocknr); btrfs_set_header_nritems(&lower->header, nritems + 1); - mark_buffer_dirty(path->nodes[level]); + btrfs_mark_buffer_dirty(path->nodes[level]); return 0; } @@ -761,14 +771,14 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_parentid(&split->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); mid = (c_nritems + 1) / 2; - memcpy(split->ptrs, c->ptrs + mid, - (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); + btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid, + (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); btrfs_set_header_nritems(&split->header, c_nritems - mid); btrfs_set_header_nritems(&c->header, mid); ret = 0; - mark_buffer_dirty(t); - mark_buffer_dirty(split_buffer); + btrfs_mark_buffer_dirty(t); + btrfs_mark_buffer_dirty(split_buffer); wret = insert_ptr(trans, root, path, &split->ptrs[0].key, split_buffer->b_blocknr, path->slots[level + 1] + 1, level + 1); @@ -875,17 +885,22 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root push_space = btrfs_item_end(left->items + left_nritems - push_items); push_space -= leaf_data_end(root, left); /* make room in the right data area */ - memmove(btrfs_leaf_data(right) + leaf_data_end(root, right) - - push_space, btrfs_leaf_data(right) + leaf_data_end(root, right), - BTRFS_LEAF_DATA_SIZE(root) - leaf_data_end(root, right)); + btrfs_memmove(root, right, btrfs_leaf_data(right) + + leaf_data_end(root, right) - push_space, + btrfs_leaf_data(right) + + leaf_data_end(root, right), BTRFS_LEAF_DATA_SIZE(root) - + leaf_data_end(root, right)); /* copy from the left data area */ - memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - push_space, - btrfs_leaf_data(left) + leaf_data_end(root, left), push_space); - memmove(right->items + push_items, right->items, + btrfs_memcpy(root, right, btrfs_leaf_data(right) + + BTRFS_LEAF_DATA_SIZE(root) - push_space, + btrfs_leaf_data(left) + leaf_data_end(root, left), + push_space); + btrfs_memmove(root, right, right->items + push_items, right->items, right_nritems * sizeof(struct btrfs_item)); /* copy the items from left to right */ - memcpy(right->items, left->items + left_nritems - push_items, - push_items * sizeof(struct btrfs_item)); + btrfs_memcpy(root, right, right->items, left->items + + left_nritems - push_items, + push_items * sizeof(struct btrfs_item)); /* update the item pointers */ right_nritems += push_items; @@ -899,11 +914,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root left_nritems -= push_items; btrfs_set_header_nritems(&left->header, left_nritems); - mark_buffer_dirty(left_buf); - mark_buffer_dirty(right_buf); - memcpy(&upper_node->ptrs[slot + 1].key, + btrfs_mark_buffer_dirty(left_buf); + btrfs_mark_buffer_dirty(right_buf); + btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key, &right->items[0].key, sizeof(struct btrfs_disk_key)); - mark_buffer_dirty(upper); + btrfs_mark_buffer_dirty(upper); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { @@ -977,14 +992,16 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } /* push data from right to left */ - memcpy(left->items + btrfs_header_nritems(&left->header), - right->items, push_items * sizeof(struct btrfs_item)); + btrfs_memcpy(root, left, left->items + + btrfs_header_nritems(&left->header), + right->items, push_items * sizeof(struct btrfs_item)); push_space = BTRFS_LEAF_DATA_SIZE(root) - btrfs_item_offset(right->items + push_items -1); - memcpy(btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space, - btrfs_leaf_data(right) + - btrfs_item_offset(right->items + push_items - 1), - push_space); + btrfs_memcpy(root, left, btrfs_leaf_data(left) + + leaf_data_end(root, left) - push_space, + btrfs_leaf_data(right) + + btrfs_item_offset(right->items + push_items - 1), + push_space); old_left_nritems = btrfs_header_nritems(&left->header); BUG_ON(old_left_nritems < 0); @@ -1000,10 +1017,11 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root /* fixup right node */ push_space = btrfs_item_offset(right->items + push_items - 1) - leaf_data_end(root, right); - memmove(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - - push_space, btrfs_leaf_data(right) + - leaf_data_end(root, right), push_space); - memmove(right->items, right->items + push_items, + btrfs_memmove(root, right, btrfs_leaf_data(right) + + BTRFS_LEAF_DATA_SIZE(root) - push_space, + btrfs_leaf_data(right) + + leaf_data_end(root, right), push_space); + btrfs_memmove(root, right, right->items, right->items + push_items, (btrfs_header_nritems(&right->header) - push_items) * sizeof(struct btrfs_item)); btrfs_set_header_nritems(&right->header, @@ -1017,8 +1035,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root push_space = btrfs_item_offset(right->items + i); } - mark_buffer_dirty(t); - mark_buffer_dirty(right_buf); + btrfs_mark_buffer_dirty(t); + btrfs_mark_buffer_dirty(right_buf); wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); if (wret) @@ -1110,11 +1128,12 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_header_parentid(btrfs_buffer_header(root->node))); data_copy_size = btrfs_item_end(l->items + mid) - leaf_data_end(root, l); - memcpy(right->items, l->items + mid, - (nritems - mid) * sizeof(struct btrfs_item)); - memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - - data_copy_size, btrfs_leaf_data(l) + - leaf_data_end(root, l), data_copy_size); + btrfs_memcpy(root, right, right->items, l->items + mid, + (nritems - mid) * sizeof(struct btrfs_item)); + btrfs_memcpy(root, right, + btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - + data_copy_size, btrfs_leaf_data(l) + + leaf_data_end(root, l), data_copy_size); rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - btrfs_item_end(l->items + mid); @@ -1129,8 +1148,8 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root right_buffer->b_blocknr, path->slots[1] + 1, 1); if (wret) ret = wret; - mark_buffer_dirty(right_buffer); - mark_buffer_dirty(l_buf); + btrfs_mark_buffer_dirty(right_buffer); + btrfs_mark_buffer_dirty(l_buf); BUG_ON(path->slots[0] != slot); if (mid <= slot) { btrfs_block_release(root, path->nodes[0]); @@ -1200,22 +1219,23 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root } /* shift the items */ - memmove(leaf->items + slot + 1, leaf->items + slot, - (nritems - slot) * sizeof(struct btrfs_item)); + btrfs_memmove(root, leaf, leaf->items + slot + 1, + leaf->items + slot, + (nritems - slot) * sizeof(struct btrfs_item)); /* shift the data */ - memmove(btrfs_leaf_data(leaf) + data_end - data_size, - btrfs_leaf_data(leaf) + - data_end, old_data - data_end); + btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + data_end - data_size, btrfs_leaf_data(leaf) + + data_end, old_data - data_end); data_end = old_data; } /* setup the item for the new data */ - memcpy(&leaf->items[slot].key, &disk_key, - sizeof(struct btrfs_disk_key)); + btrfs_memcpy(root, leaf, &leaf->items[slot].key, &disk_key, + sizeof(struct btrfs_disk_key)); btrfs_set_item_offset(leaf->items + slot, data_end - data_size); btrfs_set_item_size(leaf->items + slot, data_size); btrfs_set_header_nritems(&leaf->header, nritems + 1); - mark_buffer_dirty(leaf_buf); + btrfs_mark_buffer_dirty(leaf_buf); ret = 0; if (slot == 0) @@ -1245,8 +1265,9 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root if (!ret) { ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], u8); - memcpy(ptr, data, data_size); - mark_buffer_dirty(path.nodes[0]); + btrfs_memcpy(root, path.nodes[0]->b_data, + ptr, data, data_size); + btrfs_mark_buffer_dirty(path.nodes[0]); } btrfs_release_path(root, &path); return ret; @@ -1271,8 +1292,10 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, node = btrfs_buffer_node(parent); nritems = btrfs_header_nritems(&node->header); if (slot != nritems -1) { - memmove(node->ptrs + slot, node->ptrs + slot + 1, - sizeof(struct btrfs_key_ptr) * (nritems - slot - 1)); + btrfs_memmove(root, node, node->ptrs + slot, + node->ptrs + slot + 1, + sizeof(struct btrfs_key_ptr) * + (nritems - slot - 1)); } nritems--; btrfs_set_header_nritems(&node->header, nritems); @@ -1287,7 +1310,7 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } - mark_buffer_dirty(parent); + btrfs_mark_buffer_dirty(parent); return ret; } @@ -1317,16 +1340,18 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (slot != nritems - 1) { int i; int data_end = leaf_data_end(root, leaf); - memmove(btrfs_leaf_data(leaf) + data_end + dsize, - btrfs_leaf_data(leaf) + data_end, - doff - data_end); + btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + data_end + dsize, + btrfs_leaf_data(leaf) + data_end, + doff - data_end); for (i = slot + 1; i < nritems; i++) { u32 ioff = btrfs_item_offset(leaf->items + i); btrfs_set_item_offset(leaf->items + i, ioff + dsize); } - memmove(leaf->items + slot, leaf->items + slot + 1, - sizeof(struct btrfs_item) * - (nritems - slot - 1)); + btrfs_memmove(root, leaf, leaf->items + slot, + leaf->items + slot + 1, + sizeof(struct btrfs_item) * + (nritems - slot - 1)); } btrfs_set_header_nritems(&leaf->header, nritems - 1); nritems--; @@ -1336,6 +1361,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, btrfs_set_header_level(&leaf->header, 0); } else { clean_tree_block(trans, root, leaf_buf); + wait_on_buffer(leaf_buf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; @@ -1373,6 +1399,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (btrfs_header_nritems(&leaf->header) == 0) { u64 blocknr = leaf_buf->b_blocknr; clean_tree_block(trans, root, leaf_buf); + wait_on_buffer(leaf_buf); wret = del_ptr(trans, root, path, 1, slot); if (wret) ret = wret; @@ -1382,11 +1409,11 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } else { - mark_buffer_dirty(leaf_buf); + btrfs_mark_buffer_dirty(leaf_buf); btrfs_block_release(root, leaf_buf); } } else { - mark_buffer_dirty(leaf_buf); + btrfs_mark_buffer_dirty(leaf_buf); } } return ret; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index df1a025a771c..9ec0d65ebe9b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2,6 +2,7 @@ #define __BTRFS__ #include +#include #include "bit-radix.h" struct btrfs_trans_handle; @@ -834,6 +835,37 @@ static inline struct btrfs_root *btrfs_sb(struct super_block *sb) return sb->s_fs_info; } +static inline void btrfs_check_bounds(void *vptr, size_t len, + void *vcontainer, size_t container_len) +{ + char *ptr = vptr; + char *container = vcontainer; + WARN_ON(ptr < container); + WARN_ON(ptr + len > container + container_len); +} + +static inline void btrfs_memcpy(struct btrfs_root *root, + void *dst_block, + void *dst, const void *src, size_t nr) +{ + btrfs_check_bounds(dst, nr, dst_block, root->fs_info->sb->s_blocksize); + memcpy(dst, src, nr); +} + +static inline void btrfs_memmove(struct btrfs_root *root, + void *dst_block, + void *dst, void *src, size_t nr) +{ + btrfs_check_bounds(dst, nr, dst_block, root->fs_info->sb->s_blocksize); + memmove(dst, src, nr); +} + +static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) +{ + WARN_ON(!atomic_read(&bh->b_count)); + mark_buffer_dirty(bh); +} + /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 18d3e168ad0a..f55c89472ac0 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -34,10 +34,8 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_dir_flags(dir_item, 0); btrfs_set_dir_name_len(dir_item, name_len); name_ptr = (char *)(dir_item + 1); - memcpy(name_ptr, name, name_len); - if (name_ptr + name_len > path.nodes[0]->b_data + 4096) - WARN_ON(1); - mark_buffer_dirty(path.nodes[0]); + btrfs_memcpy(root, path.nodes[0]->b_data, name_ptr, name, name_len); + btrfs_mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(root, &path); return ret; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6c010463b9db..bb133104e2e9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -8,6 +8,17 @@ #include "disk-io.h" #include "transaction.h" +#define PATTERN 0xDEADBEEFUL +static inline void check_pattern(struct buffer_head *buf) +{ + if (buf->b_private != (void *)PATTERN) + WARN_ON(1); +} + +static inline void set_pattern(struct buffer_head *buf) +{ + buf->b_private = (void *)PATTERN; +} static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { @@ -51,8 +62,10 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) } while (bh != head); out_unlock: unlock_page(page); - if (ret) + if (ret) { touch_buffer(ret); + check_pattern(ret); + } page_cache_release(page); return ret; } @@ -82,6 +95,7 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, bh->b_bdev = root->fs_info->sb->s_bdev; bh->b_blocknr = first_block; set_buffer_mapped(bh); + set_pattern(bh); } if (bh->b_blocknr == blocknr) { ret = bh; @@ -225,6 +239,7 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) if (!buffer_uptodate(bh)) goto fail; csum_tree_block(root, bh, 1); + set_pattern(bh); } else { unlock_buffer(bh); } @@ -240,6 +255,7 @@ fail: int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { + WARN_ON(atomic_read(&buf->b_count) == 0); mark_buffer_dirty(buf); return 0; } @@ -247,6 +263,7 @@ int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { + WARN_ON(atomic_read(&buf->b_count) == 0); clear_buffer_dirty(buf); return 0; } @@ -431,6 +448,7 @@ int close_ctree(struct btrfs_root *root) void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { + check_pattern(buf); brelse(buf); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b2ae8e768b6c..c4194dab7a33 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -39,7 +39,7 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(item); btrfs_set_extent_refs(item, refs + 1); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); btrfs_release_path(root->fs_info->extent_root, &path); finish_current_insert(trans, root->fs_info->extent_root); @@ -177,10 +177,10 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) header = btrfs_buffer_header(bh); if (btrfs_header_generation(header) == root->fs_info->running_transaction->transid) { - brelse(bh); + btrfs_block_release(root, bh); return 0; } - brelse(bh); + btrfs_block_release(root, bh); } err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); } else { @@ -224,7 +224,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ei->refs == 0); refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); if (refs == 0) { u64 super_blocks_used; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 2d2c23ca7cbf..c3992b7b0c60 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -34,7 +34,7 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_offset(item, 0); btrfs_set_file_extent_num_blocks(item, ins.offset); btrfs_set_file_extent_generation(item, trans->transid); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); *result = ins.objectid; btrfs_release_path(root, &path); return 0; @@ -81,7 +81,7 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_csum_item); ret = 0; ret = btrfs_csum_data(root, data, len, item->csum); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); fail: btrfs_release_path(root, &path); return ret; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 1b2c5e043508..fa4adb81b149 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -109,7 +109,7 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], struct btrfs_inode_map_item); btrfs_cpu_key_to_disk(&inode_item->key, location); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(inode_root, &path); return ret; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a821b5d1e233..f37dab87de16 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -45,6 +45,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_leaf *l; int ret; int slot; + struct btrfs_root_item *update_item; btrfs_init_path(&path); ret = btrfs_search_slot(trans, root, key, &path, 0, 1); @@ -53,9 +54,9 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret != 0); l = btrfs_buffer_leaf(path.nodes[0]); slot = path.slots[0]; - memcpy(btrfs_item_ptr(l, slot, struct btrfs_root_item), item, - sizeof(*item)); - mark_buffer_dirty(path.nodes[0]); + update_item = btrfs_item_ptr(l, slot, struct btrfs_root_item); + btrfs_memcpy(root, l, update_item, item, sizeof(*item)); + btrfs_mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(root, &path); return ret; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ab5419eea93a..894a70bf26d2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -557,7 +557,7 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, struct btrfs_inode_item); fill_inode_item(inode_item, inode); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); failed: btrfs_release_path(root, &path); return 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a146463c5049..84c4e278ce2e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -66,6 +66,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, cur_trans->num_writers--; put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); + memset(trans, 0, sizeof(*trans)); kfree(trans); return 0; } From 2c90e5d658424bc71b111eb5a972240d5d06fe86 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 2 Apr 2007 10:50:19 -0400 Subject: [PATCH 0088/2985] Btrfs: still corruption hunting Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 7 +- fs/btrfs/btrfs_inode.h | 14 ++++ fs/btrfs/ctree.c | 42 ++++++++---- fs/btrfs/ctree.h | 3 + fs/btrfs/disk-io.c | 84 ++++++++++++------------ fs/btrfs/disk-io.h | 4 +- fs/btrfs/extent-tree.c | 18 +++-- fs/btrfs/super.c | 146 ++++++++++++++++++++++++++++++++++++----- fs/btrfs/transaction.c | 39 +++++++---- fs/btrfs/transaction.h | 3 + 10 files changed, 265 insertions(+), 95 deletions(-) create mode 100644 fs/btrfs/btrfs_inode.h diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 845422368f48..a50a1c44eabd 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -4,6 +4,7 @@ #define BIT_ARRAY_BYTES 256 #define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8) +extern struct kmem_cache *btrfs_bit_radix_cachep; int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) { unsigned long *bits; @@ -16,7 +17,7 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) bits = radix_tree_lookup(radix, slot); if (!bits) { - bits = kmalloc(BIT_ARRAY_BYTES, GFP_NOFS); + bits = kmem_cache_alloc(btrfs_bit_radix_cachep, GFP_NOFS); if (!bits) return -ENOMEM; memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long)); @@ -68,12 +69,10 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) break; } } - if (empty) { bits = radix_tree_delete(radix, slot); - synchronize_rcu(); BUG_ON(!bits); - kfree(bits); + kmem_cache_free(btrfs_bit_radix_cachep, bits); } #endif return 0; diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h new file mode 100644 index 000000000000..e159841650a4 --- /dev/null +++ b/fs/btrfs/btrfs_inode.h @@ -0,0 +1,14 @@ +#ifndef __BTRFS_I__ +#define __BTRFS_I__ + +struct btrfs_inode { + u32 magic; + struct inode vfs_inode; + u32 magic2; +}; +static inline struct btrfs_inode *BTRFS_I(struct inode *inode) +{ + return container_of(inode, struct btrfs_inode, vfs_inode); +} + +#endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a0dfa2d6cb9b..453ce835209a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -16,6 +16,16 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); +struct btrfs_path *btrfs_alloc_path(void) +{ + return kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); +} + +void btrfs_free_path(struct btrfs_path *p) +{ + kmem_cache_free(btrfs_path_cachep, p); +} + inline void btrfs_init_path(struct btrfs_path *p) { memset(p, 0, sizeof(*p)); @@ -47,17 +57,18 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root } cow = btrfs_alloc_free_block(trans, root); cow_node = btrfs_buffer_node(cow); + if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) + WARN_ON(1); memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); btrfs_set_header_generation(&cow_node->header, trans->transid); - *cow_ret = cow; - btrfs_mark_buffer_dirty(cow); btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; get_bh(cow); - if (buf != root->commit_root) + if (buf != root->commit_root) { btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); + } btrfs_block_release(root, buf); } else { btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, @@ -66,6 +77,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); } btrfs_block_release(root, buf); + *cow_ret = cow; return 0; } @@ -477,9 +489,12 @@ again: p->slots[level + 1], &cow_buf); b = cow_buf; + c = btrfs_buffer_node(b); } BUG_ON(!cow && ins_len); - c = btrfs_buffer_node(b); + if (level != btrfs_header_level(&c->header)) + WARN_ON(1); + level = btrfs_header_level(&c->header); p->nodes[level] = b; ret = check_block(root, p, level); if (ret) @@ -1257,19 +1272,22 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root data_size) { int ret = 0; - struct btrfs_path path; + struct btrfs_path *path; u8 *ptr; - btrfs_init_path(&path); - ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (!ret) { - ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], u8); - btrfs_memcpy(root, path.nodes[0]->b_data, + ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], u8); + btrfs_memcpy(root, path->nodes[0]->b_data, ptr, data, data_size); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); } - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9ec0d65ebe9b..d8e03bd797ff 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -7,6 +7,7 @@ struct btrfs_trans_handle; struct btrfs_transaction; +extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_MAGIC "_BtRfS_M" @@ -888,6 +889,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); +struct btrfs_path *btrfs_alloc_path(void); +void btrfs_free_path(struct btrfs_path *p); void btrfs_init_path(struct btrfs_path *p); int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bb133104e2e9..2dbd55084a4e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -8,18 +8,6 @@ #include "disk-io.h" #include "transaction.h" -#define PATTERN 0xDEADBEEFUL -static inline void check_pattern(struct buffer_head *buf) -{ - if (buf->b_private != (void *)PATTERN) - WARN_ON(1); -} - -static inline void set_pattern(struct buffer_head *buf) -{ - buf->b_private = (void *)PATTERN; -} - static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); @@ -35,6 +23,8 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) { + return sb_find_get_block(root->fs_info->sb, blocknr); +#if 0 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; int blockbits = root->fs_info->sb->s_blocksize_bits; unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); @@ -43,6 +33,7 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) struct buffer_head *head; struct buffer_head *ret = NULL; + page = find_lock_page(mapping, index); if (!page) return NULL; @@ -64,15 +55,17 @@ out_unlock: unlock_page(page); if (ret) { touch_buffer(ret); - check_pattern(ret); } page_cache_release(page); return ret; +#endif } struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr) { + return sb_getblk(root->fs_info->sb, blocknr); +#if 0 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; int blockbits = root->fs_info->sb->s_blocksize_bits; unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); @@ -95,7 +88,6 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, bh->b_bdev = root->fs_info->sb->s_bdev; bh->b_blocknr = first_block; set_buffer_mapped(bh); - set_pattern(bh); } if (bh->b_blocknr == blocknr) { ret = bh; @@ -111,6 +103,7 @@ out_unlock: touch_buffer(ret); page_cache_release(page); return ret; +#endif } static sector_t max_block(struct block_device *bdev) @@ -225,6 +218,8 @@ static struct address_space_operations btree_aops = { struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { + return sb_bread(root->fs_info->sb, blocknr); +#if 0 struct buffer_head *bh = NULL; bh = btrfs_find_create_tree_block(root, blocknr); @@ -239,7 +234,6 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) if (!buffer_uptodate(bh)) goto fail; csum_tree_block(root, bh, 1); - set_pattern(bh); } else { unlock_buffer(bh); } @@ -250,6 +244,7 @@ fail: brelse(bh); return NULL; +#endif } int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -268,14 +263,14 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } -static int __setup_root(struct btrfs_super_block *super, +static int __setup_root(int blocksize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { root->node = NULL; root->commit_root = NULL; - root->blocksize = btrfs_super_blocksize(super); + root->blocksize = blocksize; root->ref_cows = 0; root->fs_info = fs_info; memset(&root->root_key, 0, sizeof(root->root_key)); @@ -283,7 +278,7 @@ static int __setup_root(struct btrfs_super_block *super, return 0; } -static int find_and_setup_root(struct btrfs_super_block *super, +static int find_and_setup_root(int blocksize, struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info, u64 objectid, @@ -291,7 +286,7 @@ static int find_and_setup_root(struct btrfs_super_block *super, { int ret; - __setup_root(super, root, fs_info, objectid); + __setup_root(blocksize, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); @@ -302,9 +297,7 @@ static int find_and_setup_root(struct btrfs_super_block *super, return 0; } -struct btrfs_root *open_ctree(struct super_block *sb, - struct buffer_head *sb_buffer, - struct btrfs_super_block *disk_super) +struct btrfs_root *open_ctree(struct super_block *sb) { struct btrfs_root *root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); @@ -317,13 +310,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), GFP_NOFS); int ret; + struct btrfs_super_block *disk_super; - if (!btrfs_super_root(disk_super)) { - return NULL; - } init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); - sb_set_blocksize(sb, sb_buffer->b_size); + sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->fs_root = root; fs_info->tree_root = tree_root; @@ -331,55 +322,59 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; - fs_info->disk_super = disk_super; fs_info->sb = sb; + fs_info->btree_inode = NULL; +#if 0 fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; + fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; insert_inode_hash(fs_info->btree_inode); - mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); +#endif fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); spin_lock_init(&fs_info->hash_lock); - if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { printk("failed to allocate sha256 hash\n"); return NULL; } - mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); - __setup_root(disk_super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); - - fs_info->sb_buffer = read_tree_block(tree_root, sb_buffer->b_blocknr); + __setup_root(sb->s_blocksize, tree_root, + fs_info, BTRFS_ROOT_TREE_OBJECTID); + fs_info->sb_buffer = read_tree_block(tree_root, + BTRFS_SUPER_INFO_OFFSET / + sb->s_blocksize); if (!fs_info->sb_buffer) { printk("failed2\n"); return NULL; } - brelse(sb_buffer); - sb_buffer = NULL; disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; + if (!btrfs_super_root(disk_super)) { + return NULL; + } fs_info->disk_super = disk_super; - tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); - ret = find_and_setup_root(disk_super, tree_root, fs_info, + mutex_lock(&fs_info->fs_mutex); + ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); - ret = find_and_setup_root(disk_super, tree_root, fs_info, + ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_INODE_MAP_OBJECTID, inode_root); BUG_ON(ret); - ret = find_and_setup_root(disk_super, tree_root, fs_info, + ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_FS_TREE_OBJECTID, root); + mutex_unlock(&fs_info->fs_mutex); BUG_ON(ret); root->commit_root = root->node; get_bh(root->node); @@ -392,9 +387,11 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct buffer_head *bh = root->fs_info->sb_buffer; + btrfs_set_super_root(root->fs_info->disk_super, root->fs_info->tree_root->node->b_blocknr); lock_buffer(bh); + WARN_ON(atomic_read(&bh->b_count) < 1); clear_buffer_dirty(bh); csum_tree_block(root, bh, 0); bh->b_end_io = end_buffer_write_sync; @@ -413,6 +410,7 @@ int close_ctree(struct btrfs_root *root) int ret; struct btrfs_trans_handle *trans; + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -421,6 +419,7 @@ int close_ctree(struct btrfs_root *root) ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); write_ctree_super(NULL, root); + mutex_unlock(&root->fs_info->fs_mutex); if (root->node) btrfs_block_release(root, root->node); @@ -436,8 +435,8 @@ int close_ctree(struct btrfs_root *root) btrfs_block_release(root, root->commit_root); btrfs_block_release(root, root->fs_info->sb_buffer); crypto_free_hash(root->fs_info->hash_tfm); - truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); - iput(root->fs_info->btree_inode); + // truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); + // iput(root->fs_info->btree_inode); kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); kfree(root->fs_info->tree_root); @@ -448,7 +447,6 @@ int close_ctree(struct btrfs_root *root) void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { - check_pattern(buf); - brelse(buf); + // brelse(buf); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index f6998e2192ce..ac6764ba8aa6 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -31,9 +31,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); -struct btrfs_root *open_ctree(struct super_block *sb, - struct buffer_head *sb_buffer, - struct btrfs_super_block *disk_super); +struct btrfs_root *open_ctree(struct super_block *sb); int close_ctree(struct btrfs_root *root); void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); int write_ctree_super(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c4194dab7a33..37b87e28a2f3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -173,12 +173,16 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) if (!pending) { bh = btrfs_find_tree_block(root, blocknr); - if (bh && buffer_uptodate(bh)) { - header = btrfs_buffer_header(bh); - if (btrfs_header_generation(header) == - root->fs_info->running_transaction->transid) { - btrfs_block_release(root, bh); - return 0; + if (bh) { + if (buffer_uptodate(bh)) { + u64 transid = + root->fs_info->running_transaction->transid; + header = btrfs_buffer_header(bh); + if (btrfs_header_generation(header) == + transid) { + btrfs_block_release(root, bh); + return 0; + } } btrfs_block_release(root, bh); } @@ -539,6 +543,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root */ while(*level >= 0) { cur = path->nodes[*level]; + if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + WARN_ON(1); if (path->slots[*level] >= btrfs_header_nritems(btrfs_buffer_header(cur))) break; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 894a70bf26d2..6969b672b570 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -14,6 +14,7 @@ #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "btrfs_inode.h" #define BTRFS_SUPER_MAGIC 0x9123682E @@ -24,6 +25,14 @@ static struct inode_operations btrfs_file_inode_operations; static struct address_space_operations btrfs_aops; static struct file_operations btrfs_file_operations; +static int check_inode(struct inode *inode) +{ + struct btrfs_inode *ei = BTRFS_I(inode); + WARN_ON(ei->magic != 0xDEADBEEF); + WARN_ON(ei->magic2 != 0xDEADBEAF); + return 0; +} + static void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path path; @@ -34,6 +43,7 @@ static void btrfs_read_locked_inode(struct inode *inode) btrfs_init_path(&path); mutex_lock(&root->fs_info->fs_mutex); + check_inode(inode); ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); if (ret) { btrfs_release_path(root, &path); @@ -41,6 +51,7 @@ static void btrfs_read_locked_inode(struct inode *inode) make_bad_inode(inode); return; } + check_inode(inode); inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], struct btrfs_inode_item); @@ -60,6 +71,7 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_generation = btrfs_inode_generation(inode_item); btrfs_release_path(root, &path); mutex_unlock(&root->fs_info->fs_mutex); + check_inode(inode); switch (inode->i_mode & S_IFMT) { #if 0 default: @@ -80,6 +92,7 @@ static void btrfs_read_locked_inode(struct inode *inode) // inode->i_op = &page_symlink_inode_operations; break; } + check_inode(inode); return; } @@ -347,6 +360,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, namelen, 0); if (ret || !btrfs_match_dir_item_name(root, &path, name, namelen)) { *ino = 0; + ret = 0; goto out; } di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], @@ -354,6 +368,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, *ino = btrfs_dir_objectid(di); out: btrfs_release_path(root, &path); + check_inode(dir); return ret; } @@ -367,7 +382,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_inode_by_name(dir, dentry, &ino); mutex_unlock(&root->fs_info->fs_mutex); @@ -378,7 +392,9 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, inode = iget(dir->i_sb, ino); if (!inode) return ERR_PTR(-EACCES); + check_inode(inode); } + check_inode(dir); return d_splice_alias(inode, dentry); } @@ -471,23 +487,14 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) struct inode * inode; struct dentry * root_dentry; struct btrfs_super_block *disk_super; - struct buffer_head *bh; struct btrfs_root *root; sb->s_maxbytes = MAX_LFS_FILESIZE; - sb->s_blocksize = PAGE_CACHE_SIZE; - sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = BTRFS_SUPER_MAGIC; sb->s_op = &btrfs_super_ops; sb->s_time_gran = 1; - bh = sb_bread(sb, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); - if (!bh) { - printk("btrfs: unable to read on disk super\n"); - return -EIO; - } - disk_super = (struct btrfs_super_block *)bh->b_data; - root = open_ctree(sb, bh, disk_super); + root = open_ctree(sb); if (!root) { printk("btrfs: open_ctree failed\n"); @@ -533,6 +540,7 @@ static void fill_inode_item(struct btrfs_inode_item *item, btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); btrfs_set_inode_nblocks(item, inode->i_blocks); btrfs_set_inode_generation(item, inode->i_generation); + check_inode(inode); } static int btrfs_update_inode(struct btrfs_trans_handle *trans, @@ -560,6 +568,7 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(path.nodes[0]); failed: btrfs_release_path(root, &path); + check_inode(inode); return 0; } @@ -577,6 +586,7 @@ static int btrfs_write_inode(struct inode *inode, int wait) else btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + check_inode(inode); return ret; } @@ -594,6 +604,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (!inode) return ERR_PTR(-ENOMEM); + check_inode(inode); ret = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); BUG_ON(ret); @@ -616,6 +627,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BUG_ON(ret); insert_inode_hash(inode); + check_inode(inode); + check_inode(dir); return inode; } @@ -632,7 +645,8 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, btrfs_sb(inode->i_sb), dentry->d_parent->d_inode); } - + check_inode(inode); + check_inode(dentry->d_parent->d_inode); return ret; } @@ -644,6 +658,9 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans, d_instantiate(dentry, inode); return 0; } + if (err > 0) + err = -EEXIST; + check_inode(inode); return err; } @@ -675,6 +692,9 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, out_unlock: btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + check_inode(inode); + check_inode(dir); + if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -755,11 +775,11 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) sb->s_dirt = 0; if (!wait) { - filemap_flush(root->fs_info->btree_inode->i_mapping); + // filemap_flush(root->fs_info->btree_inode->i_mapping); + filemap_flush(root->fs_info->sb->s_bdev->bd_inode->i_mapping); return 0; } - filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); - + filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); @@ -1242,6 +1262,95 @@ static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, return retval; } +static struct kmem_cache *btrfs_inode_cachep; +struct kmem_cache *btrfs_trans_handle_cachep; +struct kmem_cache *btrfs_transaction_cachep; +struct kmem_cache *btrfs_bit_radix_cachep; +struct kmem_cache *btrfs_path_cachep; + +/* + * Called inside transaction, so use GFP_NOFS + */ +static struct inode *btrfs_alloc_inode(struct super_block *sb) +{ + struct btrfs_inode *ei; + + ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); + if (!ei) + return NULL; + ei->magic = 0xDEADBEEF; + ei->magic2 = 0xDEADBEAF; + return &ei->vfs_inode; +} + +static void btrfs_destroy_inode(struct inode *inode) +{ + struct btrfs_inode *ei = BTRFS_I(inode); + WARN_ON(ei->magic != 0xDEADBEEF); + WARN_ON(ei->magic2 != 0xDEADBEAF); + WARN_ON(!list_empty(&inode->i_dentry)); + WARN_ON(inode->i_ino == 1); + WARN_ON(inode->i_data.nrpages); + + ei->magic = 0; + ei->magic2 = 0; + kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); +} + +static void init_once(void * foo, struct kmem_cache * cachep, + unsigned long flags) +{ + struct btrfs_inode *ei = (struct btrfs_inode *) foo; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) { + inode_init_once(&ei->vfs_inode); + } +} + +static int init_inodecache(void) +{ + btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache", + sizeof(struct btrfs_inode), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + init_once, NULL); + btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache", + sizeof(struct btrfs_trans_handle), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache", + sizeof(struct btrfs_transaction), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + btrfs_path_cachep = kmem_cache_create("btrfs_path_cache", + sizeof(struct btrfs_transaction), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix", + 256, + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD | + SLAB_DESTROY_BY_RCU), + NULL, NULL); + if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL || + btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL) + return -ENOMEM; + return 0; +} + +static void destroy_inodecache(void) +{ + kmem_cache_destroy(btrfs_inode_cachep); + kmem_cache_destroy(btrfs_trans_handle_cachep); + kmem_cache_destroy(btrfs_transaction_cachep); + kmem_cache_destroy(btrfs_bit_radix_cachep); + kmem_cache_destroy(btrfs_path_cachep); +} + static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -1265,6 +1374,8 @@ static struct super_operations btrfs_super_ops = { .write_super = btrfs_write_super, .sync_fs = btrfs_sync_fs, .write_inode = btrfs_write_inode, + .alloc_inode = btrfs_alloc_inode, + .destroy_inode = btrfs_destroy_inode, }; static struct inode_operations btrfs_dir_inode_operations = { @@ -1305,12 +1416,17 @@ static struct file_operations btrfs_file_operations = { static int __init init_btrfs_fs(void) { + int err; printk("btrfs loaded!\n"); + err = init_inodecache(); + if (err) + return err; return register_filesystem(&btrfs_fs_type); } static void __exit exit_btrfs_fs(void) { + destroy_inodecache(); unregister_filesystem(&btrfs_fs_type); printk("btrfs unloaded\n"); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 84c4e278ce2e..72b52e1e0b1b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -5,13 +5,20 @@ #include "transaction.h" static int total_trans = 0; +extern struct kmem_cache *btrfs_trans_handle_cachep; +extern struct kmem_cache *btrfs_transaction_cachep; + +#define TRANS_MAGIC 0xE1E10E static void put_transaction(struct btrfs_transaction *transaction) { + WARN_ON(transaction->use_count == 0); transaction->use_count--; + WARN_ON(transaction->magic != TRANS_MAGIC); if (transaction->use_count == 0) { WARN_ON(total_trans == 0); total_trans--; - kfree(transaction); + memset(transaction, 0, sizeof(*transaction)); + kmem_cache_free(btrfs_transaction_cachep, transaction); } } @@ -20,7 +27,8 @@ static int join_transaction(struct btrfs_root *root) struct btrfs_transaction *cur_trans; cur_trans = root->fs_info->running_transaction; if (!cur_trans) { - cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS); + cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, + GFP_NOFS); total_trans++; BUG_ON(!cur_trans); root->fs_info->running_transaction = cur_trans; @@ -28,6 +36,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->transid = root->root_key.offset + 1; init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); + cur_trans->magic = TRANS_MAGIC; cur_trans->in_commit = 0; cur_trans->use_count = 1; cur_trans->commit_done = 0; @@ -39,7 +48,8 @@ static int join_transaction(struct btrfs_root *root) struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_blocks) { - struct btrfs_trans_handle *h = kmalloc(sizeof(*h), GFP_NOFS); + struct btrfs_trans_handle *h = + kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); int ret; mutex_lock(&root->fs_info->trans_mutex); @@ -51,6 +61,7 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, h->blocks_used = 0; root->fs_info->running_transaction->use_count++; mutex_unlock(&root->fs_info->trans_mutex); + h->magic = h->magic2 = TRANS_MAGIC; return h; } @@ -58,6 +69,8 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_transaction *cur_trans; + WARN_ON(trans->magic != TRANS_MAGIC); + WARN_ON(trans->magic2 != TRANS_MAGIC); mutex_lock(&root->fs_info->trans_mutex); cur_trans = root->fs_info->running_transaction; WARN_ON(cur_trans->num_writers < 1); @@ -67,7 +80,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); memset(trans, 0, sizeof(*trans)); - kfree(trans); + kmem_cache_free(btrfs_trans_handle_cachep, trans); return 0; } @@ -75,7 +88,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); + filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); return 0; } @@ -137,6 +150,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->trans_mutex); if (trans->transaction->in_commit) { +printk("already in commit!, waiting\n"); cur_trans = trans->transaction; trans->transaction->use_count++; btrfs_end_transaction(trans, root); @@ -146,7 +160,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); return 0; } + cur_trans = trans->transaction; + trans->transaction->in_commit = 1; while (trans->transaction->num_writers > 1) { + WARN_ON(cur_trans != trans->transaction); prepare_to_wait(&trans->transaction->writer_wait, &wait, TASK_UNINTERRUPTIBLE); if (trans->transaction->num_writers <= 1) @@ -154,15 +171,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); schedule(); mutex_lock(&root->fs_info->trans_mutex); + finish_wait(&trans->transaction->writer_wait, &wait); } finish_wait(&trans->transaction->writer_wait, &wait); - + WARN_ON(cur_trans != trans->transaction); if (root->node != root->commit_root) { memcpy(&snap_key, &root->root_key, sizeof(snap_key)); root->root_key.offset++; } - if (btrfs_root_blocknr(&root->root_item) != root->node->b_blocknr) { btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); ret = btrfs_insert_root(trans, root->fs_info->tree_root, @@ -172,22 +189,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); - cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; mutex_unlock(&root->fs_info->trans_mutex); - ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root); btrfs_finish_extent_commit(trans, root); mutex_lock(&root->fs_info->trans_mutex); + cur_trans->commit_done = 1; + wake_up(&cur_trans->commit_wait); put_transaction(cur_trans); put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); - kfree(trans); - + kmem_cache_free(btrfs_trans_handle_cachep, trans); if (root->node != root->commit_root) { trans = btrfs_start_transaction(root, 1); snap = root->commit_root; @@ -203,7 +219,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_end_transaction(trans, root); BUG_ON(ret); } - return ret; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 7e6c08a0accd..4f1496ae6f24 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -7,15 +7,18 @@ struct btrfs_transaction { int in_commit; int use_count; int commit_done; + int magic; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; }; struct btrfs_trans_handle { + int magic; u64 transid; unsigned long blocks_reserved; unsigned long blocks_used; struct btrfs_transaction *transaction; + int magic2; }; From 5caf2a002901f0fde475371c4bf1c553b51884af Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 2 Apr 2007 11:20:42 -0400 Subject: [PATCH 0089/2985] Btrfs: dynamic allocation of path struct Signed-off-by: Chris Mason --- fs/btrfs/dir-item.c | 18 ++-- fs/btrfs/extent-tree.c | 100 +++++++++++++-------- fs/btrfs/file-item.c | 43 +++++---- fs/btrfs/inode-item.c | 9 +- fs/btrfs/root-tree.c | 47 ++++++---- fs/btrfs/super.c | 198 +++++++++++++++++++++++------------------ 6 files changed, 243 insertions(+), 172 deletions(-) diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index f55c89472ac0..baceb1da609f 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -9,7 +9,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root objectid, u8 type) { int ret = 0; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_dir_item *dir_item; char *name_ptr; struct btrfs_key key; @@ -20,24 +20,26 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); - btrfs_init_path(&path); + path = btrfs_alloc_path(); + btrfs_init_path(path); data_size = sizeof(*dir_item) + name_len; - ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size); + ret = btrfs_insert_empty_item(trans, root, path, &key, data_size); if (ret) goto out; - dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], + dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_dir_item); btrfs_set_dir_objectid(dir_item, objectid); btrfs_set_dir_type(dir_item, type); btrfs_set_dir_flags(dir_item, 0); btrfs_set_dir_name_len(dir_item, name_len); name_ptr = (char *)(dir_item + 1); - btrfs_memcpy(root, path.nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); + btrfs_mark_buffer_dirty(path->nodes[0]); out: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 37b87e28a2f3..d785b721b461 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -15,7 +15,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks) { - struct btrfs_path path; + struct btrfs_path *path; int ret; struct btrfs_key key; struct btrfs_leaf *l; @@ -25,23 +25,26 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, &ins); - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; - ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, + ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); if (ret != 0) BUG(); BUG_ON(ret != 0); - l = btrfs_buffer_leaf(path.nodes[0]); - item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); + l = btrfs_buffer_leaf(path->nodes[0]); + item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(item); btrfs_set_extent_refs(item, refs + 1); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); - btrfs_release_path(root->fs_info->extent_root, &path); + btrfs_release_path(root->fs_info->extent_root, path); + btrfs_free_path(path); finish_current_insert(trans, root->fs_info->extent_root); del_pending_extents(trans, root->fs_info->extent_root); return 0; @@ -50,24 +53,27 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, u32 *refs) { - struct btrfs_path path; + struct btrfs_path *path; int ret; struct btrfs_key key; struct btrfs_leaf *l; struct btrfs_extent_item *item; - btrfs_init_path(&path); + + path = btrfs_alloc_path(); + btrfs_init_path(path); key.objectid = blocknr; key.offset = num_blocks; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, + ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); if (ret != 0) BUG(); - l = btrfs_buffer_leaf(path.nodes[0]); - item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); + l = btrfs_buffer_leaf(path->nodes[0]); + item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); *refs = btrfs_extent_refs(item); - btrfs_release_path(root->fs_info->extent_root, &path); + btrfs_release_path(root->fs_info->extent_root, path); + btrfs_free_path(path); return 0; } @@ -200,7 +206,7 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key key; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; @@ -215,20 +221,22 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root key.offset = num_blocks; find_free_extent(trans, root, 0, 0, (u64)-1, &ins); - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, extent_root, &key, &path, -1, 1); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); if (ret) { printk("failed to find %Lu\n", key.objectid); btrfs_print_tree(extent_root, extent_root->node); printk("failed to find %Lu\n", key.objectid); BUG(); } - ei = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_extent_item); BUG_ON(ei->refs == 0); refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); if (refs == 0) { u64 super_blocks_used; @@ -240,13 +248,14 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root super_blocks_used = btrfs_super_blocks_used(info->disk_super); btrfs_set_super_blocks_used(info->disk_super, super_blocks_used - num_blocks); - ret = btrfs_del_item(trans, extent_root, &path); + ret = btrfs_del_item(trans, extent_root, path); if (extent_root->fs_info->last_insert.objectid > blocknr) extent_root->fs_info->last_insert.objectid = blocknr; if (ret) BUG(); } - btrfs_release_path(extent_root, &path); + btrfs_release_path(extent_root, path); + btrfs_free_path(path); finish_current_insert(trans, extent_root); return ret; } @@ -319,7 +328,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 search_end, struct btrfs_key *ins) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key key; int ret; u64 hole_size = 0; @@ -339,24 +348,25 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + path = btrfs_alloc_path(); check_failed: - btrfs_init_path(&path); + btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; start_found = 0; - ret = btrfs_search_slot(trans, root, ins, &path, 0, 0); + ret = btrfs_search_slot(trans, root, ins, path, 0, 0); if (ret < 0) goto error; - if (path.slots[0] > 0) - path.slots[0]--; + if (path->slots[0] > 0) + path->slots[0]--; while (1) { - l = btrfs_buffer_leaf(path.nodes[0]); - slot = path.slots[0]; + l = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { - ret = btrfs_next_leaf(root, &path); + ret = btrfs_next_leaf(root, path); if (ret == 0) continue; if (ret < 0) @@ -387,14 +397,14 @@ check_failed: } start_found = 1; last_block = key.objectid + key.offset; - path.slots[0]++; + path->slots[0]++; } // FIXME -ENOSPC check_pending: /* we have to make sure we didn't find an extent that has already * been allocated by the map tree or the original allocation */ - btrfs_release_path(root, &path); + btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); for (test_block = ins->objectid; test_block < ins->objectid + total_needed; test_block++) { @@ -410,9 +420,11 @@ check_pending: root->fs_info->current_insert.flags = 0; root->fs_info->last_insert.objectid = ins->objectid; ins->offset = num_blocks; + btrfs_free_path(path); return 0; error: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -533,6 +545,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root int ret; u32 refs; + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr, 1, &refs); BUG_ON(ret); @@ -542,6 +556,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root * walk down to the last node level and free all the leaves */ while(*level >= 0) { + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); cur = path->nodes[*level]; if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) WARN_ON(1); @@ -564,6 +580,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root continue; } next = read_tree_block(root, blocknr); + WARN_ON(*level <= 0); if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); path->nodes[*level-1] = next; @@ -571,6 +588,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root path->slots[*level] = 0; } out: + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = btrfs_free_extent(trans, root, path->nodes[*level]->b_blocknr, 1, 1); btrfs_block_release(root, path->nodes[*level]); @@ -622,33 +641,36 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root int ret = 0; int wret; int level; - struct btrfs_path path; + struct btrfs_path *path; int i; int orig_level; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); level = btrfs_header_level(btrfs_buffer_header(snap)); orig_level = level; - path.nodes[level] = snap; - path.slots[level] = 0; + path->nodes[level] = snap; + path->slots[level] = 0; while(1) { - wret = walk_down_tree(trans, root, &path, &level); + wret = walk_down_tree(trans, root, path, &level); if (wret > 0) break; if (wret < 0) ret = wret; - wret = walk_up_tree(trans, root, &path, &level); + wret = walk_up_tree(trans, root, path, &level); if (wret > 0) break; if (wret < 0) ret = wret; } for (i = 0; i <= orig_level; i++) { - if (path.nodes[i]) { - btrfs_block_release(root, path.nodes[i]); + if (path->nodes[i]) { + btrfs_block_release(root, path->nodes[i]); } } + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index c3992b7b0c60..e7510ac5559d 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -13,9 +13,11 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, int ret = 0; struct btrfs_file_extent_item *item; struct btrfs_key file_key; - struct btrfs_path path; + struct btrfs_path *path; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); ret = btrfs_alloc_extent(trans, root, num_blocks, hint_block, (u64)-1, objectid, &ins); BUG_ON(ret); @@ -24,19 +26,20 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); - ret = btrfs_insert_empty_item(trans, root, &path, &file_key, + ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(*item)); BUG_ON(ret); - item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_disk_blocknr(item, ins.objectid); btrfs_set_file_extent_disk_num_blocks(item, ins.offset); btrfs_set_file_extent_offset(item, 0); btrfs_set_file_extent_num_blocks(item, ins.offset); btrfs_set_file_extent_generation(item, trans->transid); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); *result = ins.objectid; - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return 0; } @@ -65,25 +68,28 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, { int ret; struct btrfs_key file_key; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_csum_item *item; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); file_key.objectid = objectid; file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); - ret = btrfs_insert_empty_item(trans, root, &path, &file_key, + ret = btrfs_insert_empty_item(trans, root, path, &file_key, BTRFS_CSUM_SIZE); if (ret != 0 && ret != -EEXIST) goto fail; - item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_csum_item); ret = 0; ret = btrfs_csum_data(root, data, len, item->csum); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); fail: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -93,19 +99,21 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, { int ret; struct btrfs_key file_key; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_csum_item *item; char result[BTRFS_CSUM_SIZE]; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); file_key.objectid = objectid; file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); - ret = btrfs_search_slot(NULL, root, &file_key, &path, 0, 0); + ret = btrfs_search_slot(NULL, root, &file_key, path, 0, 0); if (ret) goto fail; - item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_csum_item); ret = 0; ret = btrfs_csum_data(root, data, len, result); @@ -113,7 +121,8 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, if (memcmp(result, item->csum, BTRFS_CSUM_SIZE)) ret = 1; fail: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 8d8c26a6c1a0..6bfa980790c2 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -7,7 +7,7 @@ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, struct btrfs_inode_item *inode_item) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key key; int ret; key.objectid = objectid; @@ -15,10 +15,13 @@ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); ret = btrfs_insert_item(trans, root, &key, inode_item, sizeof(*inode_item)); - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index f37dab87de16..ddc1c13a5352 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -6,7 +6,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key search_key; struct btrfs_leaf *l; int ret; @@ -16,14 +16,16 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, search_key.flags = (u32)-1; search_key.offset = (u32)-1; - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, root, &search_key, &path, 0, 0); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); if (ret < 0) goto out; BUG_ON(ret == 0); - l = btrfs_buffer_leaf(path.nodes[0]); - BUG_ON(path.slots[0] == 0); - slot = path.slots[0] - 1; + l = btrfs_buffer_leaf(path->nodes[0]); + BUG_ON(path->slots[0] == 0); + slot = path->slots[0] - 1; if (btrfs_disk_key_objectid(&l->items[slot].key) != objectid) { ret = 1; goto out; @@ -31,9 +33,10 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, memcpy(item, btrfs_item_ptr(l, slot, struct btrfs_root_item), sizeof(*item)); btrfs_disk_key_to_cpu(key, &l->items[slot].key); - btrfs_release_path(root, &path); ret = 0; out: + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -41,24 +44,27 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_root_item *item) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_leaf *l; int ret; int slot; struct btrfs_root_item *update_item; - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, key, &path, 0, 1); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_search_slot(trans, root, key, path, 0, 1); if (ret < 0) goto out; BUG_ON(ret != 0); - l = btrfs_buffer_leaf(path.nodes[0]); - slot = path.slots[0]; + l = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; update_item = btrfs_item_ptr(l, slot, struct btrfs_root_item); btrfs_memcpy(root, l, update_item, item, sizeof(*item)); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); out: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -75,16 +81,19 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key) { - struct btrfs_path path; + struct btrfs_path *path; int ret; - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, key, &path, -1, 1); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_search_slot(trans, root, key, path, -1, 1); if (ret < 0) goto out; BUG_ON(ret != 0); - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); out: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6969b672b570..c260fcad17b3 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -35,25 +35,27 @@ static int check_inode(struct inode *inode) static void btrfs_read_locked_inode(struct inode *inode) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_inode_item *inode_item; struct btrfs_root *root = btrfs_sb(inode->i_sb); int ret; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); mutex_lock(&root->fs_info->fs_mutex); check_inode(inode); - ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); + ret = btrfs_lookup_inode(NULL, root, path, inode->i_ino, 0); if (ret) { - btrfs_release_path(root, &path); + btrfs_release_path(root, path); mutex_unlock(&root->fs_info->fs_mutex); make_bad_inode(inode); return; } check_inode(inode); - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_inode_item); inode->i_mode = btrfs_inode_mode(inode_item); @@ -69,7 +71,11 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); - btrfs_release_path(root, &path); + + btrfs_release_path(root, path); + btrfs_free_path(path); + inode_item = NULL; + mutex_unlock(&root->fs_info->fs_mutex); check_inode(inode); switch (inode->i_mode & S_IFMT) { @@ -101,15 +107,17 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, struct inode *dir, struct dentry *dentry) { - struct btrfs_path path; + struct btrfs_path *path; const char *name = dentry->d_name.name; int name_len = dentry->d_name.len; int ret; u64 objectid; struct btrfs_dir_item *di; - btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(trans, root, &path, dir->i_ino, + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, name, name_len, -1); if (ret < 0) goto err; @@ -117,15 +125,16 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, ret = -ENOENT; goto err; } - di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + di = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_dir_item); objectid = btrfs_dir_objectid(di); - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); BUG_ON(ret); dentry->d_inode->i_ctime = dir->i_ctime; err: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); if (ret == 0) inode_dec_link_count(dentry->d_inode); return ret; @@ -152,30 +161,32 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) int err; int ret; struct btrfs_root *root = btrfs_sb(dir->i_sb); - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key key; struct btrfs_trans_handle *trans; struct btrfs_disk_key *found_key; struct btrfs_leaf *leaf; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); key.objectid = inode->i_ino; key.offset = (u64)-1; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { err = ret; goto out; } BUG_ON(ret == 0); - BUG_ON(path.slots[0] == 0); - path.slots[0]--; - leaf = btrfs_buffer_leaf(path.nodes[0]); - found_key = &leaf->items[path.slots[0]].key; + BUG_ON(path->slots[0] == 0); + path->slots[0]--; + leaf = btrfs_buffer_leaf(path->nodes[0]); + found_key = &leaf->items[path->slots[0]].key; if (btrfs_disk_key_objectid(found_key) != inode->i_ino) { err = -ENOENT; goto out; @@ -185,11 +196,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) err = -ENOTEMPTY; goto out; } - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); BUG_ON(ret); - btrfs_release_path(root, &path); + btrfs_release_path(root, path); key.offset = 1; - ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { err = ret; goto out; @@ -198,12 +209,13 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) err = -ENOTEMPTY; goto out; } - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); if (ret) { err = ret; goto out; } - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); /* now the directory is empty */ err = btrfs_unlink_trans(trans, root, dir, dentry); @@ -223,33 +235,36 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, struct inode *inode) { u64 objectid = inode->i_ino; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_inode_map_item *map; struct btrfs_key stat_data_key; int ret; + clear_inode(inode); - btrfs_init_path(&path); - ret = btrfs_lookup_inode_map(trans, root, &path, objectid, -1); + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_lookup_inode_map(trans, root, path, objectid, -1); if (ret) { if (ret > 0) ret = -ENOENT; - btrfs_release_path(root, &path); goto error; } - map = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + map = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_inode_map_item); btrfs_disk_key_to_cpu(&stat_data_key, &map->key); - ret = btrfs_del_item(trans, root->fs_info->inode_root, &path); + ret = btrfs_del_item(trans, root->fs_info->inode_root, path); BUG_ON(ret); - btrfs_release_path(root, &path); - btrfs_init_path(&path); + btrfs_release_path(root, path); - ret = btrfs_lookup_inode(trans, root, &path, objectid, -1); + ret = btrfs_lookup_inode(trans, root, path, objectid, -1); BUG_ON(ret); - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); BUG_ON(ret); - btrfs_release_path(root, &path); error: + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -258,7 +273,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct inode *inode) { int ret; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key key; struct btrfs_disk_key *found_key; struct btrfs_leaf *leaf; @@ -267,24 +282,25 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, u64 extent_num_blocks = 0; int found_extent; + path = btrfs_alloc_path(); + BUG_ON(!path); /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; key.offset = (u64)-1; key.flags = 0; btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY); while(1) { - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + btrfs_init_path(path); + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { - btrfs_release_path(root, &path); goto error; } if (ret > 0) { - BUG_ON(path.slots[0] == 0); - path.slots[0]--; + BUG_ON(path->slots[0] == 0); + path->slots[0]--; } - leaf = btrfs_buffer_leaf(path.nodes[0]); - found_key = &leaf->items[path.slots[0]].key; + leaf = btrfs_buffer_leaf(path->nodes[0]); + found_key = &leaf->items[path->slots[0]].key; if (btrfs_disk_key_objectid(found_key) != inode->i_ino) break; if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY && @@ -293,8 +309,8 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, if (btrfs_disk_key_offset(found_key) < inode->i_size) break; if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) { - fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], + fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_file_extent_item); extent_start = btrfs_file_extent_disk_blocknr(fi); extent_num_blocks = @@ -305,18 +321,19 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, } else { found_extent = 0; } - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); BUG_ON(ret); - btrfs_release_path(root, &path); + btrfs_release_path(root, path); if (found_extent) { ret = btrfs_free_extent(trans, root, extent_start, extent_num_blocks, 0); BUG_ON(ret); } } - btrfs_release_path(root, &path); ret = 0; error: + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -351,23 +368,26 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, const char *name = dentry->d_name.name; int namelen = dentry->d_name.len; struct btrfs_dir_item *di; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_root *root = btrfs_sb(dir->i_sb); int ret; - btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(NULL, root, &path, dir->i_ino, name, + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, namelen, 0); - if (ret || !btrfs_match_dir_item_name(root, &path, name, namelen)) { + if (ret || !btrfs_match_dir_item_name(root, path, name, namelen)) { *ino = 0; ret = 0; goto out; } - di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + di = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_dir_item); *ino = btrfs_dir_objectid(di); out: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); check_inode(dir); return ret; } @@ -405,7 +425,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct btrfs_item *item; struct btrfs_dir_item *di; struct btrfs_key key; - struct btrfs_path path; + struct btrfs_path *path; int ret; u32 nritems; struct btrfs_leaf *leaf; @@ -419,27 +439,28 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); key.offset = filp->f_pos; - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + path = btrfs_alloc_path(); + btrfs_init_path(path); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) { goto err; } advance = 0; while(1) { - leaf = btrfs_buffer_leaf(path.nodes[0]); + leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); - slot = path.slots[0]; + slot = path->slots[0]; if (advance || slot >= nritems) { if (slot >= nritems -1) { - ret = btrfs_next_leaf(root, &path); + ret = btrfs_next_leaf(root, path); if (ret) break; - leaf = btrfs_buffer_leaf(path.nodes[0]); + leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); - slot = path.slots[0]; + slot = path->slots[0]; } else { slot++; - path.slots[0]++; + path->slots[0]++; } } advance = 1; @@ -465,7 +486,8 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } ret = 0; err: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -548,26 +570,29 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, struct inode *inode) { struct btrfs_inode_item *inode_item; - struct btrfs_path path; + struct btrfs_path *path; int ret; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); - ret = btrfs_lookup_inode(trans, root, &path, inode->i_ino, 1); + ret = btrfs_lookup_inode(trans, root, path, inode->i_ino, 1); if (ret) { if (ret > 0) ret = -ENOENT; goto failed; } - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_inode_item); fill_inode_item(inode_item, inode); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); failed: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); check_inode(inode); return 0; } @@ -799,38 +824,39 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, u64 extent_start = 0; u64 extent_end = 0; u64 objectid = inode->i_ino; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_root *root = btrfs_sb(inode->i_sb); struct btrfs_trans_handle *trans = NULL; struct btrfs_file_extent_item *item; struct btrfs_leaf *leaf; struct btrfs_disk_key *found_key; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); if (create) trans = btrfs_start_transaction(root, 1); - ret = btrfs_lookup_file_extent(trans, root, &path, + ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, iblock << inode->i_blkbits, 0); if (ret < 0) { - btrfs_release_path(root, &path); err = ret; goto out; } if (ret != 0) { - if (path.slots[0] == 0) { - btrfs_release_path(root, &path); + if (path->slots[0] == 0) { + btrfs_release_path(root, path); goto allocate; } - path.slots[0]--; + path->slots[0]--; } - item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); - leaf = btrfs_buffer_leaf(path.nodes[0]); + leaf = btrfs_buffer_leaf(path->nodes[0]); blocknr = btrfs_file_extent_disk_blocknr(item); blocknr += btrfs_file_extent_offset(item); @@ -838,25 +864,23 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, if (ret == 0) { err = 0; map_bh(result, inode->i_sb, blocknr); - btrfs_release_path(root, &path); goto out; } /* are we inside the extent that was found? */ - found_key = &leaf->items[path.slots[0]].key; + found_key = &leaf->items[path->slots[0]].key; if (btrfs_disk_key_objectid(found_key) != objectid || btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) { extent_end = 0; extent_start = 0; - btrfs_release_path(root, &path); + btrfs_release_path(root, path); goto allocate; } - extent_start = btrfs_disk_key_offset(&leaf->items[path.slots[0]].key); + extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); extent_start = extent_start >> inode->i_blkbits; extent_start += btrfs_file_extent_offset(item); extent_end = extent_start + btrfs_file_extent_num_blocks(item); - btrfs_release_path(root, &path); if (iblock >= extent_start && iblock < extent_end) { err = 0; map_bh(result, inode->i_sb, blocknr + iblock - extent_start); @@ -880,6 +904,8 @@ allocate: map_bh(result, inode->i_sb, blocknr); out: + btrfs_release_path(root, path); + btrfs_free_path(path); if (trans) btrfs_end_transaction(trans, root); return err; From 7cfcc17e7f2334a0503331264c1340de4e564c85 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 2 Apr 2007 14:53:59 -0400 Subject: [PATCH 0090/2985] Btrfs: corruptions fixed Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 2 -- fs/btrfs/disk-io.c | 21 +++------------------ fs/btrfs/inode-map.c | 43 ++++++++++++++++++++++++------------------ fs/btrfs/super.c | 9 +++++---- fs/btrfs/transaction.c | 2 +- 5 files changed, 34 insertions(+), 43 deletions(-) diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index a50a1c44eabd..783f54c2a16c 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -62,7 +62,6 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) if (!bits) return 0; clear_bit(bit_slot, bits + 1); -#if 0 for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) { if (bits[i]) { empty = 0; @@ -74,7 +73,6 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) BUG_ON(!bits); kmem_cache_free(btrfs_bit_radix_cachep, bits); } -#endif return 0; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2dbd55084a4e..a227e94b9e88 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -23,8 +23,6 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) { - return sb_find_get_block(root->fs_info->sb, blocknr); -#if 0 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; int blockbits = root->fs_info->sb->s_blocksize_bits; unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); @@ -58,14 +56,11 @@ out_unlock: } page_cache_release(page); return ret; -#endif } struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr) { - return sb_getblk(root->fs_info->sb, blocknr); -#if 0 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; int blockbits = root->fs_info->sb->s_blocksize_bits; unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); @@ -103,7 +98,6 @@ out_unlock: touch_buffer(ret); page_cache_release(page); return ret; -#endif } static sector_t max_block(struct block_device *bdev) @@ -186,7 +180,6 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, static int btree_writepage(struct page *page, struct writeback_control *wbc) { -#if 0 struct buffer_head *bh; struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb); struct buffer_head *head; @@ -201,7 +194,6 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) csum_tree_block(root, bh, 0); bh = bh->b_this_page; } while (bh != head); -#endif return block_write_full_page(page, btree_get_block, wbc); } @@ -218,8 +210,6 @@ static struct address_space_operations btree_aops = { struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { - return sb_bread(root->fs_info->sb, blocknr); -#if 0 struct buffer_head *bh = NULL; bh = btrfs_find_create_tree_block(root, blocknr); @@ -243,8 +233,6 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) fail: brelse(bh); return NULL; - -#endif } int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -323,8 +311,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; fs_info->sb = sb; - fs_info->btree_inode = NULL; -#if 0 fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; @@ -332,7 +318,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_mapping->a_ops = &btree_aops; insert_inode_hash(fs_info->btree_inode); mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); -#endif fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); spin_lock_init(&fs_info->hash_lock); if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { @@ -435,8 +420,8 @@ int close_ctree(struct btrfs_root *root) btrfs_block_release(root, root->commit_root); btrfs_block_release(root, root->fs_info->sb_buffer); crypto_free_hash(root->fs_info->hash_tfm); - // truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); - // iput(root->fs_info->btree_inode); + truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); + iput(root->fs_info->btree_inode); kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); kfree(root->fs_info->tree_root); @@ -447,6 +432,6 @@ int close_ctree(struct btrfs_root *root) void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { - // brelse(buf); + brelse(buf); } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index fa4adb81b149..b3de823eb628 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -10,7 +10,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_root *fs_root, u64 dirid, u64 *objectid) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key key; int ret; u64 hole_size = 0; @@ -31,20 +31,22 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, btrfs_set_key_type(&search_key, BTRFS_INODE_MAP_ITEM_KEY); search_key.offset = 0; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); start_found = 0; - ret = btrfs_search_slot(trans, root, &search_key, &path, 0, 0); + ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); if (ret < 0) goto error; - if (path.slots[0] > 0) - path.slots[0]--; + if (path->slots[0] > 0) + path->slots[0]--; while (1) { - l = btrfs_buffer_leaf(path.nodes[0]); - slot = path.slots[0]; + l = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { - ret = btrfs_next_leaf(root, &path); + ret = btrfs_next_leaf(root, path); if (ret == 0) continue; if (ret < 0) @@ -72,17 +74,19 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, } start_found = 1; last_ino = key.objectid + 1; - path.slots[0]++; + path->slots[0]++; } // FIXME -ENOSPC found: root->fs_info->last_inode_alloc = *objectid; root->fs_info->last_inode_alloc_dirid = dirid; - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); BUG_ON(*objectid < search_start); return 0; error: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -91,7 +95,7 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, u64 objectid, struct btrfs_key *location) { int ret = 0; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_inode_map_item *inode_item; struct btrfs_key key; struct btrfs_root *inode_root = fs_root->fs_info->inode_root; @@ -100,18 +104,21 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_MAP_ITEM_KEY); key.offset = 0; - btrfs_init_path(&path); - ret = btrfs_insert_empty_item(trans, inode_root, &path, &key, + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_insert_empty_item(trans, inode_root, path, &key, sizeof(struct btrfs_inode_map_item)); if (ret) goto out; - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], struct btrfs_inode_map_item); + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_inode_map_item); btrfs_cpu_key_to_disk(&inode_item->key, location); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); out: - btrfs_release_path(inode_root, &path); + btrfs_release_path(inode_root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c260fcad17b3..9fd87f5edbc9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -49,6 +49,7 @@ static void btrfs_read_locked_inode(struct inode *inode) ret = btrfs_lookup_inode(NULL, root, path, inode->i_ino, 0); if (ret) { btrfs_release_path(root, path); + btrfs_free_path(path); mutex_unlock(&root->fs_info->fs_mutex); make_bad_inode(inode); return; @@ -215,7 +216,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) goto out; } btrfs_release_path(root, path); - btrfs_free_path(path); /* now the directory is empty */ err = btrfs_unlink_trans(trans, root, dir, dentry); @@ -223,6 +223,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) inode->i_size = 0; } out: + btrfs_release_path(root, path); + btrfs_free_path(path); mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_end_transaction(trans, root); if (ret && !err) @@ -800,11 +802,10 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) sb->s_dirt = 0; if (!wait) { - // filemap_flush(root->fs_info->btree_inode->i_mapping); - filemap_flush(root->fs_info->sb->s_bdev->bd_inode->i_mapping); + filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; } - filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); + filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 72b52e1e0b1b..2b0f98c92123 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -88,7 +88,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); + filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); return 0; } From 2da566edd8ce32ae2952b863ee634bcc2e7d63c1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 2 Apr 2007 15:43:21 -0400 Subject: [PATCH 0091/2985] Btrfs: csum_verify_file_block locking fix Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 - fs/btrfs/extent-tree.c | 2 -- fs/btrfs/file-item.c | 2 ++ fs/btrfs/super.c | 1 - 4 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a227e94b9e88..39b117a57a54 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -160,7 +160,6 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, int ret; struct btrfs_node *node; - return 0; ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE, bh->b_size - BTRFS_CSUM_SIZE, result); if (ret) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d785b721b461..cd40a08dd8a6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -249,8 +249,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_super_blocks_used(info->disk_super, super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, path); - if (extent_root->fs_info->last_insert.objectid > blocknr) - extent_root->fs_info->last_insert.objectid = blocknr; if (ret) BUG(); } diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index e7510ac5559d..2c98b620c594 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -110,6 +110,7 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); + mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_search_slot(NULL, root, &file_key, path, 0, 0); if (ret) goto fail; @@ -123,6 +124,7 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, fail: btrfs_release_path(root, path); btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9fd87f5edbc9..fbc26eafc58c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1316,7 +1316,6 @@ static void btrfs_destroy_inode(struct inode *inode) WARN_ON(ei->magic != 0xDEADBEEF); WARN_ON(ei->magic2 != 0xDEADBEAF); WARN_ON(!list_empty(&inode->i_dentry)); - WARN_ON(inode->i_ino == 1); WARN_ON(inode->i_data.nrpages); ei->magic = 0; From 9d64272cd3be2aae71f89f7ababbee90a7587b33 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 3 Apr 2007 11:43:19 -0400 Subject: [PATCH 0092/2985] Btrfs: don't wait on uptodate buffers on read Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 39b117a57a54..055cd45e802e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -214,6 +214,8 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) bh = btrfs_find_create_tree_block(root, blocknr); if (!bh) return bh; + if (buffer_uptodate(bh)) + goto uptodate; lock_buffer(bh); if (!buffer_uptodate(bh)) { get_bh(bh); @@ -226,6 +228,7 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) } else { unlock_buffer(bh); } +uptodate: if (check_tree_block(root, bh)) BUG(); return bh; From df24a2b9c9bcef3348e4b1a8f206cd484a248d36 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 4 Apr 2007 09:36:31 -0400 Subject: [PATCH 0093/2985] Btrfs: early inline file data code Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 18 ++-- fs/btrfs/super.c | 240 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 248 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 453ce835209a..9ef65e2ecf51 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -16,21 +16,26 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); +inline void btrfs_init_path(struct btrfs_path *p) +{ + memset(p, 0, sizeof(*p)); +} + struct btrfs_path *btrfs_alloc_path(void) { - return kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); + struct btrfs_path *path; + path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); + if (path) + btrfs_init_path(path); + return path; } void btrfs_free_path(struct btrfs_path *p) { + btrfs_release_path(NULL, p); kmem_cache_free(btrfs_path_cachep, p); } -inline void btrfs_init_path(struct btrfs_path *p) -{ - memset(p, 0, sizeof(*p)); -} - void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; @@ -77,6 +82,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); } btrfs_block_release(root, buf); + mark_buffer_dirty(cow); *cow_ret = cow; return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index fbc26eafc58c..68127b8462ab 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -816,6 +816,73 @@ printk("btrfs sync_fs\n"); return 0; } +static int btrfs_get_block_inline(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_leaf *leaf; + int num_bytes = result->b_size; + int item_size; + int ret; + u64 pos; + char *ptr; + int copy_size; + int err = 0; + char *safe_ptr; + char *data_ptr; + + path = btrfs_alloc_path(); + BUG_ON(!path); + + WARN_ON(create); + if (create) { + return 0; + } + pos = iblock << inode->i_blkbits; + key.objectid = inode->i_ino; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_INLINE_DATA_KEY); + ptr = kmap(result->b_page); + safe_ptr = ptr; + ptr += (pos & (PAGE_CACHE_SIZE -1)); +again: + key.offset = pos; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret) { + if (ret < 0) + err = ret; + else + err = 0; + goto out; + } + leaf = btrfs_buffer_leaf(path->nodes[0]); + item_size = btrfs_item_size(leaf->items + path->slots[0]); + copy_size = min(num_bytes, item_size); + data_ptr = btrfs_item_ptr(leaf, path->slots[0], char); + WARN_ON(safe_ptr + PAGE_CACHE_SIZE < ptr + copy_size); + memcpy(ptr, data_ptr, copy_size); + pos += copy_size; + num_bytes -= copy_size; + WARN_ON(num_bytes < 0); + ptr += copy_size; + btrfs_release_path(root, path); + if (num_bytes != 0) { + if (pos >= i_size_read(inode)) + memset(ptr, 0, num_bytes); + else + goto again; + } + set_buffer_uptodate(result); + map_bh(result, inode->i_sb, 0); + err = 0; +out: + btrfs_free_path(path); + kunmap(result->b_page); + return err; +} + static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { @@ -918,7 +985,8 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, int err; struct btrfs_root *root = btrfs_sb(inode->i_sb); mutex_lock(&root->fs_info->fs_mutex); - err = btrfs_get_block_lock(inode, iblock, result, create); + // err = btrfs_get_block_lock(inode, iblock, result, create); + err = btrfs_get_block_inline(inode, iblock, result, create); mutex_unlock(&root->fs_info->fs_mutex); return err; } @@ -1177,6 +1245,170 @@ out: return num_written ? num_written : err; } +static ssize_t inline_one_page(struct btrfs_root *root, struct inode *inode, + struct page *page, loff_t pos, + size_t offset, size_t write_bytes) +{ + struct btrfs_path *path; + struct btrfs_trans_handle *trans; + struct btrfs_key key; + struct btrfs_leaf *leaf; + struct btrfs_key found_key; + int ret; + size_t copy_size = 0; + char *dst = NULL; + int err = 0; + size_t num_written = 0; + + path = btrfs_alloc_path(); + BUG_ON(!path); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + key.objectid = inode->i_ino; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_INLINE_DATA_KEY); + +again: + key.offset = pos; + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) { + err = ret; + goto out; + } + if (ret == 0) { + leaf = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&found_key, + &leaf->items[path->slots[0]].key); + copy_size = btrfs_item_size(leaf->items + path->slots[0]); + dst = btrfs_item_ptr(leaf, path->slots[0], char); + copy_size = min(write_bytes, copy_size); + goto copyit; + } else { + int slot = path->slots[0]; + if (slot > 0) { + slot--; + } + // FIXME find max key + leaf = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&found_key, + &leaf->items[slot].key); + if (found_key.objectid != inode->i_ino) + goto insert; + if (btrfs_key_type(&found_key) != BTRFS_INLINE_DATA_KEY) + goto insert; + copy_size = btrfs_item_size(leaf->items + slot); + if (found_key.offset + copy_size <= pos) + goto insert; + dst = btrfs_item_ptr(leaf, path->slots[0], char); + dst += pos - found_key.offset; + copy_size = copy_size - (pos - found_key.offset); + BUG_ON(copy_size < 0); + copy_size = min(write_bytes, copy_size); + WARN_ON(copy_size == 0); + goto copyit; + } +insert: + btrfs_release_path(root, path); + copy_size = min(write_bytes, (size_t)512); + ret = btrfs_insert_empty_item(trans, root, path, &key, copy_size); + BUG_ON(ret); + dst = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], char); +copyit: + WARN_ON(copy_size == 0); + WARN_ON(dst + copy_size > + btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], char) + + btrfs_item_size(btrfs_buffer_leaf(path->nodes[0])->items + + path->slots[0])); + btrfs_memcpy(root, path->nodes[0]->b_data, dst, + page_address(page) + offset, copy_size); + mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(root, path); + pos += copy_size; + offset += copy_size; + num_written += copy_size; + write_bytes -= copy_size; + if (write_bytes) + goto again; +out: + btrfs_free_path(path); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + return num_written ? num_written : err; +} + +static ssize_t btrfs_file_inline_write(struct file *file, + const char __user *buf, + size_t count, loff_t *ppos) +{ + loff_t pos; + size_t num_written = 0; + int err = 0; + int ret = 0; + struct inode *inode = file->f_path.dentry->d_inode; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + unsigned long page_index; + + if (file->f_flags & O_DIRECT) + return -EINVAL; + pos = *ppos; + + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + current->backing_dev_info = inode->i_mapping->backing_dev_info; + err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + if (err) + goto out; + if (count == 0) + goto out; + err = remove_suid(file->f_path.dentry); + if (err) + goto out; + file_update_time(file); + mutex_lock(&inode->i_mutex); + while(count > 0) { + size_t offset = pos & (PAGE_CACHE_SIZE - 1); + size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); + struct page *page; + + page_index = pos >> PAGE_CACHE_SHIFT; + page = grab_cache_page(inode->i_mapping, page_index); + if (!PageUptodate(page)) { + ret = mpage_readpage(page, btrfs_get_block); + BUG_ON(ret); + lock_page(page); + } + ret = btrfs_copy_from_user(pos, 1, + write_bytes, &page, buf); + BUG_ON(ret); + write_bytes = inline_one_page(root, inode, page, pos, + offset, write_bytes); + SetPageUptodate(page); + if (write_bytes > 0 && pos + write_bytes > inode->i_size) { + i_size_write(inode, pos + write_bytes); + mark_inode_dirty(inode); + } + page_cache_release(page); + unlock_page(page); + if (write_bytes < 0) + goto out_unlock; + buf += write_bytes; + count -= write_bytes; + pos += write_bytes; + num_written += write_bytes; + + balance_dirty_pages_ratelimited(inode->i_mapping); + cond_resched(); + } +out_unlock: + mutex_unlock(&inode->i_mutex); +out: + *ppos = pos; + current->backing_dev_info = NULL; + return num_written ? num_written : err; +} + static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset, unsigned long size) { @@ -1420,7 +1652,7 @@ static struct file_operations btrfs_dir_file_operations = { static struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, - .readpages = btrfs_readpages, + // .readpages = btrfs_readpages, .writepage = btrfs_writepage, .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, @@ -1434,8 +1666,8 @@ static struct inode_operations btrfs_file_inode_operations = { static struct file_operations btrfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, - .aio_read = btrfs_file_aio_read, - .write = btrfs_file_write, + .aio_read = generic_file_aio_read, + .write = btrfs_file_inline_write, .mmap = generic_file_mmap, .open = generic_file_open, }; From d4dbff953e1f6f4079126c0404cc24f2ef14e925 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 4 Apr 2007 14:08:15 -0400 Subject: [PATCH 0094/2985] Btrfs: support for items bigger than 1/2 the blocksize Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 132 ++++++++++++++++++++++++++++++++++------------- fs/btrfs/super.c | 16 ++++-- 2 files changed, 110 insertions(+), 38 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9ef65e2ecf51..864ee423b300 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -6,7 +6,8 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int data_size); + *root, struct btrfs_key *ins_key, + struct btrfs_path *path, int data_size); static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *dst, struct buffer_head *src); @@ -101,19 +102,6 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root, return btrfs_item_offset(leaf->items + nr - 1); } -/* - * The space between the end of the leaf items and - * the start of the leaf data. IOW, how much room - * the leaf has left for both items and data - */ -int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) -{ - int data_end = leaf_data_end(root, leaf); - int nritems = btrfs_header_nritems(&leaf->header); - char *items_end = (char *)(leaf->items + nritems + 1); - return (char *)(btrfs_leaf_data(leaf) + data_end) - (char *)items_end; -} - /* * compare two keys in a memcmp fashion */ @@ -510,8 +498,8 @@ again: if (ret && slot > 0) slot -= 1; p->slots[level] = slot; - if (ins_len > 0 && btrfs_header_nritems(&c->header) == - BTRFS_NODEPTRS_PER_BLOCK(root)) { + if (ins_len > 0 && btrfs_header_nritems(&c->header) >= + BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { int sret = split_node(trans, root, p, level); BUG_ON(sret > 0); if (sret) @@ -537,7 +525,8 @@ again: p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, l) < sizeof(struct btrfs_item) + ins_len) { - int sret = split_leaf(trans, root, p, ins_len); + int sret = split_leaf(trans, root, key, + p, ins_len); BUG_ON(sret > 0); if (sret) return sret; @@ -825,16 +814,29 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) { int data_len; - int end = start + nr - 1; + int nritems = btrfs_header_nritems(&l->header); + int end = min(nritems, start + nr) - 1; if (!nr) return 0; data_len = btrfs_item_end(l->items + start); data_len = data_len - btrfs_item_offset(l->items + end); data_len += sizeof(struct btrfs_item) * nr; + WARN_ON(data_len < 0); return data_len; } +/* + * The space between the end of the leaf items and + * the start of the leaf data. IOW, how much room + * the leaf has left for both items and data + */ +int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) +{ + int nritems = btrfs_header_nritems(&leaf->header); + return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); +} + /* * push some data in the path leaf to the right, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise @@ -1084,7 +1086,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root * returns 0 if all went well and < 0 on failure. */ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int data_size) + *root, struct btrfs_key *ins_key, + struct btrfs_path *path, int data_size) { struct buffer_head *l_buf; struct btrfs_leaf *l; @@ -1097,8 +1100,10 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root int data_copy_size; int rt_data_off; int i; - int ret; + int ret = 0; int wret; + int double_split = 0; + struct btrfs_disk_key disk_key; /* first try to make some room by pushing left and right */ wret = push_leaf_left(trans, root, path, data_size); @@ -1127,26 +1132,58 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root mid = (nritems + 1)/ 2; right_buffer = btrfs_alloc_free_block(trans, root); BUG_ON(!right_buffer); - BUG_ON(mid == nritems); right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); - if (mid <= slot) { - /* FIXME, just alloc a new leaf here */ - if (leaf_space_used(l, mid, nritems - mid) + space_needed > - BTRFS_LEAF_DATA_SIZE(root)) - BUG(); - } else { - /* FIXME, just alloc a new leaf here */ - if (leaf_space_used(l, 0, mid + 1) + space_needed > - BTRFS_LEAF_DATA_SIZE(root)) - BUG(); - } - btrfs_set_header_nritems(&right->header, nritems - mid); btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); btrfs_set_header_generation(&right->header, trans->transid); btrfs_set_header_level(&right->header, 0); btrfs_set_header_parentid(&right->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); + if (mid <= slot) { + if (nritems == 1 || + leaf_space_used(l, mid, nritems - mid) + space_needed > + BTRFS_LEAF_DATA_SIZE(root)) { + if (slot >= nritems) { + btrfs_cpu_key_to_disk(&disk_key, ins_key); + btrfs_set_header_nritems(&right->header, 0); + wret = insert_ptr(trans, root, path, + &disk_key, + right_buffer->b_blocknr, + path->slots[1] + 1, 1); + if (wret) + ret = wret; + btrfs_block_release(root, path->nodes[0]); + path->nodes[0] = right_buffer; + path->slots[0] = 0; + path->slots[1] += 1; + return ret; + } + mid = slot; + double_split = 1; + } + } else { + if (leaf_space_used(l, 0, mid + 1) + space_needed > + BTRFS_LEAF_DATA_SIZE(root)) { + if (slot == 0) { + btrfs_cpu_key_to_disk(&disk_key, ins_key); + btrfs_set_header_nritems(&right->header, 0); + wret = insert_ptr(trans, root, path, + &disk_key, + right_buffer->b_blocknr, + path->slots[1] - 1, 1); + if (wret) + ret = wret; + btrfs_block_release(root, path->nodes[0]); + path->nodes[0] = right_buffer; + path->slots[0] = 0; + path->slots[1] -= 1; + return ret; + } + mid = slot; + double_split = 1; + } + } + btrfs_set_header_nritems(&right->header, nritems - mid); data_copy_size = btrfs_item_end(l->items + mid) - leaf_data_end(root, l); btrfs_memcpy(root, right, right->items, l->items + mid, @@ -1180,6 +1217,31 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root } else btrfs_block_release(root, right_buffer); BUG_ON(path->slots[0] < 0); + + if (!double_split) + return ret; + right_buffer = btrfs_alloc_free_block(trans, root); + BUG_ON(!right_buffer); + right = btrfs_buffer_leaf(right_buffer); + memset(&right->header, 0, sizeof(right->header)); + btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); + btrfs_set_header_generation(&right->header, trans->transid); + btrfs_set_header_level(&right->header, 0); + btrfs_set_header_parentid(&right->header, + btrfs_header_parentid(btrfs_buffer_header(root->node))); + btrfs_cpu_key_to_disk(&disk_key, ins_key); + btrfs_set_header_nritems(&right->header, 0); + wret = insert_ptr(trans, root, path, + &disk_key, + right_buffer->b_blocknr, + path->slots[1], 1); + if (wret) + ret = wret; + btrfs_block_release(root, path->nodes[0]); + path->nodes[0] = right_buffer; + path->slots[0] = 0; + check_node(root, path, 1); + check_leaf(root, path, 0); return ret; } @@ -1220,9 +1282,9 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root data_end = leaf_data_end(root, leaf); if (btrfs_leaf_free_space(root, leaf) < - sizeof(struct btrfs_item) + data_size) + sizeof(struct btrfs_item) + data_size) { BUG(); - + } slot = path->slots[0]; BUG_ON(slot < 0); if (slot != nritems) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 68127b8462ab..0afb6cece82c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -136,8 +136,11 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, err: btrfs_release_path(root, path); btrfs_free_path(path); - if (ret == 0) + if (ret == 0) { inode_dec_link_count(dentry->d_inode); + dir->i_size -= name_len; + mark_inode_dirty(dir); + } return ret; } @@ -290,6 +293,10 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, key.objectid = inode->i_ino; key.offset = (u64)-1; key.flags = 0; + /* + * use BTRFS_CSUM_ITEM_KEY because it is larger than inline keys + * or extent data + */ btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY); while(1) { btrfs_init_path(path); @@ -306,6 +313,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, if (btrfs_disk_key_objectid(found_key) != inode->i_ino) break; if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY && + btrfs_disk_key_type(found_key) != BTRFS_INLINE_DATA_KEY && btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) break; if (btrfs_disk_key_offset(found_key) < inode->i_size) @@ -1036,7 +1044,7 @@ static void btrfs_truncate(struct inode *inode) if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return; - nobh_truncate_page(inode->i_mapping, inode->i_size); + // nobh_truncate_page(inode->i_mapping, inode->i_size); /* FIXME, add redo link to tree so we don't leak on crash */ mutex_lock(&root->fs_info->fs_mutex); @@ -1309,7 +1317,9 @@ again: } insert: btrfs_release_path(root, path); - copy_size = min(write_bytes, (size_t)512); + copy_size = min(write_bytes, + (size_t)BTRFS_LEAF_DATA_SIZE(root) - + sizeof(struct btrfs_item) * 4); ret = btrfs_insert_empty_item(trans, root, path, &key, copy_size); BUG_ON(ret); dst = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), From e8f05c4508aed906146919f2efa89242a7f90a8b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 4 Apr 2007 14:30:09 -0400 Subject: [PATCH 0095/2985] Btrfs: disable inline data code for now Signed-off-by: Chris Mason --- fs/btrfs/super.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0afb6cece82c..822243650ae9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -824,6 +824,7 @@ printk("btrfs sync_fs\n"); return 0; } +#if 0 static int btrfs_get_block_inline(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { @@ -890,6 +891,7 @@ out: kunmap(result->b_page); return err; } +#endif static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) @@ -993,8 +995,8 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, int err; struct btrfs_root *root = btrfs_sb(inode->i_sb); mutex_lock(&root->fs_info->fs_mutex); - // err = btrfs_get_block_lock(inode, iblock, result, create); - err = btrfs_get_block_inline(inode, iblock, result, create); + err = btrfs_get_block_lock(inode, iblock, result, create); + // err = btrfs_get_block_inline(inode, iblock, result, create); mutex_unlock(&root->fs_info->fs_mutex); return err; } @@ -1002,13 +1004,11 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, static int btrfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { - WARN_ON(1); return nobh_prepare_write(page, from, to, btrfs_get_block); } static int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) { - WARN_ON(1); return nobh_commit_write(file, page, from, to); } @@ -1044,7 +1044,7 @@ static void btrfs_truncate(struct inode *inode) if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return; - // nobh_truncate_page(inode->i_mapping, inode->i_size); + nobh_truncate_page(inode->i_mapping, inode->i_size); /* FIXME, add redo link to tree so we don't leak on crash */ mutex_lock(&root->fs_info->fs_mutex); @@ -1253,6 +1253,7 @@ out: return num_written ? num_written : err; } +#if 0 static ssize_t inline_one_page(struct btrfs_root *root, struct inode *inode, struct page *page, loff_t pos, size_t offset, size_t write_bytes) @@ -1418,6 +1419,7 @@ out: current->backing_dev_info = NULL; return num_written ? num_written : err; } +#endif static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset, unsigned long size) @@ -1662,7 +1664,7 @@ static struct file_operations btrfs_dir_file_operations = { static struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, - // .readpages = btrfs_readpages, + .readpages = btrfs_readpages, .writepage = btrfs_writepage, .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, @@ -1676,8 +1678,8 @@ static struct inode_operations btrfs_file_inode_operations = { static struct file_operations btrfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = btrfs_file_inline_write, + .aio_read = btrfs_file_aio_read, + .write = btrfs_file_write, .mmap = generic_file_mmap, .open = generic_file_open, }; From b1a4d96509a78ad234d94e0b914b289c60d2969d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 4 Apr 2007 15:27:52 -0400 Subject: [PATCH 0096/2985] Btrfs: tweak the inode-map and free extent search starts on cold mount Signed-off-by: Chris Mason --- fs/btrfs/TODO | 2 -- fs/btrfs/ctree.h | 1 - fs/btrfs/disk-io.c | 1 - fs/btrfs/extent-tree.c | 21 +++++++++++++++++++-- fs/btrfs/inode-map.c | 33 +++++++++++++++++++++++---------- 5 files changed, 42 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index e9371385ccc0..db986c46a554 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -1,9 +1,7 @@ * cleanup, add more error checking, get rid of BUG_ONs * Fix ENOSPC handling -* Do checksumming * Add block mapping tree (simple dm layer) * Make allocator smarter -* make level a field in header * add a block group to struct inode * Make directory hashing work on 32 bit * Make sure nobh stuff is working properly for cows diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d8e03bd797ff..41cc013ef08d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -238,7 +238,6 @@ struct btrfs_fs_info { struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; u64 last_inode_alloc; - u64 last_inode_alloc_dirid; u64 generation; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 055cd45e802e..de9ee3aa0aad 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -311,7 +311,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->extent_root = extent_root; fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; - fs_info->last_inode_alloc_dirid = 0; fs_info->sb = sb; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cd40a08dd8a6..688aa861a928 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -339,13 +339,30 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int total_needed = num_blocks; int level; + path = btrfs_alloc_path(); + ins->flags = 0; + btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + level = btrfs_header_level(btrfs_buffer_header(root->node)); total_needed += (level + 1) * 3; + if (root->fs_info->last_insert.objectid == 0 && search_end == (u64)-1) { + struct btrfs_disk_key *last_key; + btrfs_init_path(path); + ins->objectid = (u64)-1; + ins->offset = (u64)-1; + ret = btrfs_search_slot(trans, root, ins, path, 0, 0); + if (ret < 0) + goto error; + BUG_ON(ret == 0); + if (path->slots[0] > 0) + path->slots[0]--; + l = btrfs_buffer_leaf(path->nodes[0]); + last_key = &l->items[path->slots[0]].key; + search_start = btrfs_disk_key_objectid(last_key); + } if (root->fs_info->last_insert.objectid > search_start) search_start = root->fs_info->last_insert.objectid; - ins->flags = 0; - btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); path = btrfs_alloc_path(); check_failed: diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index b3de823eb628..329edb42897e 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -22,17 +22,31 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_key search_key; u64 search_start = dirid; - if (fs_root->fs_info->last_inode_alloc_dirid == dirid) - search_start = fs_root->fs_info->last_inode_alloc; - - search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); - search_key.objectid = search_start; - search_key.flags = 0; - btrfs_set_key_type(&search_key, BTRFS_INODE_MAP_ITEM_KEY); - search_key.offset = 0; - path = btrfs_alloc_path(); BUG_ON(!path); + search_key.flags = 0; + btrfs_set_key_type(&search_key, BTRFS_INODE_MAP_ITEM_KEY); + + search_start = fs_root->fs_info->last_inode_alloc; + if (search_start == 0) { + struct btrfs_disk_key *last_key; + btrfs_init_path(path); + search_key.objectid = (u64)-1; + search_key.offset = (u64)-1; + ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); + if (ret < 0) + goto error; + BUG_ON(ret == 0); + if (path->slots[0] > 0) + path->slots[0]--; + l = btrfs_buffer_leaf(path->nodes[0]); + last_key = &l->items[path->slots[0]].key; + search_start = btrfs_disk_key_objectid(last_key); + } + search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); + search_key.objectid = search_start; + search_key.offset = 0; + btrfs_init_path(path); start_found = 0; ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); @@ -79,7 +93,6 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, // FIXME -ENOSPC found: root->fs_info->last_inode_alloc = *objectid; - root->fs_info->last_inode_alloc_dirid = dirid; btrfs_release_path(root, path); btrfs_free_path(path); BUG_ON(*objectid < search_start); From bae45de03c4d54a9893dedf8a015beb2608b896a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 4 Apr 2007 21:22:22 -0400 Subject: [PATCH 0097/2985] Btrfs: add dir inode index Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 15 ++++++++------- fs/btrfs/dir-item.c | 19 +++++++++++++++++++ fs/btrfs/super.c | 4 ++-- fs/btrfs/sysfs.c | 4 ++++ 4 files changed, 33 insertions(+), 9 deletions(-) create mode 100644 fs/btrfs/sysfs.c diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 41cc013ef08d..87c56222a620 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -282,41 +282,42 @@ struct btrfs_root { * for every name in a directory. */ #define BTRFS_DIR_ITEM_KEY 2 +#define BTRFS_DIR_INDEX_KEY 3 /* * inline data is file data that fits in the btree. */ -#define BTRFS_INLINE_DATA_KEY 3 +#define BTRFS_INLINE_DATA_KEY 4 /* * extent data is for data that can't fit in the btree. It points to * a (hopefully) huge chunk of disk */ -#define BTRFS_EXTENT_DATA_KEY 4 +#define BTRFS_EXTENT_DATA_KEY 5 /* * csum items have the checksums for data in the extents */ -#define BTRFS_CSUM_ITEM_KEY 5 +#define BTRFS_CSUM_ITEM_KEY 6 /* * root items point to tree roots. There are typically in the root * tree used by the super block to find all the other trees */ -#define BTRFS_ROOT_ITEM_KEY 6 +#define BTRFS_ROOT_ITEM_KEY 7 /* * extent items are in the extent map tree. These record which blocks * are used, and how many references there are to each block */ -#define BTRFS_EXTENT_ITEM_KEY 7 +#define BTRFS_EXTENT_ITEM_KEY 8 /* * the inode map records which inode numbers are in use and where * they actually live on disk */ -#define BTRFS_INODE_MAP_ITEM_KEY 8 +#define BTRFS_INODE_MAP_ITEM_KEY 9 /* * string items are for debugging. They just store a short string of * data in the FS */ -#define BTRFS_STRING_ITEM_KEY 9 +#define BTRFS_STRING_ITEM_KEY 10 static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) { diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index baceb1da609f..2a87e23ac9c9 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -37,6 +37,25 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root name_ptr = (char *)(dir_item + 1); btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(root, path); + + btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + key.offset = objectid; + ret = btrfs_insert_empty_item(trans, root, path, &key, data_size); + // FIXME clear the dirindex bit + if (ret) + goto out; + + dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], + struct btrfs_dir_item); + btrfs_set_dir_objectid(dir_item, objectid); + btrfs_set_dir_type(dir_item, type); + btrfs_set_dir_flags(dir_item, 0); + btrfs_set_dir_name_len(dir_item, name_len); + name_ptr = (char *)(dir_item + 1); + btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); + btrfs_mark_buffer_dirty(path->nodes[0]); out: btrfs_release_path(root, path); btrfs_free_path(path); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 822243650ae9..4b042460e875 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -447,7 +447,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) mutex_lock(&root->fs_info->fs_mutex); key.objectid = inode->i_ino; key.flags = 0; - btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); key.offset = filp->f_pos; path = btrfs_alloc_path(); btrfs_init_path(path); @@ -477,7 +477,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) item = leaf->items + slot; if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; - if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_ITEM_KEY) + if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_INDEX_KEY) continue; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) continue; diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c new file mode 100644 index 000000000000..ed8b7e20cfbc --- /dev/null +++ b/fs/btrfs/sysfs.c @@ -0,0 +1,4 @@ +#include +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" From 5f26f772e5c4e833ffcb0599f54deda466d2a3e5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 5 Apr 2007 10:38:44 -0400 Subject: [PATCH 0098/2985] Btrfs: more inode indexed directory work Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++ fs/btrfs/dir-item.c | 18 +++++++++ fs/btrfs/extent-tree.c | 3 +- fs/btrfs/super.c | 90 ++++++++++++++++++++++-------------------- 4 files changed, 70 insertions(+), 45 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 87c56222a620..61d7b4738af6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -921,6 +921,10 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, const char *name, int name_len, int mod); +int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + u64 objectid, int mod); int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len); /* inode-map.c */ diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 2a87e23ac9c9..62d0c0916a73 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -80,6 +80,24 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } +int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + u64 objectid, int mod) +{ + int ret; + struct btrfs_key key; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + + key.objectid = dir; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + key.offset = objectid; + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + return ret; +} + int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 688aa861a928..7c21f63f1b93 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -224,6 +224,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); + ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); if (ret) { printk("failed to find %Lu\n", key.objectid); @@ -363,8 +364,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (root->fs_info->last_insert.objectid > search_start) search_start = root->fs_info->last_insert.objectid; - path = btrfs_alloc_path(); - check_failed: btrfs_init_path(path); ins->objectid = search_start; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4b042460e875..d776b29a1676 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -130,6 +130,13 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, struct btrfs_dir_item); objectid = btrfs_dir_objectid(di); + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + + btrfs_release_path(root, path); + ret = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, + objectid, -1); + BUG_ON(ret); ret = btrfs_del_item(trans, root, path); BUG_ON(ret); dentry->d_inode->i_ctime = dir->i_ctime; @@ -138,7 +145,7 @@ err: btrfs_free_path(path); if (ret == 0) { inode_dec_link_count(dentry->d_inode); - dir->i_size -= name_len; + dir->i_size -= name_len * 2; mark_inode_dirty(dir); } return ret; @@ -168,8 +175,10 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_path *path; struct btrfs_key key; struct btrfs_trans_handle *trans; - struct btrfs_disk_key *found_key; + struct btrfs_key found_key; + int found_type; struct btrfs_leaf *leaf; + char *goodnames = ".."; path = btrfs_alloc_path(); BUG_ON(!path); @@ -178,46 +187,42 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) trans = btrfs_start_transaction(root, 1); key.objectid = inode->i_ino; key.offset = (u64)-1; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) { - err = ret; - goto out; - } + key.flags = (u32)-1; + while(1) { + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) { + err = ret; + goto out; + } + BUG_ON(ret == 0); + if (path->slots[0] == 0) { + err = -ENOENT; + goto out; + } + path->slots[0]--; + leaf = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&found_key, + &leaf->items[path->slots[0]].key); + found_type = btrfs_key_type(&found_key); + if (found_key.objectid != inode->i_ino) { + err = -ENOENT; + goto out; + } + if ((found_type != BTRFS_DIR_ITEM_KEY && + found_type != BTRFS_DIR_INDEX_KEY) || + (!btrfs_match_dir_item_name(root, path, goodnames, 2) && + !btrfs_match_dir_item_name(root, path, goodnames, 1))) { + err = -ENOTEMPTY; + goto out; + } + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); - BUG_ON(ret == 0); - BUG_ON(path->slots[0] == 0); - path->slots[0]--; - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - if (btrfs_disk_key_objectid(found_key) != inode->i_ino) { - err = -ENOENT; - goto out; - } - if (btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY || - btrfs_disk_key_offset(found_key) != 2) { - err = -ENOTEMPTY; - goto out; - } - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); - btrfs_release_path(root, path); - key.offset = 1; - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) { - err = ret; - goto out; - } - if (ret > 0) { - err = -ENOTEMPTY; - goto out; - } - ret = btrfs_del_item(trans, root, path); - if (ret) { - err = ret; - goto out; + if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1) + break; + btrfs_release_path(root, path); } + ret = 0; btrfs_release_path(root, path); /* now the directory is empty */ @@ -676,7 +681,7 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, dentry->d_parent->d_inode->i_ino, inode->i_ino, 0); if (ret == 0) { - dentry->d_parent->d_inode->i_size += dentry->d_name.len; + dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2; ret = btrfs_update_inode(trans, btrfs_sb(inode->i_sb), dentry->d_parent->d_inode); } @@ -754,7 +759,7 @@ static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, dir->i_ino, 1); if (ret) goto error; - inode->i_size = 3; + inode->i_size = 6; ret = btrfs_update_inode(trans, root, inode); error: return ret; @@ -915,7 +920,6 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, if (create) trans = btrfs_start_transaction(root, 1); - ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, iblock << inode->i_blkbits, 0); From 7fcde0e3298c00ee18e2ae7f01c67f99133ef7be Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 5 Apr 2007 12:13:21 -0400 Subject: [PATCH 0099/2985] Btrfs: finish off inode indexing in dirs, add overflows Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 33 ++++++++++++++++++++ fs/btrfs/dir-item.c | 74 ++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/super.c | 11 +++---- 3 files changed, 107 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 61d7b4738af6..794f7e4f1c07 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -270,6 +270,10 @@ struct btrfs_root { #define BTRFS_KEY_TYPE_MAX 256 #define BTRFS_KEY_TYPE_MASK (BTRFS_KEY_TYPE_MAX - 1) +#define BTRFS_KEY_OVERFLOW_MAX 128 +#define BTRFS_KEY_OVERFLOW_SHIFT 8 +#define BTRFS_KEY_OVERFLOW_MASK (0x7FULL << BTRFS_KEY_OVERFLOW_SHIFT) + /* * inode items have the data typically returned from stat and store other * info about object characteristics. There is one for every file and dir in @@ -588,6 +592,19 @@ static inline void btrfs_set_disk_key_flags(struct btrfs_disk_key *disk, disk->flags = cpu_to_le32(val); } +static inline u32 btrfs_key_overflow(struct btrfs_key *key) +{ + u32 over = key->flags & BTRFS_KEY_OVERFLOW_MASK; + return over >> BTRFS_KEY_OVERFLOW_SHIFT; +} + +static inline void btrfs_set_key_overflow(struct btrfs_key *key, u32 over) +{ + BUG_ON(over > BTRFS_KEY_OVERFLOW_MAX); + over = over << BTRFS_KEY_OVERFLOW_SHIFT; + key->flags = (key->flags & ~((u64)BTRFS_KEY_OVERFLOW_MASK)) | over; +} + static inline u32 btrfs_key_type(struct btrfs_key *key) { return key->flags & BTRFS_KEY_TYPE_MASK; @@ -612,6 +629,22 @@ static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key, u32 type) btrfs_set_disk_key_flags(key, flags); } +static inline u32 btrfs_disk_key_overflow(struct btrfs_disk_key *key) +{ + u32 over = le32_to_cpu(key->flags) & BTRFS_KEY_OVERFLOW_MASK; + return over >> BTRFS_KEY_OVERFLOW_SHIFT; +} + +static inline void btrfs_set_disK_key_overflow(struct btrfs_disk_key *key, + u32 over) +{ + u32 flags = btrfs_disk_key_flags(key); + BUG_ON(over > BTRFS_KEY_OVERFLOW_MAX); + over = over << BTRFS_KEY_OVERFLOW_SHIFT; + flags = (flags & ~((u64)BTRFS_KEY_OVERFLOW_MASK)) | over; + btrfs_set_disk_key_flags(key, flags); +} + static inline u64 btrfs_header_blocknr(struct btrfs_header *h) { return le64_to_cpu(h->blocknr); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 62d0c0916a73..b1629a5d73c8 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -4,6 +4,26 @@ #include "hash.h" #include "transaction.h" +int insert_with_overflow(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, struct btrfs_key + *cpu_key, u32 data_size) +{ + int overflow; + int ret; + + ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); + overflow = btrfs_key_overflow(cpu_key); + + while(ret == -EEXIST && overflow < BTRFS_KEY_OVERFLOW_MAX) { + overflow++; + btrfs_set_key_overflow(cpu_key, overflow); + btrfs_release_path(root, path); + ret = btrfs_insert_empty_item(trans, root, path, cpu_key, + data_size); + } + return ret; +} + int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, u64 objectid, u8 type) @@ -23,7 +43,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); btrfs_init_path(path); data_size = sizeof(*dir_item) + name_len; - ret = btrfs_insert_empty_item(trans, root, path, &key, data_size); + ret = insert_with_overflow(trans, root, path, &key, data_size); if (ret) goto out; @@ -41,7 +61,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); key.offset = objectid; - ret = btrfs_insert_empty_item(trans, root, path, &key, data_size); + ret = insert_with_overflow(trans, root, path, &key, data_size); // FIXME clear the dirindex bit if (ret) goto out; @@ -70,14 +90,40 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; + struct btrfs_disk_key *found_key; + struct btrfs_leaf *leaf; key.objectid = dir; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + btrfs_set_key_overflow(&key, BTRFS_KEY_OVERFLOW_MAX - 1); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); - ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); - return ret; + while(1) { + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + if (ret < 0) + return ret; + if (ret > 0) { + if (path->slots[0] == 0) + return 1; + path->slots[0]--; + } + leaf = btrfs_buffer_leaf(path->nodes[0]); + found_key = &leaf->items[path->slots[0]].key; + + if (btrfs_disk_key_objectid(found_key) != dir || + btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY || + btrfs_disk_key_offset(found_key) != key.offset) + return 1; + + if (btrfs_match_dir_item_name(root, path, name, name_len)) + return 0; + + if (btrfs_disk_key_overflow(found_key) == 0) + return 1; + btrfs_release_path(root, path); + } + return 1; } int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, @@ -89,13 +135,31 @@ int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; + struct btrfs_disk_key *found_key; + struct btrfs_leaf *leaf; key.objectid = dir; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + btrfs_set_key_overflow(&key, BTRFS_KEY_OVERFLOW_MAX - 1); key.offset = objectid; ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); - return ret; + if (ret < 0) + return ret; + if (ret > 0) { + if (path->slots[0] == 0) + return 1; + path->slots[0]--; + } + leaf = btrfs_buffer_leaf(path->nodes[0]); + found_key = &leaf->items[path->slots[0]].key; + + if (btrfs_disk_key_objectid(found_key) != dir || + btrfs_disk_key_type(found_key) != BTRFS_DIR_INDEX_KEY) + return 1; + if (btrfs_disk_key_offset(found_key) == objectid) + return 0; + return 1; } int btrfs_match_dir_item_name(struct btrfs_root *root, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d776b29a1676..4fd2b168b2c4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -486,19 +486,18 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) continue; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) continue; - + filp->f_pos = btrfs_disk_key_offset(&item->key); advance = 1; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); over = filldir(dirent, (const char *)(di + 1), btrfs_dir_name_len(di), btrfs_disk_key_offset(&item->key), btrfs_dir_objectid(di), d_type); - if (over) { - filp->f_pos = btrfs_disk_key_offset(&item->key); - break; - } - filp->f_pos = btrfs_disk_key_offset(&item->key) + 1; + if (over) + goto nopos; } + filp->f_pos++; +nopos: ret = 0; err: btrfs_release_path(root, path); From 5be6f7f174146d91039a27ebb2f1b4ac599172b3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 5 Apr 2007 13:35:25 -0400 Subject: [PATCH 0100/2985] Btrfs: dirindex optimizations Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 +++ fs/btrfs/dir-item.c | 5 ++++- fs/btrfs/disk-io.c | 6 +++++- fs/btrfs/inode-map.c | 48 ++++++++++++++++++++++++++++++-------------- fs/btrfs/super.c | 5 +++++ 5 files changed, 50 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 794f7e4f1c07..6d40758b893f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -239,6 +239,7 @@ struct btrfs_fs_info { struct radix_tree_root pinned_radix; u64 last_inode_alloc; u64 generation; + u64 highest_inode; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; struct buffer_head *sb_buffer; @@ -970,6 +971,8 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, int btrfs_lookup_inode_map(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, int mod); +int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid); + /* inode-item.c */ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, struct btrfs_inode_item diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index b1629a5d73c8..0ee9945fb1b0 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -92,6 +92,7 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root int cow = mod != 0; struct btrfs_disk_key *found_key; struct btrfs_leaf *leaf; + u32 overflow; key.objectid = dir; key.flags = 0; @@ -119,8 +120,10 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root if (btrfs_match_dir_item_name(root, path, name, name_len)) return 0; - if (btrfs_disk_key_overflow(found_key) == 0) + overflow = btrfs_disk_key_overflow(found_key); + if (overflow == 0) return 1; + btrfs_set_key_overflow(&key, overflow - 1); btrfs_release_path(root, path); } return 1; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index de9ee3aa0aad..5230554380d1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -311,6 +311,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->extent_root = extent_root; fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; + fs_info->highest_inode = 0; fs_info->sb = sb; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; @@ -360,12 +361,15 @@ printk("failed2\n"); ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_FS_TREE_OBJECTID, root); - mutex_unlock(&fs_info->fs_mutex); BUG_ON(ret); root->commit_root = root->node; get_bh(root->node); root->ref_cows = 1; root->fs_info->generation = root->root_key.offset + 1; + ret = btrfs_find_highest_inode(root, &root->fs_info->last_inode_alloc); + if (ret == 0) + fs_info->highest_inode = fs_info->last_inode_alloc; + mutex_unlock(&fs_info->fs_mutex); return root; } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 329edb42897e..f665221409ac 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -3,6 +3,37 @@ #include "disk-io.h" #include "transaction.h" +int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid) +{ + struct btrfs_path *path; + int ret; + struct btrfs_leaf *l; + struct btrfs_root *root = fs_root->fs_info->inode_root; + struct btrfs_key search_key; + int slot; + + path = btrfs_alloc_path(); + BUG_ON(!path); + + search_key.objectid = (u64)-1; + search_key.offset = (u64)-1; + ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); + if (ret < 0) + goto error; + BUG_ON(ret == 0); + if (path->slots[0] > 0) { + slot = path->slots[0] - 1; + l = btrfs_buffer_leaf(path->nodes[0]); + *objectid = btrfs_disk_key_objectid(&l->items[slot].key); + } else { + *objectid = BTRFS_FIRST_FREE_OBJECTID; + } + ret = 0; +error: + btrfs_free_path(path); + return ret; +} + /* * walks the btree of allocated inodes and find a hole. */ @@ -28,21 +59,6 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, btrfs_set_key_type(&search_key, BTRFS_INODE_MAP_ITEM_KEY); search_start = fs_root->fs_info->last_inode_alloc; - if (search_start == 0) { - struct btrfs_disk_key *last_key; - btrfs_init_path(path); - search_key.objectid = (u64)-1; - search_key.offset = (u64)-1; - ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); - if (ret < 0) - goto error; - BUG_ON(ret == 0); - if (path->slots[0] > 0) - path->slots[0]--; - l = btrfs_buffer_leaf(path->nodes[0]); - last_key = &l->items[path->slots[0]].key; - search_start = btrfs_disk_key_objectid(last_key); - } search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); search_key.objectid = search_start; search_key.offset = 0; @@ -129,6 +145,8 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, path->slots[0], struct btrfs_inode_map_item); btrfs_cpu_key_to_disk(&inode_item->key, location); btrfs_mark_buffer_dirty(path->nodes[0]); + if (objectid > fs_root->fs_info->highest_inode) + fs_root->fs_info->highest_inode = objectid; out: btrfs_release_path(inode_root, path); btrfs_free_path(path); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4fd2b168b2c4..d4ee78046b86 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -482,6 +482,11 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) item = leaf->items + slot; if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; + if (btrfs_disk_key_offset(&item->key) > + root->fs_info->highest_inode) { +printk("stopping at highest inode %Lu\n", root->fs_info->highest_inode); + break; + } if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_INDEX_KEY) continue; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) From 3eb0314dc1053b1ae617dcc8d6d93f776c5baa31 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 5 Apr 2007 14:28:50 -0400 Subject: [PATCH 0101/2985] Btrfs: uuids Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 - fs/btrfs/ctree.c | 12 ++++++++++++ fs/btrfs/super.c | 4 +--- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index db986c46a554..ea4c38c790b4 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -3,7 +3,6 @@ * Add block mapping tree (simple dm layer) * Make allocator smarter * add a block group to struct inode -* Make directory hashing work on 32 bit * Make sure nobh stuff is working properly for cows * Do actual block accounting * Check compat and incompat flags on the inode diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 864ee423b300..39b551564fcf 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -200,6 +200,10 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, static int check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { + struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); + if (memcmp(node->header.fsid, root->fs_info->disk_super->fsid, + sizeof(node->header.fsid))) + BUG(); if (level == 0) return check_leaf(root, path, level); return check_node(root, path, level); @@ -687,6 +691,8 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_parentid(&c->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); lower = btrfs_buffer_node(path->nodes[level-1]); + memcpy(c->header.fsid, root->fs_info->disk_super->fsid, + sizeof(c->header.fsid)); if (btrfs_is_leaf(lower)) lower_key = &((struct btrfs_leaf *)lower)->items[0].key; else @@ -780,6 +786,8 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_generation(&split->header, trans->transid); btrfs_set_header_parentid(&split->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); + memcpy(split->header.fsid, root->fs_info->disk_super->fsid, + sizeof(split->header.fsid)); mid = (c_nritems + 1) / 2; btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid, (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); @@ -1139,6 +1147,8 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_level(&right->header, 0); btrfs_set_header_parentid(&right->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); + memcpy(right->header.fsid, root->fs_info->disk_super->fsid, + sizeof(right->header.fsid)); if (mid <= slot) { if (nritems == 1 || leaf_space_used(l, mid, nritems - mid) + space_needed > @@ -1229,6 +1239,8 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_level(&right->header, 0); btrfs_set_header_parentid(&right->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); + memcpy(right->header.fsid, root->fs_info->disk_super->fsid, + sizeof(right->header.fsid)); btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(&right->header, 0); wret = insert_ptr(trans, root, path, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d4ee78046b86..32224ffdcecd 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -483,10 +483,8 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; if (btrfs_disk_key_offset(&item->key) > - root->fs_info->highest_inode) { -printk("stopping at highest inode %Lu\n", root->fs_info->highest_inode); + root->fs_info->highest_inode) break; - } if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_INDEX_KEY) continue; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) From d6e4a428eb8f92bbb3537ccabadfb1195efb432b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 6 Apr 2007 15:37:36 -0400 Subject: [PATCH 0102/2985] Btrfs: start of support for many FS volumes Signed-off-by: Chris Mason --- fs/btrfs/TODO | 7 ++ fs/btrfs/btrfs_inode.h | 2 + fs/btrfs/ctree.h | 36 ++++--- fs/btrfs/dir-item.c | 14 ++- fs/btrfs/disk-io.c | 7 +- fs/btrfs/inode-item.c | 26 +++-- fs/btrfs/print-tree.c | 2 +- fs/btrfs/super.c | 218 +++++++++++++++++++++++++++++++---------- fs/btrfs/transaction.c | 9 ++ 9 files changed, 242 insertions(+), 79 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index ea4c38c790b4..73d4b842a316 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -19,3 +19,10 @@ * fsck * Scrub & defrag +--- +metata FS ideas: +mount metadata FS on /sys/fs/btrfs/dev/ +subvolumes in /sys/fs/btrfs/dev/fs/ +snapshots in /sys/fs/btrfs/dev/fs/snapshots +mount -o bind +ioctl to create a snapshot diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index e159841650a4..3439ffb467b5 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -3,6 +3,8 @@ struct btrfs_inode { u32 magic; + struct btrfs_root *root; + struct btrfs_key location; struct inode vfs_inode; u32 magic2; }; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6d40758b893f..1ff5b99af682 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3,6 +3,7 @@ #include #include +#include #include "bit-radix.h" struct btrfs_trans_handle; @@ -183,13 +184,15 @@ struct btrfs_inline_data_item { } __attribute__ ((__packed__)); struct btrfs_dir_item { - __le64 objectid; + struct btrfs_disk_key location; __le16 flags; __le16 name_len; u8 type; } __attribute__ ((__packed__)); struct btrfs_root_item { + struct btrfs_inode_item inode; + __le64 root_dirid; __le64 blocknr; __le32 flags; __le64 block_limit; @@ -249,6 +252,7 @@ struct btrfs_fs_info { struct mutex fs_mutex; struct crypto_hash *hash_tfm; spinlock_t hash_lock; + struct kobject kobj; }; /* @@ -504,16 +508,6 @@ static inline void btrfs_set_item_size(struct btrfs_item *item, u16 val) item->size = cpu_to_le16(val); } -static inline u64 btrfs_dir_objectid(struct btrfs_dir_item *d) -{ - return le64_to_cpu(d->objectid); -} - -static inline void btrfs_set_dir_objectid(struct btrfs_dir_item *d, u64 val) -{ - d->objectid = cpu_to_le64(val); -} - static inline u16 btrfs_dir_flags(struct btrfs_dir_item *d) { return le16_to_cpu(d->flags); @@ -724,6 +718,16 @@ static inline void btrfs_set_root_blocknr(struct btrfs_root_item *item, u64 val) item->blocknr = cpu_to_le64(val); } +static inline u64 btrfs_root_dirid(struct btrfs_root_item *item) +{ + return le64_to_cpu(item->root_dirid); +} + +static inline void btrfs_set_root_dirid(struct btrfs_root_item *item, u64 val) +{ + item->root_dirid = cpu_to_le64(val); +} + static inline u32 btrfs_root_refs(struct btrfs_root_item *item) { return le32_to_cpu(item->refs); @@ -950,8 +954,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); /* dir-item.c */ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, const char *name, int name_len, u64 dir, u64 - objectid, u8 type); + *root, const char *name, int name_len, u64 dir, + struct btrfs_key *location, u8 type); int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, const char *name, int name_len, int mod); @@ -978,7 +982,8 @@ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, struct btrfs_inode_item *inode_item); int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u64 objectid, int mod); + *root, struct btrfs_path *path, + struct btrfs_key *location, int mod); /* file-item.c */ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, @@ -997,4 +1002,7 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, int btrfs_csum_verify_file_block(struct btrfs_root *root, u64 objectid, u64 offset, char *data, size_t len); +/* super.c */ +extern struct subsystem btrfs_subsys; + #endif diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 0ee9945fb1b0..7aed9f015b55 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -25,8 +25,8 @@ int insert_with_overflow(struct btrfs_trans_handle *trans, struct btrfs_root } int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, const char *name, int name_len, u64 dir, u64 - objectid, u8 type) + *root, const char *name, int name_len, u64 dir, + struct btrfs_key *location, u8 type) { int ret = 0; struct btrfs_path *path; @@ -50,17 +50,21 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_dir_item); - btrfs_set_dir_objectid(dir_item, objectid); + btrfs_cpu_key_to_disk(&dir_item->location, location); btrfs_set_dir_type(dir_item, type); btrfs_set_dir_flags(dir_item, 0); btrfs_set_dir_name_len(dir_item, name_len); name_ptr = (char *)(dir_item + 1); + /* FIXME, use some real flag for selecting the extra index */ + if (root == root->fs_info->tree_root) + goto out; + btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root, path); btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); - key.offset = objectid; + key.offset = location->objectid; ret = insert_with_overflow(trans, root, path, &key, data_size); // FIXME clear the dirindex bit if (ret) @@ -69,7 +73,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_dir_item); - btrfs_set_dir_objectid(dir_item, objectid); + btrfs_cpu_key_to_disk(&dir_item->location, location); btrfs_set_dir_type(dir_item, type); btrfs_set_dir_flags(dir_item, 0); btrfs_set_dir_name_len(dir_item, name_len); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5230554380d1..b9301a5e4608 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -338,7 +338,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) sb->s_blocksize); if (!fs_info->sb_buffer) { -printk("failed2\n"); return NULL; } disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; @@ -369,6 +368,10 @@ printk("failed2\n"); ret = btrfs_find_highest_inode(root, &root->fs_info->last_inode_alloc); if (ret == 0) fs_info->highest_inode = fs_info->last_inode_alloc; + memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); + kobj_set_kset_s(fs_info, btrfs_subsys); + kobject_set_name(&fs_info->kobj, "%s", sb->s_id); + kobject_register(&fs_info->kobj); mutex_unlock(&fs_info->fs_mutex); return root; } @@ -430,7 +433,7 @@ int close_ctree(struct btrfs_root *root) kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); kfree(root->fs_info->tree_root); - kfree(root->fs_info); + kobject_unregister(&root->fs_info->kobj); kfree(root); return 0; } diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 6bfa980790c2..b276a3b40a66 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -26,15 +26,27 @@ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root } int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u64 objectid, int mod) + *root, struct btrfs_path *path, + struct btrfs_key *location, int mod) { - struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; + int ret; + int slot; + struct btrfs_leaf *leaf; + struct btrfs_key found_key; - key.objectid = objectid; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); - key.offset = 0; - return btrfs_search_slot(trans, root, &key, path, ins_len, cow); + ret = btrfs_search_slot(trans, root, location, path, ins_len, cow); + if (ret > 0 && btrfs_key_type(location) == BTRFS_ROOT_ITEM_KEY && + location->offset == (u64)-1 && path->slots[0] != 0) { + slot = path->slots[0] - 1; + leaf = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&found_key, &leaf->items[slot].key); + if (found_key.objectid == location->objectid && + btrfs_key_type(&found_key) == btrfs_key_type(location)) { + path->slots[0]--; + return 0; + } + } + return ret; } diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index c8ee938c1251..f0da65c4f96b 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -38,7 +38,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); printk("\t\tdir oid %Lu flags %u type %u\n", - btrfs_dir_objectid(di), + btrfs_disk_key_objectid(&di->location), btrfs_dir_flags(di), btrfs_dir_type(di)); printk("\t\tname %.*s\n", diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 32224ffdcecd..66d9fb2288c3 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -16,9 +16,23 @@ #include "transaction.h" #include "btrfs_inode.h" +void btrfs_fsinfo_release(struct kobject *obj) +{ + struct btrfs_fs_info *fsinfo = container_of(obj, + struct btrfs_fs_info, kobj); + kfree(fsinfo); +} + +struct kobj_type btrfs_fsinfo_ktype = { + .release = btrfs_fsinfo_release, +}; + +decl_subsys(btrfs, &btrfs_fsinfo_ktype, NULL); + #define BTRFS_SUPER_MAGIC 0x9123682E static struct inode_operations btrfs_dir_inode_operations; +static struct inode_operations btrfs_dir_ro_inode_operations; static struct super_operations btrfs_super_ops; static struct file_operations btrfs_dir_file_operations; static struct inode_operations btrfs_file_inode_operations; @@ -37,7 +51,8 @@ static void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; struct btrfs_inode_item *inode_item; - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_key location; int ret; path = btrfs_alloc_path(); @@ -46,13 +61,12 @@ static void btrfs_read_locked_inode(struct inode *inode) mutex_lock(&root->fs_info->fs_mutex); check_inode(inode); - ret = btrfs_lookup_inode(NULL, root, path, inode->i_ino, 0); + + memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); + ret = btrfs_lookup_inode(NULL, root, path, &location, 0); if (ret) { - btrfs_release_path(root, path); btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - make_bad_inode(inode); - return; + goto make_bad; } check_inode(inode); inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), @@ -73,7 +87,6 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); - btrfs_release_path(root, path); btrfs_free_path(path); inode_item = NULL; @@ -92,8 +105,11 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_op = &btrfs_file_inode_operations; break; case S_IFDIR: - inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; + if (root == root->fs_info->tree_root) + inode->i_op = &btrfs_dir_ro_inode_operations; + else + inode->i_op = &btrfs_dir_inode_operations; break; case S_IFLNK: // inode->i_op = &page_symlink_inode_operations; @@ -101,6 +117,12 @@ static void btrfs_read_locked_inode(struct inode *inode) } check_inode(inode); return; + +make_bad: + btrfs_release_path(root, path); + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + make_bad_inode(inode); } static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, @@ -128,7 +150,7 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, } di = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_dir_item); - objectid = btrfs_dir_objectid(di); + objectid = btrfs_disk_key_objectid(&di->location); ret = btrfs_del_item(trans, root, path); BUG_ON(ret); @@ -157,7 +179,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) struct btrfs_trans_handle *trans; int ret; - root = btrfs_sb(dir->i_sb); + root = BTRFS_I(dir)->root; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_unlink_trans(trans, root, dir, dentry); @@ -171,7 +193,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; int err; int ret; - struct btrfs_root *root = btrfs_sb(dir->i_sb); + struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_path *path; struct btrfs_key key; struct btrfs_trans_handle *trans; @@ -268,7 +290,8 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, BUG_ON(ret); btrfs_release_path(root, path); - ret = btrfs_lookup_inode(trans, root, path, objectid, -1); + ret = btrfs_lookup_inode(trans, root, path, + &BTRFS_I(inode)->location, -1); BUG_ON(ret); ret = btrfs_del_item(trans, root, path); BUG_ON(ret); @@ -355,7 +378,7 @@ error: static void btrfs_delete_inode(struct inode *inode) { struct btrfs_trans_handle *trans; - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; int ret; truncate_inode_pages(&inode->i_data, 0); @@ -378,13 +401,13 @@ no_delete: } static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, - ino_t *ino) + struct btrfs_key *location) { const char *name = dentry->d_name.name; int namelen = dentry->d_name.len; struct btrfs_dir_item *di; struct btrfs_path *path; - struct btrfs_root *root = btrfs_sb(dir->i_sb); + struct btrfs_root *root = BTRFS_I(dir)->root; int ret; path = btrfs_alloc_path(); @@ -393,13 +416,13 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, ret = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, namelen, 0); if (ret || !btrfs_match_dir_item_name(root, path, name, namelen)) { - *ino = 0; + location->objectid = 0; ret = 0; goto out; } di = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_dir_item); - *ino = btrfs_dir_objectid(di); + btrfs_disk_key_to_cpu(location, &di->location); out: btrfs_release_path(root, path); btrfs_free_path(path); @@ -407,26 +430,76 @@ out: return ret; } +int fixup_tree_root_location(struct btrfs_root *root, + struct btrfs_key *location, + struct btrfs_root **sub_root) +{ + struct btrfs_path *path; + struct btrfs_root_item *ri; + int ret; + + if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) + return 0; + if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) + return 0; + + path = btrfs_alloc_path(); + BUG_ON(!path); + mutex_lock(&root->fs_info->fs_mutex); + + ret = btrfs_lookup_inode(NULL, root, path, location, 0); + if (ret) + goto out; + ri = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], + struct btrfs_root_item); + location->objectid = btrfs_root_dirid(ri); + location->flags = 0; + btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); + location->offset = 0; + /* FIXME properly select the root */ + *sub_root = root->fs_info->fs_root; +out: + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + + static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct inode * inode; - struct btrfs_root *root = btrfs_sb(dir->i_sb); - ino_t ino; + struct btrfs_inode *bi = BTRFS_I(dir); + struct btrfs_root *root = bi->root; + struct btrfs_root *sub_root = root; + struct btrfs_key location; int ret; if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_inode_by_name(dir, dentry, &ino); + ret = btrfs_inode_by_name(dir, dentry, &location); mutex_unlock(&root->fs_info->fs_mutex); if (ret < 0) return ERR_PTR(ret); inode = NULL; - if (ino) { - inode = iget(dir->i_sb, ino); + if (location.objectid) { + ret = fixup_tree_root_location(root, &location, &sub_root); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) + return ERR_PTR(-ENOENT); + inode = iget_locked(dir->i_sb, location.objectid); if (!inode) return ERR_PTR(-EACCES); + if (inode->i_state & I_NEW) { + BTRFS_I(inode)->root = sub_root; + memcpy(&BTRFS_I(inode)->location, &location, + sizeof(location)); + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + } check_inode(inode); } check_inode(dir); @@ -436,7 +509,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct inode *inode = filp->f_path.dentry->d_inode; - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_item *item; struct btrfs_dir_item *di; struct btrfs_key key; @@ -448,11 +521,16 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) int advance; unsigned char d_type = DT_UNKNOWN; int over = 0; + int key_type = BTRFS_DIR_INDEX_KEY; + + /* FIXME, use a real flag for deciding about the key type */ + if (root->fs_info->tree_root == root) + key_type = BTRFS_DIR_ITEM_KEY; mutex_lock(&root->fs_info->fs_mutex); key.objectid = inode->i_ino; key.flags = 0; - btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; path = btrfs_alloc_path(); btrfs_init_path(path); @@ -482,10 +560,11 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) item = leaf->items + slot; if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; - if (btrfs_disk_key_offset(&item->key) > + if (key_type == BTRFS_DIR_INDEX_KEY && + btrfs_disk_key_offset(&item->key) > root->fs_info->highest_inode) break; - if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_INDEX_KEY) + if (btrfs_disk_key_type(&item->key) != key_type) continue; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) continue; @@ -495,7 +574,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) over = filldir(dirent, (const char *)(di + 1), btrfs_dir_name_len(di), btrfs_disk_key_offset(&item->key), - btrfs_dir_objectid(di), d_type); + btrfs_disk_key_objectid(&di->location), d_type); if (over) goto nopos; } @@ -527,6 +606,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) struct dentry * root_dentry; struct btrfs_super_block *disk_super; struct btrfs_root *root; + struct btrfs_inode *bi; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_magic = BTRFS_SUPER_MAGIC; @@ -546,6 +626,13 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) btrfs_super_root_dir(disk_super)); inode = iget_locked(sb, btrfs_super_root_dir(disk_super)); + bi = BTRFS_I(inode); + bi->location.objectid = inode->i_ino; + bi->location.offset = 0; + bi->location.flags = 0; + bi->root = root->fs_info->tree_root; + btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); + if (!inode) return -ENOMEM; if (inode->i_state & I_NEW) { @@ -594,7 +681,8 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, BUG_ON(!path); btrfs_init_path(path); - ret = btrfs_lookup_inode(trans, root, path, inode->i_ino, 1); + ret = btrfs_lookup_inode(trans, root, path, + &BTRFS_I(inode)->location, 1); if (ret) { if (ret > 0) ret = -ENOENT; @@ -616,7 +704,7 @@ failed: static int btrfs_write_inode(struct inode *inode, int wait) { - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; int ret; @@ -637,8 +725,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, { struct inode *inode; struct btrfs_inode_item inode_item; - struct btrfs_root *root = btrfs_sb(dir->i_sb); - struct btrfs_key key; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_key *key; int ret; u64 objectid; @@ -646,6 +734,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (!inode) return ERR_PTR(-ENOMEM); + BTRFS_I(inode)->root = BTRFS_I(dir)->root; + key = &BTRFS_I(inode)->location; check_inode(inode); ret = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); BUG_ON(ret); @@ -658,11 +748,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; fill_inode_item(&inode_item, inode); - key.objectid = objectid; - key.flags = 0; - key.offset = 0; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); - ret = btrfs_insert_inode_map(trans, root, objectid, &key); + key->objectid = objectid; + key->flags = 0; + key->offset = 0; + btrfs_set_key_type(key, BTRFS_INODE_ITEM_KEY); + ret = btrfs_insert_inode_map(trans, root, objectid, key); BUG_ON(ret); ret = btrfs_insert_inode(trans, root, objectid, &inode_item); @@ -678,13 +768,20 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, struct dentry *dentry, struct inode *inode) { int ret; - ret = btrfs_insert_dir_item(trans, btrfs_sb(inode->i_sb), + struct btrfs_key key; + struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root; + key.objectid = inode->i_ino; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.offset = 0; + + ret = btrfs_insert_dir_item(trans, root, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, - inode->i_ino, 0); + &key, 0); if (ret == 0) { dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2; - ret = btrfs_update_inode(trans, btrfs_sb(inode->i_sb), + ret = btrfs_update_inode(trans, root, dentry->d_parent->d_inode); } check_inode(inode); @@ -710,7 +807,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { struct btrfs_trans_handle *trans; - struct btrfs_root *root = btrfs_sb(dir->i_sb); + struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode; int err; int drop_inode = 0; @@ -747,18 +844,26 @@ out_unlock: static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *dir) { - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(dir)->root; int ret; char buf[2]; + struct btrfs_key key; + buf[0] = '.'; buf[1] = '.'; + key.objectid = inode->i_ino; + key.offset = 0; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + ret = btrfs_insert_dir_item(trans, root, buf, 1, inode->i_ino, - inode->i_ino, 1); + &key, 1); if (ret) goto error; + key.objectid = dir->i_ino; ret = btrfs_insert_dir_item(trans, root, buf, 2, inode->i_ino, - dir->i_ino, 1); + &key, 1); if (ret) goto error; inode->i_size = 6; @@ -771,7 +876,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { struct inode *inode; struct btrfs_trans_handle *trans; - struct btrfs_root *root = btrfs_sb(dir->i_sb); + struct btrfs_root *root = BTRFS_I(dir)->root; int err = 0; int drop_on_err = 0; @@ -910,7 +1015,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, u64 extent_end = 0; u64 objectid = inode->i_ino; struct btrfs_path *path; - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans = NULL; struct btrfs_file_extent_item *item; struct btrfs_leaf *leaf; @@ -999,7 +1104,7 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { int err; - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; mutex_lock(&root->fs_info->fs_mutex); err = btrfs_get_block_lock(inode, iblock, result, create); // err = btrfs_get_block_inline(inode, iblock, result, create); @@ -1041,7 +1146,7 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) static void btrfs_truncate(struct inode *inode) { - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; int ret; struct btrfs_trans_handle *trans; @@ -1208,7 +1313,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, int err = 0; int ret = 0; struct inode *inode = file->f_path.dentry->d_inode; - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; struct page *pages[1]; if (file->f_flags & O_DIRECT) @@ -1365,7 +1470,7 @@ static ssize_t btrfs_file_inline_write(struct file *file, int err = 0; int ret = 0; struct inode *inode = file->f_path.dentry->d_inode; - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; unsigned long page_index; if (file->f_flags & O_DIRECT) @@ -1432,13 +1537,14 @@ static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, { char *kaddr; unsigned long left, count = desc->count; + struct inode *inode = page->mapping->host; if (size > count) size = count; if (!PageChecked(page)) { /* FIXME, do it per block */ - struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; int ret = btrfs_csum_verify_file_block(root, page->mapping->host->i_ino, page->index << PAGE_CACHE_SHIFT, @@ -1662,6 +1768,10 @@ static struct inode_operations btrfs_dir_inode_operations = { .rmdir = btrfs_rmdir, }; +static struct inode_operations btrfs_dir_ro_inode_operations = { + .lookup = btrfs_lookup, +}; + static struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, @@ -1697,13 +1807,21 @@ static int __init init_btrfs_fs(void) err = init_inodecache(); if (err) return err; + kset_set_kset_s(&btrfs_subsys, fs_subsys); + err = subsystem_register(&btrfs_subsys); + if (err) + goto out; return register_filesystem(&btrfs_fs_type); +out: + destroy_inodecache(); + return err; } static void __exit exit_btrfs_fs(void) { destroy_inodecache(); unregister_filesystem(&btrfs_fs_type); + subsystem_unregister(&btrfs_subsys); printk("btrfs unloaded\n"); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2b0f98c92123..20d84bd03223 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -52,6 +52,8 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); int ret; + /* FIXME, use the right root */ + root = root->fs_info->fs_root; mutex_lock(&root->fs_info->trans_mutex); ret = join_transaction(root); BUG_ON(ret); @@ -69,6 +71,10 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_transaction *cur_trans; + + /* FIXME, use the right root */ + root = root->fs_info->fs_root; + WARN_ON(trans->magic != TRANS_MAGIC); WARN_ON(trans->magic2 != TRANS_MAGIC); mutex_lock(&root->fs_info->trans_mutex); @@ -148,6 +154,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; DEFINE_WAIT(wait); + /* FIXME, use the right root */ + root = root->fs_info->fs_root; + mutex_lock(&root->fs_info->trans_mutex); if (trans->transaction->in_commit) { printk("already in commit!, waiting\n"); From 0f7d52f4431c530b4f39c524448c688bb7754de5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 9 Apr 2007 10:42:37 -0400 Subject: [PATCH 0103/2985] Btrfs: groundwork for subvolume and snapshot roots Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 2 +- fs/btrfs/ctree.h | 20 ++++- fs/btrfs/disk-io.c | 176 +++++++++++++++++++++++++++++++---------- fs/btrfs/disk-io.h | 2 + fs/btrfs/super.c | 39 +++++---- fs/btrfs/transaction.c | 140 +++++++++++++++++++++++--------- 6 files changed, 281 insertions(+), 98 deletions(-) diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 783f54c2a16c..9fc42e99c7df 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -86,7 +86,7 @@ int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, int i; int total_found = 0; - ret = radix_tree_gang_lookup(radix, (void *)&gang, 0, ARRAY_SIZE(gang)); + ret = radix_tree_gang_lookup(radix, (void **)gang, 0, ARRAY_SIZE(gang)); for (i = 0; i < ret && nr > 0; i++) { found = 0; bits = gang[i]; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1ff5b99af682..5460030c9e6a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -232,12 +232,12 @@ struct btrfs_inode_map_item { struct crypto_hash; struct btrfs_fs_info { - struct btrfs_root *fs_root; struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct btrfs_root *inode_root; struct btrfs_key current_insert; struct btrfs_key last_insert; + struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; u64 last_inode_alloc; @@ -266,6 +266,9 @@ struct btrfs_root { struct btrfs_root_item root_item; struct btrfs_key root_key; struct btrfs_fs_info *fs_info; + struct inode *inode; + u64 objectid; + u64 last_trans; u32 blocksize; int ref_cows; u32 type; @@ -595,7 +598,7 @@ static inline u32 btrfs_key_overflow(struct btrfs_key *key) static inline void btrfs_set_key_overflow(struct btrfs_key *key, u32 over) { - BUG_ON(over > BTRFS_KEY_OVERFLOW_MAX); + BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX); over = over << BTRFS_KEY_OVERFLOW_SHIFT; key->flags = (key->flags & ~((u64)BTRFS_KEY_OVERFLOW_MASK)) | over; } @@ -634,7 +637,7 @@ static inline void btrfs_set_disK_key_overflow(struct btrfs_disk_key *key, u32 over) { u32 flags = btrfs_disk_key_flags(key); - BUG_ON(over > BTRFS_KEY_OVERFLOW_MAX); + BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX); over = over << BTRFS_KEY_OVERFLOW_SHIFT; flags = (flags & ~((u64)BTRFS_KEY_OVERFLOW_MASK)) | over; btrfs_set_disk_key_flags(key, flags); @@ -748,6 +751,17 @@ static inline void btrfs_set_super_blocknr(struct btrfs_super_block *s, u64 val) s->blocknr = cpu_to_le64(val); } +static inline u64 btrfs_super_generation(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->generation); +} + +static inline void btrfs_set_super_generation(struct btrfs_super_block *s, + u64 val) +{ + s->generation = cpu_to_le64(val); +} + static inline u64 btrfs_super_root(struct btrfs_super_block *s) { return le64_to_cpu(s->root); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b9301a5e4608..b557bdd1e26a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4,9 +4,11 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "btrfs_inode.h" static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { @@ -180,7 +182,7 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct buffer_head *bh; - struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb); + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; struct buffer_head *head; if (!page_has_buffers(page)) { create_empty_buffers(page, root->fs_info->sb->s_blocksize, @@ -259,10 +261,13 @@ static int __setup_root(int blocksize, u64 objectid) { root->node = NULL; + root->inode = NULL; root->commit_root = NULL; root->blocksize = blocksize; root->ref_cows = 0; root->fs_info = fs_info; + root->objectid = objectid; + root->last_trans = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); return 0; @@ -287,10 +292,78 @@ static int find_and_setup_root(int blocksize, return 0; } +struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) +{ + struct btrfs_root *root; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_path *path; + struct btrfs_leaf *l; + int ret = 0; + +printk("read_fs_root looking for %Lu %Lu %u\n", location->objectid, location->offset, location->flags); + root = kmalloc(sizeof(*root), GFP_NOFS); + if (!root) { +printk("failed1\n"); + return ERR_PTR(-ENOMEM); + } + if (location->offset == (u64)-1) { + ret = find_and_setup_root(fs_info->sb->s_blocksize, + fs_info->tree_root, fs_info, + location->objectid, root); + if (ret) { +printk("failed2\n"); + kfree(root); + return ERR_PTR(ret); + } + goto insert; + } + + __setup_root(fs_info->sb->s_blocksize, root, fs_info, + location->objectid); + + path = btrfs_alloc_path(); + BUG_ON(!path); + ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); + if (ret != 0) { +printk("internal search_slot gives us %d\n", ret); + if (ret > 0) + ret = -ENOENT; + goto out; + } + l = btrfs_buffer_leaf(path->nodes[0]); + memcpy(&root->root_item, + btrfs_item_ptr(l, path->slots[0], struct btrfs_root_item), + sizeof(root->root_item)); + memcpy(&root->root_key, location, sizeof(*location)); + ret = 0; +out: + btrfs_release_path(root, path); + btrfs_free_path(path); + if (ret) { + kfree(root); + return ERR_PTR(ret); + } + root->node = read_tree_block(root, + btrfs_root_blocknr(&root->root_item)); + BUG_ON(!root->node); +insert: +printk("inserting %p\n", root); + root->ref_cows = 1; + ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root, + root); + if (ret) { +printk("radix_tree_insert gives us %d\n", ret); + brelse(root->node); + kfree(root); + return ERR_PTR(ret); + } +printk("all worked\n"); + return root; +} + struct btrfs_root *open_ctree(struct super_block *sb) { - struct btrfs_root *root = kmalloc(sizeof(struct btrfs_root), - GFP_NOFS); struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), @@ -304,9 +377,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); + INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; - fs_info->fs_root = root; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->inode_root = inode_root; @@ -318,6 +391,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + BTRFS_I(fs_info->btree_inode)->root = tree_root; + memset(&BTRFS_I(fs_info->btree_inode)->location, 0, + sizeof(struct btrfs_key)); insert_inode_hash(fs_info->btree_inode); mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); @@ -337,13 +413,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); - if (!fs_info->sb_buffer) { + if (!fs_info->sb_buffer) return NULL; - } disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; - if (!btrfs_super_root(disk_super)) { + if (!btrfs_super_root(disk_super)) return NULL; - } + fs_info->disk_super = disk_super; tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); @@ -358,14 +433,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) BTRFS_INODE_MAP_OBJECTID, inode_root); BUG_ON(ret); - ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, - BTRFS_FS_TREE_OBJECTID, root); - BUG_ON(ret); - root->commit_root = root->node; - get_bh(root->node); - root->ref_cows = 1; - root->fs_info->generation = root->root_key.offset + 1; - ret = btrfs_find_highest_inode(root, &root->fs_info->last_inode_alloc); + fs_info->generation = btrfs_super_generation(disk_super) + 1; + ret = btrfs_find_highest_inode(tree_root, &fs_info->last_inode_alloc); if (ret == 0) fs_info->highest_inode = fs_info->last_inode_alloc; memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); @@ -373,7 +442,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) kobject_set_name(&fs_info->kobj, "%s", sb->s_id); kobject_register(&fs_info->kobj); mutex_unlock(&fs_info->fs_mutex); - return root; + return tree_root; } int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root @@ -398,12 +467,42 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } +int del_fs_roots(struct btrfs_fs_info *fs_info) +{ + int ret; + struct btrfs_root *gang[8]; + int i; + + while(1) { + ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, + (void **)gang, 0, + ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + radix_tree_delete(&fs_info->fs_roots_radix, + (unsigned long)gang[i]); + if (gang[i]->inode) + iput(gang[i]->inode); + else + printk("no inode for root %p\n", gang[i]); + if (gang[i]->node) + brelse(gang[i]->node); + if (gang[i]->commit_root) + brelse(gang[i]->commit_root); + kfree(gang[i]); + } + } + return 0; +} + int close_ctree(struct btrfs_root *root) { int ret; struct btrfs_trans_handle *trans; + struct btrfs_fs_info *fs_info = root->fs_info; - mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -412,29 +511,26 @@ int close_ctree(struct btrfs_root *root) ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); write_ctree_super(NULL, root); - mutex_unlock(&root->fs_info->fs_mutex); + mutex_unlock(&fs_info->fs_mutex); - if (root->node) - btrfs_block_release(root, root->node); - if (root->fs_info->extent_root->node) - btrfs_block_release(root->fs_info->extent_root, - root->fs_info->extent_root->node); - if (root->fs_info->inode_root->node) - btrfs_block_release(root->fs_info->inode_root, - root->fs_info->inode_root->node); - if (root->fs_info->tree_root->node) - btrfs_block_release(root->fs_info->tree_root, - root->fs_info->tree_root->node); - btrfs_block_release(root, root->commit_root); - btrfs_block_release(root, root->fs_info->sb_buffer); - crypto_free_hash(root->fs_info->hash_tfm); - truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); - iput(root->fs_info->btree_inode); - kfree(root->fs_info->extent_root); - kfree(root->fs_info->inode_root); - kfree(root->fs_info->tree_root); - kobject_unregister(&root->fs_info->kobj); - kfree(root); + if (fs_info->extent_root->node) + btrfs_block_release(fs_info->extent_root, + fs_info->extent_root->node); + if (fs_info->inode_root->node) + btrfs_block_release(fs_info->inode_root, + fs_info->inode_root->node); + if (fs_info->tree_root->node) + btrfs_block_release(fs_info->tree_root, + fs_info->tree_root->node); + btrfs_block_release(root, fs_info->sb_buffer); + crypto_free_hash(fs_info->hash_tfm); + truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + iput(fs_info->btree_inode); + del_fs_roots(fs_info); + kfree(fs_info->extent_root); + kfree(fs_info->inode_root); + kfree(fs_info->tree_root); + kobject_unregister(&fs_info->kobj); return 0; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index ac6764ba8aa6..0ef6e6f714af 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -39,4 +39,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr); int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result); +struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location); #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 66d9fb2288c3..3c9236ca889c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -436,7 +436,6 @@ int fixup_tree_root_location(struct btrfs_root *root, { struct btrfs_path *path; struct btrfs_root_item *ri; - int ret; if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) return 0; @@ -447,22 +446,19 @@ int fixup_tree_root_location(struct btrfs_root *root, BUG_ON(!path); mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_lookup_inode(NULL, root, path, location, 0); - if (ret) - goto out; - ri = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_root_item); + *sub_root = btrfs_read_fs_root(root->fs_info, location); + if (IS_ERR(*sub_root)) + return PTR_ERR(*sub_root); + + ri = &(*sub_root)->root_item; location->objectid = btrfs_root_dirid(ri); location->flags = 0; btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); location->offset = 0; - /* FIXME properly select the root */ - *sub_root = root->fs_info->fs_root; -out: + btrfs_free_path(path); mutex_unlock(&root->fs_info->fs_mutex); - return ret; + return 0; } @@ -494,6 +490,15 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, if (!inode) return ERR_PTR(-EACCES); if (inode->i_state & I_NEW) { + if (sub_root != root) { + ret = radix_tree_insert( + &root->fs_info->fs_roots_radix, + (unsigned long)sub_root, + sub_root); +printk("adding new root for inode %lu\n", inode->i_ino); + igrab(inode); + sub_root->inode = inode; + } BTRFS_I(inode)->root = sub_root; memcpy(&BTRFS_I(inode)->location, &location, sizeof(location)); @@ -605,7 +610,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) struct inode * inode; struct dentry * root_dentry; struct btrfs_super_block *disk_super; - struct btrfs_root *root; + struct btrfs_root *tree_root; struct btrfs_inode *bi; sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -613,14 +618,14 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) sb->s_op = &btrfs_super_ops; sb->s_time_gran = 1; - root = open_ctree(sb); + tree_root = open_ctree(sb); - if (!root) { + if (!tree_root) { printk("btrfs: open_ctree failed\n"); return -EIO; } - sb->s_fs_info = root; - disk_super = root->fs_info->disk_super; + sb->s_fs_info = tree_root; + disk_super = tree_root->fs_info->disk_super; printk("read in super total blocks %Lu root %Lu\n", btrfs_super_total_blocks(disk_super), btrfs_super_root_dir(disk_super)); @@ -630,7 +635,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) bi->location.objectid = inode->i_ino; bi->location.offset = 0; bi->location.flags = 0; - bi->root = root->fs_info->tree_root; + bi->root = tree_root; btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); if (!inode) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 20d84bd03223..83a0194ab163 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -8,6 +8,8 @@ static int total_trans = 0; extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; +#define BTRFS_ROOT_TRANS_TAG 0 + #define TRANS_MAGIC 0xE1E10E static void put_transaction(struct btrfs_transaction *transaction) { @@ -31,9 +33,10 @@ static int join_transaction(struct btrfs_root *root) GFP_NOFS); total_trans++; BUG_ON(!cur_trans); + root->fs_info->generation++; root->fs_info->running_transaction = cur_trans; cur_trans->num_writers = 0; - cur_trans->transid = root->root_key.offset + 1; + cur_trans->transid = root->fs_info->generation; init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); cur_trans->magic = TRANS_MAGIC; @@ -51,13 +54,22 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, struct btrfs_trans_handle *h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); int ret; + u64 running_trans_id; - /* FIXME, use the right root */ - root = root->fs_info->fs_root; mutex_lock(&root->fs_info->trans_mutex); ret = join_transaction(root); BUG_ON(ret); - h->transid = root->fs_info->running_transaction->transid; + running_trans_id = root->fs_info->running_transaction->transid; + + if (root != root->fs_info->tree_root && root->last_trans < + running_trans_id) { + radix_tree_tag_set(&root->fs_info->fs_roots_radix, + (unsigned long)root, BTRFS_ROOT_TRANS_TAG); + root->commit_root = root->node; + get_bh(root->node); + } + root->last_trans = running_trans_id; + h->transid = running_trans_id; h->transaction = root->fs_info->running_transaction; h->blocks_reserved = num_blocks; h->blocks_used = 0; @@ -72,9 +84,6 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, { struct btrfs_transaction *cur_trans; - /* FIXME, use the right root */ - root = root->fs_info->fs_root; - WARN_ON(trans->magic != TRANS_MAGIC); WARN_ON(trans->magic2 != TRANS_MAGIC); mutex_lock(&root->fs_info->trans_mutex); @@ -145,17 +154,96 @@ static int wait_for_commit(struct btrfs_root *root, return 0; } +struct dirty_root { + struct list_head list; + struct btrfs_key snap_key; + struct buffer_head *commit_root; + struct btrfs_root *root; +}; + +int add_dirty_roots(struct btrfs_trans_handle *trans, + struct radix_tree_root *radix, struct list_head *list) +{ + struct dirty_root *dirty; + struct btrfs_root *gang[8]; + struct btrfs_root *root; + int i; + int ret; + int err; +printk("add dirty\n"); + while(1) { + ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, + ARRAY_SIZE(gang), + BTRFS_ROOT_TRANS_TAG); + if (ret == 0) + break; + for (i = 0; i < ret; i++) { + root = gang[i]; + radix_tree_tag_clear(radix, (unsigned long)root, + BTRFS_ROOT_TRANS_TAG); + if (root->commit_root == root->node) { + WARN_ON(root->node->b_blocknr != + btrfs_root_blocknr(&root->root_item)); + brelse(root->commit_root); + root->commit_root = NULL; + continue; + } + dirty = kmalloc(sizeof(*dirty), GFP_NOFS); + BUG_ON(!dirty); + memcpy(&dirty->snap_key, &root->root_key, + sizeof(root->root_key)); + dirty->commit_root = root->commit_root; + root->commit_root = NULL; + dirty->root = root; +printk("adding dirty root %Lu gen %Lu blocknr %Lu\n", root->root_key.objectid, root->root_key.offset, dirty->commit_root->b_blocknr); + root->root_key.offset = root->fs_info->generation; + btrfs_set_root_blocknr(&root->root_item, + root->node->b_blocknr); + err = btrfs_insert_root(trans, root->fs_info->tree_root, + &root->root_key, + &root->root_item); + BUG_ON(err); + list_add(&dirty->list, list); + } + } +printk("add dirty done\n"); + return 0; +} + +int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) +{ + struct dirty_root *dirty; + struct btrfs_trans_handle *trans; + int ret; + + while(!list_empty(list)) { + dirty = list_entry(list->next, struct dirty_root, list); + list_del_init(&dirty->list); + trans = btrfs_start_transaction(tree_root, 1); +printk("drop snapshot root %p, commit_root blocknr %Lu generation %Lu\n", dirty->root, dirty->commit_root->b_blocknr, dirty->snap_key.offset); + ret = btrfs_drop_snapshot(trans, dirty->root, + dirty->commit_root); + BUG_ON(ret); + +printk("del root objectid %Lu, offset %Lu\n", dirty->snap_key.objectid, dirty->snap_key.offset); + ret = btrfs_del_root(trans, tree_root, &dirty->snap_key); + BUG_ON(ret); + ret = btrfs_end_transaction(trans, tree_root); + BUG_ON(ret); + kfree(dirty); + } + return 0; +} + int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret = 0; - struct buffer_head *snap; - struct btrfs_key snap_key; struct btrfs_transaction *cur_trans; + struct list_head dirty_fs_roots; DEFINE_WAIT(wait); - /* FIXME, use the right root */ - root = root->fs_info->fs_root; + INIT_LIST_HEAD(&dirty_fs_roots); mutex_lock(&root->fs_info->trans_mutex); if (trans->transaction->in_commit) { @@ -184,22 +272,13 @@ printk("already in commit!, waiting\n"); } finish_wait(&trans->transaction->writer_wait, &wait); WARN_ON(cur_trans != trans->transaction); - if (root->node != root->commit_root) { - memcpy(&snap_key, &root->root_key, sizeof(snap_key)); - root->root_key.offset++; - } - - if (btrfs_root_blocknr(&root->root_item) != root->node->b_blocknr) { - btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); - ret = btrfs_insert_root(trans, root->fs_info->tree_root, - &root->root_key, &root->root_item); - BUG_ON(ret); - } - + add_dirty_roots(trans, &root->fs_info->fs_roots_radix, &dirty_fs_roots); ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; + btrfs_set_super_generation(root->fs_info->disk_super, + root->fs_info->generation + 1); mutex_unlock(&root->fs_info->trans_mutex); ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); @@ -213,21 +292,8 @@ printk("already in commit!, waiting\n"); put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); kmem_cache_free(btrfs_trans_handle_cachep, trans); - if (root->node != root->commit_root) { - trans = btrfs_start_transaction(root, 1); - snap = root->commit_root; - root->commit_root = root->node; - get_bh(root->node); - ret = btrfs_drop_snapshot(trans, root, snap); - BUG_ON(ret); - ret = btrfs_del_root(trans, root->fs_info->tree_root, - &snap_key); - BUG_ON(ret); - root->fs_info->generation = root->root_key.offset + 1; - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - } + drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots); return ret; } From c5739bba5260a59cebd20a51a55080592c8d3b07 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 09:27:04 -0400 Subject: [PATCH 0104/2985] Btrfs: snapshot progress Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 15 ++++- fs/btrfs/dir-item.c | 6 +- fs/btrfs/disk-io.c | 4 -- fs/btrfs/extent-tree.c | 6 ++ fs/btrfs/ioctl.h | 13 ++++ fs/btrfs/root-tree.c | 16 ++++- fs/btrfs/super.c | 135 +++++++++++++++++++++++++++++++++++++++-- 7 files changed, 183 insertions(+), 12 deletions(-) create mode 100644 fs/btrfs/ioctl.h diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5460030c9e6a..2cbcaaeac9e5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -227,7 +227,7 @@ struct btrfs_csum_item { } __attribute__ ((__packed__)); struct btrfs_inode_map_item { - struct btrfs_disk_key key; + u32 refs; } __attribute__ ((__packed__)); struct crypto_hash; @@ -883,6 +883,17 @@ static inline void btrfs_set_file_extent_num_blocks(struct e->num_blocks = cpu_to_le64(val); } +static inline u32 btrfs_inode_map_refs(struct btrfs_inode_map_item *m) +{ + return le32_to_cpu(m->refs); +} + +static inline void btrfs_set_inode_map_refs(struct btrfs_inode_map_item *m, + u32 val) +{ + m->refs = cpu_to_le32(val); +} + static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; @@ -925,6 +936,8 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) btrfs_item_offset((leaf)->items + (slot)))) /* extent-item.c */ +int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 7aed9f015b55..0ba46bc0da9a 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -55,12 +55,14 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_dir_flags(dir_item, 0); btrfs_set_dir_name_len(dir_item, name_len); name_ptr = (char *)(dir_item + 1); + + btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); + btrfs_mark_buffer_dirty(path->nodes[0]); + /* FIXME, use some real flag for selecting the extra index */ if (root == root->fs_info->tree_root) goto out; - btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root, path); btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b557bdd1e26a..6b097ede80b1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,10 +16,6 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) { BUG(); } - if (root->node && btrfs_header_parentid(&node->header) != - btrfs_header_parentid(btrfs_buffer_header(root->node))) { - BUG(); - } return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7c21f63f1b93..efc604eea0b0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -77,6 +77,12 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } +int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + return inc_block_ref(trans, root, root->node->b_blocknr, 1); +} + int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h new file mode 100644 index 000000000000..201fb3270723 --- /dev/null +++ b/fs/btrfs/ioctl.h @@ -0,0 +1,13 @@ +#ifndef __IOCTL_ +#define __IOCTL_ +#include + +#define BTRFS_IOCTL_MAGIC 0x94 +#define BTRFS_VOL_NAME_MAX 255 +struct btrfs_ioctl_vol_args { + char name[BTRFS_VOL_NAME_MAX + 1]; +}; + +#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ + struct btrfs_ioctl_vol_args) +#endif diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index ddc1c13a5352..72be9836932f 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -83,6 +83,8 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, { struct btrfs_path *path; int ret; + u32 refs; + struct btrfs_root_item *ri; path = btrfs_alloc_path(); BUG_ON(!path); @@ -91,7 +93,19 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (ret < 0) goto out; BUG_ON(ret != 0); - ret = btrfs_del_item(trans, root, path); + ri = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_root_item); + + refs = btrfs_root_refs(ri); + BUG_ON(refs == 0); + if (refs == 1) { + ret = btrfs_del_item(trans, root, path); +printk("deleting root %Lu %Lu %u\n", key->objectid, key->offset, key->flags); + } else { + btrfs_set_root_refs(ri, refs - 1); +printk("ref now %u root %Lu %Lu %u\n", refs -1, key->objectid, key->offset, key->flags); + mark_buffer_dirty(path->nodes[0]); + } out: btrfs_release_path(root, path); btrfs_free_path(path); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3c9236ca889c..bbe5cabcb42e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -15,6 +15,7 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" +#include "ioctl.h" void btrfs_fsinfo_release(struct kobject *obj) { @@ -27,6 +28,11 @@ struct kobj_type btrfs_fsinfo_ktype = { .release = btrfs_fsinfo_release, }; +struct btrfs_iget_args { + u64 ino; + struct btrfs_root *root; +}; + decl_subsys(btrfs, &btrfs_fsinfo_ktype, NULL); #define BTRFS_SUPER_MAGIC 0x9123682E @@ -461,6 +467,34 @@ int fixup_tree_root_location(struct btrfs_root *root, return 0; } +int btrfs_init_locked_inode(struct inode *inode, void *p) +{ + struct btrfs_iget_args *args = p; + inode->i_ino = args->ino; + BTRFS_I(inode)->root = args->root; + return 0; +} + +int btrfs_find_actor(struct inode *inode, void *opaque) +{ + struct btrfs_iget_args *args = opaque; + return (args->ino == inode->i_ino && + args->root == BTRFS_I(inode)->root); +} + +struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, + struct btrfs_root *root) +{ + struct inode *inode; + struct btrfs_iget_args args; + args.ino = objectid; + args.root = root; + + inode = iget5_locked(s, objectid, btrfs_find_actor, + btrfs_init_locked_inode, + (void *)&args); + return inode; +} static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) @@ -486,7 +520,8 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(ret); if (ret > 0) return ERR_PTR(-ENOENT); - inode = iget_locked(dir->i_sb, location.objectid); + inode = btrfs_iget_locked(dir->i_sb, location.objectid, + sub_root); if (!inode) return ERR_PTR(-EACCES); if (inode->i_state & I_NEW) { @@ -495,7 +530,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, &root->fs_info->fs_roots_radix, (unsigned long)sub_root, sub_root); -printk("adding new root for inode %lu\n", inode->i_ino); +printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root); igrab(inode); sub_root->inode = inode; } @@ -630,7 +665,8 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) btrfs_super_total_blocks(disk_super), btrfs_super_root_dir(disk_super)); - inode = iget_locked(sb, btrfs_super_root_dir(disk_super)); + inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super), + tree_root); bi = BTRFS_I(inode); bi->location.objectid = inode->i_ino; bi->location.offset = 0; @@ -750,7 +786,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mode = mode; inode->i_ino = objectid; inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; fill_inode_item(&inode_item, inode); key->objectid = objectid; @@ -1650,6 +1686,95 @@ static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, return retval; } +static int create_snapshot(struct btrfs_root *root, char *name, int namelen) +{ + struct btrfs_trans_handle *trans; + struct btrfs_key key; + struct btrfs_root_item new_root_item; + int ret; + u64 objectid; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + ret = btrfs_update_inode(trans, root, root->inode); + BUG_ON(ret); + + ret = btrfs_find_free_objectid(trans, root, 0, &objectid); + BUG_ON(ret); + + memset(&new_root_item, 0, sizeof(new_root_item)); + memcpy(&new_root_item, &root->root_item, + sizeof(new_root_item)); + + key.objectid = objectid; + key.flags = 0; + key.offset = 0; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + ret = btrfs_insert_inode_map(trans, root, objectid, &key); + BUG_ON(ret); + + key.objectid = objectid; + key.offset = 1; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + btrfs_set_root_blocknr(&new_root_item, root->node->b_blocknr); + + ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, + &new_root_item); + BUG_ON(ret); + +printk("adding snapshot name %.*s root %Lu %Lu %u\n", namelen, name, key.objectid, key.offset, key.flags); + + /* + * insert the directory item + */ + key.offset = (u64)-1; + ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, + name, namelen, + root->fs_info->sb->s_root->d_inode->i_ino, + &key, 0); + + BUG_ON(ret); + + ret = btrfs_inc_root_ref(trans, root); + BUG_ON(ret); + + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + return 0; +} + +static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int + cmd, unsigned long arg) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_ioctl_vol_args vol_args; + int ret; + int namelen; + + if (!root->ref_cows) + return -EINVAL; + switch (cmd) { + case BTRFS_IOC_SNAP_CREATE: + if (copy_from_user(&vol_args, + (struct btrfs_ioctl_vol_args __user *)arg, + sizeof(vol_args))) + return -EFAULT; + namelen = strlen(vol_args.name); + if (namelen > BTRFS_VOL_NAME_MAX) + return -EINVAL; + ret = create_snapshot(root, vol_args.name, namelen); + WARN_ON(ret); + break; + default: + return -ENOTTY; + } + return 0; +} + static struct kmem_cache *btrfs_inode_cachep; struct kmem_cache *btrfs_trans_handle_cachep; struct kmem_cache *btrfs_transaction_cachep; @@ -1781,6 +1906,7 @@ static struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = btrfs_readdir, + .ioctl = btrfs_ioctl, }; static struct address_space_operations btrfs_aops = { @@ -1803,6 +1929,7 @@ static struct file_operations btrfs_file_operations = { .write = btrfs_file_write, .mmap = generic_file_mmap, .open = generic_file_open, + .ioctl = btrfs_ioctl, }; static int __init init_btrfs_fs(void) From 1b05da2ee6217e7d55460d04335813fec25be4ca Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 12:13:09 -0400 Subject: [PATCH 0105/2985] Btrfs: drop the inode map tree Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 2 - fs/btrfs/ctree.h | 39 +++--------------- fs/btrfs/disk-io.c | 25 +++++------- fs/btrfs/inode-item.c | 2 + fs/btrfs/inode-map.c | 65 ++---------------------------- fs/btrfs/print-tree.c | 8 ---- fs/btrfs/super.c | 89 ++++++------------------------------------ fs/btrfs/transaction.c | 7 ---- 8 files changed, 33 insertions(+), 204 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 3439ffb467b5..43a4f1ddb375 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -2,11 +2,9 @@ #define __BTRFS_I__ struct btrfs_inode { - u32 magic; struct btrfs_root *root; struct btrfs_key location; struct inode vfs_inode; - u32 magic2; }; static inline struct btrfs_inode *BTRFS_I(struct inode *inode) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2cbcaaeac9e5..5103709bb2b9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -14,9 +14,8 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_ROOT_TREE_OBJECTID 1ULL #define BTRFS_EXTENT_TREE_OBJECTID 2ULL -#define BTRFS_INODE_MAP_OBJECTID 3ULL -#define BTRFS_FS_TREE_OBJECTID 4ULL -#define BTRFS_FIRST_FREE_OBJECTID 5ULL +#define BTRFS_FS_TREE_OBJECTID 3ULL +#define BTRFS_FIRST_FREE_OBJECTID 4ULL /* * we can actually store much bigger names, but lets not confuse the rest @@ -62,7 +61,6 @@ struct btrfs_header { __le64 blocknr; /* which block this node is supposed to live in */ __le64 generation; __le64 parentid; /* objectid of the tree root */ - __le32 ham; __le16 nritems; __le16 flags; u8 level; @@ -226,23 +224,16 @@ struct btrfs_csum_item { u8 csum[BTRFS_CSUM_SIZE]; } __attribute__ ((__packed__)); -struct btrfs_inode_map_item { - u32 refs; -} __attribute__ ((__packed__)); - struct crypto_hash; struct btrfs_fs_info { struct btrfs_root *extent_root; struct btrfs_root *tree_root; - struct btrfs_root *inode_root; struct btrfs_key current_insert; struct btrfs_key last_insert; struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; - u64 last_inode_alloc; u64 generation; - u64 highest_inode; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; struct buffer_head *sb_buffer; @@ -272,6 +263,8 @@ struct btrfs_root { u32 blocksize; int ref_cows; u32 type; + u64 highest_inode; + u64 last_inode_alloc; }; /* the lower bits in the key flags defines the item type */ @@ -320,16 +313,11 @@ struct btrfs_root { */ #define BTRFS_EXTENT_ITEM_KEY 8 -/* - * the inode map records which inode numbers are in use and where - * they actually live on disk - */ -#define BTRFS_INODE_MAP_ITEM_KEY 9 /* * string items are for debugging. They just store a short string of * data in the FS */ -#define BTRFS_STRING_ITEM_KEY 10 +#define BTRFS_STRING_ITEM_KEY 9 static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) { @@ -883,17 +871,6 @@ static inline void btrfs_set_file_extent_num_blocks(struct e->num_blocks = cpu_to_le64(val); } -static inline u32 btrfs_inode_map_refs(struct btrfs_inode_map_item *m) -{ - return le32_to_cpu(m->refs); -} - -static inline void btrfs_set_inode_map_refs(struct btrfs_inode_map_item *m, - u32 val) -{ - m->refs = cpu_to_le32(val); -} - static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; @@ -996,12 +973,6 @@ int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_root *fs_root, u64 dirid, u64 *objectid); -int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 objectid, struct btrfs_key *location); -int btrfs_lookup_inode_map(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - u64 objectid, int mod); int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid); /* inode-item.c */ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6b097ede80b1..760fdc9a7664 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -264,6 +264,8 @@ static int __setup_root(int blocksize, root->fs_info = fs_info; root->objectid = objectid; root->last_trans = 0; + root->highest_inode = 0; + root->last_inode_alloc = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); return 0; @@ -295,6 +297,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_path *path; struct btrfs_leaf *l; + u64 highest_inode; int ret = 0; printk("read_fs_root looking for %Lu %Lu %u\n", location->objectid, location->offset, location->flags); @@ -354,6 +357,12 @@ printk("radix_tree_insert gives us %d\n", ret); kfree(root); return ERR_PTR(ret); } + ret = btrfs_find_highest_inode(root, &highest_inode); + if (ret == 0) { + root->highest_inode = highest_inode; + root->last_inode_alloc = highest_inode; +printk("highest inode is %Lu\n", highest_inode); + } printk("all worked\n"); return root; } @@ -364,8 +373,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_root *inode_root = kmalloc(sizeof(struct btrfs_root), - GFP_NOFS); struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), GFP_NOFS); int ret; @@ -378,9 +385,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; - fs_info->inode_root = inode_root; - fs_info->last_inode_alloc = 0; - fs_info->highest_inode = 0; fs_info->sb = sb; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; @@ -425,14 +429,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); - ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, - BTRFS_INODE_MAP_OBJECTID, inode_root); - BUG_ON(ret); - fs_info->generation = btrfs_super_generation(disk_super) + 1; - ret = btrfs_find_highest_inode(tree_root, &fs_info->last_inode_alloc); - if (ret == 0) - fs_info->highest_inode = fs_info->last_inode_alloc; memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); kobj_set_kset_s(fs_info, btrfs_subsys); kobject_set_name(&fs_info->kobj, "%s", sb->s_id); @@ -512,9 +509,6 @@ int close_ctree(struct btrfs_root *root) if (fs_info->extent_root->node) btrfs_block_release(fs_info->extent_root, fs_info->extent_root->node); - if (fs_info->inode_root->node) - btrfs_block_release(fs_info->inode_root, - fs_info->inode_root->node); if (fs_info->tree_root->node) btrfs_block_release(fs_info->tree_root, fs_info->tree_root->node); @@ -524,7 +518,6 @@ int close_ctree(struct btrfs_root *root) iput(fs_info->btree_inode); del_fs_roots(fs_info); kfree(fs_info->extent_root); - kfree(fs_info->inode_root); kfree(fs_info->tree_root); kobject_unregister(&fs_info->kobj); return 0; diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index b276a3b40a66..c3b990b661c2 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -22,6 +22,8 @@ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root sizeof(*inode_item)); btrfs_release_path(root, path); btrfs_free_path(path); + if (ret == 0 && objectid > root->highest_inode) + root->highest_inode = objectid; return ret; } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index f665221409ac..318e27a6378b 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -3,12 +3,11 @@ #include "disk-io.h" #include "transaction.h" -int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid) +int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) { struct btrfs_path *path; int ret; struct btrfs_leaf *l; - struct btrfs_root *root = fs_root->fs_info->inode_root; struct btrfs_key search_key; int slot; @@ -38,7 +37,7 @@ error: * walks the btree of allocated inodes and find a hole. */ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, - struct btrfs_root *fs_root, + struct btrfs_root *root, u64 dirid, u64 *objectid) { struct btrfs_path *path; @@ -49,16 +48,13 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, u64 last_ino = 0; int start_found; struct btrfs_leaf *l; - struct btrfs_root *root = fs_root->fs_info->inode_root; struct btrfs_key search_key; u64 search_start = dirid; path = btrfs_alloc_path(); BUG_ON(!path); search_key.flags = 0; - btrfs_set_key_type(&search_key, BTRFS_INODE_MAP_ITEM_KEY); - - search_start = fs_root->fs_info->last_inode_alloc; + search_start = root->last_inode_alloc; search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); search_key.objectid = search_start; search_key.offset = 0; @@ -108,7 +104,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, } // FIXME -ENOSPC found: - root->fs_info->last_inode_alloc = *objectid; + root->last_inode_alloc = *objectid; btrfs_release_path(root, path); btrfs_free_path(path); BUG_ON(*objectid < search_start); @@ -118,56 +114,3 @@ error: btrfs_free_path(path); return ret; } - -int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, - struct btrfs_root *fs_root, - u64 objectid, struct btrfs_key *location) -{ - int ret = 0; - struct btrfs_path *path; - struct btrfs_inode_map_item *inode_item; - struct btrfs_key key; - struct btrfs_root *inode_root = fs_root->fs_info->inode_root; - - key.objectid = objectid; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_INODE_MAP_ITEM_KEY); - key.offset = 0; - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - ret = btrfs_insert_empty_item(trans, inode_root, path, &key, - sizeof(struct btrfs_inode_map_item)); - if (ret) - goto out; - - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_inode_map_item); - btrfs_cpu_key_to_disk(&inode_item->key, location); - btrfs_mark_buffer_dirty(path->nodes[0]); - if (objectid > fs_root->fs_info->highest_inode) - fs_root->fs_info->highest_inode = objectid; -out: - btrfs_release_path(inode_root, path); - btrfs_free_path(path); - return ret; -} - -int btrfs_lookup_inode_map(struct btrfs_trans_handle *trans, - struct btrfs_root *fs_root, struct btrfs_path *path, - u64 objectid, int mod) -{ - int ret; - struct btrfs_key key; - int ins_len = mod < 0 ? -1 : 0; - int cow = mod != 0; - struct btrfs_root *inode_root = fs_root->fs_info->inode_root; - - key.objectid = objectid; - key.flags = 0; - key.offset = 0; - btrfs_set_key_type(&key, BTRFS_INODE_MAP_ITEM_KEY); - ret = btrfs_search_slot(trans, inode_root, &key, path, ins_len, cow); - return ret; -} - diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index f0da65c4f96b..854d47d9bdc9 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -10,7 +10,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_extent_item *ei; struct btrfs_root_item *ri; struct btrfs_dir_item *di; - struct btrfs_inode_map_item *mi; struct btrfs_inode_item *ii; u32 type; @@ -54,13 +53,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) printk("\t\textent data refs %u owner %Lu\n", btrfs_extent_refs(ei), btrfs_extent_owner(ei)); break; - case BTRFS_INODE_MAP_ITEM_KEY: - mi = btrfs_item_ptr(l, i, struct btrfs_inode_map_item); - printk("\t\tinode map key %Lu %u %Lu\n", - btrfs_disk_key_objectid(&mi->key), - btrfs_disk_key_flags(&mi->key), - btrfs_disk_key_offset(&mi->key)); - break; case BTRFS_STRING_ITEM_KEY: printk("\t\titem data %.*s\n", btrfs_item_size(item), btrfs_leaf_data(l) + btrfs_item_offset(item)); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index bbe5cabcb42e..3e8bfb0e5d7e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -45,14 +45,6 @@ static struct inode_operations btrfs_file_inode_operations; static struct address_space_operations btrfs_aops; static struct file_operations btrfs_file_operations; -static int check_inode(struct inode *inode) -{ - struct btrfs_inode *ei = BTRFS_I(inode); - WARN_ON(ei->magic != 0xDEADBEEF); - WARN_ON(ei->magic2 != 0xDEADBEAF); - return 0; -} - static void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; @@ -66,15 +58,12 @@ static void btrfs_read_locked_inode(struct inode *inode) btrfs_init_path(path); mutex_lock(&root->fs_info->fs_mutex); - check_inode(inode); - memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); if (ret) { btrfs_free_path(path); goto make_bad; } - check_inode(inode); inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_inode_item); @@ -97,7 +86,7 @@ static void btrfs_read_locked_inode(struct inode *inode) inode_item = NULL; mutex_unlock(&root->fs_info->fs_mutex); - check_inode(inode); + switch (inode->i_mode & S_IFMT) { #if 0 default: @@ -121,7 +110,6 @@ static void btrfs_read_locked_inode(struct inode *inode) // inode->i_op = &page_symlink_inode_operations; break; } - check_inode(inode); return; make_bad: @@ -272,10 +260,7 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) { - u64 objectid = inode->i_ino; struct btrfs_path *path; - struct btrfs_inode_map_item *map; - struct btrfs_key stat_data_key; int ret; clear_inode(inode); @@ -283,26 +268,11 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); - ret = btrfs_lookup_inode_map(trans, root, path, objectid, -1); - if (ret) { - if (ret > 0) - ret = -ENOENT; - goto error; - } - map = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_inode_map_item); - btrfs_disk_key_to_cpu(&stat_data_key, &map->key); - ret = btrfs_del_item(trans, root->fs_info->inode_root, path); - BUG_ON(ret); - btrfs_release_path(root, path); - ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location, -1); BUG_ON(ret); ret = btrfs_del_item(trans, root, path); BUG_ON(ret); -error: - btrfs_release_path(root, path); btrfs_free_path(path); return ret; } @@ -432,7 +402,6 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, out: btrfs_release_path(root, path); btrfs_free_path(path); - check_inode(dir); return ret; } @@ -540,9 +509,7 @@ printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_r btrfs_read_locked_inode(inode); unlock_new_inode(inode); } - check_inode(inode); } - check_inode(dir); return d_splice_alias(inode, dentry); } @@ -566,7 +533,6 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) key_type = BTRFS_DIR_ITEM_KEY; - mutex_lock(&root->fs_info->fs_mutex); key.objectid = inode->i_ino; key.flags = 0; @@ -575,9 +541,8 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) path = btrfs_alloc_path(); btrfs_init_path(path); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { + if (ret < 0) goto err; - } advance = 0; while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); @@ -601,8 +566,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; if (key_type == BTRFS_DIR_INDEX_KEY && - btrfs_disk_key_offset(&item->key) > - root->fs_info->highest_inode) + btrfs_disk_key_offset(&item->key) > root->highest_inode) break; if (btrfs_disk_key_type(&item->key) != key_type) continue; @@ -707,7 +671,6 @@ static void fill_inode_item(struct btrfs_inode_item *item, btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); btrfs_set_inode_nblocks(item, inode->i_blocks); btrfs_set_inode_generation(item, inode->i_generation); - check_inode(inode); } static int btrfs_update_inode(struct btrfs_trans_handle *trans, @@ -721,7 +684,6 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); - ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location, 1); if (ret) { @@ -736,11 +698,11 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, fill_inode_item(inode_item, inode); btrfs_mark_buffer_dirty(path->nodes[0]); + ret = 0; failed: btrfs_release_path(root, path); btrfs_free_path(path); - check_inode(inode); - return 0; + return ret; } static int btrfs_write_inode(struct inode *inode, int wait) @@ -757,7 +719,6 @@ static int btrfs_write_inode(struct inode *inode, int wait) else btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - check_inode(inode); return ret; } @@ -767,7 +728,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct inode *inode; struct btrfs_inode_item inode_item; struct btrfs_root *root = BTRFS_I(dir)->root; - struct btrfs_key *key; + struct btrfs_key *location; int ret; u64 objectid; @@ -776,8 +737,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); BTRFS_I(inode)->root = BTRFS_I(dir)->root; - key = &BTRFS_I(inode)->location; - check_inode(inode); ret = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); BUG_ON(ret); @@ -788,20 +747,16 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; fill_inode_item(&inode_item, inode); - - key->objectid = objectid; - key->flags = 0; - key->offset = 0; - btrfs_set_key_type(key, BTRFS_INODE_ITEM_KEY); - ret = btrfs_insert_inode_map(trans, root, objectid, key); - BUG_ON(ret); + location = &BTRFS_I(inode)->location; + location->objectid = objectid; + location->flags = 0; + location->offset = 0; + btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); ret = btrfs_insert_inode(trans, root, objectid, &inode_item); BUG_ON(ret); insert_inode_hash(inode); - check_inode(inode); - check_inode(dir); return inode; } @@ -825,8 +780,6 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, root, dentry->d_parent->d_inode); } - check_inode(inode); - check_inode(dentry->d_parent->d_inode); return ret; } @@ -840,7 +793,6 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans, } if (err > 0) err = -EEXIST; - check_inode(inode); return err; } @@ -872,8 +824,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, out_unlock: btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - check_inode(inode); - check_inode(dir); if (drop_inode) { inode_dec_link_count(inode); @@ -1701,20 +1651,14 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) ret = btrfs_update_inode(trans, root, root->inode); BUG_ON(ret); - ret = btrfs_find_free_objectid(trans, root, 0, &objectid); + ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, + 0, &objectid); BUG_ON(ret); memset(&new_root_item, 0, sizeof(new_root_item)); memcpy(&new_root_item, &root->root_item, sizeof(new_root_item)); - key.objectid = objectid; - key.flags = 0; - key.offset = 0; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); - ret = btrfs_insert_inode_map(trans, root, objectid, &key); - BUG_ON(ret); - key.objectid = objectid; key.offset = 1; key.flags = 0; @@ -1791,21 +1735,14 @@ static struct inode *btrfs_alloc_inode(struct super_block *sb) ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); if (!ei) return NULL; - ei->magic = 0xDEADBEEF; - ei->magic2 = 0xDEADBEAF; return &ei->vfs_inode; } static void btrfs_destroy_inode(struct inode *inode) { - struct btrfs_inode *ei = BTRFS_I(inode); - WARN_ON(ei->magic != 0xDEADBEEF); - WARN_ON(ei->magic2 != 0xDEADBEAF); WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); - ei->magic = 0; - ei->magic2 = 0; kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 83a0194ab163..54978d1bd958 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -115,14 +115,7 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *extent_root = fs_info->extent_root; - struct btrfs_root *inode_root = fs_info->inode_root; - btrfs_set_root_blocknr(&inode_root->root_item, - inode_root->node->b_blocknr); - ret = btrfs_update_root(trans, tree_root, - &inode_root->root_key, - &inode_root->root_item); - BUG_ON(ret); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); if (old_extent_block == extent_root->node->b_blocknr) From d0dbc6245cefa36e19dff49c557ccf05e3063e9c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 12:36:36 -0400 Subject: [PATCH 0106/2985] Btrfs: drop owner and parentid Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 8 -------- fs/btrfs/ctree.h | 25 +------------------------ fs/btrfs/extent-tree.c | 8 ++------ fs/btrfs/file-item.c | 2 +- fs/btrfs/print-tree.c | 4 ++-- fs/btrfs/transaction.c | 6 ------ 6 files changed, 6 insertions(+), 47 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 39b551564fcf..f31ac843bf2e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -688,8 +688,6 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_level(&c->header, level); btrfs_set_header_blocknr(&c->header, t->b_blocknr); btrfs_set_header_generation(&c->header, trans->transid); - btrfs_set_header_parentid(&c->header, - btrfs_header_parentid(btrfs_buffer_header(root->node))); lower = btrfs_buffer_node(path->nodes[level-1]); memcpy(c->header.fsid, root->fs_info->disk_super->fsid, sizeof(c->header.fsid)); @@ -784,8 +782,6 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr); btrfs_set_header_generation(&split->header, trans->transid); - btrfs_set_header_parentid(&split->header, - btrfs_header_parentid(btrfs_buffer_header(root->node))); memcpy(split->header.fsid, root->fs_info->disk_super->fsid, sizeof(split->header.fsid)); mid = (c_nritems + 1) / 2; @@ -1145,8 +1141,6 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); btrfs_set_header_generation(&right->header, trans->transid); btrfs_set_header_level(&right->header, 0); - btrfs_set_header_parentid(&right->header, - btrfs_header_parentid(btrfs_buffer_header(root->node))); memcpy(right->header.fsid, root->fs_info->disk_super->fsid, sizeof(right->header.fsid)); if (mid <= slot) { @@ -1237,8 +1231,6 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); btrfs_set_header_generation(&right->header, trans->transid); btrfs_set_header_level(&right->header, 0); - btrfs_set_header_parentid(&right->header, - btrfs_header_parentid(btrfs_buffer_header(root->node))); memcpy(right->header.fsid, root->fs_info->disk_super->fsid, sizeof(right->header.fsid)); btrfs_cpu_key_to_disk(&disk_key, ins_key); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5103709bb2b9..0ba560f0d2a6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -60,7 +60,6 @@ struct btrfs_header { u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ __le64 generation; - __le64 parentid; /* objectid of the tree root */ __le16 nritems; __le16 flags; u8 level; @@ -147,7 +146,6 @@ struct btrfs_path { */ struct btrfs_extent_item { __le32 refs; - __le64 owner; } __attribute__ ((__packed__)); struct btrfs_inode_timespec { @@ -443,16 +441,6 @@ static inline void btrfs_set_timespec_nsec(struct btrfs_inode_timespec *ts, ts->nsec = cpu_to_le32(val); } -static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) -{ - return le64_to_cpu(ei->owner); -} - -static inline void btrfs_set_extent_owner(struct btrfs_extent_item *ei, u64 val) -{ - ei->owner = cpu_to_le64(val); -} - static inline u32 btrfs_extent_refs(struct btrfs_extent_item *ei) { return le32_to_cpu(ei->refs); @@ -652,17 +640,6 @@ static inline void btrfs_set_header_generation(struct btrfs_header *h, h->generation = cpu_to_le64(val); } -static inline u64 btrfs_header_parentid(struct btrfs_header *h) -{ - return le64_to_cpu(h->parentid); -} - -static inline void btrfs_set_header_parentid(struct btrfs_header *h, - u64 parentid) -{ - h->parentid = cpu_to_le64(parentid); -} - static inline u16 btrfs_header_nritems(struct btrfs_header *h) { return le16_to_cpu(h->nritems); @@ -919,7 +896,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_blocks, u64 search_start, u64 - search_end, u64 owner, struct btrfs_key *ins); + search_end, struct btrfs_key *ins); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index efc604eea0b0..be9630df506c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -157,8 +157,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct struct btrfs_fs_info *info = extent_root->fs_info; btrfs_set_extent_refs(&extent_item, 1); - btrfs_set_extent_owner(&extent_item, - btrfs_header_parentid(btrfs_buffer_header(extent_root->node))); ins.offset = 1; ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); @@ -457,7 +455,7 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_blocks, u64 search_start, u64 - search_end, u64 owner, struct btrfs_key *ins) + search_end, struct btrfs_key *ins) { int ret; int pending_ret; @@ -467,7 +465,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_extent_item extent_item; btrfs_set_extent_refs(&extent_item, 1); - btrfs_set_extent_owner(&extent_item, owner); if (root == extent_root) { BUG_ON(extent_root->fs_info->current_insert.offset == 0); @@ -510,8 +507,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int ret; struct buffer_head *buf; - ret = btrfs_alloc_extent(trans, root, 1, 0, (unsigned long)-1, - btrfs_header_parentid(btrfs_buffer_header(root->node)), &ins); + ret = btrfs_alloc_extent(trans, root, 1, 0, (unsigned long)-1, &ins); if (ret) { BUG(); return NULL; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 2c98b620c594..d9fd7f4d3794 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -19,7 +19,7 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, BUG_ON(!path); btrfs_init_path(path); ret = btrfs_alloc_extent(trans, root, num_blocks, hint_block, - (u64)-1, objectid, &ins); + (u64)-1, &ins); BUG_ON(ret); file_key.objectid = objectid; file_key.offset = offset; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 854d47d9bdc9..0732a2fbb23a 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -50,8 +50,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) break; case BTRFS_EXTENT_ITEM_KEY: ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); - printk("\t\textent data refs %u owner %Lu\n", - btrfs_extent_refs(ei), btrfs_extent_owner(ei)); + printk("\t\textent data refs %u\n", + btrfs_extent_refs(ei)); break; case BTRFS_STRING_ITEM_KEY: printk("\t\titem data %.*s\n", btrfs_item_size(item), diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 54978d1bd958..f64c1729b0e1 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -163,7 +163,6 @@ int add_dirty_roots(struct btrfs_trans_handle *trans, int i; int ret; int err; -printk("add dirty\n"); while(1) { ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, ARRAY_SIZE(gang), @@ -188,7 +187,6 @@ printk("add dirty\n"); dirty->commit_root = root->commit_root; root->commit_root = NULL; dirty->root = root; -printk("adding dirty root %Lu gen %Lu blocknr %Lu\n", root->root_key.objectid, root->root_key.offset, dirty->commit_root->b_blocknr); root->root_key.offset = root->fs_info->generation; btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); @@ -199,7 +197,6 @@ printk("adding dirty root %Lu gen %Lu blocknr %Lu\n", root->root_key.objectid, r list_add(&dirty->list, list); } } -printk("add dirty done\n"); return 0; } @@ -213,12 +210,10 @@ int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); trans = btrfs_start_transaction(tree_root, 1); -printk("drop snapshot root %p, commit_root blocknr %Lu generation %Lu\n", dirty->root, dirty->commit_root->b_blocknr, dirty->snap_key.offset); ret = btrfs_drop_snapshot(trans, dirty->root, dirty->commit_root); BUG_ON(ret); -printk("del root objectid %Lu, offset %Lu\n", dirty->snap_key.objectid, dirty->snap_key.offset); ret = btrfs_del_root(trans, tree_root, &dirty->snap_key); BUG_ON(ret); ret = btrfs_end_transaction(trans, tree_root); @@ -240,7 +235,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->trans_mutex); if (trans->transaction->in_commit) { -printk("already in commit!, waiting\n"); cur_trans = trans->transaction; trans->transaction->use_count++; btrfs_end_transaction(trans, root); From 48ddc6f44b5a167c00573b6012727d674e9c667a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 13:20:21 -0400 Subject: [PATCH 0107/2985] Btrfs: cow file extents before writing Signed-off-by: Chris Mason --- fs/btrfs/super.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3e8bfb0e5d7e..91aa1b4c50af 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1020,7 +1020,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, - iblock << inode->i_blkbits, 0); + iblock << inode->i_blkbits, 1); if (ret < 0) { err = ret; goto out; @@ -1040,9 +1040,25 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, blocknr = btrfs_file_extent_disk_blocknr(item); blocknr += btrfs_file_extent_offset(item); - /* exact match found, use it */ + /* exact match found, use it, FIXME, deal with extents + * other than the page size + */ if (ret == 0) { err = 0; + BUG_ON(btrfs_file_extent_disk_num_blocks(item) != 1); + if (btrfs_file_extent_generation(item) != trans->transid) { + struct btrfs_key ins; + ret = btrfs_alloc_extent(trans, root, 1, + blocknr, (u64)-1, &ins); + BUG_ON(ret); + btrfs_set_file_extent_disk_blocknr(item, ins.objectid); + mark_buffer_dirty(path->nodes[0]); + ret = btrfs_free_extent(trans, root, + blocknr, 1, 0); + BUG_ON(ret); + blocknr = ins.objectid; + + } map_bh(result, inode->i_sb, blocknr); goto out; } From 79b2cb1fa6430501907cb1384abbddf074833e45 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 13:51:41 -0400 Subject: [PATCH 0108/2985] Btrfs: only cow in get_block when create==1 Signed-off-by: Chris Mason --- fs/btrfs/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 91aa1b4c50af..a3183972f310 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1020,7 +1020,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, - iblock << inode->i_blkbits, 1); + iblock << inode->i_blkbits, create); if (ret < 0) { err = ret; goto out; @@ -1046,7 +1046,8 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, if (ret == 0) { err = 0; BUG_ON(btrfs_file_extent_disk_num_blocks(item) != 1); - if (btrfs_file_extent_generation(item) != trans->transid) { + if (create && + btrfs_file_extent_generation(item) != trans->transid) { struct btrfs_key ins; ret = btrfs_alloc_extent(trans, root, 1, blocknr, (u64)-1, &ins); From 2932f3ec94ff0212313b8187064f93bcb76ce411 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 14:22:02 -0400 Subject: [PATCH 0109/2985] Btrfs: when forced to cow for file_write, get the page uptodate first Signed-off-by: Chris Mason --- fs/btrfs/super.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a3183972f310..b93d790e7484 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1268,6 +1268,8 @@ static int prepare_pages(struct btrfs_trans_handle *trans, struct page **pages, size_t num_pages, loff_t pos, + unsigned long first_index, + unsigned long last_index, size_t write_bytes) { int i; @@ -1289,6 +1291,13 @@ static int prepare_pages(struct btrfs_trans_handle *trans, } offset = pos & (PAGE_CACHE_SIZE -1); this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); + if (!PageUptodate(pages[i]) && + (pages[i]->index == first_index || + pages[i]->index == last_index) && pos < isize) { + ret = mpage_readpage(pages[i], btrfs_get_block); + BUG_ON(ret); + lock_page(pages[i]); + } ret = nobh_prepare_write(pages[i], offset, offset + this_write, btrfs_get_block); @@ -1323,6 +1332,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct page *pages[1]; + unsigned long first_index; + unsigned long last_index; if (file->f_flags & O_DIRECT) return -EINVAL; @@ -1340,13 +1351,15 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, goto out; file_update_time(file); mutex_lock(&inode->i_mutex); + first_index = pos >> PAGE_CACHE_SHIFT; + last_index = (pos + count) >> PAGE_CACHE_SHIFT; while(count > 0) { size_t offset = pos & (PAGE_CACHE_SIZE - 1); size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; ret = prepare_pages(NULL, root, file, pages, num_pages, - pos, write_bytes); + pos, first_index, last_index, write_bytes); BUG_ON(ret); ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); From 2619ba1f0ff9540a9d84683310a1e350b5efde3d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 16:58:11 -0400 Subject: [PATCH 0110/2985] Btrfs: subvolumes Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 + fs/btrfs/disk-io.c | 38 +++++---- fs/btrfs/super.c | 178 +++++++++++++++++++++++++++++++++++------ fs/btrfs/transaction.c | 8 +- 4 files changed, 182 insertions(+), 43 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 73d4b842a316..4cd11a1b4b32 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -8,6 +8,7 @@ * Check compat and incompat flags on the inode * Add virtual filesystems, mountable snapshots * Get rid of struct ctree_path, limiting tree levels held at one time +* EEXIST for dirs instead of hash overflow * Release * Do real tree locking * Add extent mirroring (backup copies of blocks) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 760fdc9a7664..ff69162829b3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -301,6 +301,12 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, int ret = 0; printk("read_fs_root looking for %Lu %Lu %u\n", location->objectid, location->offset, location->flags); + root = radix_tree_lookup(&fs_info->fs_roots_radix, + (unsigned long)location->objectid); + if (root) { +printk("found %p in cache\n", root); + return root; + } root = kmalloc(sizeof(*root), GFP_NOFS); if (!root) { printk("failed1\n"); @@ -349,7 +355,8 @@ out: insert: printk("inserting %p\n", root); root->ref_cows = 1; - ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root, + ret = radix_tree_insert(&fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, root); if (ret) { printk("radix_tree_insert gives us %d\n", ret); @@ -460,6 +467,20 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } +static int free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) +{ + radix_tree_delete(&fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid); + if (root->inode) + iput(root->inode); + if (root->node) + brelse(root->node); + if (root->commit_root) + brelse(root->commit_root); + kfree(root); + return 0; +} + int del_fs_roots(struct btrfs_fs_info *fs_info) { int ret; @@ -472,19 +493,8 @@ int del_fs_roots(struct btrfs_fs_info *fs_info) ARRAY_SIZE(gang)); if (!ret) break; - for (i = 0; i < ret; i++) { - radix_tree_delete(&fs_info->fs_roots_radix, - (unsigned long)gang[i]); - if (gang[i]->inode) - iput(gang[i]->inode); - else - printk("no inode for root %p\n", gang[i]); - if (gang[i]->node) - brelse(gang[i]->node); - if (gang[i]->commit_root) - brelse(gang[i]->commit_root); - kfree(gang[i]); - } + for (i = 0; i < ret; i++) + free_fs_root(fs_info, gang[i]); } return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b93d790e7484..84abdde4e301 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -495,10 +495,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(-EACCES); if (inode->i_state & I_NEW) { if (sub_root != root) { - ret = radix_tree_insert( - &root->fs_info->fs_roots_radix, - (unsigned long)sub_root, - sub_root); printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root); igrab(inode); sub_root->inode = inode; @@ -723,22 +719,19 @@ static int btrfs_write_inode(struct inode *inode, int wait) } static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, - struct inode *dir, int mode) + struct btrfs_root *root, + u64 objectid, int mode) { struct inode *inode; struct btrfs_inode_item inode_item; - struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_key *location; int ret; - u64 objectid; - inode = new_inode(dir->i_sb); + inode = new_inode(root->fs_info->sb); if (!inode) return ERR_PTR(-ENOMEM); - BTRFS_I(inode)->root = BTRFS_I(dir)->root; - ret = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); - BUG_ON(ret); + BTRFS_I(inode)->root = root; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; @@ -804,10 +797,18 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, struct inode *inode; int err; int drop_inode = 0; + u64 objectid; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - inode = btrfs_new_inode(trans, dir, mode); + + err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); + if (err) { + err = -ENOSPC; + goto out_unlock; + } + + inode = btrfs_new_inode(trans, root, objectid, mode); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_unlock; @@ -833,9 +834,9 @@ out_unlock: } static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, - struct inode *inode, struct inode *dir) + struct btrfs_root *root, + u64 objectid, u64 dirid) { - struct btrfs_root *root = BTRFS_I(dir)->root; int ret; char buf[2]; struct btrfs_key key; @@ -843,22 +844,20 @@ static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, buf[0] = '.'; buf[1] = '.'; - key.objectid = inode->i_ino; + key.objectid = objectid; key.offset = 0; key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); - ret = btrfs_insert_dir_item(trans, root, buf, 1, inode->i_ino, + ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid, &key, 1); if (ret) goto error; - key.objectid = dir->i_ino; - ret = btrfs_insert_dir_item(trans, root, buf, 2, inode->i_ino, + key.objectid = dirid; + ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid, &key, 1); if (ret) goto error; - inode->i_size = 6; - ret = btrfs_update_inode(trans, root, inode); error: return ret; } @@ -870,6 +869,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct btrfs_root *root = BTRFS_I(dir)->root; int err = 0; int drop_on_err = 0; + u64 objectid; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -877,7 +877,14 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) err = PTR_ERR(trans); goto out_unlock; } - inode = btrfs_new_inode(trans, dir, S_IFDIR | mode); + + err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); + if (err) { + err = -ENOSPC; + goto out_unlock; + } + + inode = btrfs_new_inode(trans, root, objectid, S_IFDIR | mode); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_fail; @@ -886,7 +893,12 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; - err = btrfs_make_empty_dir(trans, inode, dir); + err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino); + if (err) + goto out_fail; + + inode->i_size = 6; + err = btrfs_update_inode(trans, root, inode); if (err) goto out_fail; err = btrfs_add_link(trans, dentry, inode); @@ -1666,6 +1678,102 @@ static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, return retval; } +static int create_subvol(struct btrfs_root *root, char *name, int namelen) +{ + struct btrfs_trans_handle *trans; + struct btrfs_key key; + struct btrfs_root_item root_item; + struct btrfs_inode_item *inode_item; + struct buffer_head *subvol; + struct btrfs_leaf *leaf; + struct btrfs_root *new_root; + struct inode *inode; + int ret; + u64 objectid; + u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + subvol = btrfs_alloc_free_block(trans, root); + leaf = btrfs_buffer_leaf(subvol); + btrfs_set_header_nritems(&leaf->header, 0); + btrfs_set_header_level(&leaf->header, 0); + btrfs_set_header_blocknr(&leaf->header, subvol->b_blocknr); + btrfs_set_header_generation(&leaf->header, trans->transid); + memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, + sizeof(leaf->header.fsid)); + + inode_item = &root_item.inode; + memset(inode_item, 0, sizeof(*inode_item)); + btrfs_set_inode_generation(inode_item, 1); + btrfs_set_inode_size(inode_item, 3); + btrfs_set_inode_nlink(inode_item, 1); + btrfs_set_inode_nblocks(inode_item, 1); + btrfs_set_inode_mode(inode_item, S_IFDIR | 0755); + + btrfs_set_root_blocknr(&root_item, subvol->b_blocknr); + btrfs_set_root_refs(&root_item, 1); + + mark_buffer_dirty(subvol); + brelse(subvol); + subvol = NULL; + + ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, + 0, &objectid); + BUG_ON(ret); + + btrfs_set_root_dirid(&root_item, new_dirid); + + key.objectid = objectid; + key.offset = 1; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, + &root_item); + BUG_ON(ret); + + /* + * insert the directory item + */ + key.offset = (u64)-1; + ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, + name, namelen, + root->fs_info->sb->s_root->d_inode->i_ino, + &key, 0); + BUG_ON(ret); + + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + + new_root = btrfs_read_fs_root(root->fs_info, &key); + BUG_ON(!new_root); + + trans = btrfs_start_transaction(new_root, 1); + BUG_ON(!trans); + + inode = btrfs_new_inode(trans, new_root, new_dirid, S_IFDIR | 0700); + inode->i_op = &btrfs_dir_inode_operations; + inode->i_fop = &btrfs_dir_file_operations; + + ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid); + BUG_ON(ret); + + inode->i_nlink = 1; + inode->i_size = 6; + ret = btrfs_update_inode(trans, new_root, inode); + BUG_ON(ret); + + ret = btrfs_commit_transaction(trans, new_root); + BUG_ON(ret); + + iput(inode); + + mutex_unlock(&root->fs_info->fs_mutex); + return 0; +} + static int create_snapshot(struct btrfs_root *root, char *name, int namelen) { struct btrfs_trans_handle *trans; @@ -1674,6 +1782,9 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) int ret; u64 objectid; + if (!root->ref_cows) + return -EINVAL; + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); @@ -1685,7 +1796,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) 0, &objectid); BUG_ON(ret); - memset(&new_root_item, 0, sizeof(new_root_item)); memcpy(&new_root_item, &root->root_item, sizeof(new_root_item)); @@ -1728,9 +1838,9 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int struct btrfs_ioctl_vol_args vol_args; int ret; int namelen; + struct btrfs_path *path; + u64 root_dirid; - if (!root->ref_cows) - return -EINVAL; switch (cmd) { case BTRFS_IOC_SNAP_CREATE: if (copy_from_user(&vol_args, @@ -1740,7 +1850,23 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int namelen = strlen(vol_args.name); if (namelen > BTRFS_VOL_NAME_MAX) return -EINVAL; - ret = create_snapshot(root, vol_args.name, namelen); + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + root_dirid = btrfs_root_dirid( + &root->fs_info->tree_root->root_item); + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, + path, root_dirid, + vol_args.name, namelen, 0); + mutex_unlock(&root->fs_info->fs_mutex); + if (ret == 0) + return -EEXIST; + + if (root == root->fs_info->tree_root) + ret = create_subvol(root, vol_args.name, namelen); + else + ret = create_snapshot(root, vol_args.name, namelen); WARN_ON(ret); break; default: diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f64c1729b0e1..e15a072407bf 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -64,7 +64,8 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, if (root != root->fs_info->tree_root && root->last_trans < running_trans_id) { radix_tree_tag_set(&root->fs_info->fs_roots_radix, - (unsigned long)root, BTRFS_ROOT_TRANS_TAG); + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); root->commit_root = root->node; get_bh(root->node); } @@ -171,8 +172,9 @@ int add_dirty_roots(struct btrfs_trans_handle *trans, break; for (i = 0; i < ret; i++) { root = gang[i]; - radix_tree_tag_clear(radix, (unsigned long)root, - BTRFS_ROOT_TRANS_TAG); + radix_tree_tag_clear(radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); if (root->commit_root == root->node) { WARN_ON(root->node->b_blocknr != btrfs_root_blocknr(&root->root_item)); From 2d13d8d065cc7bbee6b1ad6ee3fe965d66badfe6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 20:07:20 -0400 Subject: [PATCH 0111/2985] Btrfs: detect duplicate subvol names Signed-off-by: Chris Mason --- fs/btrfs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 84abdde4e301..2972aaa9eb7e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1853,13 +1853,13 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int path = btrfs_alloc_path(); if (!path) return -ENOMEM; - root_dirid = btrfs_root_dirid( - &root->fs_info->tree_root->root_item); + root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path, root_dirid, vol_args.name, namelen, 0); mutex_unlock(&root->fs_info->fs_mutex); + btrfs_free_path(path); if (ret == 0) return -EEXIST; From cac87faa09f56776602d4b6f15c1bd44f6da300e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Apr 2007 08:59:20 -0400 Subject: [PATCH 0112/2985] Btrfs: use a dedicated inode num for root root dir Signed-off-by: Chris Mason --- fs/btrfs/TODO | 9 --------- fs/btrfs/ctree.h | 3 ++- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 4cd11a1b4b32..1c3acfcf146e 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -6,9 +6,7 @@ * Make sure nobh stuff is working properly for cows * Do actual block accounting * Check compat and incompat flags on the inode -* Add virtual filesystems, mountable snapshots * Get rid of struct ctree_path, limiting tree levels held at one time -* EEXIST for dirs instead of hash overflow * Release * Do real tree locking * Add extent mirroring (backup copies of blocks) @@ -20,10 +18,3 @@ * fsck * Scrub & defrag ---- -metata FS ideas: -mount metadata FS on /sys/fs/btrfs/dev/ -subvolumes in /sys/fs/btrfs/dev/fs/ -snapshots in /sys/fs/btrfs/dev/fs/snapshots -mount -o bind -ioctl to create a snapshot diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0ba560f0d2a6..3f3a23f52a3b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -15,7 +15,8 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_ROOT_TREE_OBJECTID 1ULL #define BTRFS_EXTENT_TREE_OBJECTID 2ULL #define BTRFS_FS_TREE_OBJECTID 3ULL -#define BTRFS_FIRST_FREE_OBJECTID 4ULL +#define BTRFS_ROOT_TREE_DIR_OBJECTID 4ULL +#define BTRFS_FIRST_FREE_OBJECTID 5ULL /* * we can actually store much bigger names, but lets not confuse the rest From 0bd93ba022f5c1ddb4e5b722b02f3c4a4a8865ad Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Apr 2007 13:57:44 -0400 Subject: [PATCH 0113/2985] Btrfs: early support for multiple devices Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 70 ++++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/disk-io.c | 11 ++++++++ 2 files changed, 76 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3f3a23f52a3b..01310de2bf4a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -13,10 +13,11 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_MAGIC "_BtRfS_M" #define BTRFS_ROOT_TREE_OBJECTID 1ULL -#define BTRFS_EXTENT_TREE_OBJECTID 2ULL -#define BTRFS_FS_TREE_OBJECTID 3ULL -#define BTRFS_ROOT_TREE_DIR_OBJECTID 4ULL -#define BTRFS_FIRST_FREE_OBJECTID 5ULL +#define BTRFS_DEV_TREE_OBJECTID 2ULL +#define BTRFS_EXTENT_TREE_OBJECTID 3ULL +#define BTRFS_FS_TREE_OBJECTID 4ULL +#define BTRFS_ROOT_TREE_DIR_OBJECTID 5ULL +#define BTRFS_FIRST_FREE_OBJECTID 6ULL /* * we can actually store much bigger names, but lets not confuse the rest @@ -90,6 +91,10 @@ struct btrfs_super_block { __le64 total_blocks; __le64 blocks_used; __le64 root_dir_objectid; + /* fields below here vary with the underlying disk */ + __le64 device_block_start; + __le64 device_num_blocks; + __le64 device_root; } __attribute__ ((__packed__)); /* @@ -223,10 +228,15 @@ struct btrfs_csum_item { u8 csum[BTRFS_CSUM_SIZE]; } __attribute__ ((__packed__)); +struct btrfs_device_item { + __le16 pathlen; +} __attribute__ ((__packed__)); + struct crypto_hash; struct btrfs_fs_info { struct btrfs_root *extent_root; struct btrfs_root *tree_root; + struct btrfs_root *dev_root; struct btrfs_key current_insert; struct btrfs_key last_insert; struct radix_tree_root fs_roots_radix; @@ -312,11 +322,16 @@ struct btrfs_root { */ #define BTRFS_EXTENT_ITEM_KEY 8 +/* + * dev items list the devices that make up the FS + */ +#define BTRFS_DEV_ITEM_KEY 9 + /* * string items are for debugging. They just store a short string of * data in the FS */ -#define BTRFS_STRING_ITEM_KEY 9 +#define BTRFS_STRING_ITEM_KEY 10 static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) { @@ -782,6 +797,40 @@ static inline void btrfs_set_super_root_dir(struct btrfs_super_block *s, u64 s->root_dir_objectid = cpu_to_le64(val); } +static inline u64 btrfs_super_device_block_start(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->device_block_start); +} + +static inline void btrfs_set_super_device_block_start(struct btrfs_super_block + *s, u64 val) +{ + s->device_block_start = cpu_to_le64(val); +} + +static inline u64 btrfs_super_device_num_blocks(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->device_num_blocks); +} + +static inline void btrfs_set_super_device_num_blocks(struct btrfs_super_block + *s, u64 val) +{ + s->device_num_blocks = cpu_to_le64(val); +} + +static inline u64 btrfs_super_device_root(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->device_root); +} + +static inline void btrfs_set_super_device_root(struct btrfs_super_block + *s, u64 val) +{ + s->device_root = cpu_to_le64(val); +} + + static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) { return (u8 *)l->items; @@ -849,6 +898,17 @@ static inline void btrfs_set_file_extent_num_blocks(struct e->num_blocks = cpu_to_le64(val); } +static inline u16 btrfs_device_pathlen(struct btrfs_device_item *d) +{ + return le16_to_cpu(d->pathlen); +} + +static inline void btrfs_set_device_pathlen(struct btrfs_device_item *d, + u16 val) +{ + d->pathlen = cpu_to_le16(val); +} + static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ff69162829b3..e09233262af6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -378,6 +378,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) { struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); + struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), @@ -392,6 +394,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; + fs_info->dev_root = dev_root; fs_info->sb = sb; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; @@ -414,6 +417,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); + __setup_root(sb->s_blocksize, dev_root, + fs_info, BTRFS_DEV_TREE_OBJECTID); + __setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); fs_info->sb_buffer = read_tree_block(tree_root, @@ -427,6 +433,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) return NULL; fs_info->disk_super = disk_super; + dev_root->node = read_tree_block(tree_root, + btrfs_super_device_root(disk_super)); tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); @@ -519,6 +527,9 @@ int close_ctree(struct btrfs_root *root) if (fs_info->extent_root->node) btrfs_block_release(fs_info->extent_root, fs_info->extent_root->node); + if (fs_info->dev_root->node) + btrfs_block_release(fs_info->dev_root, + fs_info->dev_root->node); if (fs_info->tree_root->node) btrfs_block_release(fs_info->tree_root, fs_info->tree_root->node); From 7eccb903a817e890c947ba4bc90c6a9af9b4219a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Apr 2007 15:53:25 -0400 Subject: [PATCH 0114/2985] Btrfs: create a logical->phsyical block number mapping scheme Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 38 +++++------ fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 140 ++++++++++++++++++++++++++++++----------- fs/btrfs/disk-io.h | 1 + fs/btrfs/extent-tree.c | 8 +-- fs/btrfs/super.c | 6 +- fs/btrfs/transaction.c | 8 +-- 7 files changed, 134 insertions(+), 68 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f31ac843bf2e..e2fc6f0d0243 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -66,21 +66,21 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) WARN_ON(1); memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); - btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); + btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); btrfs_set_header_generation(&cow_node->header, trans->transid); btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; get_bh(cow); if (buf != root->commit_root) { - btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); + btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); } btrfs_block_release(root, buf); } else { btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, - cow->b_blocknr); + bh_blocknr(cow)); btrfs_mark_buffer_dirty(parent); - btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); + btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); } btrfs_block_release(root, buf); mark_buffer_dirty(cow); @@ -312,7 +312,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root */ if (!parent_buf) { struct buffer_head *child; - u64 blocknr = mid_buf->b_blocknr; + u64 blocknr = bh_blocknr(mid_buf); if (btrfs_header_nritems(&mid->header) != 1) return 0; @@ -361,7 +361,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) ret = wret; if (btrfs_header_nritems(&right->header) == 0) { - u64 blocknr = right_buf->b_blocknr; + u64 blocknr = bh_blocknr(right_buf); clean_tree_block(trans, root, right_buf); wait_on_buffer(right_buf); btrfs_block_release(root, right_buf); @@ -400,7 +400,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } if (btrfs_header_nritems(&mid->header) == 0) { /* we've managed to empty the middle node, drop it */ - u64 blocknr = mid_buf->b_blocknr; + u64 blocknr = bh_blocknr(mid_buf); clean_tree_block(trans, root, mid_buf); wait_on_buffer(mid_buf); btrfs_block_release(root, mid_buf); @@ -686,7 +686,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); btrfs_set_header_level(&c->header, level); - btrfs_set_header_blocknr(&c->header, t->b_blocknr); + btrfs_set_header_blocknr(&c->header, bh_blocknr(t)); btrfs_set_header_generation(&c->header, trans->transid); lower = btrfs_buffer_node(path->nodes[level-1]); memcpy(c->header.fsid, root->fs_info->disk_super->fsid, @@ -697,7 +697,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root lower_key = &lower->ptrs[0].key; btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key)); - btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr); + btrfs_set_node_blockptr(c, 0, bh_blocknr(path->nodes[level - 1])); btrfs_mark_buffer_dirty(t); @@ -780,7 +780,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root split = btrfs_buffer_node(split_buffer); btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); - btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr); + btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer)); btrfs_set_header_generation(&split->header, trans->transid); memcpy(split->header.fsid, root->fs_info->disk_super->fsid, sizeof(split->header.fsid)); @@ -794,7 +794,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(t); btrfs_mark_buffer_dirty(split_buffer); wret = insert_ptr(trans, root, path, &split->ptrs[0].key, - split_buffer->b_blocknr, path->slots[level + 1] + 1, + bh_blocknr(split_buffer), path->slots[level + 1] + 1, level + 1); if (wret) ret = wret; @@ -1138,7 +1138,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(!right_buffer); right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); - btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); + btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); btrfs_set_header_generation(&right->header, trans->transid); btrfs_set_header_level(&right->header, 0); memcpy(right->header.fsid, root->fs_info->disk_super->fsid, @@ -1152,7 +1152,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&right->header, 0); wret = insert_ptr(trans, root, path, &disk_key, - right_buffer->b_blocknr, + bh_blocknr(right_buffer), path->slots[1] + 1, 1); if (wret) ret = wret; @@ -1173,7 +1173,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&right->header, 0); wret = insert_ptr(trans, root, path, &disk_key, - right_buffer->b_blocknr, + bh_blocknr(right_buffer), path->slots[1] - 1, 1); if (wret) ret = wret; @@ -1207,7 +1207,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&l->header, mid); ret = 0; wret = insert_ptr(trans, root, path, &right->items[0].key, - right_buffer->b_blocknr, path->slots[1] + 1, 1); + bh_blocknr(right_buffer), path->slots[1] + 1, 1); if (wret) ret = wret; btrfs_mark_buffer_dirty(right_buffer); @@ -1228,7 +1228,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(!right_buffer); right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); - btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); + btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); btrfs_set_header_generation(&right->header, trans->transid); btrfs_set_header_level(&right->header, 0); memcpy(right->header.fsid, root->fs_info->disk_super->fsid, @@ -1237,7 +1237,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&right->header, 0); wret = insert_ptr(trans, root, path, &disk_key, - right_buffer->b_blocknr, + bh_blocknr(right_buffer), path->slots[1], 1); if (wret) ret = wret; @@ -1456,7 +1456,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - leaf_buf->b_blocknr, 1, 1); + bh_blocknr(leaf_buf), 1, 1); if (wret) ret = wret; } @@ -1487,7 +1487,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = wret; } if (btrfs_header_nritems(&leaf->header) == 0) { - u64 blocknr = leaf_buf->b_blocknr; + u64 blocknr = bh_blocknr(leaf_buf); clean_tree_block(trans, root, leaf_buf); wait_on_buffer(leaf_buf); wret = del_ptr(trans, root, path, 1, slot); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 01310de2bf4a..454eb88611bb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -242,6 +242,7 @@ struct btrfs_fs_info { struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; + struct radix_tree_root dev_radix; u64 generation; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e09233262af6..c872a7e67abd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -10,10 +10,30 @@ #include "transaction.h" #include "btrfs_inode.h" +struct dev_lookup { + u64 block_start; + u64 num_blocks; + struct block_device *bdev; +}; + +u64 bh_blocknr(struct buffer_head *bh) +{ + int blkbits = bh->b_page->mapping->host->i_blkbits; + u64 blocknr = bh->b_page->index << (PAGE_CACHE_SHIFT - blkbits); + unsigned long offset; + + if (PageHighMem(bh->b_page)) + offset = (unsigned long)bh->b_data; + else + offset = bh->b_data - (char *)page_address(bh->b_page); + blocknr += offset >> (PAGE_CACHE_SHIFT - blkbits); + return blocknr; +} + static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); - if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) { + if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { BUG(); } return 0; @@ -40,7 +60,7 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) head = page_buffers(page); bh = head; do { - if (buffer_mapped(bh) && bh->b_blocknr == blocknr) { + if (buffer_mapped(bh) && bh_blocknr(bh) == blocknr) { ret = bh; get_bh(bh); goto out_unlock; @@ -56,6 +76,33 @@ out_unlock: return ret; } +static int map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, + u64 logical) +{ + struct dev_lookup *lookup[2]; + char b[BDEVNAME_SIZE]; + + int ret; + + root = root->fs_info->dev_root; + ret = radix_tree_gang_lookup(&root->fs_info->dev_radix, + (void **)lookup, + (unsigned long)logical, + ARRAY_SIZE(lookup)); + if (ret == 0 || lookup[0]->block_start > logical || + lookup[0]->block_start + lookup[0]->num_blocks <= logical) { + ret = -ENOENT; + goto out; + } + bh->b_bdev = lookup[0]->bdev; + bh->b_blocknr = logical - lookup[0]->block_start; +printk("logical mapping %Lu to %lu bdev %s\n", logical, bh->b_blocknr, bdevname(bh->b_bdev, b)); + set_buffer_mapped(bh); + ret = 0; +out: + return ret; +} + struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr) { @@ -66,6 +113,7 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, struct buffer_head *bh; struct buffer_head *head; struct buffer_head *ret = NULL; + int err; u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); page = grab_cache_page(mapping, index); @@ -78,11 +126,10 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, bh = head; do { if (!buffer_mapped(bh)) { - bh->b_bdev = root->fs_info->sb->s_bdev; - bh->b_blocknr = first_block; - set_buffer_mapped(bh); + err = map_bh_to_logical(root, bh, first_block); + BUG_ON(err); } - if (bh->b_blocknr == blocknr) { + if (bh_blocknr(bh) == blocknr) { ret = bh; get_bh(bh); goto out_unlock; @@ -98,38 +145,13 @@ out_unlock: return ret; } -static sector_t max_block(struct block_device *bdev) -{ - sector_t retval = ~((sector_t)0); - loff_t sz = i_size_read(bdev->bd_inode); - - if (sz) { - unsigned int size = block_size(bdev); - unsigned int sizebits = blksize_bits(size); - retval = (sz >> sizebits); - } - return retval; -} - static int btree_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create) { - if (iblock >= max_block(inode->i_sb->s_bdev)) { - if (create) - return -EIO; - - /* - * for reads, we're just trying to fill a partial page. - * return a hole, they will have to call get_block again - * before they can fill it, and they will get -EIO at that - * time - */ - return 0; - } - bh->b_bdev = inode->i_sb->s_bdev; - bh->b_blocknr = iblock; - set_buffer_mapped(bh); - return 0; + int err; + struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; + err = map_bh_to_logical(root, bh, iblock); + return err; } int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, @@ -164,8 +186,8 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, return ret; if (verify) { if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) { - printk("checksum verify failed on %lu\n", - bh->b_blocknr); + printk("checksum verify failed on %Lu\n", + bh_blocknr(bh)); return 1; } } else { @@ -386,10 +408,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) GFP_NOFS); int ret; struct btrfs_super_block *disk_super; + struct dev_lookup *dev_lookup; init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); + INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; @@ -422,6 +446,13 @@ struct btrfs_root *open_ctree(struct super_block *sb) __setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); + + dev_lookup = kmalloc(sizeof(*dev_lookup), GFP_NOFS); + dev_lookup->block_start = 0; + dev_lookup->num_blocks = (u32)-2; + dev_lookup->bdev = sb->s_bdev; + ret = radix_tree_insert(&fs_info->dev_radix, (u32)-2, dev_lookup); + BUG_ON(ret); fs_info->sb_buffer = read_tree_block(tree_root, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); @@ -432,6 +463,14 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!btrfs_super_root(disk_super)) return NULL; + radix_tree_delete(&fs_info->dev_radix, (u32)-2); + dev_lookup->block_start = btrfs_super_device_block_start(disk_super); + dev_lookup->num_blocks = btrfs_super_device_num_blocks(disk_super); + ret = radix_tree_insert(&fs_info->dev_radix, + dev_lookup->block_start + + dev_lookup->num_blocks, dev_lookup); + BUG_ON(ret); + fs_info->disk_super = disk_super; dev_root->node = read_tree_block(tree_root, btrfs_super_device_root(disk_super)); @@ -459,7 +498,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *bh = root->fs_info->sb_buffer; btrfs_set_super_root(root->fs_info->disk_super, - root->fs_info->tree_root->node->b_blocknr); + bh_blocknr(root->fs_info->tree_root->node)); lock_buffer(bh); WARN_ON(atomic_read(&bh->b_count) < 1); clear_buffer_dirty(bh); @@ -506,6 +545,29 @@ int del_fs_roots(struct btrfs_fs_info *fs_info) } return 0; } +static int free_dev_radix(struct btrfs_fs_info *fs_info) +{ + struct dev_lookup *lookup[8]; + struct block_device *super_bdev = fs_info->sb->s_bdev; + int ret; + int i; + while(1) { + ret = radix_tree_gang_lookup(&fs_info->dev_radix, + (void **)lookup, 0, + ARRAY_SIZE(lookup)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + if (lookup[i]->bdev != super_bdev) + close_bdev_excl(lookup[i]->bdev); + radix_tree_delete(&fs_info->dev_radix, + lookup[i]->block_start + + lookup[i]->num_blocks); + kfree(lookup[i]); + } + } + return 0; +} int close_ctree(struct btrfs_root *root) { @@ -537,6 +599,8 @@ int close_ctree(struct btrfs_root *root) crypto_free_hash(fs_info->hash_tfm); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); + + free_dev_radix(fs_info); del_fs_roots(fs_info); kfree(fs_info->extent_root); kfree(fs_info->tree_root); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 0ef6e6f714af..46cc5dc9d572 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -41,4 +41,5 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location); +u64 bh_blocknr(struct buffer_head *bh); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index be9630df506c..d560831c10a7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -80,7 +80,7 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return inc_block_ref(trans, root, root->node->b_blocknr, 1); + return inc_block_ref(trans, root, bh_blocknr(root->node), 1); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -563,7 +563,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr, + ret = lookup_block_ref(trans, root, bh_blocknr(path->nodes[*level]), 1, &refs); BUG_ON(ret); if (refs > 1) @@ -607,7 +607,7 @@ out: WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = btrfs_free_extent(trans, root, - path->nodes[*level]->b_blocknr, 1, 1); + bh_blocknr(path->nodes[*level]), 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -635,7 +635,7 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } else { ret = btrfs_free_extent(trans, root, - path->nodes[*level]->b_blocknr, + bh_blocknr(path->nodes[*level]), 1, 1); BUG_ON(ret); btrfs_block_release(root, path->nodes[*level]); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2972aaa9eb7e..4ff0cc1efb13 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1700,7 +1700,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) leaf = btrfs_buffer_leaf(subvol); btrfs_set_header_nritems(&leaf->header, 0); btrfs_set_header_level(&leaf->header, 0); - btrfs_set_header_blocknr(&leaf->header, subvol->b_blocknr); + btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol)); btrfs_set_header_generation(&leaf->header, trans->transid); memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, sizeof(leaf->header.fsid)); @@ -1713,7 +1713,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_inode_nblocks(inode_item, 1); btrfs_set_inode_mode(inode_item, S_IFDIR | 0755); - btrfs_set_root_blocknr(&root_item, subvol->b_blocknr); + btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); btrfs_set_root_refs(&root_item, 1); mark_buffer_dirty(subvol); @@ -1803,7 +1803,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) key.offset = 1; key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - btrfs_set_root_blocknr(&new_root_item, root->node->b_blocknr); + btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e15a072407bf..3a15943ea8ed 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -119,10 +119,10 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == extent_root->node->b_blocknr) + if (old_extent_block == bh_blocknr(extent_root->node)) break; btrfs_set_root_blocknr(&extent_root->root_item, - extent_root->node->b_blocknr); + bh_blocknr(extent_root->node)); ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); @@ -176,7 +176,7 @@ int add_dirty_roots(struct btrfs_trans_handle *trans, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); if (root->commit_root == root->node) { - WARN_ON(root->node->b_blocknr != + WARN_ON(bh_blocknr(root->node) != btrfs_root_blocknr(&root->root_item)); brelse(root->commit_root); root->commit_root = NULL; @@ -191,7 +191,7 @@ int add_dirty_roots(struct btrfs_trans_handle *trans, dirty->root = root; root->root_key.offset = root->fs_info->generation; btrfs_set_root_blocknr(&root->root_item, - root->node->b_blocknr); + bh_blocknr(root->node)); err = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); From 8352d8a473ac84bf7a1c69690b626946d744ca58 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 12 Apr 2007 10:43:05 -0400 Subject: [PATCH 0115/2985] Btrfs: add disk ioctl, mostly working Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 128 ++++++++++++++++++++++++++++++++++++++--- fs/btrfs/disk-io.h | 6 ++ fs/btrfs/ioctl.h | 2 + fs/btrfs/super.c | 96 +++++++++++++++++++++++++++---- fs/btrfs/transaction.c | 6 ++ 5 files changed, 221 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c872a7e67abd..3ba4df20c913 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,6 +16,28 @@ struct dev_lookup { struct block_device *bdev; }; +int btrfs_insert_dev_radix(struct btrfs_root *root, + struct block_device *bdev, + u64 block_start, + u64 num_blocks) +{ + struct dev_lookup *lookup; + char b[BDEVNAME_SIZE]; + int ret; + + lookup = kmalloc(sizeof(*lookup), GFP_NOFS); + if (!lookup) + return -ENOMEM; + lookup->block_start = block_start; + lookup->num_blocks = num_blocks; + lookup->bdev = bdev; +printk("inserting %s into dev radix %Lu %Lu\n", bdevname(bdev, b), block_start, num_blocks); + + ret = radix_tree_insert(&root->fs_info->dev_radix, block_start + + num_blocks - 1, lookup); + return ret; +} + u64 bh_blocknr(struct buffer_head *bh) { int blkbits = bh->b_page->mapping->host->i_blkbits; @@ -34,6 +56,8 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { + printk(KERN_CRIT "bh_blocknr(buf) is %Lu, header is %Lu\n", + bh_blocknr(buf), btrfs_header_blocknr(&node->header)); BUG(); } return 0; @@ -76,11 +100,10 @@ out_unlock: return ret; } -static int map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, +int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, u64 logical) { struct dev_lookup *lookup[2]; - char b[BDEVNAME_SIZE]; int ret; @@ -96,7 +119,6 @@ static int map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, } bh->b_bdev = lookup[0]->bdev; bh->b_blocknr = logical - lookup[0]->block_start; -printk("logical mapping %Lu to %lu bdev %s\n", logical, bh->b_blocknr, bdevname(bh->b_bdev, b)); set_buffer_mapped(bh); ret = 0; out: @@ -126,7 +148,7 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, bh = head; do { if (!buffer_mapped(bh)) { - err = map_bh_to_logical(root, bh, first_block); + err = btrfs_map_bh_to_logical(root, bh, first_block); BUG_ON(err); } if (bh_blocknr(bh) == blocknr) { @@ -150,7 +172,7 @@ static int btree_get_block(struct inode *inode, sector_t iblock, { int err; struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; - err = map_bh_to_logical(root, bh, iblock); + err = btrfs_map_bh_to_logical(root, bh, iblock); return err; } @@ -396,6 +418,89 @@ printk("all worked\n"); return root; } +int btrfs_open_disk(struct btrfs_root *root, u64 block_start, u64 num_blocks, + char *filename, int name_len) +{ + char *null_filename; + struct block_device *bdev; + int ret; + + if (block_start == 0) { +printk("skipping disk with block_start == 0\n"); +return 0; + } + null_filename = kmalloc(name_len + 1, GFP_NOFS); + if (!null_filename) + return -ENOMEM; + memcpy(null_filename, filename, name_len); + null_filename[name_len] = '\0'; + + bdev = open_bdev_excl(null_filename, O_RDWR, root->fs_info->sb); + if (IS_ERR(bdev)) { + ret = PTR_ERR(bdev); + goto out; + } + set_blocksize(bdev, root->fs_info->sb->s_blocksize); + ret = btrfs_insert_dev_radix(root, bdev, block_start, num_blocks); + BUG_ON(ret); + ret = 0; +out: + kfree(null_filename); + return ret; +} + +static int read_device_info(struct btrfs_root *root) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + struct btrfs_leaf *leaf; + struct btrfs_device_item *dev_item; + int nritems; + int slot; + + root = root->fs_info->dev_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + key.objectid = 0; + key.offset = 0; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY); + + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + leaf = btrfs_buffer_leaf(path->nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + while(1) { + slot = path->slots[0]; + if (slot >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret) + break; + leaf = btrfs_buffer_leaf(path->nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path->slots[0]; + } + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + if (btrfs_key_type(&key) != BTRFS_DEV_ITEM_KEY) { + path->slots[0]++; + continue; + } + dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_device_item); +printk("found key %Lu %Lu\n", key.objectid, key.offset); + ret = btrfs_open_disk(root, key.objectid, key.offset, + (char *)(dev_item + 1), + btrfs_device_pathlen(dev_item)); + BUG_ON(ret); + path->slots[0]++; + } + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + return 0; +} + struct btrfs_root *open_ctree(struct super_block *sb) { struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), @@ -463,17 +568,26 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!btrfs_super_root(disk_super)) return NULL; + i_size_write(fs_info->btree_inode, + btrfs_super_total_blocks(disk_super) << + fs_info->btree_inode->i_blkbits); + radix_tree_delete(&fs_info->dev_radix, (u32)-2); dev_lookup->block_start = btrfs_super_device_block_start(disk_super); dev_lookup->num_blocks = btrfs_super_device_num_blocks(disk_super); ret = radix_tree_insert(&fs_info->dev_radix, dev_lookup->block_start + - dev_lookup->num_blocks, dev_lookup); + dev_lookup->num_blocks - 1, dev_lookup); BUG_ON(ret); fs_info->disk_super = disk_super; + dev_root->node = read_tree_block(tree_root, btrfs_super_device_root(disk_super)); + + ret = read_device_info(dev_root); + BUG_ON(ret); + tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); @@ -562,7 +676,7 @@ static int free_dev_radix(struct btrfs_fs_info *fs_info) close_bdev_excl(lookup[i]->bdev); radix_tree_delete(&fs_info->dev_radix, lookup[i]->block_start + - lookup[i]->num_blocks); + lookup[i]->num_blocks - 1); kfree(lookup[i]); } } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 46cc5dc9d572..aa9f56c38419 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -42,4 +42,10 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location); u64 bh_blocknr(struct buffer_head *bh); +int btrfs_insert_dev_radix(struct btrfs_root *root, + struct block_device *bdev, + u64 block_start, + u64 num_blocks); +int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, + u64 logical); #endif diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 201fb3270723..497fadd98c3a 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -10,4 +10,6 @@ struct btrfs_ioctl_vol_args { #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_ADD_DISK _IOW(BTRFS_IOCTL_MAGIC, 2, \ + struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4ff0cc1efb13..c46d7eafcf62 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1073,6 +1073,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, } map_bh(result, inode->i_sb, blocknr); + btrfs_map_bh_to_logical(root, result, blocknr); goto out; } @@ -1092,7 +1093,8 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, extent_end = extent_start + btrfs_file_extent_num_blocks(item); if (iblock >= extent_start && iblock < extent_end) { err = 0; - map_bh(result, inode->i_sb, blocknr + iblock - extent_start); + btrfs_map_bh_to_logical(root, result, blocknr + iblock - + extent_start); goto out; } allocate: @@ -1112,6 +1114,7 @@ allocate: set_buffer_new(result); map_bh(result, inode->i_sb, blocknr); + btrfs_map_bh_to_logical(root, result, blocknr); out: btrfs_release_path(root, path); btrfs_free_path(path); @@ -1153,12 +1156,6 @@ static int btrfs_readpage(struct file *file, struct page *page) return mpage_readpage(page, btrfs_get_block); } -static int btrfs_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) -{ - return mpage_readpages(mapping, pages, nr_pages, btrfs_get_block); -} - static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { return nobh_writepage(page, btrfs_get_block, wbc); @@ -1831,12 +1828,81 @@ printk("adding snapshot name %.*s root %Lu %Lu %u\n", namelen, name, key.objecti return 0; } +static int add_disk(struct btrfs_root *root, char *name, int namelen) +{ + struct block_device *bdev; + struct btrfs_path *path; + struct super_block *sb = root->fs_info->sb; + struct btrfs_root *dev_root = root->fs_info->dev_root; + struct btrfs_trans_handle *trans; + struct btrfs_device_item *dev_item; + struct btrfs_key key; + u16 item_size; + u64 num_blocks; + u64 new_blocks; + int ret; +printk("adding disk %s\n", name); + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super); + bdev = open_bdev_excl(name, O_RDWR, sb); + if (IS_ERR(bdev)) { + ret = PTR_ERR(bdev); +printk("open bdev excl failed ret %d\n", ret); + goto out_nolock; + } + set_blocksize(bdev, sb->s_blocksize); + new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits; + key.objectid = num_blocks; + key.offset = new_blocks; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY); + + mutex_lock(&dev_root->fs_info->fs_mutex); + trans = btrfs_start_transaction(dev_root, 1); + item_size = sizeof(*dev_item) + namelen; +printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size); + ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size); + if (ret) { +printk("insert failed %d\n", ret); + close_bdev_excl(bdev); + if (ret > 0) + ret = -EEXIST; + goto out; + } + dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_device_item); + btrfs_set_device_pathlen(dev_item, namelen); + memcpy(dev_item + 1, name, namelen); + mark_buffer_dirty(path->nodes[0]); + + ret = btrfs_insert_dev_radix(root, bdev, num_blocks, new_blocks); + + if (!ret) { + btrfs_set_super_total_blocks(root->fs_info->disk_super, + num_blocks + new_blocks); + i_size_write(root->fs_info->btree_inode, + (num_blocks + new_blocks) << + root->fs_info->btree_inode->i_blkbits); + } + +out: + ret = btrfs_commit_transaction(trans, dev_root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); +out_nolock: + btrfs_free_path(path); + + return ret; +} + static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_vol_args vol_args; - int ret; + int ret = 0; int namelen; struct btrfs_path *path; u64 root_dirid; @@ -1869,10 +1935,21 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int ret = create_snapshot(root, vol_args.name, namelen); WARN_ON(ret); break; + case BTRFS_IOC_ADD_DISK: + if (copy_from_user(&vol_args, + (struct btrfs_ioctl_vol_args __user *)arg, + sizeof(vol_args))) + return -EFAULT; + namelen = strlen(vol_args.name); + if (namelen > BTRFS_VOL_NAME_MAX) + return -EINVAL; + vol_args.name[namelen] = '\0'; + ret = add_disk(root, vol_args.name, namelen); + break; default: return -ENOTTY; } - return 0; + return ret; } static struct kmem_cache *btrfs_inode_cachep; @@ -2004,7 +2081,6 @@ static struct file_operations btrfs_dir_file_operations = { static struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, - .readpages = btrfs_readpages, .writepage = btrfs_writepage, .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3a15943ea8ed..8740752f3845 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -116,7 +116,13 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *dev_root = fs_info->dev_root; + if (btrfs_super_device_root(fs_info->disk_super) != + bh_blocknr(dev_root->node)) { + btrfs_set_super_device_root(fs_info->disk_super, + bh_blocknr(dev_root->node)); + } while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); if (old_extent_block == bh_blocknr(extent_root->node)) From b4100d64241fed93a3f821ddf59d11ab4443a3ba Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 12 Apr 2007 12:14:00 -0400 Subject: [PATCH 0116/2985] Btrfs: add a device id to device items Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 36 ++++++++++++++++++++++++++++++++++++ fs/btrfs/disk-io.c | 31 ++++++++++++++++++++----------- fs/btrfs/disk-io.h | 1 + fs/btrfs/super.c | 9 ++++++++- 4 files changed, 65 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 454eb88611bb..ef3583cf74d6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -91,10 +91,12 @@ struct btrfs_super_block { __le64 total_blocks; __le64 blocks_used; __le64 root_dir_objectid; + __le64 last_device_id; /* fields below here vary with the underlying disk */ __le64 device_block_start; __le64 device_num_blocks; __le64 device_root; + __le64 device_id; } __attribute__ ((__packed__)); /* @@ -230,6 +232,7 @@ struct btrfs_csum_item { struct btrfs_device_item { __le16 pathlen; + __le64 device_id; } __attribute__ ((__packed__)); struct crypto_hash; @@ -798,6 +801,28 @@ static inline void btrfs_set_super_root_dir(struct btrfs_super_block *s, u64 s->root_dir_objectid = cpu_to_le64(val); } +static inline u64 btrfs_super_last_device_id(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->last_device_id); +} + +static inline void btrfs_set_super_last_device_id(struct btrfs_super_block *s, + u64 val) +{ + s->last_device_id = cpu_to_le64(val); +} + +static inline u64 btrfs_super_device_id(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->device_id); +} + +static inline void btrfs_set_super_device_id(struct btrfs_super_block *s, + u64 val) +{ + s->device_id = cpu_to_le64(val); +} + static inline u64 btrfs_super_device_block_start(struct btrfs_super_block *s) { return le64_to_cpu(s->device_block_start); @@ -910,6 +935,17 @@ static inline void btrfs_set_device_pathlen(struct btrfs_device_item *d, d->pathlen = cpu_to_le16(val); } +static inline u64 btrfs_device_id(struct btrfs_device_item *d) +{ + return le64_to_cpu(d->device_id); +} + +static inline void btrfs_set_device_id(struct btrfs_device_item *d, + u64 val) +{ + d->device_id = cpu_to_le64(val); +} + static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3ba4df20c913..06b969c14625 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -13,11 +13,13 @@ struct dev_lookup { u64 block_start; u64 num_blocks; + u64 device_id; struct block_device *bdev; }; int btrfs_insert_dev_radix(struct btrfs_root *root, struct block_device *bdev, + u64 device_id, u64 block_start, u64 num_blocks) { @@ -31,6 +33,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, lookup->block_start = block_start; lookup->num_blocks = num_blocks; lookup->bdev = bdev; + lookup->device_id = device_id; printk("inserting %s into dev radix %Lu %Lu\n", bdevname(bdev, b), block_start, num_blocks); ret = radix_tree_insert(&root->fs_info->dev_radix, block_start + @@ -418,17 +421,14 @@ printk("all worked\n"); return root; } -int btrfs_open_disk(struct btrfs_root *root, u64 block_start, u64 num_blocks, - char *filename, int name_len) +static int btrfs_open_disk(struct btrfs_root *root, u64 device_id, + u64 block_start, u64 num_blocks, + char *filename, int name_len) { char *null_filename; struct block_device *bdev; int ret; - if (block_start == 0) { -printk("skipping disk with block_start == 0\n"); -return 0; - } null_filename = kmalloc(name_len + 1, GFP_NOFS); if (!null_filename) return -ENOMEM; @@ -441,7 +441,8 @@ return 0; goto out; } set_blocksize(bdev, root->fs_info->sb->s_blocksize); - ret = btrfs_insert_dev_radix(root, bdev, block_start, num_blocks); + ret = btrfs_insert_dev_radix(root, bdev, device_id, + block_start, num_blocks); BUG_ON(ret); ret = 0; out: @@ -490,10 +491,14 @@ static int read_device_info(struct btrfs_root *root) } dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_device_item); printk("found key %Lu %Lu\n", key.objectid, key.offset); - ret = btrfs_open_disk(root, key.objectid, key.offset, - (char *)(dev_item + 1), - btrfs_device_pathlen(dev_item)); - BUG_ON(ret); + if (btrfs_device_id(dev_item) != + btrfs_super_device_id(root->fs_info->disk_super)) { + ret = btrfs_open_disk(root, btrfs_device_id(dev_item), + key.objectid, key.offset, + (char *)(dev_item + 1), + btrfs_device_pathlen(dev_item)); + BUG_ON(ret); + } path->slots[0]++; } btrfs_free_path(path); @@ -556,6 +561,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) dev_lookup->block_start = 0; dev_lookup->num_blocks = (u32)-2; dev_lookup->bdev = sb->s_bdev; + dev_lookup->device_id = 0; ret = radix_tree_insert(&fs_info->dev_radix, (u32)-2, dev_lookup); BUG_ON(ret); fs_info->sb_buffer = read_tree_block(tree_root, @@ -575,6 +581,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) radix_tree_delete(&fs_info->dev_radix, (u32)-2); dev_lookup->block_start = btrfs_super_device_block_start(disk_super); dev_lookup->num_blocks = btrfs_super_device_num_blocks(disk_super); + dev_lookup->device_id = btrfs_super_device_id(disk_super); + ret = radix_tree_insert(&fs_info->dev_radix, dev_lookup->block_start + dev_lookup->num_blocks - 1, dev_lookup); @@ -659,6 +667,7 @@ int del_fs_roots(struct btrfs_fs_info *fs_info) } return 0; } + static int free_dev_radix(struct btrfs_fs_info *fs_info) { struct dev_lookup *lookup[8]; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index aa9f56c38419..444ebb0141ae 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -44,6 +44,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, u64 bh_blocknr(struct buffer_head *bh); int btrfs_insert_dev_radix(struct btrfs_root *root, struct block_device *bdev, + u64 device_id, u64 block_start, u64 num_blocks); int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c46d7eafcf62..8dcf600e39ef 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1840,7 +1840,9 @@ static int add_disk(struct btrfs_root *root, char *name, int namelen) u16 item_size; u64 num_blocks; u64 new_blocks; + u64 device_id; int ret; + printk("adding disk %s\n", name); path = btrfs_alloc_path(); if (!path) @@ -1875,9 +1877,14 @@ printk("insert failed %d\n", ret); path->slots[0], struct btrfs_device_item); btrfs_set_device_pathlen(dev_item, namelen); memcpy(dev_item + 1, name, namelen); + + device_id = btrfs_super_last_device_id(root->fs_info->disk_super) + 1; + btrfs_set_super_last_device_id(root->fs_info->disk_super, device_id); + btrfs_set_device_id(dev_item, device_id); mark_buffer_dirty(path->nodes[0]); - ret = btrfs_insert_dev_radix(root, bdev, num_blocks, new_blocks); + ret = btrfs_insert_dev_radix(root, bdev, device_id, num_blocks, + new_blocks); if (!ret) { btrfs_set_super_total_blocks(root->fs_info->disk_super, From 6567e837df07e43bffc08ac40858af8133a007bf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 16 Apr 2007 09:22:45 -0400 Subject: [PATCH 0117/2985] Btrfs: early work to file_write in big extents Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 54 +++++++++++++++++++++ fs/btrfs/ctree.h | 15 ++++++ fs/btrfs/file-item.c | 111 ++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/super.c | 75 ++++++++++++++++++++++------- 4 files changed, 232 insertions(+), 23 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e2fc6f0d0243..6b76fd967153 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1249,6 +1249,60 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } +int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, u32 data_size) +{ + int ret = 0; + int slot; + int slot_orig; + struct btrfs_leaf *leaf; + struct buffer_head *leaf_buf; + u32 nritems; + unsigned int data_end; + unsigned int old_data; + unsigned int old_size; + int i; + + slot_orig = path->slots[0]; + leaf_buf = path->nodes[0]; + leaf = btrfs_buffer_leaf(leaf_buf); + + nritems = btrfs_header_nritems(&leaf->header); + data_end = leaf_data_end(root, leaf); + + if (btrfs_leaf_free_space(root, leaf) < data_size) + BUG(); + slot = path->slots[0]; + old_data = btrfs_item_end(leaf->items + slot); + + BUG_ON(slot < 0); + BUG_ON(slot >= nritems); + + /* + * item0..itemN ... dataN.offset..dataN.size .. data0.size + */ + /* first correct the data pointers */ + for (i = slot; i < nritems; i++) { + u32 ioff = btrfs_item_offset(leaf->items + i); + btrfs_set_item_offset(leaf->items + i, + ioff - data_size); + } + /* shift the data */ + btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + data_end - data_size, btrfs_leaf_data(leaf) + + data_end, old_data - data_end); + data_end = old_data; + old_size = btrfs_item_size(leaf->items + slot); + btrfs_set_item_size(leaf->items + slot, old_size + data_size); + btrfs_mark_buffer_dirty(leaf_buf); + + ret = 0; + if (btrfs_leaf_free_space(root, leaf) < 0) + BUG(); + check_leaf(root, path, 0); + return ret; +} + /* * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ef3583cf74d6..796f19d03ab5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -227,6 +227,7 @@ struct btrfs_file_extent_item { } __attribute__ ((__packed__)); struct btrfs_csum_item { + __le64 extent_offset; u8 csum[BTRFS_CSUM_SIZE]; } __attribute__ ((__packed__)); @@ -924,6 +925,17 @@ static inline void btrfs_set_file_extent_num_blocks(struct e->num_blocks = cpu_to_le64(val); } +static inline u64 btrfs_csum_extent_offset(struct btrfs_csum_item *c) +{ + return le64_to_cpu(c->extent_offset); +} + +static inline void btrfs_set_csum_extent_offset(struct btrfs_csum_item *c, + u64 val) +{ + c->extent_offset = cpu_to_le64(val); +} + static inline u16 btrfs_device_pathlen(struct btrfs_device_item *d) { return le16_to_cpu(d->pathlen); @@ -1002,6 +1014,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); /* ctree.c */ +int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, u32 data_size); int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); @@ -1071,6 +1085,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, + u64 extent_offset, char *data, size_t len); int btrfs_csum_verify_file_block(struct btrfs_root *root, u64 objectid, u64 offset, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index d9fd7f4d3794..93d42d650820 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -3,6 +3,9 @@ #include "disk-io.h" #include "transaction.h" +#define MAX_CSUM_ITEMS(r) ((((BTRFS_LEAF_DATA_SIZE(r) - \ + sizeof(struct btrfs_item)) / \ + sizeof(struct btrfs_csum_item)) - 1)) int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, @@ -43,6 +46,54 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, return 0; } +static struct btrfs_csum_item *__lookup_csum_item(struct btrfs_root *root, + struct btrfs_path *path, + u64 objectid, u64 offset) +{ + int ret; + struct btrfs_key file_key; + struct btrfs_key found_key; + struct btrfs_csum_item *item; + struct btrfs_leaf *leaf; + u64 csum_offset = 0; + + file_key.objectid = objectid; + file_key.offset = offset; + file_key.flags = 0; + btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); + ret = btrfs_search_slot(NULL, root, &file_key, path, 0, 0); + if (ret < 0) + goto fail; + leaf = btrfs_buffer_leaf(path->nodes[0]); + if (ret > 0) { + ret = 1; + if (path->slots[0] == 0) + goto fail; + path->slots[0]--; + btrfs_disk_key_to_cpu(&found_key, + &leaf->items[path->slots[0]].key); + if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || + found_key.objectid != objectid) { + goto fail; + } + csum_offset = (offset - found_key.offset) >> + root->fs_info->sb->s_blocksize_bits; + if (csum_offset >= + btrfs_item_size(leaf->items + path->slots[0]) / + sizeof(struct btrfs_csum_item)) { + goto fail; + } + } + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); + item += csum_offset; + return item; +fail: + if (ret > 0) + ret = -EIO; + return ERR_PTR(ret); +} + + int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, @@ -52,11 +103,16 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_key file_key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; + struct btrfs_csum_item *csum_item; + csum_item = __lookup_csum_item(root, path, objectid, offset); + if (IS_ERR(csum_item)) + return PTR_ERR(csum_item); file_key.objectid = objectid; - file_key.offset = offset; + file_key.offset = btrfs_csum_extent_offset(csum_item); file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); + btrfs_release_path(root, path); ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); return ret; } @@ -64,12 +120,16 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, + u64 extent_offset, char *data, size_t len) { int ret; struct btrfs_key file_key; + struct btrfs_key found_key; struct btrfs_path *path; struct btrfs_csum_item *item; + struct btrfs_leaf *leaf; + u64 csum_offset; path = btrfs_alloc_path(); BUG_ON(!path); @@ -78,14 +138,50 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); + ret = btrfs_search_slot(trans, root, &file_key, path, + sizeof(struct btrfs_csum_item), 1); + if (ret < 0) + goto fail; + if (ret == 0) { + csum_offset = 0; + goto csum; + } + if (path->slots[0] == 0) { + btrfs_release_path(root, path); + goto insert; + } + path->slots[0]--; + leaf = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key); + csum_offset = (offset - found_key.offset) >> + root->fs_info->sb->s_blocksize_bits; + if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || + found_key.objectid != objectid || + csum_offset >= MAX_CSUM_ITEMS(root)) { + btrfs_release_path(root, path); + goto insert; + } + if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / + sizeof(struct btrfs_csum_item)) { + ret = btrfs_extend_item(trans, root, path, + sizeof(struct btrfs_csum_item)); + BUG_ON(ret); + goto csum; + } + +insert: + csum_offset = 0; ret = btrfs_insert_empty_item(trans, root, path, &file_key, - BTRFS_CSUM_SIZE); + sizeof(struct btrfs_csum_item)); if (ret != 0 && ret != -EEXIST) goto fail; +csum: item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_csum_item); ret = 0; + item += csum_offset; ret = btrfs_csum_data(root, data, len, item->csum); + btrfs_set_csum_extent_offset(item, extent_offset); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); @@ -111,12 +207,13 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_search_slot(NULL, root, &file_key, path, 0, 0); - if (ret) + + item = __lookup_csum_item(root, path, objectid, offset); + if (IS_ERR(item)) { + ret = PTR_ERR(item); goto fail; - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_csum_item); - ret = 0; + } + ret = btrfs_csum_data(root, data, len, result); WARN_ON(ret); if (memcmp(result, item->csum, BTRFS_CSUM_SIZE)) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8dcf600e39ef..ec689992fdf4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1027,8 +1027,10 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); - if (create) + if (create) { trans = btrfs_start_transaction(root, 1); + WARN_ON(1); + } ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, @@ -1055,9 +1057,8 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, /* exact match found, use it, FIXME, deal with extents * other than the page size */ - if (ret == 0) { + if (0 && ret == 0) { err = 0; - BUG_ON(btrfs_file_extent_disk_num_blocks(item) != 1); if (create && btrfs_file_extent_generation(item) != trans->transid) { struct btrfs_key ins; @@ -1072,7 +1073,6 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, blocknr = ins.objectid; } - map_bh(result, inode->i_sb, blocknr); btrfs_map_bh_to_logical(root, result, blocknr); goto out; } @@ -1231,6 +1231,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct file *file, struct page **pages, size_t num_pages, + u64 extent_offset, loff_t pos, size_t write_bytes) { @@ -1250,6 +1251,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, trans = btrfs_start_transaction(root, 1); btrfs_csum_file_block(trans, root, inode->i_ino, pages[i]->index << PAGE_CACHE_SHIFT, + extent_offset, kmap(pages[i]), PAGE_CACHE_SIZE); kunmap(pages[i]); SetPageChecked(pages[i]); @@ -1279,7 +1281,8 @@ static int prepare_pages(struct btrfs_trans_handle *trans, loff_t pos, unsigned long first_index, unsigned long last_index, - size_t write_bytes) + size_t write_bytes, + u64 alloc_extent_start) { int i; unsigned long index = pos >> PAGE_CACHE_SHIFT; @@ -1288,6 +1291,8 @@ static int prepare_pages(struct btrfs_trans_handle *trans, int err = 0; int ret; int this_write; + struct buffer_head *bh; + struct buffer_head *head; loff_t isize = i_size_read(inode); memset(pages, 0, num_pages * sizeof(struct page *)); @@ -1307,14 +1312,20 @@ static int prepare_pages(struct btrfs_trans_handle *trans, BUG_ON(ret); lock_page(pages[i]); } - ret = nobh_prepare_write(pages[i], offset, - offset + this_write, - btrfs_get_block); + create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize, + (1 << BH_Uptodate)); + head = page_buffers(pages[i]); + bh = head; + do { + err = btrfs_map_bh_to_logical(root, bh, + alloc_extent_start); + BUG_ON(err); + if (err) + goto failed_truncate; + bh = bh->b_this_page; + alloc_extent_start++; + } while (bh != head); pos += this_write; - if (ret) { - err = ret; - goto failed_truncate; - } WARN_ON(this_write > write_bytes); write_bytes -= this_write; } @@ -1343,11 +1354,23 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, struct page *pages[1]; unsigned long first_index; unsigned long last_index; + u64 start_pos; + u64 num_blocks; + u64 alloc_extent_start; + u64 orig_extent_start; + struct btrfs_trans_handle *trans; if (file->f_flags & O_DIRECT) return -EINVAL; pos = *ppos; + start_pos = pos & ~(root->blocksize - 1); + /* FIXME */ + if (start_pos != pos) + return -EINVAL; + num_blocks = (count + pos - start_pos + root->blocksize - 1) >> + inode->i_blkbits; + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); current->backing_dev_info = inode->i_mapping->backing_dev_info; err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); @@ -1362,20 +1385,41 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, mutex_lock(&inode->i_mutex); first_index = pos >> PAGE_CACHE_SHIFT; last_index = (pos + count) >> PAGE_CACHE_SHIFT; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (!trans) { + err = -ENOMEM; + goto out_unlock; + } + ret = btrfs_alloc_file_extent(trans, root, inode->i_ino, + start_pos, num_blocks, 1, + &alloc_extent_start); + BUG_ON(ret); + + orig_extent_start = start_pos; + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + while(count > 0) { size_t offset = pos & (PAGE_CACHE_SIZE - 1); size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; ret = prepare_pages(NULL, root, file, pages, num_pages, - pos, first_index, last_index, write_bytes); + pos, first_index, last_index, + write_bytes, alloc_extent_start); BUG_ON(ret); + /* FIXME blocks != pagesize */ + alloc_extent_start += num_pages; ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); BUG_ON(ret); ret = dirty_and_release_pages(NULL, root, file, pages, - num_pages, pos, write_bytes); + num_pages, orig_extent_start, + pos, write_bytes); BUG_ON(ret); btrfs_drop_pages(pages, num_pages); @@ -1387,6 +1431,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, balance_dirty_pages_ratelimited(inode->i_mapping); cond_resched(); } +out_unlock: mutex_unlock(&inode->i_mutex); out: *ppos = pos; @@ -1806,8 +1851,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) &new_root_item); BUG_ON(ret); -printk("adding snapshot name %.*s root %Lu %Lu %u\n", namelen, name, key.objectid, key.offset, key.flags); - /* * insert the directory item */ From b18c6685810af8e6763760711aece31ccc7a8ea8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 17 Apr 2007 13:26:50 -0400 Subject: [PATCH 0118/2985] Btrfs: progress on file_write Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 57 +++++++++ fs/btrfs/ctree.h | 21 ++- fs/btrfs/extent-tree.c | 20 +-- fs/btrfs/file-item.c | 61 ++++++--- fs/btrfs/super.c | 284 ++++++++++++++++++++++++++++++++++++++--- 5 files changed, 394 insertions(+), 49 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6b76fd967153..43d4779abdd1 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1249,6 +1249,63 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } +int btrfs_truncate_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u32 new_size) +{ + int ret = 0; + int slot; + int slot_orig; + struct btrfs_leaf *leaf; + struct buffer_head *leaf_buf; + u32 nritems; + unsigned int data_end; + unsigned int old_data_start; + unsigned int old_size; + unsigned int size_diff; + int i; + + slot_orig = path->slots[0]; + leaf_buf = path->nodes[0]; + leaf = btrfs_buffer_leaf(leaf_buf); + + nritems = btrfs_header_nritems(&leaf->header); + data_end = leaf_data_end(root, leaf); + + slot = path->slots[0]; + old_data_start = btrfs_item_offset(leaf->items + slot); + old_size = btrfs_item_size(leaf->items + slot); + BUG_ON(old_size <= new_size); + size_diff = old_size - new_size; + + BUG_ON(slot < 0); + BUG_ON(slot >= nritems); + + /* + * item0..itemN ... dataN.offset..dataN.size .. data0.size + */ + /* first correct the data pointers */ + for (i = slot; i < nritems; i++) { + u32 ioff = btrfs_item_offset(leaf->items + i); + btrfs_set_item_offset(leaf->items + i, + ioff + size_diff); + } + /* shift the data */ +printk("truncate item, new_size %u old_size %u, diff %u, bufp %p, dst, %p, num %u, old_data_start %u, data_end %u\n", new_size, old_size, size_diff, leaf, btrfs_leaf_data(leaf) + data_end + size_diff, old_data_start-data_end, old_data_start, data_end); + btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + data_end + size_diff, btrfs_leaf_data(leaf) + + data_end, old_data_start + new_size - data_end); + btrfs_set_item_size(leaf->items + slot, new_size); + btrfs_mark_buffer_dirty(leaf_buf); + + ret = 0; + if (btrfs_leaf_free_space(root, leaf) < 0) + BUG(); + check_leaf(root, path, 0); + return ret; +} + int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 796f19d03ab5..ca3ab160f460 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -999,7 +999,7 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) ((type *)(btrfs_leaf_data(leaf) + \ btrfs_item_offset((leaf)->items + (slot)))) -/* extent-item.c */ +/* extent-tree.c */ int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, @@ -1013,9 +1013,16 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 blocknr, u64 num_blocks); /* ctree.c */ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); +int btrfs_truncate_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u32 new_size); int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); @@ -1073,11 +1080,10 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key *location, int mod); /* file-item.c */ -int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, +int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 objectid, u64 offset, - u64 num_blocks, u64 hint_block, - u64 *result); + u64 objectid, u64 pos, u64 offset, + u64 num_blocks); int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, @@ -1090,6 +1096,11 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, int btrfs_csum_verify_file_block(struct btrfs_root *root, u64 objectid, u64 offset, char *data, size_t len); +struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 objectid, u64 offset, + int cow); /* super.c */ extern struct subsystem btrfs_subsys; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d560831c10a7..2cee9df001f6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,8 +12,9 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks) +int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 blocknr, u64 num_blocks) { struct btrfs_path *path; int ret; @@ -50,8 +51,9 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } -static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, u32 *refs) +static int lookup_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 blocknr, + u64 num_blocks, u32 *refs) { struct btrfs_path *path; int ret; @@ -80,7 +82,7 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return inc_block_ref(trans, root, bh_blocknr(root->node), 1); + return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -107,13 +109,13 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, continue; fi = btrfs_item_ptr(buf_leaf, i, struct btrfs_file_extent_item); - ret = inc_block_ref(trans, root, + ret = btrfs_inc_extent_ref(trans, root, btrfs_file_extent_disk_blocknr(fi), btrfs_file_extent_disk_num_blocks(fi)); BUG_ON(ret); } else { blocknr = btrfs_node_blockptr(buf_node, i); - ret = inc_block_ref(trans, root, blocknr, 1); + ret = btrfs_inc_extent_ref(trans, root, blocknr, 1); BUG_ON(ret); } } @@ -563,7 +565,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = lookup_block_ref(trans, root, bh_blocknr(path->nodes[*level]), + ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]), 1, &refs); BUG_ON(ret); if (refs > 1) @@ -587,7 +589,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root } blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), path->slots[*level]); - ret = lookup_block_ref(trans, root, blocknr, 1, &refs); + ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); BUG_ON(ret); if (refs != 1) { path->slots[*level]++; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 93d42d650820..f49968ad0a07 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -6,13 +6,11 @@ #define MAX_CSUM_ITEMS(r) ((((BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item)) / \ sizeof(struct btrfs_csum_item)) - 1)) -int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, +int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 objectid, u64 offset, - u64 num_blocks, u64 hint_block, - u64 *result) + u64 objectid, u64 pos, + u64 offset, u64 num_blocks) { - struct btrfs_key ins; int ret = 0; struct btrfs_file_extent_item *item; struct btrfs_key file_key; @@ -21,11 +19,13 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); + /* ret = btrfs_alloc_extent(trans, root, num_blocks, hint_block, (u64)-1, &ins); + */ BUG_ON(ret); file_key.objectid = objectid; - file_key.offset = offset; + file_key.offset = pos; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); @@ -34,21 +34,22 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(item, ins.objectid); - btrfs_set_file_extent_disk_num_blocks(item, ins.offset); + btrfs_set_file_extent_disk_blocknr(item, offset); + btrfs_set_file_extent_disk_num_blocks(item, num_blocks); btrfs_set_file_extent_offset(item, 0); - btrfs_set_file_extent_num_blocks(item, ins.offset); + btrfs_set_file_extent_num_blocks(item, num_blocks); btrfs_set_file_extent_generation(item, trans->transid); btrfs_mark_buffer_dirty(path->nodes[0]); - *result = ins.objectid; btrfs_release_path(root, path); btrfs_free_path(path); return 0; } -static struct btrfs_csum_item *__lookup_csum_item(struct btrfs_root *root, - struct btrfs_path *path, - u64 objectid, u64 offset) +struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 objectid, u64 offset, + int cow) { int ret; struct btrfs_key file_key; @@ -61,19 +62,23 @@ static struct btrfs_csum_item *__lookup_csum_item(struct btrfs_root *root, file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); - ret = btrfs_search_slot(NULL, root, &file_key, path, 0, 0); +printk("__lookup for %Lu\n", offset); + ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); if (ret < 0) goto fail; leaf = btrfs_buffer_leaf(path->nodes[0]); if (ret > 0) { ret = 1; - if (path->slots[0] == 0) + if (path->slots[0] == 0) { +printk("fail1\n"); goto fail; + } path->slots[0]--; btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key); if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid) { +printk("fail2 type %u %Lu %Lu\n", btrfs_key_type(&found_key), found_key.objectid, objectid); goto fail; } csum_offset = (offset - found_key.offset) >> @@ -81,6 +86,7 @@ static struct btrfs_csum_item *__lookup_csum_item(struct btrfs_root *root, if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / sizeof(struct btrfs_csum_item)) { +printk("fail3, csum offset %lu size %u\n", csum_offset, btrfs_item_size(leaf->items + path->slots[0]) / sizeof(struct btrfs_csum_item)); goto fail; } } @@ -89,7 +95,7 @@ static struct btrfs_csum_item *__lookup_csum_item(struct btrfs_root *root, return item; fail: if (ret > 0) - ret = -EIO; + ret = -ENOENT; return ERR_PTR(ret); } @@ -105,7 +111,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, int cow = mod != 0; struct btrfs_csum_item *csum_item; - csum_item = __lookup_csum_item(root, path, objectid, offset); + csum_item = btrfs_lookup_csum(trans, root, path, objectid, offset, 0); if (IS_ERR(csum_item)) return PTR_ERR(csum_item); file_key.objectid = objectid; @@ -113,7 +119,9 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); btrfs_release_path(root, path); +printk("lookup file extent searches for %Lu\n", file_key.offset); ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); +printk("ret is %d\n", ret); return ret; } @@ -134,17 +142,23 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); + + item = btrfs_lookup_csum(trans, root, path, objectid, offset, 0); + if (!IS_ERR(item)) + goto found; + btrfs_release_path(root, path); file_key.objectid = objectid; file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); +printk("searching for csum %Lu %Lu\n", objectid, offset); ret = btrfs_search_slot(trans, root, &file_key, path, sizeof(struct btrfs_csum_item), 1); +printk("ret %d\n", ret); if (ret < 0) goto fail; if (ret == 0) { - csum_offset = 0; - goto csum; + BUG(); } if (path->slots[0] == 0) { btrfs_release_path(root, path); @@ -153,12 +167,15 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, path->slots[0]--; leaf = btrfs_buffer_leaf(path->nodes[0]); btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key); +printk("found key %Lu %Lu %u\n", found_key.objectid, found_key.offset, found_key.flags); csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; +printk("csum_offset %Lu\n", csum_offset); if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid || csum_offset >= MAX_CSUM_ITEMS(root)) { btrfs_release_path(root, path); +printk("insert1\n"); goto insert; } if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / @@ -166,11 +183,13 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, ret = btrfs_extend_item(trans, root, path, sizeof(struct btrfs_csum_item)); BUG_ON(ret); +printk("item extended\n"); goto csum; } insert: csum_offset = 0; +printk("inserting item %Lu %Lu %u\n", file_key.objectid, file_key.offset, file_key.flags); ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(struct btrfs_csum_item)); if (ret != 0 && ret != -EEXIST) @@ -180,12 +199,14 @@ csum: struct btrfs_csum_item); ret = 0; item += csum_offset; +found: ret = btrfs_csum_data(root, data, len, item->csum); btrfs_set_csum_extent_offset(item, extent_offset); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); btrfs_free_path(path); +printk("return ret %d\n", ret); return ret; } @@ -208,7 +229,7 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); mutex_lock(&root->fs_info->fs_mutex); - item = __lookup_csum_item(root, path, objectid, offset); + item = btrfs_lookup_csum(NULL, root, path, objectid, offset, 0); if (IS_ERR(item)) { ret = PTR_ERR(item); goto fail; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ec689992fdf4..6a56416147e6 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1089,7 +1089,6 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); extent_start = extent_start >> inode->i_blkbits; - extent_start += btrfs_file_extent_offset(item); extent_end = extent_start + btrfs_file_extent_num_blocks(item); if (iblock >= extent_start && iblock < extent_end) { err = 0; @@ -1103,6 +1102,7 @@ allocate: err = 0; goto out; } +#if 0 ret = btrfs_alloc_file_extent(trans, root, objectid, iblock << inode->i_blkbits, 1, extent_end, &blocknr); @@ -1115,9 +1115,11 @@ allocate: map_bh(result, inode->i_sb, blocknr); btrfs_map_bh_to_logical(root, result, blocknr); +#endif out: btrfs_release_path(root, path); btrfs_free_path(path); +printk("mapping iblock %lu to %lu\n", iblock, result->b_blocknr); if (trans) btrfs_end_transaction(trans, root); return err; @@ -1273,8 +1275,244 @@ failed: return err; } -static int prepare_pages(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int drop_csums(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + u64 start, u64 end) +{ + struct btrfs_path *path; + struct btrfs_leaf *leaf; + struct btrfs_key key; + int slot; + struct btrfs_csum_item *item; + char *old_block = NULL; + u64 cur = start; + u64 found_end; + u64 num_csums; + u64 item_size; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + while(cur < end) { + item = btrfs_lookup_csum(trans, root, path, + inode->i_ino, cur, 1); + if (IS_ERR(item)) { + cur += root->blocksize; + continue; + } + leaf = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + item_size = btrfs_item_size(leaf->items + slot); + num_csums = item_size / sizeof(struct btrfs_csum_item); + found_end = key.offset + (num_csums << inode->i_blkbits); + cur = found_end; + + if (found_end > end) { + char *src; + old_block = kmalloc(root->blocksize, GFP_NOFS); + src = btrfs_item_ptr(leaf, slot, char); + memcpy(old_block, src, item_size); + } + if (key.offset < start) { + u64 new_size = (start - key.offset) >> + inode->i_blkbits; + new_size *= sizeof(struct btrfs_csum_item); + ret = btrfs_truncate_item(trans, root, path, new_size); + BUG_ON(ret); + } else { + btrfs_del_item(trans, root, path); + } + btrfs_release_path(root, path); + if (found_end > end) { + char *dst; + int i; + int new_size; + + num_csums = (found_end - end) >> inode->i_blkbits; + new_size = num_csums * sizeof(struct btrfs_csum_item); + key.offset = end; + ret = btrfs_insert_empty_item(trans, root, path, + &key, new_size); + BUG_ON(ret); + dst = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], char); + memcpy(dst, old_block + item_size - new_size, + new_size); + item = (struct btrfs_csum_item *)dst; + for (i = 0; i < num_csums; i++) { + btrfs_set_csum_extent_offset(item, end); + item++; + } + mark_buffer_dirty(path->nodes[0]); + kfree(old_block); + break; + } + } + btrfs_free_path(path); + return 0; +} + +static int drop_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + u64 start, u64 end) +{ + int ret; + struct btrfs_key key; + struct btrfs_leaf *leaf; + int slot; + struct btrfs_file_extent_item *extent; + u64 extent_end; + int keep; + struct btrfs_file_extent_item old; + struct btrfs_path *path; + u64 search_start = start; + int bookend; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; +search_again: +printk("drop extent inode %lu start %Lu end %Lu\n", inode->i_ino, start, end); + ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, + search_start, -1); + if (ret != 0) { +printk("lookup failed\n"); + goto out; + } + while(1) { + keep = 0; + bookend = 0; + leaf = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + +printk("found key %Lu %Lu %u\n", key.objectid, key.offset, key.flags); + + extent = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + extent_end = key.offset + + (btrfs_file_extent_num_blocks(extent) << + inode->i_blkbits); +printk("extent end is %Lu\n", extent_end); + if (key.offset >= end || key.objectid != inode->i_ino) { + ret = 0; + goto out; + } + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + goto next_leaf; + + if (end < extent_end && end >= key.offset) { + memcpy(&old, extent, sizeof(old)); + ret = btrfs_inc_extent_ref(trans, root, + btrfs_file_extent_disk_blocknr(&old), + btrfs_file_extent_disk_num_blocks(&old)); + BUG_ON(ret); + bookend = 1; + } + + if (start > key.offset) { + u64 new_num; + /* truncate existing extent */ + keep = 1; + WARN_ON(start & (root->blocksize - 1)); + new_num = (start - key.offset) >> inode->i_blkbits; +printk("truncating existing extent, was %Lu ", btrfs_file_extent_num_blocks(extent)); + btrfs_set_file_extent_num_blocks(extent, new_num); +printk("now %Lu\n", btrfs_file_extent_num_blocks(extent)); + + mark_buffer_dirty(path->nodes[0]); + } + if (!keep) { + u64 disk_blocknr; + u64 disk_num_blocks; +printk("del old\n"); + disk_blocknr = btrfs_file_extent_disk_blocknr(extent); + disk_num_blocks = + btrfs_file_extent_disk_num_blocks(extent); + search_start = key.offset + + (btrfs_file_extent_num_blocks(extent) << + inode->i_blkbits); + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + btrfs_release_path(root, path); + + ret = btrfs_free_extent(trans, root, disk_blocknr, + disk_num_blocks, 0); + + BUG_ON(ret); + if (!bookend && search_start >= end) { + ret = 0; + goto out; + } + if (!bookend) + goto search_again; + } + if (bookend) { + /* create bookend */ + struct btrfs_key ins; +printk("bookend! extent end %Lu\n", extent_end); + ins.objectid = inode->i_ino; + ins.offset = end; + ins.flags = 0; + btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); + + btrfs_release_path(root, path); + ret = drop_csums(trans, root, inode, start, end); + BUG_ON(ret); + ret = btrfs_insert_empty_item(trans, root, path, &ins, + sizeof(*extent)); + BUG_ON(ret); + extent = btrfs_item_ptr( + btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_disk_blocknr(extent, + btrfs_file_extent_disk_blocknr(&old)); + btrfs_set_file_extent_disk_num_blocks(extent, + btrfs_file_extent_disk_num_blocks(&old)); + + btrfs_set_file_extent_offset(extent, + btrfs_file_extent_offset(&old) + + ((end - key.offset) >> inode->i_blkbits)); + WARN_ON(btrfs_file_extent_num_blocks(&old) < + (end - key.offset) >> inode->i_blkbits); + btrfs_set_file_extent_num_blocks(extent, + btrfs_file_extent_num_blocks(&old) - + ((end - key.offset) >> inode->i_blkbits)); + + btrfs_set_file_extent_generation(extent, + btrfs_file_extent_generation(&old)); +printk("new bookend at offset %Lu, file_extent_offset %Lu, file_extent_num_blocks %Lu\n", end, btrfs_file_extent_offset(extent), btrfs_file_extent_num_blocks(extent)); + btrfs_mark_buffer_dirty(path->nodes[0]); + ret = 0; + goto out_nocsum; + } +next_leaf: + if (slot >= btrfs_header_nritems(&leaf->header) - 1) { + ret = btrfs_next_leaf(root, path); + if (ret) { + ret = 0; + goto out; + } + } else { + path->slots[0]++; + } + } + +out: + ret = drop_csums(trans, root, inode, start, end); + BUG_ON(ret); + +out_nocsum: + btrfs_free_path(path); + return ret; +} + +static int prepare_pages(struct btrfs_root *root, struct file *file, struct page **pages, size_t num_pages, @@ -1289,7 +1527,6 @@ static int prepare_pages(struct btrfs_trans_handle *trans, struct inode *inode = file->f_path.dentry->d_inode; int offset; int err = 0; - int ret; int this_write; struct buffer_head *bh; struct buffer_head *head; @@ -1305,18 +1542,21 @@ static int prepare_pages(struct btrfs_trans_handle *trans, } offset = pos & (PAGE_CACHE_SIZE -1); this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); - if (!PageUptodate(pages[i]) && - (pages[i]->index == first_index || - pages[i]->index == last_index) && pos < isize) { +#if 0 + if ((pages[i]->index == first_index || + pages[i]->index == last_index) && pos < isize && + !PageUptodate(pages[i])) { ret = mpage_readpage(pages[i], btrfs_get_block); BUG_ON(ret); lock_page(pages[i]); } +#endif create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize, (1 << BH_Uptodate)); head = page_buffers(pages[i]); bh = head; do { +printk("mapping page %lu to block %Lu\n", pages[i]->index, alloc_extent_start); err = btrfs_map_bh_to_logical(root, bh, alloc_extent_start); BUG_ON(err); @@ -1351,7 +1591,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, int ret = 0; struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct page *pages[1]; + struct page *pages[8]; unsigned long first_index; unsigned long last_index; u64 start_pos; @@ -1359,6 +1599,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, u64 alloc_extent_start; u64 orig_extent_start; struct btrfs_trans_handle *trans; + struct btrfs_key ins; if (file->f_flags & O_DIRECT) return -EINVAL; @@ -1390,16 +1631,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, trans = btrfs_start_transaction(root, 1); if (!trans) { err = -ENOMEM; + mutex_unlock(&root->fs_info->fs_mutex); goto out_unlock; } - ret = btrfs_alloc_file_extent(trans, root, inode->i_ino, - start_pos, num_blocks, 1, - &alloc_extent_start); - BUG_ON(ret); - + if (start_pos < inode->i_size) { + ret = drop_extents(trans, root, inode, + start_pos, + (pos + count + root->blocksize -1) & + ~(root->blocksize - 1)); + } orig_extent_start = start_pos; - ret = btrfs_end_transaction(trans, root); + ret = btrfs_alloc_extent(trans, root, num_blocks, 1, + (u64)-1, &ins); BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + start_pos, ins.objectid, ins.offset); + BUG_ON(ret); + alloc_extent_start = ins.objectid; + ret = btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); while(count > 0) { @@ -1407,16 +1656,21 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - ret = prepare_pages(NULL, root, file, pages, num_pages, +printk("num_pages is %lu\n", num_pages); + + memset(pages, 0, sizeof(pages)); + ret = prepare_pages(root, file, pages, num_pages, pos, first_index, last_index, write_bytes, alloc_extent_start); BUG_ON(ret); + /* FIXME blocks != pagesize */ alloc_extent_start += num_pages; ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); BUG_ON(ret); +printk("2num_pages is %lu\n", num_pages); ret = dirty_and_release_pages(NULL, root, file, pages, num_pages, orig_extent_start, pos, write_bytes); From 70b2befd0c8a4064715d8b340270650cc9d15af8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 17 Apr 2007 15:39:32 -0400 Subject: [PATCH 0119/2985] Btrfs: rework csums and extent item ordering Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 9 ++-- fs/btrfs/ctree.h | 17 +----- fs/btrfs/dir-item.c | 28 ++++++---- fs/btrfs/disk-io.c | 2 - fs/btrfs/file-item.c | 26 +-------- fs/btrfs/super.c | 123 ++++--------------------------------------- 6 files changed, 37 insertions(+), 168 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 43d4779abdd1..5c160c236773 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -115,14 +115,14 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 1; if (k1.objectid < k2->objectid) return -1; - if (k1.offset > k2->offset) - return 1; - if (k1.offset < k2->offset) - return -1; if (k1.flags > k2->flags) return 1; if (k1.flags < k2->flags) return -1; + if (k1.offset > k2->offset) + return 1; + if (k1.offset < k2->offset) + return -1; return 0; } @@ -1292,7 +1292,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, ioff + size_diff); } /* shift the data */ -printk("truncate item, new_size %u old_size %u, diff %u, bufp %p, dst, %p, num %u, old_data_start %u, data_end %u\n", new_size, old_size, size_diff, leaf, btrfs_leaf_data(leaf) + data_end + size_diff, old_data_start-data_end, old_data_start, data_end); btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + data_end + size_diff, btrfs_leaf_data(leaf) + data_end, old_data_start + new_size - data_end); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ca3ab160f460..d75a4d5bc010 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -44,14 +44,14 @@ extern struct kmem_cache *btrfs_path_cachep; */ struct btrfs_disk_key { __le64 objectid; - __le64 offset; __le32 flags; + __le64 offset; } __attribute__ ((__packed__)); struct btrfs_key { u64 objectid; - u64 offset; u32 flags; + u64 offset; } __attribute__ ((__packed__)); /* @@ -227,7 +227,6 @@ struct btrfs_file_extent_item { } __attribute__ ((__packed__)); struct btrfs_csum_item { - __le64 extent_offset; u8 csum[BTRFS_CSUM_SIZE]; } __attribute__ ((__packed__)); @@ -925,17 +924,6 @@ static inline void btrfs_set_file_extent_num_blocks(struct e->num_blocks = cpu_to_le64(val); } -static inline u64 btrfs_csum_extent_offset(struct btrfs_csum_item *c) -{ - return le64_to_cpu(c->extent_offset); -} - -static inline void btrfs_set_csum_extent_offset(struct btrfs_csum_item *c, - u64 val) -{ - c->extent_offset = cpu_to_le64(val); -} - static inline u16 btrfs_device_pathlen(struct btrfs_device_item *d) { return le16_to_cpu(d->pathlen); @@ -1091,7 +1079,6 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, - u64 extent_offset, char *data, size_t len); int btrfs_csum_verify_file_block(struct btrfs_root *root, u64 objectid, u64 offset, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 0ba46bc0da9a..cd4137a8b87e 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -103,7 +103,7 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root key.objectid = dir; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - btrfs_set_key_overflow(&key, BTRFS_KEY_OVERFLOW_MAX - 1); + // btrfs_set_key_overflow(&key, BTRFS_KEY_OVERFLOW_MAX - 1); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); while(1) { @@ -146,19 +146,29 @@ int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, int cow = mod != 0; struct btrfs_disk_key *found_key; struct btrfs_leaf *leaf; + int overflow = 0; key.objectid = dir; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); - btrfs_set_key_overflow(&key, BTRFS_KEY_OVERFLOW_MAX - 1); key.offset = objectid; - ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); - if (ret < 0) - return ret; - if (ret > 0) { - if (path->slots[0] == 0) - return 1; - path->slots[0]--; + + while(1) { + btrfs_set_key_overflow(&key, overflow); + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + if (ret < 0) + return ret; + if (ret > 0) { + if (overflow >= BTRFS_KEY_OVERFLOW_MAX) + return 1; + overflow++; + btrfs_set_key_overflow(&key, overflow); + btrfs_release_path(root, path); + continue; + } else { + /* found */ + break; + } } leaf = btrfs_buffer_leaf(path->nodes[0]); found_key = &leaf->items[path->slots[0]].key; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 06b969c14625..a2a3f529cada 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -24,7 +24,6 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, u64 num_blocks) { struct dev_lookup *lookup; - char b[BDEVNAME_SIZE]; int ret; lookup = kmalloc(sizeof(*lookup), GFP_NOFS); @@ -34,7 +33,6 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, lookup->num_blocks = num_blocks; lookup->bdev = bdev; lookup->device_id = device_id; -printk("inserting %s into dev radix %Lu %Lu\n", bdevname(bdev, b), block_start, num_blocks); ret = radix_tree_insert(&root->fs_info->dev_radix, block_start + num_blocks - 1, lookup); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index f49968ad0a07..ff8f3339c684 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -62,23 +62,19 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); -printk("__lookup for %Lu\n", offset); ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); if (ret < 0) goto fail; leaf = btrfs_buffer_leaf(path->nodes[0]); if (ret > 0) { ret = 1; - if (path->slots[0] == 0) { -printk("fail1\n"); + if (path->slots[0] == 0) goto fail; - } path->slots[0]--; btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key); if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid) { -printk("fail2 type %u %Lu %Lu\n", btrfs_key_type(&found_key), found_key.objectid, objectid); goto fail; } csum_offset = (offset - found_key.offset) >> @@ -86,7 +82,6 @@ printk("fail2 type %u %Lu %Lu\n", btrfs_key_type(&found_key), found_key.objectid if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / sizeof(struct btrfs_csum_item)) { -printk("fail3, csum offset %lu size %u\n", csum_offset, btrfs_item_size(leaf->items + path->slots[0]) / sizeof(struct btrfs_csum_item)); goto fail; } } @@ -109,26 +104,18 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_key file_key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; - struct btrfs_csum_item *csum_item; - csum_item = btrfs_lookup_csum(trans, root, path, objectid, offset, 0); - if (IS_ERR(csum_item)) - return PTR_ERR(csum_item); file_key.objectid = objectid; - file_key.offset = btrfs_csum_extent_offset(csum_item); + file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); - btrfs_release_path(root, path); -printk("lookup file extent searches for %Lu\n", file_key.offset); ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); -printk("ret is %d\n", ret); return ret; } int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, - u64 extent_offset, char *data, size_t len) { int ret; @@ -151,10 +138,8 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); -printk("searching for csum %Lu %Lu\n", objectid, offset); ret = btrfs_search_slot(trans, root, &file_key, path, sizeof(struct btrfs_csum_item), 1); -printk("ret %d\n", ret); if (ret < 0) goto fail; if (ret == 0) { @@ -167,15 +152,12 @@ printk("ret %d\n", ret); path->slots[0]--; leaf = btrfs_buffer_leaf(path->nodes[0]); btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key); -printk("found key %Lu %Lu %u\n", found_key.objectid, found_key.offset, found_key.flags); csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; -printk("csum_offset %Lu\n", csum_offset); if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid || csum_offset >= MAX_CSUM_ITEMS(root)) { btrfs_release_path(root, path); -printk("insert1\n"); goto insert; } if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / @@ -183,13 +165,11 @@ printk("insert1\n"); ret = btrfs_extend_item(trans, root, path, sizeof(struct btrfs_csum_item)); BUG_ON(ret); -printk("item extended\n"); goto csum; } insert: csum_offset = 0; -printk("inserting item %Lu %Lu %u\n", file_key.objectid, file_key.offset, file_key.flags); ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(struct btrfs_csum_item)); if (ret != 0 && ret != -EEXIST) @@ -201,12 +181,10 @@ csum: item += csum_offset; found: ret = btrfs_csum_data(root, data, len, item->csum); - btrfs_set_csum_extent_offset(item, extent_offset); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); btrfs_free_path(path); -printk("return ret %d\n", ret); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6a56416147e6..b2a2220d1352 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -524,7 +524,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) int advance; unsigned char d_type = DT_UNKNOWN; int over = 0; - int key_type = BTRFS_DIR_INDEX_KEY; + int key_type = BTRFS_DIR_ITEM_KEY; /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) @@ -561,9 +561,6 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) item = leaf->items + slot; if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; - if (key_type == BTRFS_DIR_INDEX_KEY && - btrfs_disk_key_offset(&item->key) > root->highest_inode) - break; if (btrfs_disk_key_type(&item->key) != key_type) continue; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) @@ -1119,7 +1116,6 @@ allocate: out: btrfs_release_path(root, path); btrfs_free_path(path); -printk("mapping iblock %lu to %lu\n", iblock, result->b_blocknr); if (trans) btrfs_end_transaction(trans, root); return err; @@ -1233,7 +1229,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct file *file, struct page **pages, size_t num_pages, - u64 extent_offset, loff_t pos, size_t write_bytes) { @@ -1253,7 +1248,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, trans = btrfs_start_transaction(root, 1); btrfs_csum_file_block(trans, root, inode->i_ino, pages[i]->index << PAGE_CACHE_SHIFT, - extent_offset, kmap(pages[i]), PAGE_CACHE_SIZE); kunmap(pages[i]); SetPageChecked(pages[i]); @@ -1275,86 +1269,6 @@ failed: return err; } -static int drop_csums(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, - u64 start, u64 end) -{ - struct btrfs_path *path; - struct btrfs_leaf *leaf; - struct btrfs_key key; - int slot; - struct btrfs_csum_item *item; - char *old_block = NULL; - u64 cur = start; - u64 found_end; - u64 num_csums; - u64 item_size; - int ret; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - while(cur < end) { - item = btrfs_lookup_csum(trans, root, path, - inode->i_ino, cur, 1); - if (IS_ERR(item)) { - cur += root->blocksize; - continue; - } - leaf = btrfs_buffer_leaf(path->nodes[0]); - slot = path->slots[0]; - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); - item_size = btrfs_item_size(leaf->items + slot); - num_csums = item_size / sizeof(struct btrfs_csum_item); - found_end = key.offset + (num_csums << inode->i_blkbits); - cur = found_end; - - if (found_end > end) { - char *src; - old_block = kmalloc(root->blocksize, GFP_NOFS); - src = btrfs_item_ptr(leaf, slot, char); - memcpy(old_block, src, item_size); - } - if (key.offset < start) { - u64 new_size = (start - key.offset) >> - inode->i_blkbits; - new_size *= sizeof(struct btrfs_csum_item); - ret = btrfs_truncate_item(trans, root, path, new_size); - BUG_ON(ret); - } else { - btrfs_del_item(trans, root, path); - } - btrfs_release_path(root, path); - if (found_end > end) { - char *dst; - int i; - int new_size; - - num_csums = (found_end - end) >> inode->i_blkbits; - new_size = num_csums * sizeof(struct btrfs_csum_item); - key.offset = end; - ret = btrfs_insert_empty_item(trans, root, path, - &key, new_size); - BUG_ON(ret); - dst = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], char); - memcpy(dst, old_block + item_size - new_size, - new_size); - item = (struct btrfs_csum_item *)dst; - for (i = 0; i < num_csums; i++) { - btrfs_set_csum_extent_offset(item, end); - item++; - } - mark_buffer_dirty(path->nodes[0]); - kfree(old_block); - break; - } - } - btrfs_free_path(path); - return 0; -} - static int drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, @@ -1376,12 +1290,16 @@ static int drop_extents(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; search_again: -printk("drop extent inode %lu start %Lu end %Lu\n", inode->i_ino, start, end); ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, search_start, -1); - if (ret != 0) { -printk("lookup failed\n"); + if (ret < 0) goto out; + if (ret > 0) { + if (path->slots[0] == 0) { + ret = -ENOENT; + goto out; + } + path->slots[0]--; } while(1) { keep = 0; @@ -1390,14 +1308,11 @@ printk("lookup failed\n"); slot = path->slots[0]; btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); -printk("found key %Lu %Lu %u\n", key.objectid, key.offset, key.flags); - extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); extent_end = key.offset + (btrfs_file_extent_num_blocks(extent) << inode->i_blkbits); -printk("extent end is %Lu\n", extent_end); if (key.offset >= end || key.objectid != inode->i_ino) { ret = 0; goto out; @@ -1420,16 +1335,12 @@ printk("extent end is %Lu\n", extent_end); keep = 1; WARN_ON(start & (root->blocksize - 1)); new_num = (start - key.offset) >> inode->i_blkbits; -printk("truncating existing extent, was %Lu ", btrfs_file_extent_num_blocks(extent)); btrfs_set_file_extent_num_blocks(extent, new_num); -printk("now %Lu\n", btrfs_file_extent_num_blocks(extent)); - mark_buffer_dirty(path->nodes[0]); } if (!keep) { u64 disk_blocknr; u64 disk_num_blocks; -printk("del old\n"); disk_blocknr = btrfs_file_extent_disk_blocknr(extent); disk_num_blocks = btrfs_file_extent_disk_num_blocks(extent); @@ -1454,15 +1365,12 @@ printk("del old\n"); if (bookend) { /* create bookend */ struct btrfs_key ins; -printk("bookend! extent end %Lu\n", extent_end); ins.objectid = inode->i_ino; ins.offset = end; ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); btrfs_release_path(root, path); - ret = drop_csums(trans, root, inode, start, end); - BUG_ON(ret); ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*extent)); BUG_ON(ret); @@ -1486,10 +1394,9 @@ printk("bookend! extent end %Lu\n", extent_end); btrfs_set_file_extent_generation(extent, btrfs_file_extent_generation(&old)); -printk("new bookend at offset %Lu, file_extent_offset %Lu, file_extent_num_blocks %Lu\n", end, btrfs_file_extent_offset(extent), btrfs_file_extent_num_blocks(extent)); btrfs_mark_buffer_dirty(path->nodes[0]); ret = 0; - goto out_nocsum; + goto out; } next_leaf: if (slot >= btrfs_header_nritems(&leaf->header) - 1) { @@ -1504,10 +1411,6 @@ next_leaf: } out: - ret = drop_csums(trans, root, inode, start, end); - BUG_ON(ret); - -out_nocsum: btrfs_free_path(path); return ret; } @@ -1556,7 +1459,6 @@ static int prepare_pages(struct btrfs_root *root, head = page_buffers(pages[i]); bh = head; do { -printk("mapping page %lu to block %Lu\n", pages[i]->index, alloc_extent_start); err = btrfs_map_bh_to_logical(root, bh, alloc_extent_start); BUG_ON(err); @@ -1597,7 +1499,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, u64 start_pos; u64 num_blocks; u64 alloc_extent_start; - u64 orig_extent_start; struct btrfs_trans_handle *trans; struct btrfs_key ins; @@ -1640,7 +1541,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, (pos + count + root->blocksize -1) & ~(root->blocksize - 1)); } - orig_extent_start = start_pos; ret = btrfs_alloc_extent(trans, root, num_blocks, 1, (u64)-1, &ins); BUG_ON(ret); @@ -1656,7 +1556,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; -printk("num_pages is %lu\n", num_pages); memset(pages, 0, sizeof(pages)); ret = prepare_pages(root, file, pages, num_pages, @@ -1670,10 +1569,8 @@ printk("num_pages is %lu\n", num_pages); write_bytes, pages, buf); BUG_ON(ret); -printk("2num_pages is %lu\n", num_pages); ret = dirty_and_release_pages(NULL, root, file, pages, - num_pages, orig_extent_start, - pos, write_bytes); + num_pages, pos, write_bytes); BUG_ON(ret); btrfs_drop_pages(pages, num_pages); From a429e51371eee3c989160c003ee40bc3947c6a76 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 18 Apr 2007 16:15:28 -0400 Subject: [PATCH 0120/2985] Btrfs: working file_write, reorganized key flags Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 30 +++++++++-- fs/btrfs/ctree.h | 72 ++++++++++++------------- fs/btrfs/dir-item.c | 70 ------------------------ fs/btrfs/extent-tree.c | 4 +- fs/btrfs/file-item.c | 66 ++++++++++++++++------- fs/btrfs/print-tree.c | 2 +- fs/btrfs/super.c | 120 ++++++++++++++++++++++------------------- 7 files changed, 178 insertions(+), 186 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5c160c236773..4efcd1bd63e5 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -893,7 +893,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root } left_nritems = btrfs_header_nritems(&left->header); - for (i = left_nritems - 1; i >= 0; i--) { + if (left_nritems == 0) { + btrfs_block_release(root, right_buf); + return 1; + } + for (i = left_nritems - 1; i >= 1; i--) { item = left->items + i; if (path->slots[0] == i) push_space += data_size + sizeof(*item); @@ -907,6 +911,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_block_release(root, right_buf); return 1; } + if (push_items == left_nritems) + WARN_ON(1); right_nritems = btrfs_header_nritems(&right->header); /* push left to right */ push_space = btrfs_item_end(left->items + left_nritems - push_items); @@ -943,6 +949,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(left_buf); btrfs_mark_buffer_dirty(right_buf); + btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key, &right->items[0].key, sizeof(struct btrfs_disk_key)); btrfs_mark_buffer_dirty(upper); @@ -1004,7 +1011,12 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } - for (i = 0; i < btrfs_header_nritems(&right->header); i++) { + if (btrfs_header_nritems(&right->header) == 0) { + btrfs_block_release(root, t); + return 1; + } + + for (i = 0; i < btrfs_header_nritems(&right->header) - 1; i++) { item = right->items + i; if (path->slots[0] == i) push_space += data_size + sizeof(*item); @@ -1018,6 +1030,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_block_release(root, t); return 1; } + if (push_items == btrfs_header_nritems(&right->header)) + WARN_ON(1); /* push data from right to left */ btrfs_memcpy(root, left, left->items + btrfs_header_nritems(&left->header), @@ -1064,7 +1078,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(t); btrfs_mark_buffer_dirty(right_buf); - wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); if (wret) ret = wret; @@ -1181,6 +1194,12 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root path->nodes[0] = right_buffer; path->slots[0] = 0; path->slots[1] -= 1; + if (path->slots[1] == 0) { + wret = fixup_low_keys(trans, root, + path, &disk_key, 1); + if (wret) + ret = wret; + } return ret; } mid = slot; @@ -1241,6 +1260,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root path->slots[1], 1); if (wret) ret = wret; + if (path->slots[1] == 0) { + wret = fixup_low_keys(trans, root, path, &disk_key, 1); + if (wret) + ret = wret; + } btrfs_block_release(root, path->nodes[0]); path->nodes[0] = right_buffer; path->slots[0] = 0; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d75a4d5bc010..8a329d3901a2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -282,11 +282,12 @@ struct btrfs_root { /* the lower bits in the key flags defines the item type */ #define BTRFS_KEY_TYPE_MAX 256 -#define BTRFS_KEY_TYPE_MASK (BTRFS_KEY_TYPE_MAX - 1) +#define BTRFS_KEY_TYPE_SHIFT 24 +#define BTRFS_KEY_TYPE_MASK (((u32)BTRFS_KEY_TYPE_MAX - 1) << \ + BTRFS_KEY_TYPE_SHIFT) #define BTRFS_KEY_OVERFLOW_MAX 128 -#define BTRFS_KEY_OVERFLOW_SHIFT 8 -#define BTRFS_KEY_OVERFLOW_MASK (0x7FULL << BTRFS_KEY_OVERFLOW_SHIFT) +#define BTRFS_KEY_OVERFLOW_MASK ((u32)BTRFS_KEY_OVERFLOW_MAX - 1) /* * inode items have the data typically returned from stat and store other @@ -586,56 +587,55 @@ static inline void btrfs_set_disk_key_flags(struct btrfs_disk_key *disk, disk->flags = cpu_to_le32(val); } +static inline u32 btrfs_disk_key_type(struct btrfs_disk_key *key) +{ + return le32_to_cpu(key->flags) >> BTRFS_KEY_TYPE_SHIFT; +} + +static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key, + u32 val) +{ + u32 flags = btrfs_disk_key_flags(key); + BUG_ON(val >= BTRFS_KEY_TYPE_MAX); + val = val << BTRFS_KEY_TYPE_SHIFT; + flags = (flags & ~BTRFS_KEY_TYPE_MASK) | val; + btrfs_set_disk_key_flags(key, flags); +} + +static inline u32 btrfs_key_type(struct btrfs_key *key) +{ + return key->flags >> BTRFS_KEY_TYPE_SHIFT; +} + +static inline void btrfs_set_key_type(struct btrfs_key *key, u32 val) +{ + BUG_ON(val >= BTRFS_KEY_TYPE_MAX); + val = val << BTRFS_KEY_TYPE_SHIFT; + key->flags = (key->flags & ~(BTRFS_KEY_TYPE_MASK)) | val; +} + static inline u32 btrfs_key_overflow(struct btrfs_key *key) { - u32 over = key->flags & BTRFS_KEY_OVERFLOW_MASK; - return over >> BTRFS_KEY_OVERFLOW_SHIFT; + return key->flags & BTRFS_KEY_OVERFLOW_MASK; } static inline void btrfs_set_key_overflow(struct btrfs_key *key, u32 over) { BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX); - over = over << BTRFS_KEY_OVERFLOW_SHIFT; - key->flags = (key->flags & ~((u64)BTRFS_KEY_OVERFLOW_MASK)) | over; -} - -static inline u32 btrfs_key_type(struct btrfs_key *key) -{ - return key->flags & BTRFS_KEY_TYPE_MASK; -} - -static inline u32 btrfs_disk_key_type(struct btrfs_disk_key *key) -{ - return le32_to_cpu(key->flags) & BTRFS_KEY_TYPE_MASK; -} - -static inline void btrfs_set_key_type(struct btrfs_key *key, u32 type) -{ - BUG_ON(type >= BTRFS_KEY_TYPE_MAX); - key->flags = (key->flags & ~((u64)BTRFS_KEY_TYPE_MASK)) | type; -} - -static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key, u32 type) -{ - u32 flags = btrfs_disk_key_flags(key); - BUG_ON(type >= BTRFS_KEY_TYPE_MAX); - flags = (flags & ~((u64)BTRFS_KEY_TYPE_MASK)) | type; - btrfs_set_disk_key_flags(key, flags); + key->flags = (key->flags & ~BTRFS_KEY_OVERFLOW_MASK) | over; } static inline u32 btrfs_disk_key_overflow(struct btrfs_disk_key *key) { - u32 over = le32_to_cpu(key->flags) & BTRFS_KEY_OVERFLOW_MASK; - return over >> BTRFS_KEY_OVERFLOW_SHIFT; + return le32_to_cpu(key->flags) & BTRFS_KEY_OVERFLOW_MASK; } -static inline void btrfs_set_disK_key_overflow(struct btrfs_disk_key *key, +static inline void btrfs_set_disk_key_overflow(struct btrfs_disk_key *key, u32 over) { u32 flags = btrfs_disk_key_flags(key); BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX); - over = over << BTRFS_KEY_OVERFLOW_SHIFT; - flags = (flags & ~((u64)BTRFS_KEY_OVERFLOW_MASK)) | over; + flags = (flags & ~BTRFS_KEY_OVERFLOW_MASK) | over; btrfs_set_disk_key_flags(key, flags); } diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index cd4137a8b87e..a43deb726486 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -58,30 +58,6 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); btrfs_mark_buffer_dirty(path->nodes[0]); - - /* FIXME, use some real flag for selecting the extra index */ - if (root == root->fs_info->tree_root) - goto out; - - btrfs_release_path(root, path); - - btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); - key.offset = location->objectid; - ret = insert_with_overflow(trans, root, path, &key, data_size); - // FIXME clear the dirindex bit - if (ret) - goto out; - - dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_dir_item); - btrfs_cpu_key_to_disk(&dir_item->location, location); - btrfs_set_dir_type(dir_item, type); - btrfs_set_dir_flags(dir_item, 0); - btrfs_set_dir_name_len(dir_item, name_len); - name_ptr = (char *)(dir_item + 1); - btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); out: btrfs_release_path(root, path); btrfs_free_path(path); @@ -135,52 +111,6 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } -int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, u64 dir, - u64 objectid, int mod) -{ - int ret; - struct btrfs_key key; - int ins_len = mod < 0 ? -1 : 0; - int cow = mod != 0; - struct btrfs_disk_key *found_key; - struct btrfs_leaf *leaf; - int overflow = 0; - - key.objectid = dir; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); - key.offset = objectid; - - while(1) { - btrfs_set_key_overflow(&key, overflow); - ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); - if (ret < 0) - return ret; - if (ret > 0) { - if (overflow >= BTRFS_KEY_OVERFLOW_MAX) - return 1; - overflow++; - btrfs_set_key_overflow(&key, overflow); - btrfs_release_path(root, path); - continue; - } else { - /* found */ - break; - } - } - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - - if (btrfs_disk_key_objectid(found_key) != dir || - btrfs_disk_key_type(found_key) != BTRFS_DIR_INDEX_KEY) - return 1; - if (btrfs_disk_key_offset(found_key) == objectid) - return 0; - return 1; -} - int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2cee9df001f6..cb04a70eb7e8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -35,8 +35,10 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, key.offset = num_blocks; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); - if (ret != 0) + if (ret != 0) { +printk("can't find block %Lu %Lu\n", blocknr, num_blocks); BUG(); + } BUG_ON(ret != 0); l = btrfs_buffer_leaf(path->nodes[0]); item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index ff8f3339c684..8cc3c1d15412 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -4,7 +4,7 @@ #include "transaction.h" #define MAX_CSUM_ITEMS(r) ((((BTRFS_LEAF_DATA_SIZE(r) - \ - sizeof(struct btrfs_item)) / \ + sizeof(struct btrfs_item) * 2) / \ sizeof(struct btrfs_csum_item)) - 1)) int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -19,11 +19,6 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); - /* - ret = btrfs_alloc_extent(trans, root, num_blocks, hint_block, - (u64)-1, &ins); - */ - BUG_ON(ret); file_key.objectid = objectid; file_key.offset = pos; file_key.flags = 0; @@ -40,6 +35,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_num_blocks(item, num_blocks); btrfs_set_file_extent_generation(item, trans->transid); btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(root, path); btrfs_free_path(path); return 0; @@ -57,6 +53,7 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_csum_item *item; struct btrfs_leaf *leaf; u64 csum_offset = 0; + int csums_in_item; file_key.objectid = objectid; file_key.offset = offset; @@ -79,9 +76,11 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, } csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; - if (csum_offset >= - btrfs_item_size(leaf->items + path->slots[0]) / - sizeof(struct btrfs_csum_item)) { + csums_in_item = btrfs_item_size(leaf->items + path->slots[0]); + csums_in_item /= sizeof(struct btrfs_csum_item); + + if (csum_offset >= csums_in_item) { + ret = -EFBIG; goto fail; } } @@ -128,16 +127,36 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); - item = btrfs_lookup_csum(trans, root, path, objectid, offset, 0); - if (!IS_ERR(item)) - goto found; - btrfs_release_path(root, path); file_key.objectid = objectid; file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); + + item = btrfs_lookup_csum(trans, root, path, objectid, offset, 1); + if (!IS_ERR(item)) + goto found; + ret = PTR_ERR(item); + if (ret == -EFBIG) { + u32 item_size; + /* we found one, but it isn't big enough yet */ + leaf = btrfs_buffer_leaf(path->nodes[0]); + item_size = btrfs_item_size(leaf->items + path->slots[0]); + if ((item_size / sizeof(struct btrfs_csum_item)) >= + MAX_CSUM_ITEMS(root)) { + /* already at max size, make a new one */ + goto insert; + } + } else { + /* we didn't find a csum item, insert one */ + goto insert; + } + + /* + * at this point, we know the tree has an item, but it isn't big + * enough yet to put our csum in. Grow it + */ + btrfs_release_path(root, path); ret = btrfs_search_slot(trans, root, &file_key, path, sizeof(struct btrfs_csum_item), 1); if (ret < 0) @@ -146,7 +165,6 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, BUG(); } if (path->slots[0] == 0) { - btrfs_release_path(root, path); goto insert; } path->slots[0]--; @@ -157,29 +175,36 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid || csum_offset >= MAX_CSUM_ITEMS(root)) { - btrfs_release_path(root, path); + WARN_ON(1); goto insert; } if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / sizeof(struct btrfs_csum_item)) { - ret = btrfs_extend_item(trans, root, path, - sizeof(struct btrfs_csum_item)); + u32 diff = (csum_offset + 1) * sizeof(struct btrfs_csum_item); + diff = diff - btrfs_item_size(leaf->items + path->slots[0]); + WARN_ON(diff != sizeof(struct btrfs_csum_item)); + ret = btrfs_extend_item(trans, root, path, diff); BUG_ON(ret); goto csum; } insert: + btrfs_release_path(root, path); csum_offset = 0; ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(struct btrfs_csum_item)); - if (ret != 0 && ret != -EEXIST) + if (ret != 0) { + printk("at insert for %Lu %u %Lu ret is %d\n", file_key.objectid, file_key.flags, file_key.offset, ret); + WARN_ON(1); goto fail; + } csum: item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_csum_item); ret = 0; item += csum_offset; found: + btrfs_check_bounds(item->csum, BTRFS_CSUM_SIZE, path->nodes[0]->b_data, root->fs_info->sb->s_blocksize); ret = btrfs_csum_data(root, data, len, item->csum); btrfs_mark_buffer_dirty(path->nodes[0]); fail: @@ -210,6 +235,9 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, item = btrfs_lookup_csum(NULL, root, path, objectid, offset, 0); if (IS_ERR(item)) { ret = PTR_ERR(item); + /* a csum that isn't present is a preallocated region. */ + if (ret == -ENOENT || ret == -EFBIG) + ret = 1; goto fail; } diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 0732a2fbb23a..1e7038b070ae 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -19,7 +19,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) for (i = 0 ; i < nr ; i++) { item = l->items + i; type = btrfs_disk_key_type(&item->key); - printk("\titem %d key (%Lu %u %Lu) itemoff %d itemsize %d\n", + printk("\titem %d key (%Lu %x %Lu) itemoff %d itemsize %d\n", i, btrfs_disk_key_objectid(&item->key), btrfs_disk_key_flags(&item->key), diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b2a2220d1352..583cd87a62e2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -150,11 +150,6 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, BUG_ON(ret); btrfs_release_path(root, path); - ret = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, - objectid, -1); - BUG_ON(ret); - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); dentry->d_inode->i_ctime = dir->i_ctime; err: btrfs_release_path(root, path); @@ -329,8 +324,9 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, extent_start = btrfs_file_extent_disk_blocknr(fi); extent_num_blocks = btrfs_file_extent_disk_num_blocks(fi); + /* FIXME blocksize != 4096 */ inode->i_blocks -= - btrfs_file_extent_num_blocks(fi) >> 9; + btrfs_file_extent_num_blocks(fi) << 3; found_extent = 1; } else { found_extent = 0; @@ -562,7 +558,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; if (btrfs_disk_key_type(&item->key) != key_type) - continue; + break; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) continue; filp->f_pos = btrfs_disk_key_offset(&item->key); @@ -1285,29 +1281,27 @@ static int drop_extents(struct btrfs_trans_handle *trans, struct btrfs_path *path; u64 search_start = start; int bookend; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; -search_again: - ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, - search_start, -1); - if (ret < 0) - goto out; - if (ret > 0) { - if (path->slots[0] == 0) { - ret = -ENOENT; - goto out; - } - path->slots[0]--; - } while(1) { + btrfs_release_path(root, path); + ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, + search_start, -1); + if (ret < 0) + goto out; + if (ret > 0) { + if (path->slots[0] == 0) { + ret = -ENOENT; + goto out; + } + path->slots[0]--; + } keep = 0; bookend = 0; leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); - extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); extent_end = key.offset + @@ -1318,7 +1312,10 @@ search_again: goto out; } if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) - goto next_leaf; + goto out; + if (search_start >= extent_end) + goto out; + search_start = extent_end; if (end < extent_end && end >= key.offset) { memcpy(&old, extent, sizeof(old)); @@ -1331,10 +1328,13 @@ search_again: if (start > key.offset) { u64 new_num; + u64 old_num; /* truncate existing extent */ keep = 1; WARN_ON(start & (root->blocksize - 1)); new_num = (start - key.offset) >> inode->i_blkbits; + old_num = btrfs_file_extent_num_blocks(extent); + inode->i_blocks -= (old_num - new_num) << 3; btrfs_set_file_extent_num_blocks(extent, new_num); mark_buffer_dirty(path->nodes[0]); } @@ -1344,13 +1344,11 @@ search_again: disk_blocknr = btrfs_file_extent_disk_blocknr(extent); disk_num_blocks = btrfs_file_extent_disk_num_blocks(extent); - search_start = key.offset + - (btrfs_file_extent_num_blocks(extent) << - inode->i_blkbits); ret = btrfs_del_item(trans, root, path); BUG_ON(ret); + inode->i_blocks -= + btrfs_file_extent_num_blocks(extent) << 3; btrfs_release_path(root, path); - ret = btrfs_free_extent(trans, root, disk_blocknr, disk_num_blocks, 0); @@ -1360,7 +1358,7 @@ search_again: goto out; } if (!bookend) - goto search_again; + continue; } if (bookend) { /* create bookend */ @@ -1395,21 +1393,12 @@ search_again: btrfs_set_file_extent_generation(extent, btrfs_file_extent_generation(&old)); btrfs_mark_buffer_dirty(path->nodes[0]); + inode->i_blocks += + btrfs_file_extent_num_blocks(extent) << 3; ret = 0; goto out; } -next_leaf: - if (slot >= btrfs_header_nritems(&leaf->header) - 1) { - ret = btrfs_next_leaf(root, path); - if (ret) { - ret = 0; - goto out; - } - } else { - path->slots[0]++; - } } - out: btrfs_free_path(path); return ret; @@ -1445,15 +1434,6 @@ static int prepare_pages(struct btrfs_root *root, } offset = pos & (PAGE_CACHE_SIZE -1); this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); -#if 0 - if ((pages[i]->index == first_index || - pages[i]->index == last_index) && pos < isize && - !PageUptodate(pages[i])) { - ret = mpage_readpage(pages[i], btrfs_get_block); - BUG_ON(ret); - lock_page(pages[i]); - } -#endif create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize, (1 << BH_Uptodate)); head = page_buffers(pages[i]); @@ -1494,6 +1474,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct page *pages[8]; + struct page *pinned[2] = { NULL, NULL }; unsigned long first_index; unsigned long last_index; u64 start_pos; @@ -1505,14 +1486,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (file->f_flags & O_DIRECT) return -EINVAL; pos = *ppos; - - start_pos = pos & ~(root->blocksize - 1); - /* FIXME */ - if (start_pos != pos) - return -EINVAL; - num_blocks = (count + pos - start_pos + root->blocksize - 1) >> - inode->i_blkbits; - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); current->backing_dev_info = inode->i_mapping->backing_dev_info; err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); @@ -1524,10 +1497,37 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (err) goto out; file_update_time(file); + + start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); + num_blocks = (count + pos - start_pos + root->blocksize - 1) >> + inode->i_blkbits; + mutex_lock(&inode->i_mutex); first_index = pos >> PAGE_CACHE_SHIFT; last_index = (pos + count) >> PAGE_CACHE_SHIFT; + if ((first_index << PAGE_CACHE_SHIFT) < inode->i_size && + (pos & (PAGE_CACHE_SIZE - 1))) { + pinned[0] = grab_cache_page(inode->i_mapping, first_index); + if (!PageUptodate(pinned[0])) { + ret = mpage_readpage(pinned[0], btrfs_get_block); + BUG_ON(ret); + } else { + unlock_page(pinned[0]); + } + } + if (first_index != last_index && + (last_index << PAGE_CACHE_SHIFT) < inode->i_size && + (count & (PAGE_CACHE_SIZE - 1))) { + pinned[1] = grab_cache_page(inode->i_mapping, last_index); + if (!PageUptodate(pinned[1])) { + ret = mpage_readpage(pinned[1], btrfs_get_block); + BUG_ON(ret); + } else { + unlock_page(pinned[1]); + } + } + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (!trans) { @@ -1535,11 +1535,14 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, mutex_unlock(&root->fs_info->fs_mutex); goto out_unlock; } + /* FIXME blocksize != 4096 */ + inode->i_blocks += num_blocks << 3; if (start_pos < inode->i_size) { + /* FIXME blocksize != pagesize */ ret = drop_extents(trans, root, inode, start_pos, (pos + count + root->blocksize -1) & - ~(root->blocksize - 1)); + ~((u64)root->blocksize - 1)); } ret = btrfs_alloc_extent(trans, root, num_blocks, 1, (u64)-1, &ins); @@ -1585,8 +1588,13 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, out_unlock: mutex_unlock(&inode->i_mutex); out: + if (pinned[0]) + page_cache_release(pinned[0]); + if (pinned[1]) + page_cache_release(pinned[1]); *ppos = pos; current->backing_dev_info = NULL; + mark_inode_dirty(inode); return num_written ? num_written : err; } From 236454dfffb64a95ee01c50a215153f5de61c475 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 19 Apr 2007 13:37:44 -0400 Subject: [PATCH 0121/2985] Btrfs: many file_write fixes, inline data Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 35 +++ fs/btrfs/disk-io.c | 6 + fs/btrfs/extent-tree.c | 5 + fs/btrfs/file-item.c | 1 + fs/btrfs/super.c | 553 ++++++++++++++++++++++++++++++++--------- 5 files changed, 484 insertions(+), 116 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8a329d3901a2..f1800008be34 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -73,6 +73,9 @@ struct btrfs_header { (sizeof(struct btrfs_disk_key) + sizeof(u64))) #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize)) +#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ + sizeof(struct btrfs_item) - \ + sizeof(struct btrfs_file_extent_item)) struct buffer_head; /* @@ -204,8 +207,12 @@ struct btrfs_root_item { __le32 refs; } __attribute__ ((__packed__)); +#define BTRFS_FILE_EXTENT_REG 0 +#define BTRFS_FILE_EXTENT_INLINE 1 + struct btrfs_file_extent_item { __le64 generation; + u8 type; /* * disk space consumed by the extent, checksum blocks are included * in these numbers @@ -862,6 +869,34 @@ static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) return (u8 *)l->items; } +static inline int btrfs_file_extent_type(struct btrfs_file_extent_item *e) +{ + return e->type; +} +static inline void btrfs_set_file_extent_type(struct btrfs_file_extent_item *e, + u8 val) +{ + e->type = val; +} + +static inline char *btrfs_file_extent_inline_start(struct + btrfs_file_extent_item *e) +{ + return (char *)(&e->disk_blocknr); +} + +static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) +{ + return (unsigned long)(&((struct + btrfs_file_extent_item *)NULL)->disk_blocknr) + datasize; +} + +static inline u32 btrfs_file_extent_inline_len(struct btrfs_item *e) +{ + struct btrfs_file_extent_item *fe = NULL; + return btrfs_item_size(e) - (unsigned long)(&fe->disk_blocknr); +} + static inline u64 btrfs_file_extent_disk_blocknr(struct btrfs_file_extent_item *e) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a2a3f529cada..11e17a2f736b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -108,6 +108,12 @@ int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, int ret; + if (logical == 0) { + bh->b_bdev = NULL; + bh->b_blocknr = 0; + set_buffer_mapped(bh); + return 0; + } root = root->fs_info->dev_root; ret = radix_tree_gang_lookup(&root->fs_info->dev_radix, (void **)lookup, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cb04a70eb7e8..b2faad3e8791 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -111,6 +111,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, continue; fi = btrfs_item_ptr(buf_leaf, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; ret = btrfs_inc_extent_ref(trans, root, btrfs_file_extent_disk_blocknr(fi), btrfs_file_extent_disk_num_blocks(fi)); @@ -539,6 +542,8 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) continue; fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE) + continue; /* * FIXME make sure to insert a trans record that * repeats the snapshot del on crash diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 8cc3c1d15412..10e4cf08e9ed 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -34,6 +34,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_offset(item, 0); btrfs_set_file_extent_num_blocks(item, num_blocks); btrfs_set_file_extent_generation(item, trans->transid); + btrfs_set_file_extent_type(item, BTRFS_FILE_EXTENT_REG); btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root, path); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 583cd87a62e2..1b286bb26f12 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -317,19 +317,22 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, break; if (btrfs_disk_key_offset(found_key) < inode->i_size) break; + found_extent = 0; if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) { fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); - extent_start = btrfs_file_extent_disk_blocknr(fi); - extent_num_blocks = - btrfs_file_extent_disk_num_blocks(fi); - /* FIXME blocksize != 4096 */ - inode->i_blocks -= - btrfs_file_extent_num_blocks(fi) << 3; - found_extent = 1; - } else { - found_extent = 0; + if (btrfs_file_extent_type(fi) != + BTRFS_FILE_EXTENT_INLINE) { + extent_start = + btrfs_file_extent_disk_blocknr(fi); + extent_num_blocks = + btrfs_file_extent_disk_num_blocks(fi); + /* FIXME blocksize != 4096 */ + inode->i_blocks -= + btrfs_file_extent_num_blocks(fi) << 3; + found_extent = 1; + } } ret = btrfs_del_item(trans, root, path); BUG_ON(ret); @@ -1010,9 +1013,9 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, u64 extent_start = 0; u64 extent_end = 0; u64 objectid = inode->i_ino; + u32 found_type; struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans = NULL; struct btrfs_file_extent_item *item; struct btrfs_leaf *leaf; struct btrfs_disk_key *found_key; @@ -1021,13 +1024,12 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, BUG_ON(!path); btrfs_init_path(path); if (create) { - trans = btrfs_start_transaction(root, 1); WARN_ON(1); } - ret = btrfs_lookup_file_extent(trans, root, path, + ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino, - iblock << inode->i_blkbits, create); + iblock << inode->i_blkbits, 0); if (ret < 0) { err = ret; goto out; @@ -1036,7 +1038,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, if (ret != 0) { if (path->slots[0] == 0) { btrfs_release_path(root, path); - goto allocate; + goto out; } path->slots[0]--; } @@ -1047,73 +1049,51 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, blocknr = btrfs_file_extent_disk_blocknr(item); blocknr += btrfs_file_extent_offset(item); - /* exact match found, use it, FIXME, deal with extents - * other than the page size - */ - if (0 && ret == 0) { - err = 0; - if (create && - btrfs_file_extent_generation(item) != trans->transid) { - struct btrfs_key ins; - ret = btrfs_alloc_extent(trans, root, 1, - blocknr, (u64)-1, &ins); - BUG_ON(ret); - btrfs_set_file_extent_disk_blocknr(item, ins.objectid); - mark_buffer_dirty(path->nodes[0]); - ret = btrfs_free_extent(trans, root, - blocknr, 1, 0); - BUG_ON(ret); - blocknr = ins.objectid; - - } - btrfs_map_bh_to_logical(root, result, blocknr); - goto out; - } - /* are we inside the extent that was found? */ found_key = &leaf->items[path->slots[0]].key; + found_type = btrfs_disk_key_type(found_key); if (btrfs_disk_key_objectid(found_key) != objectid || - btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) { + found_type != BTRFS_EXTENT_DATA_KEY) { extent_end = 0; extent_start = 0; btrfs_release_path(root, path); - goto allocate; + goto out; } - + found_type = btrfs_file_extent_type(item); extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); - extent_start = extent_start >> inode->i_blkbits; - extent_end = extent_start + btrfs_file_extent_num_blocks(item); - if (iblock >= extent_start && iblock < extent_end) { - err = 0; - btrfs_map_bh_to_logical(root, result, blocknr + iblock - - extent_start); - goto out; + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_start = extent_start >> inode->i_blkbits; + extent_end = extent_start + btrfs_file_extent_num_blocks(item); + if (iblock >= extent_start && iblock < extent_end) { + err = 0; + btrfs_map_bh_to_logical(root, result, blocknr + + iblock - extent_start); + goto out; + } + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + char *ptr; + char *map; + u32 size; + size = btrfs_file_extent_inline_len(leaf->items + + path->slots[0]); + extent_end = (extent_start + size) >> inode->i_blkbits; + extent_start >>= inode->i_blkbits; + if (iblock < extent_start || iblock > extent_end) { + goto out; + } + ptr = btrfs_file_extent_inline_start(item); + map = kmap(result->b_page); + memcpy(map, ptr, size); + memset(map + size, 0, PAGE_CACHE_SIZE - size); + flush_dcache_page(result->b_page); + kunmap(result->b_page); + set_buffer_uptodate(result); + SetPageChecked(result->b_page); + btrfs_map_bh_to_logical(root, result, 0); } -allocate: - /* ok, create a new extent */ - if (!create) { - err = 0; - goto out; - } -#if 0 - ret = btrfs_alloc_file_extent(trans, root, objectid, - iblock << inode->i_blkbits, - 1, extent_end, &blocknr); - if (ret) { - err = ret; - goto out; - } - inode->i_blocks += inode->i_sb->s_blocksize >> 9; - set_buffer_new(result); - map_bh(result, inode->i_sb, blocknr); - - btrfs_map_bh_to_logical(root, result, blocknr); -#endif out: btrfs_release_path(root, path); btrfs_free_path(path); - if (trans) - btrfs_end_transaction(trans, root); return err; } @@ -1124,7 +1104,6 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, struct btrfs_root *root = BTRFS_I(inode)->root; mutex_lock(&root->fs_info->fs_mutex); err = btrfs_get_block_lock(inode, iblock, result, create); - // err = btrfs_get_block_inline(inode, iblock, result, create); mutex_unlock(&root->fs_info->fs_mutex); return err; } @@ -1134,11 +1113,6 @@ static int btrfs_prepare_write(struct file *file, struct page *page, { return nobh_prepare_write(page, from, to, btrfs_get_block); } -static int btrfs_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - return nobh_commit_write(file, page, from, to); -} static void btrfs_write_super(struct super_block *sb) { @@ -1150,9 +1124,227 @@ static int btrfs_readpage(struct file *file, struct page *page) return mpage_readpage(page, btrfs_get_block); } +/* + * While block_write_full_page is writing back the dirty buffers under + * the page lock, whoever dirtied the buffers may decide to clean them + * again at any time. We handle that by only looking at the buffer + * state inside lock_buffer(). + * + * If block_write_full_page() is called for regular writeback + * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a + * locked buffer. This only can happen if someone has written the buffer + * directly, with submit_bh(). At the address_space level PageWriteback + * prevents this contention from occurring. + */ +static int __btrfs_write_full_page(struct inode *inode, struct page *page, + struct writeback_control *wbc) +{ + int err; + sector_t block; + sector_t last_block; + struct buffer_head *bh, *head; + const unsigned blocksize = 1 << inode->i_blkbits; + int nr_underway = 0; + + BUG_ON(!PageLocked(page)); + + last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; + + if (!page_has_buffers(page)) { + create_empty_buffers(page, blocksize, + (1 << BH_Dirty)|(1 << BH_Uptodate)); + } + + /* + * Be very careful. We have no exclusion from __set_page_dirty_buffers + * here, and the (potentially unmapped) buffers may become dirty at + * any time. If a buffer becomes dirty here after we've inspected it + * then we just miss that fact, and the page stays dirty. + * + * Buffers outside i_size may be dirtied by __set_page_dirty_buffers; + * handle that here by just cleaning them. + */ + + block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + head = page_buffers(page); + bh = head; + + /* + * Get all the dirty buffers mapped to disk addresses and + * handle any aliases from the underlying blockdev's mapping. + */ + do { + if (block > last_block) { + /* + * mapped buffers outside i_size will occur, because + * this page can be outside i_size when there is a + * truncate in progress. + */ + /* + * The buffer was zeroed by block_write_full_page() + */ + clear_buffer_dirty(bh); + set_buffer_uptodate(bh); + } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { + WARN_ON(bh->b_size != blocksize); + err = btrfs_get_block(inode, block, bh, 0); + if (err) + goto recover; + if (buffer_new(bh)) { + /* blockdev mappings never come here */ + clear_buffer_new(bh); + unmap_underlying_metadata(bh->b_bdev, + bh->b_blocknr); + } + } + bh = bh->b_this_page; + block++; + } while (bh != head); + + do { + if (!buffer_mapped(bh)) + continue; + /* + * If it's a fully non-blocking write attempt and we cannot + * lock the buffer then redirty the page. Note that this can + * potentially cause a busy-wait loop from pdflush and kswapd + * activity, but those code paths have their own higher-level + * throttling. + */ + if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { + lock_buffer(bh); + } else if (test_set_buffer_locked(bh)) { + redirty_page_for_writepage(wbc, page); + continue; + } + if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) { + mark_buffer_async_write(bh); + } else { + unlock_buffer(bh); + } + } while ((bh = bh->b_this_page) != head); + + /* + * The page and its buffers are protected by PageWriteback(), so we can + * drop the bh refcounts early. + */ + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + submit_bh(WRITE, bh); + nr_underway++; + } + bh = next; + } while (bh != head); + unlock_page(page); + + err = 0; +done: + if (nr_underway == 0) { + /* + * The page was marked dirty, but the buffers were + * clean. Someone wrote them back by hand with + * ll_rw_block/submit_bh. A rare case. + */ + int uptodate = 1; + do { + if (!buffer_uptodate(bh)) { + uptodate = 0; + break; + } + bh = bh->b_this_page; + } while (bh != head); + if (uptodate) + SetPageUptodate(page); + end_page_writeback(page); + /* + * The page and buffer_heads can be released at any time from + * here on. + */ + wbc->pages_skipped++; /* We didn't write this page */ + } + return err; + +recover: + /* + * ENOSPC, or some other error. We may already have added some + * blocks to the file, so we need to write these out to avoid + * exposing stale data. + * The page is currently locked and not marked for writeback + */ + bh = head; + /* Recovery: lock and submit the mapped buffers */ + do { + if (buffer_mapped(bh) && buffer_dirty(bh)) { + lock_buffer(bh); + mark_buffer_async_write(bh); + } else { + /* + * The buffer may have been set dirty during + * attachment to a dirty page. + */ + clear_buffer_dirty(bh); + } + } while ((bh = bh->b_this_page) != head); + SetPageError(page); + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + clear_buffer_dirty(bh); + submit_bh(WRITE, bh); + nr_underway++; + } + bh = next; + } while (bh != head); + unlock_page(page); + goto done; +} + +/* + * The generic ->writepage function for buffer-backed address_spaces + */ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { - return nobh_writepage(page, btrfs_get_block, wbc); + struct inode * const inode = page->mapping->host; + loff_t i_size = i_size_read(inode); + const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; + unsigned offset; + void *kaddr; + + /* Is the page fully inside i_size? */ + if (page->index < end_index) + return __btrfs_write_full_page(inode, page, wbc); + + /* Is the page fully outside i_size? (truncate in progress) */ + offset = i_size & (PAGE_CACHE_SIZE-1); + if (page->index >= end_index+1 || !offset) { + /* + * The page may have dirty, unmapped buffers. For example, + * they may have been added in ext3_writepage(). Make them + * freeable here, so the page does not leak. + */ + block_invalidatepage(page, 0); + unlock_page(page); + return 0; /* don't care */ + } + + /* + * The page straddles i_size. It must be zeroed out on each and every + * writepage invokation because it may be mmapped. "A file is mapped + * in multiples of the page size. For a file that is not a multiple of + * the page size, the remaining memory is zeroed when mapped, and + * writes to that region are not written out to the file." + */ + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + return __btrfs_write_full_page(inode, page, wbc); } static void btrfs_truncate(struct inode *inode) @@ -1179,6 +1371,29 @@ static void btrfs_truncate(struct inode *inode) mark_inode_dirty(inode); } +/* + * Make sure any changes to nobh_commit_write() are reflected in + * nobh_truncate_page(), since it doesn't call commit_write(). + */ +static int btrfs_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct inode *inode = page->mapping->host; + struct buffer_head *bh; + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + + SetPageUptodate(page); + bh = page_buffers(page); + if (buffer_mapped(bh) && bh->b_blocknr != 0) { + set_page_dirty(page); + } + if (pos > inode->i_size) { + i_size_write(inode, pos); + mark_inode_dirty(inode); + } + return 0; +} + static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, struct page **prepared_pages, const char __user * buf) @@ -1234,6 +1449,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, int ret; int this_write; struct inode *inode = file->f_path.dentry->d_inode; + struct buffer_head *bh; + struct btrfs_file_extent_item *ei; for (i = 0; i < num_pages; i++) { offset = pos & (PAGE_CACHE_SIZE -1); @@ -1242,16 +1459,47 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - btrfs_csum_file_block(trans, root, inode->i_ino, + + bh = page_buffers(pages[i]); + if (buffer_mapped(bh) && bh->b_blocknr == 0) { + struct btrfs_key key; + struct btrfs_path *path; + char *ptr; + u32 datasize; + + path = btrfs_alloc_path(); + BUG_ON(!path); + key.objectid = inode->i_ino; + key.offset = pages[i]->index << PAGE_CACHE_SHIFT; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + BUG_ON(write_bytes >= PAGE_CACHE_SIZE); + datasize = offset + + btrfs_file_extent_calc_inline_size(write_bytes); + ret = btrfs_insert_empty_item(trans, root, path, &key, + datasize); + BUG_ON(ret); + ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(ei, trans->transid); + btrfs_set_file_extent_type(ei, + BTRFS_FILE_EXTENT_INLINE); + ptr = btrfs_file_extent_inline_start(ei); + memcpy(ptr, bh->b_data, offset + write_bytes); + mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + } else { + btrfs_csum_file_block(trans, root, inode->i_ino, pages[i]->index << PAGE_CACHE_SHIFT, kmap(pages[i]), PAGE_CACHE_SIZE); - kunmap(pages[i]); + kunmap(pages[i]); + } SetPageChecked(pages[i]); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); - ret = nobh_commit_write(file, pages[i], offset, + ret = btrfs_commit_write(file, pages[i], offset, offset + this_write); pos += this_write; if (ret) { @@ -1275,12 +1523,16 @@ static int drop_extents(struct btrfs_trans_handle *trans, struct btrfs_leaf *leaf; int slot; struct btrfs_file_extent_item *extent; - u64 extent_end; + u64 extent_end = 0; int keep; struct btrfs_file_extent_item old; struct btrfs_path *path; u64 search_start = start; int bookend; + int found_type; + int found_extent; + int found_inline; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -1292,37 +1544,62 @@ static int drop_extents(struct btrfs_trans_handle *trans, goto out; if (ret > 0) { if (path->slots[0] == 0) { - ret = -ENOENT; + ret = 0; goto out; } path->slots[0]--; } keep = 0; bookend = 0; + found_extent = 0; + found_inline = 0; + extent = NULL; leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); - extent = btrfs_item_ptr(leaf, slot, - struct btrfs_file_extent_item); - extent_end = key.offset + - (btrfs_file_extent_num_blocks(extent) << - inode->i_blkbits); if (key.offset >= end || key.objectid != inode->i_ino) { ret = 0; goto out; } - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) { + ret = 0; goto out; - if (search_start >= extent_end) + } + extent = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + found_type = btrfs_file_extent_type(extent); + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_end = key.offset + + (btrfs_file_extent_num_blocks(extent) << + inode->i_blkbits); + found_extent = 1; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + found_inline = 1; + extent_end = key.offset + + btrfs_file_extent_inline_len(leaf->items + slot); + } + + if (!found_extent && !found_inline) { + ret = 0; goto out; + } + + if (search_start >= extent_end) { + ret = 0; + goto out; + } + search_start = extent_end; if (end < extent_end && end >= key.offset) { - memcpy(&old, extent, sizeof(old)); - ret = btrfs_inc_extent_ref(trans, root, - btrfs_file_extent_disk_blocknr(&old), - btrfs_file_extent_disk_num_blocks(&old)); - BUG_ON(ret); + if (found_extent) { + memcpy(&old, extent, sizeof(old)); + ret = btrfs_inc_extent_ref(trans, root, + btrfs_file_extent_disk_blocknr(&old), + btrfs_file_extent_disk_num_blocks(&old)); + BUG_ON(ret); + } + WARN_ON(found_inline); bookend = 1; } @@ -1332,25 +1609,45 @@ static int drop_extents(struct btrfs_trans_handle *trans, /* truncate existing extent */ keep = 1; WARN_ON(start & (root->blocksize - 1)); - new_num = (start - key.offset) >> inode->i_blkbits; - old_num = btrfs_file_extent_num_blocks(extent); - inode->i_blocks -= (old_num - new_num) << 3; - btrfs_set_file_extent_num_blocks(extent, new_num); - mark_buffer_dirty(path->nodes[0]); + if (found_extent) { + new_num = (start - key.offset) >> + inode->i_blkbits; + old_num = btrfs_file_extent_num_blocks(extent); + inode->i_blocks -= (old_num - new_num) << 3; + btrfs_set_file_extent_num_blocks(extent, + new_num); + mark_buffer_dirty(path->nodes[0]); + } else { + WARN_ON(1); + /* + ret = btrfs_truncate_item(trans, root, path, + start - key.offset); + BUG_ON(ret); + */ + } } if (!keep) { - u64 disk_blocknr; - u64 disk_num_blocks; - disk_blocknr = btrfs_file_extent_disk_blocknr(extent); - disk_num_blocks = - btrfs_file_extent_disk_num_blocks(extent); + u64 disk_blocknr = 0; + u64 disk_num_blocks = 0; + u64 extent_num_blocks = 0; + if (found_extent) { + disk_blocknr = + btrfs_file_extent_disk_blocknr(extent); + disk_num_blocks = + btrfs_file_extent_disk_num_blocks(extent); + extent_num_blocks = + btrfs_file_extent_num_blocks(extent); + } ret = btrfs_del_item(trans, root, path); BUG_ON(ret); - inode->i_blocks -= - btrfs_file_extent_num_blocks(extent) << 3; btrfs_release_path(root, path); - ret = btrfs_free_extent(trans, root, disk_blocknr, - disk_num_blocks, 0); + if (found_extent) { + inode->i_blocks -= + btrfs_file_extent_num_blocks(extent) << 3; + ret = btrfs_free_extent(trans, root, + disk_blocknr, + disk_num_blocks, 0); + } BUG_ON(ret); if (!bookend && search_start >= end) { @@ -1360,7 +1657,7 @@ static int drop_extents(struct btrfs_trans_handle *trans, if (!bookend) continue; } - if (bookend) { + if (bookend && found_extent) { /* create bookend */ struct btrfs_key ins; ins.objectid = inode->i_ino; @@ -1390,6 +1687,8 @@ static int drop_extents(struct btrfs_trans_handle *trans, btrfs_file_extent_num_blocks(&old) - ((end - key.offset) >> inode->i_blkbits)); + btrfs_set_file_extent_type(extent, + BTRFS_FILE_EXTENT_REG); btrfs_set_file_extent_generation(extent, btrfs_file_extent_generation(&old)); btrfs_mark_buffer_dirty(path->nodes[0]); @@ -1445,7 +1744,8 @@ static int prepare_pages(struct btrfs_root *root, if (err) goto failed_truncate; bh = bh->b_this_page; - alloc_extent_start++; + if (alloc_extent_start) + alloc_extent_start++; } while (bh != head); pos += this_write; WARN_ON(this_write > write_bytes); @@ -1543,12 +1843,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, start_pos, (pos + count + root->blocksize -1) & ~((u64)root->blocksize - 1)); + BUG_ON(ret); } - ret = btrfs_alloc_extent(trans, root, num_blocks, 1, + if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || + pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { + ret = btrfs_alloc_extent(trans, root, num_blocks, 1, (u64)-1, &ins); - BUG_ON(ret); - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset); + BUG_ON(ret); + } else { + ins.offset = 0; + ins.objectid = 0; + } BUG_ON(ret); alloc_extent_start = ins.objectid; ret = btrfs_end_transaction(trans, root); @@ -1567,7 +1875,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, BUG_ON(ret); /* FIXME blocks != pagesize */ - alloc_extent_start += num_pages; + if (alloc_extent_start) + alloc_extent_start += num_pages; ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); BUG_ON(ret); @@ -1779,10 +2088,11 @@ static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, if (!PageChecked(page)) { /* FIXME, do it per block */ struct btrfs_root *root = BTRFS_I(inode)->root; + int ret = btrfs_csum_verify_file_block(root, - page->mapping->host->i_ino, - page->index << PAGE_CACHE_SHIFT, - kmap(page), PAGE_CACHE_SIZE); + page->mapping->host->i_ino, + page->index << PAGE_CACHE_SHIFT, + kmap(page), PAGE_CACHE_SIZE); if (ret) { printk("failed to verify ino %lu page %lu\n", page->mapping->host->i_ino, @@ -2249,6 +2559,16 @@ static int btrfs_get_sb(struct file_system_type *fs_type, btrfs_fill_super, mnt); } + +static int btrfs_getattr(struct vfsmount *mnt, + struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + generic_fillattr(inode, stat); + stat->blksize = 256 * 1024; + return 0; +} + static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -2298,6 +2618,7 @@ static struct address_space_operations btrfs_aops = { static struct inode_operations btrfs_file_inode_operations = { .truncate = btrfs_truncate, + .getattr = btrfs_getattr, }; static struct file_operations btrfs_file_operations = { From 7e38180e207c8707691c12395e990a5d583c5e71 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 19 Apr 2007 15:36:27 -0400 Subject: [PATCH 0122/2985] Btrfs: directory inode index is back Signed-off-by: Chris Mason --- fs/btrfs/TODO | 4 +- fs/btrfs/ctree.h | 58 +++------ fs/btrfs/dir-item.c | 204 +++++++++++++++++++++--------- fs/btrfs/super.c | 302 +++++++------------------------------------- 4 files changed, 206 insertions(+), 362 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 1c3acfcf146e..8d7e5a3ed629 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -1,12 +1,12 @@ * cleanup, add more error checking, get rid of BUG_ONs * Fix ENOSPC handling -* Add block mapping tree (simple dm layer) * Make allocator smarter * add a block group to struct inode -* Make sure nobh stuff is working properly for cows * Do actual block accounting * Check compat and incompat flags on the inode * Get rid of struct ctree_path, limiting tree levels held at one time +* Add generation number to key pointer in nodes +* Add parent pointer back to extent map. * Release * Do real tree locking * Add extent mirroring (backup copies of blocks) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f1800008be34..d628a3b9441c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -185,11 +185,6 @@ struct btrfs_inode_item { struct btrfs_inode_timespec otime; } __attribute__ ((__packed__)); -/* inline data is just a blob of bytes */ -struct btrfs_inline_data_item { - u8 data; -} __attribute__ ((__packed__)); - struct btrfs_dir_item { struct btrfs_disk_key location; __le16 flags; @@ -293,9 +288,6 @@ struct btrfs_root { #define BTRFS_KEY_TYPE_MASK (((u32)BTRFS_KEY_TYPE_MAX - 1) << \ BTRFS_KEY_TYPE_SHIFT) -#define BTRFS_KEY_OVERFLOW_MAX 128 -#define BTRFS_KEY_OVERFLOW_MASK ((u32)BTRFS_KEY_OVERFLOW_MAX - 1) - /* * inode items have the data typically returned from stat and store other * info about object characteristics. There is one for every file and dir in @@ -621,31 +613,6 @@ static inline void btrfs_set_key_type(struct btrfs_key *key, u32 val) key->flags = (key->flags & ~(BTRFS_KEY_TYPE_MASK)) | val; } -static inline u32 btrfs_key_overflow(struct btrfs_key *key) -{ - return key->flags & BTRFS_KEY_OVERFLOW_MASK; -} - -static inline void btrfs_set_key_overflow(struct btrfs_key *key, u32 over) -{ - BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX); - key->flags = (key->flags & ~BTRFS_KEY_OVERFLOW_MASK) | over; -} - -static inline u32 btrfs_disk_key_overflow(struct btrfs_disk_key *key) -{ - return le32_to_cpu(key->flags) & BTRFS_KEY_OVERFLOW_MASK; -} - -static inline void btrfs_set_disk_key_overflow(struct btrfs_disk_key *key, - u32 over) -{ - u32 flags = btrfs_disk_key_flags(key); - BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX); - flags = (flags & ~BTRFS_KEY_OVERFLOW_MASK) | over; - btrfs_set_disk_key_flags(key, flags); -} - static inline u64 btrfs_header_blocknr(struct btrfs_header *h) { return le64_to_cpu(h->blocknr); @@ -1079,15 +1046,24 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, struct btrfs_key *location, u8 type); -int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u64 dir, - const char *name, int name_len, int mod); -int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, u64 dir, - u64 objectid, int mod); -int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, +struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + const char *name, int name_len, + int mod); +struct btrfs_dir_item * +btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + u64 objectid, const char *name, int name_len, + int mod); +struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, + struct btrfs_path *path, const char *name, int name_len); +int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_dir_item *di); /* inode-map.c */ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_root *fs_root, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index a43deb726486..7a7e9846860a 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -4,24 +4,31 @@ #include "hash.h" #include "transaction.h" -int insert_with_overflow(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, struct btrfs_key - *cpu_key, u32 data_size) +struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, + u32 data_size) { - int overflow; int ret; + char *ptr; + struct btrfs_item *item; + struct btrfs_leaf *leaf; ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); - overflow = btrfs_key_overflow(cpu_key); - - while(ret == -EEXIST && overflow < BTRFS_KEY_OVERFLOW_MAX) { - overflow++; - btrfs_set_key_overflow(cpu_key, overflow); - btrfs_release_path(root, path); - ret = btrfs_insert_empty_item(trans, root, path, cpu_key, - data_size); + if (ret == -EEXIST) { + ret = btrfs_extend_item(trans, root, path, data_size); + WARN_ON(ret > 0); + if (ret) + return ERR_PTR(ret); } - return ret; + WARN_ON(ret > 0); + leaf = btrfs_buffer_leaf(path->nodes[0]); + item = leaf->items + path->slots[0]; + ptr = btrfs_item_ptr(leaf, path->slots[0], char); + BUG_ON(data_size > btrfs_item_size(item)); + ptr += btrfs_item_size(item) - data_size; + return (struct btrfs_dir_item *)ptr; } int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root @@ -43,13 +50,12 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); btrfs_init_path(path); data_size = sizeof(*dir_item) + name_len; - ret = insert_with_overflow(trans, root, path, &key, data_size); - if (ret) + dir_item = insert_with_overflow(trans, root, path, &key, data_size); + if (IS_ERR(dir_item)) { + ret = PTR_ERR(dir_item); goto out; + } - dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_dir_item); btrfs_cpu_key_to_disk(&dir_item->location, location); btrfs_set_dir_type(dir_item, type); btrfs_set_dir_flags(dir_item, 0); @@ -58,15 +64,39 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); btrfs_mark_buffer_dirty(path->nodes[0]); -out: + + /* FIXME, use some real flag for selecting the extra index */ + if (root == root->fs_info->tree_root) { + ret = 0; + goto out; + } + btrfs_release_path(root, path); + + btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + key.offset = location->objectid; + dir_item = insert_with_overflow(trans, root, path, &key, data_size); + if (IS_ERR(dir_item)) { + ret = PTR_ERR(dir_item); + goto out; + } + btrfs_cpu_key_to_disk(&dir_item->location, location); + btrfs_set_dir_type(dir_item, type); + btrfs_set_dir_flags(dir_item, 0); + btrfs_set_dir_name_len(dir_item, name_len); + name_ptr = (char *)(dir_item + 1); + btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); + btrfs_mark_buffer_dirty(path->nodes[0]); +out: btrfs_free_path(path); return ret; } -int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u64 dir, - const char *name, int name_len, int mod) +struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + const char *name, int name_len, + int mod) { int ret; struct btrfs_key key; @@ -74,57 +104,111 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root int cow = mod != 0; struct btrfs_disk_key *found_key; struct btrfs_leaf *leaf; - u32 overflow; key.objectid = dir; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - // btrfs_set_key_overflow(&key, BTRFS_KEY_OVERFLOW_MAX - 1); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); - while(1) { - ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); - if (ret < 0) - return ret; - if (ret > 0) { - if (path->slots[0] == 0) - return 1; - path->slots[0]--; - } - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - - if (btrfs_disk_key_objectid(found_key) != dir || - btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY || - btrfs_disk_key_offset(found_key) != key.offset) - return 1; - - if (btrfs_match_dir_item_name(root, path, name, name_len)) - return 0; - - overflow = btrfs_disk_key_overflow(found_key); - if (overflow == 0) - return 1; - btrfs_set_key_overflow(&key, overflow - 1); - btrfs_release_path(root, path); + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) { + if (path->slots[0] == 0) + return NULL; + path->slots[0]--; } - return 1; + leaf = btrfs_buffer_leaf(path->nodes[0]); + found_key = &leaf->items[path->slots[0]].key; + + if (btrfs_disk_key_objectid(found_key) != dir || + btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY || + btrfs_disk_key_offset(found_key) != key.offset) + return NULL; + + return btrfs_match_dir_item_name(root, path, name, name_len); } -int btrfs_match_dir_item_name(struct btrfs_root *root, +struct btrfs_dir_item * +btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + u64 objectid, const char *name, int name_len, + int mod) +{ + int ret; + struct btrfs_key key; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + + key.objectid = dir; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + key.offset = objectid; + + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) + return ERR_PTR(-ENOENT); + return btrfs_match_dir_item_name(root, path, name, name_len); +} + +struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len) { struct btrfs_dir_item *dir_item; char *name_ptr; + u32 total_len; + u32 cur = 0; + u32 this_len; + struct btrfs_leaf *leaf; - dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_dir_item); - if (btrfs_dir_name_len(dir_item) != name_len) - return 0; - name_ptr = (char *)(dir_item + 1); - if (memcmp(name_ptr, name, name_len)) - return 0; - return 1; + leaf = btrfs_buffer_leaf(path->nodes[0]); + dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); + total_len = btrfs_item_size(leaf->items + path->slots[0]); + while(cur < total_len) { + this_len = sizeof(*dir_item) + btrfs_dir_name_len(dir_item); + name_ptr = (char *)(dir_item + 1); + + if (btrfs_dir_name_len(dir_item) == name_len && + memcmp(name_ptr, name, name_len) == 0) + return dir_item; + + cur += this_len; + dir_item = (struct btrfs_dir_item *)((char *)dir_item + + this_len); + } + return NULL; } + +int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_dir_item *di) +{ + + struct btrfs_leaf *leaf; + u32 sub_item_len; + u32 item_len; + int ret; + + leaf = btrfs_buffer_leaf(path->nodes[0]); + sub_item_len = sizeof(*di) + btrfs_dir_name_len(di); + item_len = btrfs_item_size(leaf->items + path->slots[0]); + if (sub_item_len == btrfs_item_size(leaf->items + path->slots[0])) { + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + } else { + char *ptr = (char *)di; + char *start = btrfs_item_ptr(leaf, path->slots[0], char); + btrfs_memmove(root, leaf, ptr, ptr + sub_item_len, + item_len - (ptr + sub_item_len - start)); + ret = btrfs_truncate_item(trans, root, path, + item_len - sub_item_len); + BUG_ON(ret); + } + return 0; +} + diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1b286bb26f12..dff2db0d1dc2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -127,32 +127,43 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, struct btrfs_path *path; const char *name = dentry->d_name.name; int name_len = dentry->d_name.len; - int ret; + int ret = 0; u64 objectid; struct btrfs_dir_item *di; path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); - ret = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, + di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, name, name_len, -1); - if (ret < 0) + if (IS_ERR(di)) { + ret = PTR_ERR(di); goto err; - if (ret > 0) { + } + if (!di) { ret = -ENOENT; goto err; } - di = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_dir_item); objectid = btrfs_disk_key_objectid(&di->location); + ret = btrfs_delete_one_dir_name(trans, root, path, di); + BUG_ON(ret); + btrfs_release_path(root, path); - ret = btrfs_del_item(trans, root, path); + di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, + objectid, name, name_len, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto err; + } + if (!di) { + ret = -ENOENT; + goto err; + } + ret = btrfs_delete_one_dir_name(trans, root, path, di); BUG_ON(ret); - btrfs_release_path(root, path); dentry->d_inode->i_ctime = dir->i_ctime; err: - btrfs_release_path(root, path); btrfs_free_path(path); if (ret == 0) { inode_dec_link_count(dentry->d_inode); @@ -388,15 +399,13 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); - ret = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, + di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, namelen, 0); - if (ret || !btrfs_match_dir_item_name(root, path, name, namelen)) { + if (!di || IS_ERR(di)) { location->objectid = 0; ret = 0; goto out; } - di = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_dir_item); btrfs_disk_key_to_cpu(location, &di->location); out: btrfs_release_path(root, path); @@ -523,7 +532,10 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) int advance; unsigned char d_type = DT_UNKNOWN; int over = 0; - int key_type = BTRFS_DIR_ITEM_KEY; + u32 di_cur; + u32 di_total; + u32 di_len; + int key_type = BTRFS_DIR_INDEX_KEY; /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) @@ -567,12 +579,20 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) filp->f_pos = btrfs_disk_key_offset(&item->key); advance = 1; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); - over = filldir(dirent, (const char *)(di + 1), - btrfs_dir_name_len(di), - btrfs_disk_key_offset(&item->key), - btrfs_disk_key_objectid(&di->location), d_type); - if (over) - goto nopos; + di_cur = 0; + di_total = btrfs_item_size(leaf->items + slot); + while(di_cur < di_total) { + over = filldir(dirent, (const char *)(di + 1), + btrfs_dir_name_len(di), + btrfs_disk_key_offset(&item->key), + btrfs_disk_key_objectid(&di->location), + d_type); + if (over) + goto nopos; + di_len = btrfs_dir_name_len(di) + sizeof(*di); + di_cur += di_len; + di = (struct btrfs_dir_item *)((char *)di + di_len); + } } filp->f_pos++; nopos: @@ -935,75 +955,6 @@ printk("btrfs sync_fs\n"); return 0; } -#if 0 -static int btrfs_get_block_inline(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) -{ - struct btrfs_root *root = btrfs_sb(inode->i_sb); - struct btrfs_path *path; - struct btrfs_key key; - struct btrfs_leaf *leaf; - int num_bytes = result->b_size; - int item_size; - int ret; - u64 pos; - char *ptr; - int copy_size; - int err = 0; - char *safe_ptr; - char *data_ptr; - - path = btrfs_alloc_path(); - BUG_ON(!path); - - WARN_ON(create); - if (create) { - return 0; - } - pos = iblock << inode->i_blkbits; - key.objectid = inode->i_ino; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_INLINE_DATA_KEY); - ptr = kmap(result->b_page); - safe_ptr = ptr; - ptr += (pos & (PAGE_CACHE_SIZE -1)); -again: - key.offset = pos; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret) { - if (ret < 0) - err = ret; - else - err = 0; - goto out; - } - leaf = btrfs_buffer_leaf(path->nodes[0]); - item_size = btrfs_item_size(leaf->items + path->slots[0]); - copy_size = min(num_bytes, item_size); - data_ptr = btrfs_item_ptr(leaf, path->slots[0], char); - WARN_ON(safe_ptr + PAGE_CACHE_SIZE < ptr + copy_size); - memcpy(ptr, data_ptr, copy_size); - pos += copy_size; - num_bytes -= copy_size; - WARN_ON(num_bytes < 0); - ptr += copy_size; - btrfs_release_path(root, path); - if (num_bytes != 0) { - if (pos >= i_size_read(inode)) - memset(ptr, 0, num_bytes); - else - goto again; - } - set_buffer_uptodate(result); - map_bh(result, inode->i_sb, 0); - err = 0; -out: - btrfs_free_path(path); - kunmap(result->b_page); - return err; -} -#endif - static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { @@ -1907,174 +1858,6 @@ out: return num_written ? num_written : err; } -#if 0 -static ssize_t inline_one_page(struct btrfs_root *root, struct inode *inode, - struct page *page, loff_t pos, - size_t offset, size_t write_bytes) -{ - struct btrfs_path *path; - struct btrfs_trans_handle *trans; - struct btrfs_key key; - struct btrfs_leaf *leaf; - struct btrfs_key found_key; - int ret; - size_t copy_size = 0; - char *dst = NULL; - int err = 0; - size_t num_written = 0; - - path = btrfs_alloc_path(); - BUG_ON(!path); - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - key.objectid = inode->i_ino; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_INLINE_DATA_KEY); - -again: - key.offset = pos; - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); - if (ret < 0) { - err = ret; - goto out; - } - if (ret == 0) { - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); - copy_size = btrfs_item_size(leaf->items + path->slots[0]); - dst = btrfs_item_ptr(leaf, path->slots[0], char); - copy_size = min(write_bytes, copy_size); - goto copyit; - } else { - int slot = path->slots[0]; - if (slot > 0) { - slot--; - } - // FIXME find max key - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[slot].key); - if (found_key.objectid != inode->i_ino) - goto insert; - if (btrfs_key_type(&found_key) != BTRFS_INLINE_DATA_KEY) - goto insert; - copy_size = btrfs_item_size(leaf->items + slot); - if (found_key.offset + copy_size <= pos) - goto insert; - dst = btrfs_item_ptr(leaf, path->slots[0], char); - dst += pos - found_key.offset; - copy_size = copy_size - (pos - found_key.offset); - BUG_ON(copy_size < 0); - copy_size = min(write_bytes, copy_size); - WARN_ON(copy_size == 0); - goto copyit; - } -insert: - btrfs_release_path(root, path); - copy_size = min(write_bytes, - (size_t)BTRFS_LEAF_DATA_SIZE(root) - - sizeof(struct btrfs_item) * 4); - ret = btrfs_insert_empty_item(trans, root, path, &key, copy_size); - BUG_ON(ret); - dst = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], char); -copyit: - WARN_ON(copy_size == 0); - WARN_ON(dst + copy_size > - btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], char) + - btrfs_item_size(btrfs_buffer_leaf(path->nodes[0])->items + - path->slots[0])); - btrfs_memcpy(root, path->nodes[0]->b_data, dst, - page_address(page) + offset, copy_size); - mark_buffer_dirty(path->nodes[0]); - btrfs_release_path(root, path); - pos += copy_size; - offset += copy_size; - num_written += copy_size; - write_bytes -= copy_size; - if (write_bytes) - goto again; -out: - btrfs_free_path(path); - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); - return num_written ? num_written : err; -} - -static ssize_t btrfs_file_inline_write(struct file *file, - const char __user *buf, - size_t count, loff_t *ppos) -{ - loff_t pos; - size_t num_written = 0; - int err = 0; - int ret = 0; - struct inode *inode = file->f_path.dentry->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - unsigned long page_index; - - if (file->f_flags & O_DIRECT) - return -EINVAL; - pos = *ppos; - - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); - current->backing_dev_info = inode->i_mapping->backing_dev_info; - err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); - if (err) - goto out; - if (count == 0) - goto out; - err = remove_suid(file->f_path.dentry); - if (err) - goto out; - file_update_time(file); - mutex_lock(&inode->i_mutex); - while(count > 0) { - size_t offset = pos & (PAGE_CACHE_SIZE - 1); - size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); - struct page *page; - - page_index = pos >> PAGE_CACHE_SHIFT; - page = grab_cache_page(inode->i_mapping, page_index); - if (!PageUptodate(page)) { - ret = mpage_readpage(page, btrfs_get_block); - BUG_ON(ret); - lock_page(page); - } - ret = btrfs_copy_from_user(pos, 1, - write_bytes, &page, buf); - BUG_ON(ret); - write_bytes = inline_one_page(root, inode, page, pos, - offset, write_bytes); - SetPageUptodate(page); - if (write_bytes > 0 && pos + write_bytes > inode->i_size) { - i_size_write(inode, pos + write_bytes); - mark_inode_dirty(inode); - } - page_cache_release(page); - unlock_page(page); - if (write_bytes < 0) - goto out_unlock; - buf += write_bytes; - count -= write_bytes; - pos += write_bytes; - num_written += write_bytes; - - balance_dirty_pages_ratelimited(inode->i_mapping); - cond_resched(); - } -out_unlock: - mutex_unlock(&inode->i_mutex); -out: - *ppos = pos; - current->backing_dev_info = NULL; - return num_written ? num_written : err; -} -#endif - static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset, unsigned long size) { @@ -2422,6 +2205,7 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_vol_args vol_args; int ret = 0; + struct btrfs_dir_item *di; int namelen; struct btrfs_path *path; u64 root_dirid; @@ -2440,12 +2224,12 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int return -ENOMEM; root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, + di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path, root_dirid, vol_args.name, namelen, 0); mutex_unlock(&root->fs_info->fs_mutex); btrfs_free_path(path); - if (ret == 0) + if (di && !IS_ERR(di)) return -EEXIST; if (root == root->fs_info->tree_root) From 8fd17795b2261ecb1bad2a6df09ef14c4957a3fb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 19 Apr 2007 21:01:03 -0400 Subject: [PATCH 0123/2985] Btrfs: early fsync support Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 + fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/root-tree.c | 1 - fs/btrfs/super.c | 37 ++++++++++++++++++++++++++++++++++++- fs/btrfs/transaction.c | 25 ++++++++++++++++++++++--- fs/btrfs/transaction.h | 1 + 7 files changed, 62 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 8d7e5a3ed629..5ff2cef9f028 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -7,6 +7,7 @@ * Get rid of struct ctree_path, limiting tree levels held at one time * Add generation number to key pointer in nodes * Add parent pointer back to extent map. +* Implement real list of pending transactions * Release * Do real tree locking * Add extent mirroring (backup copies of blocks) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d628a3b9441c..047a84762d93 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -256,6 +256,7 @@ struct btrfs_fs_info { struct inode *btree_inode; struct mutex trans_mutex; struct mutex fs_mutex; + struct list_head trans_list; struct crypto_hash *hash_tfm; spinlock_t hash_lock; struct kobject kobj; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 11e17a2f736b..b666c3571aee 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -528,6 +528,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pending_del_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); + INIT_LIST_HEAD(&fs_info->trans_list); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 72be9836932f..a42943bd9179 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -100,7 +100,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, BUG_ON(refs == 0); if (refs == 1) { ret = btrfs_del_item(trans, root, path); -printk("deleting root %Lu %Lu %u\n", key->objectid, key->offset, key->flags); } else { btrfs_set_root_refs(ri, refs - 1); printk("ref now %u root %Lu %Lu %u\n", refs -1, key->objectid, key->offset, key->flags); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index dff2db0d1dc2..f99c764a59db 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -932,6 +933,26 @@ out_unlock: return err; } +static int btrfs_sync_file(struct file *file, + struct dentry *dentry, int datasync) +{ + struct inode *inode = dentry->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + struct btrfs_trans_handle *trans; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (!trans) { + ret = -ENOMEM; + goto out; + } + ret = btrfs_commit_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); +out: + return ret > 0 ? EIO : ret; +} + static int btrfs_sync_fs(struct super_block *sb, int wait) { struct btrfs_trans_handle *trans; @@ -2353,6 +2374,19 @@ static int btrfs_getattr(struct vfsmount *mnt, return 0; } +static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct btrfs_root *root = btrfs_sb(dentry->d_sb); + struct btrfs_super_block *disk_super = root->fs_info->disk_super; + + buf->f_namelen = BTRFS_NAME_LEN; + buf->f_blocks = btrfs_super_total_blocks(disk_super); + buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super); + buf->f_bavail = buf->f_bfree; + buf->f_bsize = dentry->d_sb->s_blocksize; + buf->f_type = BTRFS_SUPER_MAGIC; + return 0; +} static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -2362,7 +2396,6 @@ static struct file_system_type btrfs_fs_type = { }; static struct super_operations btrfs_super_ops = { - .statfs = simple_statfs, .delete_inode = btrfs_delete_inode, .put_super = btrfs_put_super, .read_inode = btrfs_read_locked_inode, @@ -2371,6 +2404,7 @@ static struct super_operations btrfs_super_ops = { .write_inode = btrfs_write_inode, .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, + .statfs = btrfs_statfs, }; static struct inode_operations btrfs_dir_inode_operations = { @@ -2413,6 +2447,7 @@ static struct file_operations btrfs_file_operations = { .mmap = generic_file_mmap, .open = generic_file_open, .ioctl = btrfs_ioctl, + .fsync = btrfs_sync_file, }; static int __init init_btrfs_fs(void) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8740752f3845..078cb9cbf9dd 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -19,6 +19,7 @@ static void put_transaction(struct btrfs_transaction *transaction) if (transaction->use_count == 0) { WARN_ON(total_trans == 0); total_trans--; + list_del_init(&transaction->list); memset(transaction, 0, sizeof(*transaction)); kmem_cache_free(btrfs_transaction_cachep, transaction); } @@ -43,6 +44,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->in_commit = 0; cur_trans->use_count = 1; cur_trans->commit_done = 0; + list_add_tail(&cur_trans->list, &root->fs_info->trans_list); } cur_trans->num_writers++; return 0; @@ -236,6 +238,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, { int ret = 0; struct btrfs_transaction *cur_trans; + struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; DEFINE_WAIT(wait); @@ -272,13 +275,29 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, BUG_ON(ret); cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; - btrfs_set_super_generation(root->fs_info->disk_super, - root->fs_info->generation + 1); + if (cur_trans->list.prev != &root->fs_info->trans_list) { + prev_trans = list_entry(cur_trans->list.prev, + struct btrfs_transaction, list); + if (prev_trans->commit_done) + prev_trans = NULL; + else + prev_trans->use_count++; + } mutex_unlock(&root->fs_info->trans_mutex); + mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_write_and_wait_transaction(trans, root); + if (prev_trans) { + mutex_lock(&root->fs_info->trans_mutex); + wait_for_commit(root, prev_trans); + put_transaction(prev_trans); + mutex_unlock(&root->fs_info->trans_mutex); + } + btrfs_set_super_generation(root->fs_info->disk_super, + cur_trans->transid); BUG_ON(ret); - write_ctree_super(trans, root); + + mutex_lock(&root->fs_info->fs_mutex); btrfs_finish_extent_commit(trans, root); mutex_lock(&root->fs_info->trans_mutex); cur_trans->commit_done = 1; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 4f1496ae6f24..3cc29900a074 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -8,6 +8,7 @@ struct btrfs_transaction { int use_count; int commit_done; int magic; + struct list_head list; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; }; From e66f709b157ee8557166c14b67c01bae978ac32e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 20 Apr 2007 13:16:02 -0400 Subject: [PATCH 0124/2985] Btrfs: write barriers on commit, balance level before split Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 119 +++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 12 ++++- 3 files changed, 131 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 4efcd1bd63e5..744fd728e5d9 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -151,6 +151,11 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, for (i = 0; nritems > 1 && i < nritems - 2; i++) { struct btrfs_key cpukey; btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key); +if (comp_keys(&node->ptrs[i].key, &cpukey) >= 0) { + struct btrfs_key bad; + btrfs_disk_key_to_cpu(&bad, &node->ptrs[i].key); +printk("check_node level %d i is %d bad comp %Lu %u %Lu, %Lu %u %Lu\n",level, i, bad.objectid, bad.flags, bad.offset, cpukey.objectid, cpukey.flags, cpukey.offset); +} BUG_ON(comp_keys(&node->ptrs[i].key, &cpukey) >= 0); } return 0; @@ -448,6 +453,111 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } +/* returns zero if the push worked, non-zero otherwise */ +static int push_nodes_for_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int level) +{ + struct buffer_head *right_buf; + struct buffer_head *mid_buf; + struct buffer_head *left_buf; + struct buffer_head *parent_buf = NULL; + struct btrfs_node *right = NULL; + struct btrfs_node *mid; + struct btrfs_node *left = NULL; + struct btrfs_node *parent = NULL; + int ret = 0; + int wret; + int pslot; + int orig_slot = path->slots[level]; + u64 orig_ptr; + + if (level == 0) + return 1; + + mid_buf = path->nodes[level]; + mid = btrfs_buffer_node(mid_buf); + orig_ptr = btrfs_node_blockptr(mid, orig_slot); + + if (level < BTRFS_MAX_LEVEL - 1) + parent_buf = path->nodes[level + 1]; + pslot = path->slots[level + 1]; + + if (!parent_buf) + return 1; + parent = btrfs_buffer_node(parent_buf); + + left_buf = read_node_slot(root, parent_buf, pslot - 1); + + /* first, try to make some room in the middle buffer */ + if (left_buf) { + u32 left_nr; + btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1, + &left_buf); + left = btrfs_buffer_node(left_buf); + left_nr = btrfs_header_nritems(&left->header); + wret = push_node_left(trans, root, left_buf, mid_buf); + if (wret < 0) + ret = wret; + if (wret == 0) { + orig_slot += left_nr; + btrfs_memcpy(root, parent, + &parent->ptrs[pslot].key, + &mid->ptrs[0].key, + sizeof(struct btrfs_disk_key)); + btrfs_mark_buffer_dirty(parent_buf); + if (btrfs_header_nritems(&left->header) > orig_slot) { + path->nodes[level] = left_buf; + path->slots[level + 1] -= 1; + path->slots[level] = orig_slot; + btrfs_block_release(root, mid_buf); + } else { + orig_slot -= + btrfs_header_nritems(&left->header); + path->slots[level] = orig_slot; + btrfs_block_release(root, left_buf); + } + check_node(root, path, level); + return 0; + } + btrfs_block_release(root, left_buf); + } + right_buf = read_node_slot(root, parent_buf, pslot + 1); + + /* + * then try to empty the right most buffer into the middle + */ + if (right_buf) { + btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1, + &right_buf); + right = btrfs_buffer_node(right_buf); + wret = balance_node_right(trans, root, right_buf, mid_buf); + if (wret < 0) + ret = wret; + if (wret == 0) { + btrfs_memcpy(root, parent, + &parent->ptrs[pslot + 1].key, + &right->ptrs[0].key, + sizeof(struct btrfs_disk_key)); + btrfs_mark_buffer_dirty(parent_buf); + if (btrfs_header_nritems(&mid->header) <= orig_slot) { + path->nodes[level] = right_buf; + path->slots[level + 1] += 1; + path->slots[level] = orig_slot - + btrfs_header_nritems(&mid->header); + btrfs_block_release(root, mid_buf); + } else { + btrfs_block_release(root, right_buf); + } + check_node(root, path, level); + return 0; + } + btrfs_block_release(root, right_buf); + } + check_node(root, path, level); + return 1; +} + /* * look for key in the tree. path is filled in with nodes along the way * if key is found, we return zero and you can find the item in the leaf @@ -774,7 +884,16 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root ret = insert_new_root(trans, root, path, level + 1); if (ret) return ret; + } else { + ret = push_nodes_for_insert(trans, root, path, level); + t = path->nodes[level]; + c = btrfs_buffer_node(t); + if (!ret && + btrfs_header_nritems(&c->header) < + BTRFS_NODEPTRS_PER_BLOCK(root) - 1) + return 0; } + c_nritems = btrfs_header_nritems(&c->header); split_buffer = btrfs_alloc_free_block(trans, root); split = btrfs_buffer_node(split_buffer); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 047a84762d93..681b23933d9b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -259,6 +259,7 @@ struct btrfs_fs_info { struct list_head trans_list; struct crypto_hash *hash_tfm; spinlock_t hash_lock; + int do_barriers; struct kobject kobj; }; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b666c3571aee..13046295bf7a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -540,6 +540,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + fs_info->do_barriers = 1; BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); @@ -622,6 +623,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { + int ret; struct buffer_head *bh = root->fs_info->sb_buffer; btrfs_set_super_root(root->fs_info->disk_super, @@ -632,7 +634,15 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root csum_tree_block(root, bh, 0); bh->b_end_io = end_buffer_write_sync; get_bh(bh); - submit_bh(WRITE, bh); + if (root->fs_info->do_barriers) + ret = submit_bh(WRITE_BARRIER, bh); + else + ret = submit_bh(WRITE, bh); + if (ret == -EOPNOTSUPP) { + set_buffer_uptodate(bh); + root->fs_info->do_barriers = 0; + ret = submit_bh(WRITE, bh); + } wait_on_buffer(bh); if (!buffer_uptodate(bh)) { WARN_ON(1); From 33ade1f826a7c348856a98930814f33ced6d1337 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 20 Apr 2007 13:48:57 -0400 Subject: [PATCH 0125/2985] Btrfs: node balance optimizations Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 744fd728e5d9..1bae035f5171 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -151,11 +151,6 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, for (i = 0; nritems > 1 && i < nritems - 2; i++) { struct btrfs_key cpukey; btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key); -if (comp_keys(&node->ptrs[i].key, &cpukey) >= 0) { - struct btrfs_key bad; - btrfs_disk_key_to_cpu(&bad, &node->ptrs[i].key); -printk("check_node level %d i is %d bad comp %Lu %u %Lu, %Lu %u %Lu\n",level, i, bad.objectid, bad.flags, bad.offset, cpukey.objectid, cpukey.flags, cpukey.offset); -} BUG_ON(comp_keys(&node->ptrs[i].key, &cpukey) >= 0); } return 0; @@ -492,11 +487,16 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, /* first, try to make some room in the middle buffer */ if (left_buf) { u32 left_nr; - btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1, - &left_buf); left = btrfs_buffer_node(left_buf); left_nr = btrfs_header_nritems(&left->header); - wret = push_node_left(trans, root, left_buf, mid_buf); + if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { + wret = 1; + } else { + btrfs_cow_block(trans, root, left_buf, parent_buf, + pslot - 1, &left_buf); + left = btrfs_buffer_node(left_buf); + wret = push_node_left(trans, root, left_buf, mid_buf); + } if (wret < 0) ret = wret; if (wret == 0) { @@ -528,10 +528,18 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, * then try to empty the right most buffer into the middle */ if (right_buf) { - btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1, - &right_buf); + u32 right_nr; right = btrfs_buffer_node(right_buf); - wret = balance_node_right(trans, root, right_buf, mid_buf); + right_nr = btrfs_header_nritems(&right->header); + if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { + wret = 1; + } else { + btrfs_cow_block(trans, root, right_buf, + parent_buf, pslot + 1, &right_buf); + right = btrfs_buffer_node(right_buf); + wret = balance_node_right(trans, root, + right_buf, mid_buf); + } if (wret < 0) ret = wret; if (wret == 0) { From 4d775673091d43b39fa9d086071009f98dec289e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 20 Apr 2007 20:23:12 -0400 Subject: [PATCH 0126/2985] Btrfs: add owner and type fields to the extents aand block headers Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 5 +++++ fs/btrfs/ctree.h | 45 +++++++++++++++++++++++++++++++++++++++--- fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 15 +++++++++----- fs/btrfs/super.c | 13 ++++++------ 5 files changed, 65 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1bae035f5171..b4783bf8bf4f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -68,6 +68,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); btrfs_set_header_generation(&cow_node->header, trans->transid); + btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; @@ -806,6 +807,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_level(&c->header, level); btrfs_set_header_blocknr(&c->header, bh_blocknr(t)); btrfs_set_header_generation(&c->header, trans->transid); + btrfs_set_header_owner(&c->header, root->root_key.objectid); lower = btrfs_buffer_node(path->nodes[level-1]); memcpy(c->header.fsid, root->fs_info->disk_super->fsid, sizeof(c->header.fsid)); @@ -909,6 +911,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer)); btrfs_set_header_generation(&split->header, trans->transid); + btrfs_set_header_owner(&split->header, root->root_key.objectid); memcpy(split->header.fsid, root->fs_info->disk_super->fsid, sizeof(split->header.fsid)); mid = (c_nritems + 1) / 2; @@ -1280,6 +1283,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root memset(&right->header, 0, sizeof(right->header)); btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); btrfs_set_header_generation(&right->header, trans->transid); + btrfs_set_header_owner(&right->header, root->root_key.objectid); btrfs_set_header_level(&right->header, 0); memcpy(right->header.fsid, root->fs_info->disk_super->fsid, sizeof(right->header.fsid)); @@ -1376,6 +1380,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root memset(&right->header, 0, sizeof(right->header)); btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); btrfs_set_header_generation(&right->header, trans->transid); + btrfs_set_header_owner(&right->header, root->root_key.objectid); btrfs_set_header_level(&right->header, 0); memcpy(right->header.fsid, root->fs_info->disk_super->fsid, sizeof(right->header.fsid)); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 681b23933d9b..78248d577290 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -62,6 +62,7 @@ struct btrfs_header { u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ __le64 generation; + __le64 owner; __le16 nritems; __le16 flags; u8 level; @@ -151,12 +152,17 @@ struct btrfs_path { int slots[BTRFS_MAX_LEVEL]; }; +/* values for the type field in btrfs_extent_item */ +#define BTRFS_EXTENT_TREE 1 +#define BTRFS_EXTENT_FILE 2 /* * items in the extent btree are used to record the objectid of the * owner of the block and the number of references */ struct btrfs_extent_item { __le32 refs; + __le64 owner; + u8 type; } __attribute__ ((__packed__)); struct btrfs_inode_timespec { @@ -473,11 +479,32 @@ static inline void btrfs_set_extent_refs(struct btrfs_extent_item *ei, u32 val) ei->refs = cpu_to_le32(val); } +static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) +{ + return le64_to_cpu(ei->owner); +} + +static inline void btrfs_set_extent_owner(struct btrfs_extent_item *ei, u64 val) +{ + ei->owner = cpu_to_le64(val); +} + +static inline u8 btrfs_extent_type(struct btrfs_extent_item *ei) +{ + return ei->type; +} + +static inline void btrfs_set_extent_type(struct btrfs_extent_item *ei, u8 val) +{ + ei->type = val; +} + static inline u64 btrfs_node_blockptr(struct btrfs_node *n, int nr) { return le64_to_cpu(n->ptrs[nr].blockptr); } + static inline void btrfs_set_node_blockptr(struct btrfs_node *n, int nr, u64 val) { @@ -636,6 +663,17 @@ static inline void btrfs_set_header_generation(struct btrfs_header *h, h->generation = cpu_to_le64(val); } +static inline u64 btrfs_header_owner(struct btrfs_header *h) +{ + return le64_to_cpu(h->owner); +} + +static inline void btrfs_set_header_owner(struct btrfs_header *h, + u64 val) +{ + h->owner = cpu_to_le64(val); +} + static inline u16 btrfs_header_nritems(struct btrfs_header *h) { return le16_to_cpu(h->nritems); @@ -996,9 +1034,10 @@ int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 num_blocks, u64 search_start, u64 - search_end, struct btrfs_key *ins); +int btrfs_alloc_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 owner, + u8 type, u64 num_blocks, u64 search_start, + u64 search_end, struct btrfs_key *ins); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 13046295bf7a..7aff6bb55d91 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -319,6 +319,7 @@ static int __setup_root(int blocksize, root->last_inode_alloc = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); + root->root_key.objectid = objectid; return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b2faad3e8791..49f7cd6e067c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -460,9 +460,10 @@ error: * * returns 0 if everything worked, non-zero otherwise. */ -int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 num_blocks, u64 search_start, u64 - search_end, struct btrfs_key *ins) +int btrfs_alloc_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 owner, + u8 type, u64 num_blocks, u64 search_start, + u64 search_end, struct btrfs_key *ins) { int ret; int pending_ret; @@ -472,6 +473,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_extent_item extent_item; btrfs_set_extent_refs(&extent_item, 1); + btrfs_set_extent_owner(&extent_item, owner); + btrfs_set_extent_type(&extent_item, type); if (root == extent_root) { BUG_ON(extent_root->fs_info->current_insert.offset == 0); @@ -508,13 +511,15 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root * returns the tree buffer or NULL. */ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root) + struct btrfs_root *root) { struct btrfs_key ins; int ret; struct buffer_head *buf; - ret = btrfs_alloc_extent(trans, root, 1, 0, (unsigned long)-1, &ins); + ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, + BTRFS_EXTENT_TREE, + 1, 0, (unsigned long)-1, &ins); if (ret) { BUG(); return NULL; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f99c764a59db..eba239cce212 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1819,8 +1819,9 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { - ret = btrfs_alloc_extent(trans, root, num_blocks, 1, - (u64)-1, &ins); + ret = btrfs_alloc_extent(trans, root, inode->i_ino, + BTRFS_EXTENT_FILE, num_blocks, 1, + (u64)-1, &ins); BUG_ON(ret); ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset); @@ -2017,8 +2018,12 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_header_level(&leaf->header, 0); btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol)); btrfs_set_header_generation(&leaf->header, trans->transid); + btrfs_set_header_owner(&leaf->header, root->root_key.objectid); memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, sizeof(leaf->header.fsid)); + mark_buffer_dirty(subvol); + brelse(subvol); + subvol = NULL; inode_item = &root_item.inode; memset(inode_item, 0, sizeof(*inode_item)); @@ -2031,10 +2036,6 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); btrfs_set_root_refs(&root_item, 1); - mark_buffer_dirty(subvol); - brelse(subvol); - subvol = NULL; - ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); BUG_ON(ret); From 5d0c3e60fea7e23fbfbe3ab2886b444b4c702441 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 23 Apr 2007 17:01:05 -0400 Subject: [PATCH 0127/2985] Btrfs: fix extent owner/type setting on extent tree blocks Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 49f7cd6e067c..aac3eccffb8e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -167,6 +167,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct ins.offset = 1; ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); + btrfs_set_extent_type(&extent_item, BTRFS_EXTENT_TREE); + btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid); for (i = 0; i < extent_root->fs_info->current_insert.flags; i++) { ins.objectid = extent_root->fs_info->current_insert.objectid + @@ -356,7 +358,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(btrfs_buffer_header(root->node)); - total_needed += (level + 1) * 3; + total_needed += (level + 2) * 3; if (root->fs_info->last_insert.objectid == 0 && search_end == (u64)-1) { struct btrfs_disk_key *last_key; btrfs_init_path(path); From 5e82849e63723afe720d4b325d04a989f6247cce Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 23 Apr 2007 19:10:49 -0400 Subject: [PATCH 0128/2985] Btrfs: new subvolume oops fix Signed-off-by: Chris Mason --- fs/btrfs/super.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index eba239cce212..6940a907654d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2013,6 +2013,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) BUG_ON(!trans); subvol = btrfs_alloc_free_block(trans, root); + if (subvol == NULL) + return -ENOSPC; leaf = btrfs_buffer_leaf(subvol); btrfs_set_header_nritems(&leaf->header, 0); btrfs_set_header_level(&leaf->header, 0); @@ -2022,8 +2024,6 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, sizeof(leaf->header.fsid)); mark_buffer_dirty(subvol); - brelse(subvol); - subvol = NULL; inode_item = &root_item.inode; memset(inode_item, 0, sizeof(*inode_item)); @@ -2035,6 +2035,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); btrfs_set_root_refs(&root_item, 1); + brelse(subvol); + subvol = NULL; ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); From b51338628f604d9b4ed3c776839a484a30e12fb7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 24 Apr 2007 11:52:22 -0400 Subject: [PATCH 0129/2985] Btrfs: add dirty_inode call Signed-off-by: Chris Mason --- fs/btrfs/super.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6940a907654d..43f081593c45 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -722,17 +722,27 @@ static int btrfs_write_inode(struct inode *inode, int wait) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; - int ret; + int ret = 0; + + if (wait) { + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_commit_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + } + return ret; +} + +static void btrfs_dirty_inode(struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - ret = btrfs_update_inode(trans, root, inode); - if (wait) - btrfs_commit_transaction(trans, root); - else - btrfs_end_transaction(trans, root); + btrfs_update_inode(trans, root, inode); + btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - return ret; } static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, @@ -2390,6 +2400,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_type = BTRFS_SUPER_MAGIC; return 0; } + static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -2405,6 +2416,7 @@ static struct super_operations btrfs_super_ops = { .write_super = btrfs_write_super, .sync_fs = btrfs_sync_fs, .write_inode = btrfs_write_inode, + .dirty_inode = btrfs_dirty_inode, .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, .statfs = btrfs_statfs, From c62a1920ced752e86f57ab1d4ad0ec65012bce4d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 24 Apr 2007 12:07:39 -0400 Subject: [PATCH 0130/2985] Btrfs: get rid of the extent_item type field Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 16 +--------------- fs/btrfs/extent-tree.c | 5 +---- fs/btrfs/super.c | 3 +-- 3 files changed, 3 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 78248d577290..2d166ca8b104 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -152,9 +152,6 @@ struct btrfs_path { int slots[BTRFS_MAX_LEVEL]; }; -/* values for the type field in btrfs_extent_item */ -#define BTRFS_EXTENT_TREE 1 -#define BTRFS_EXTENT_FILE 2 /* * items in the extent btree are used to record the objectid of the * owner of the block and the number of references @@ -162,7 +159,6 @@ struct btrfs_path { struct btrfs_extent_item { __le32 refs; __le64 owner; - u8 type; } __attribute__ ((__packed__)); struct btrfs_inode_timespec { @@ -489,16 +485,6 @@ static inline void btrfs_set_extent_owner(struct btrfs_extent_item *ei, u64 val) ei->owner = cpu_to_le64(val); } -static inline u8 btrfs_extent_type(struct btrfs_extent_item *ei) -{ - return ei->type; -} - -static inline void btrfs_set_extent_type(struct btrfs_extent_item *ei, u8 val) -{ - ei->type = val; -} - static inline u64 btrfs_node_blockptr(struct btrfs_node *n, int nr) { return le64_to_cpu(n->ptrs[nr].blockptr); @@ -1036,7 +1022,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u8 type, u64 num_blocks, u64 search_start, + u64 num_blocks, u64 search_start, u64 search_end, struct btrfs_key *ins); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index aac3eccffb8e..116519503d0c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -167,7 +167,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct ins.offset = 1; ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); - btrfs_set_extent_type(&extent_item, BTRFS_EXTENT_TREE); btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid); for (i = 0; i < extent_root->fs_info->current_insert.flags; i++) { @@ -464,7 +463,7 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u8 type, u64 num_blocks, u64 search_start, + u64 num_blocks, u64 search_start, u64 search_end, struct btrfs_key *ins) { int ret; @@ -476,7 +475,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, owner); - btrfs_set_extent_type(&extent_item, type); if (root == extent_root) { BUG_ON(extent_root->fs_info->current_insert.offset == 0); @@ -520,7 +518,6 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - BTRFS_EXTENT_TREE, 1, 0, (unsigned long)-1, &ins); if (ret) { BUG(); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 43f081593c45..c076474c8bff 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1830,8 +1830,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { ret = btrfs_alloc_extent(trans, root, inode->i_ino, - BTRFS_EXTENT_FILE, num_blocks, 1, - (u64)-1, &ins); + num_blocks, 1, (u64)-1, &ins); BUG_ON(ret); ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset); From f68cad0f9eb3c3cc100635bd0ea191ee093cf887 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 24 Apr 2007 12:44:26 -0400 Subject: [PATCH 0131/2985] Btrfs: fixup dirty_inode related deadlocks Signed-off-by: Chris Mason --- fs/btrfs/super.c | 109 ++++++++++++++++++++++++----------------------- 1 file changed, 56 insertions(+), 53 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c076474c8bff..5bbccbc7e3cc 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -120,6 +120,58 @@ make_bad: make_bad_inode(inode); } +static void fill_inode_item(struct btrfs_inode_item *item, + struct inode *inode) +{ + btrfs_set_inode_uid(item, inode->i_uid); + btrfs_set_inode_gid(item, inode->i_gid); + btrfs_set_inode_size(item, inode->i_size); + btrfs_set_inode_mode(item, inode->i_mode); + btrfs_set_inode_nlink(item, inode->i_nlink); + btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec); + btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec); + btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec); + btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec); + btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec); + btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); + btrfs_set_inode_nblocks(item, inode->i_blocks); + btrfs_set_inode_generation(item, inode->i_generation); +} + + +static int btrfs_update_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + struct btrfs_inode_item *inode_item; + struct btrfs_path *path; + int ret; + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_lookup_inode(trans, root, path, + &BTRFS_I(inode)->location, 1); + if (ret) { + if (ret > 0) + ret = -ENOENT; + goto failed; + } + + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], + struct btrfs_inode_item); + + fill_inode_item(inode_item, inode); + btrfs_mark_buffer_dirty(path->nodes[0]); + ret = 0; +failed: + btrfs_release_path(root, path); + btrfs_free_path(path); + return ret; +} + + static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *dir, @@ -166,10 +218,11 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, dentry->d_inode->i_ctime = dir->i_ctime; err: btrfs_free_path(path); - if (ret == 0) { - inode_dec_link_count(dentry->d_inode); + if (!ret) { dir->i_size -= name_len * 2; - mark_inode_dirty(dir); + btrfs_update_inode(trans, root, dir); + drop_nlink(dentry->d_inode); + btrfs_update_inode(trans, root, dentry->d_inode); } return ret; } @@ -668,56 +721,6 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) return 0; } -static void fill_inode_item(struct btrfs_inode_item *item, - struct inode *inode) -{ - btrfs_set_inode_uid(item, inode->i_uid); - btrfs_set_inode_gid(item, inode->i_gid); - btrfs_set_inode_size(item, inode->i_size); - btrfs_set_inode_mode(item, inode->i_mode); - btrfs_set_inode_nlink(item, inode->i_nlink); - btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec); - btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec); - btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec); - btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec); - btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec); - btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); - btrfs_set_inode_nblocks(item, inode->i_blocks); - btrfs_set_inode_generation(item, inode->i_generation); -} - -static int btrfs_update_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode) -{ - struct btrfs_inode_item *inode_item; - struct btrfs_path *path; - int ret; - - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - ret = btrfs_lookup_inode(trans, root, path, - &BTRFS_I(inode)->location, 1); - if (ret) { - if (ret > 0) - ret = -ENOENT; - goto failed; - } - - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_inode_item); - - fill_inode_item(inode_item, inode); - btrfs_mark_buffer_dirty(path->nodes[0]); - ret = 0; -failed: - btrfs_release_path(root, path); - btrfs_free_path(path); - return ret; -} - static int btrfs_write_inode(struct inode *inode, int wait) { struct btrfs_root *root = BTRFS_I(inode)->root; From f2458e1d8c90958ed3631654cb7fd5ab01478505 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Apr 2007 15:52:25 -0400 Subject: [PATCH 0132/2985] Btrfs: change around extent-tree prealloc Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 10 ++-- fs/btrfs/disk-io.c | 3 +- fs/btrfs/extent-tree.c | 101 +++++++++++++++++++++++++++++++---------- 3 files changed, 87 insertions(+), 27 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2d166ca8b104..26d0cdd46f49 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -244,12 +244,17 @@ struct btrfs_fs_info { struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct btrfs_root *dev_root; - struct btrfs_key current_insert; struct btrfs_key last_insert; struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; struct radix_tree_root dev_radix; + + u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3]; + int extent_tree_insert_nr; + u64 extent_tree_prealloc[BTRFS_MAX_LEVEL * 3]; + int extent_tree_prealloc_nr; + u64 generation; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; @@ -267,8 +272,7 @@ struct btrfs_fs_info { /* * in ram representation of the tree. extent_root is used for all allocations - * and for the extent tree extent_root root. current_insert is used - * only for the extent tree. + * and for the extent tree extent_root root. */ struct btrfs_root { struct buffer_head *node; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7aff6bb55d91..956727f015a5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -542,6 +542,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; fs_info->do_barriers = 1; + fs_info->extent_tree_insert_nr = 0; + fs_info->extent_tree_prealloc_nr = 0; BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); @@ -555,7 +557,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) } mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); __setup_root(sb->s_blocksize, dev_root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 116519503d0c..e6fe3fd38819 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -169,9 +169,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid); - for (i = 0; i < extent_root->fs_info->current_insert.flags; i++) { - ins.objectid = extent_root->fs_info->current_insert.objectid + - i; + for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) { + ins.objectid = extent_root->fs_info->extent_tree_insert[i]; super_blocks_used = btrfs_super_blocks_used(info->disk_super); btrfs_set_super_blocks_used(info->disk_super, super_blocks_used + 1); @@ -179,7 +178,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct sizeof(extent_item)); BUG_ON(ret); } - extent_root->fs_info->current_insert.offset = 0; + extent_root->fs_info->extent_tree_insert_nr = 0; + extent_root->fs_info->extent_tree_prealloc_nr = 0; return 0; } @@ -349,7 +349,10 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int start_found; struct btrfs_leaf *l; struct btrfs_root * root = orig_root->fs_info->extent_root; + struct btrfs_fs_info *info = root->fs_info; int total_needed = num_blocks; + int total_found = 0; + int fill_prealloc = 0; int level; path = btrfs_alloc_path(); @@ -357,8 +360,12 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(btrfs_buffer_header(root->node)); - total_needed += (level + 2) * 3; - if (root->fs_info->last_insert.objectid == 0 && search_end == (u64)-1) { + if (num_blocks == 0) { + fill_prealloc = 1; + num_blocks = 1; + total_needed = min(level + 2, BTRFS_MAX_LEVEL) * 3; + } + if (info->last_insert.objectid == 0 && search_end == (u64)-1) { struct btrfs_disk_key *last_key; btrfs_init_path(path); ins->objectid = (u64)-1; @@ -373,8 +380,8 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root last_key = &l->items[path->slots[0]].key; search_start = btrfs_disk_key_objectid(last_key); } - if (root->fs_info->last_insert.objectid > search_start) - search_start = root->fs_info->last_insert.objectid; + if (info->last_insert.objectid > search_start) + search_start = info->last_insert.objectid; check_failed: btrfs_init_path(path); @@ -392,6 +399,10 @@ check_failed: l = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { + if (fill_prealloc) { + info->extent_tree_prealloc_nr = 0; + total_found = 0; + } ret = btrfs_next_leaf(root, path); if (ret == 0) continue; @@ -399,13 +410,13 @@ check_failed: goto error; if (!start_found) { ins->objectid = search_start; - ins->offset = (u64)-1; + ins->offset = (u64)-1 - search_start; start_found = 1; goto check_pending; } ins->objectid = last_block > search_start ? last_block : search_start; - ins->offset = (u64)-1; + ins->offset = (u64)-1 - ins->objectid; goto check_pending; } btrfs_disk_key_to_cpu(&key, &l->items[slot].key); @@ -414,7 +425,7 @@ check_failed: if (last_block < search_start) last_block = search_start; hole_size = key.objectid - last_block; - if (hole_size > total_needed) { + if (hole_size > num_blocks) { ins->objectid = last_block; ins->offset = hole_size; goto check_pending; @@ -433,17 +444,51 @@ check_pending: btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); for (test_block = ins->objectid; - test_block < ins->objectid + total_needed; test_block++) { - if (test_radix_bit(&root->fs_info->pinned_radix, - test_block)) { + test_block < ins->objectid + num_blocks; test_block++) { + if (test_radix_bit(&info->pinned_radix, test_block)) { search_start = test_block + 1; goto check_failed; } } - BUG_ON(root->fs_info->current_insert.offset); - root->fs_info->current_insert.offset = total_needed - num_blocks; - root->fs_info->current_insert.objectid = ins->objectid + num_blocks; - root->fs_info->current_insert.flags = 0; + if (!fill_prealloc && info->extent_tree_insert_nr) { + u64 last = + info->extent_tree_insert[info->extent_tree_insert_nr - 1]; + if (ins->objectid + num_blocks > + info->extent_tree_insert[0] && + ins->objectid <= last) { + search_start = last + 1; + WARN_ON(1); + goto check_failed; + } + } + if (!fill_prealloc && info->extent_tree_prealloc_nr) { + u64 first = + info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1]; + if (ins->objectid + num_blocks > first && + ins->objectid <= info->extent_tree_prealloc[0]) { + search_start = info->extent_tree_prealloc[0] + 1; + WARN_ON(1); + goto check_failed; + } + } + if (fill_prealloc) { + int nr; + test_block = ins->objectid; + while(test_block < ins->objectid + ins->offset && + total_found < total_needed) { + nr = total_needed - total_found - 1; + BUG_ON(nr < 0); + root->fs_info->extent_tree_prealloc[nr] = + test_block; + total_found++; + test_block++; + } + if (total_found < total_needed) { + search_start = test_block; + goto check_failed; + } + root->fs_info->extent_tree_prealloc_nr = total_found; + } root->fs_info->last_insert.objectid = ins->objectid; ins->offset = num_blocks; btrfs_free_path(path); @@ -472,25 +517,35 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; + struct btrfs_key prealloc_key; btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, owner); if (root == extent_root) { - BUG_ON(extent_root->fs_info->current_insert.offset == 0); + int nr; + BUG_ON(info->extent_tree_prealloc_nr == 0); BUG_ON(num_blocks != 1); - BUG_ON(extent_root->fs_info->current_insert.flags == - extent_root->fs_info->current_insert.offset); ins->offset = 1; - ins->objectid = extent_root->fs_info->current_insert.objectid + - extent_root->fs_info->current_insert.flags++; + info->extent_tree_prealloc_nr--; + nr = info->extent_tree_prealloc_nr; + ins->objectid = info->extent_tree_prealloc[nr]; + info->extent_tree_insert[info->extent_tree_insert_nr++] = + ins->objectid; return 0; } + /* do the real allocation */ ret = find_free_extent(trans, root, num_blocks, search_start, search_end, ins); if (ret) return ret; + /* then do prealloc for the extent tree */ + ret = find_free_extent(trans, root, 0, ins->objectid + ins->offset, + search_end, &prealloc_key); + if (ret) + return ret; + super_blocks_used = btrfs_super_blocks_used(info->disk_super); btrfs_set_super_blocks_used(info->disk_super, super_blocks_used + num_blocks); From 9078a3e1e4e489dddc636c7bb8780349d4298743 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 26 Apr 2007 16:46:15 -0400 Subject: [PATCH 0133/2985] Btrfs: start of block group code Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 66 ++++++++++--- fs/btrfs/disk-io.c | 4 + fs/btrfs/extent-tree.c | 204 +++++++++++++++++++++++++++++++++++++---- fs/btrfs/print-tree.c | 7 ++ fs/btrfs/super.c | 1 - fs/btrfs/transaction.c | 2 + 6 files changed, 252 insertions(+), 32 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 26d0cdd46f49..419917279e65 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -239,6 +239,19 @@ struct btrfs_device_item { __le64 device_id; } __attribute__ ((__packed__)); +/* tag for the radix tree of block groups in ram */ +#define BTRFS_BLOCK_GROUP_DIRTY 0 +#define BTRFS_BLOCK_GROUP_HINTS 8 +#define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) +struct btrfs_block_group_item { + __le64 used; +} __attribute__ ((__packed__)); + +struct btrfs_block_group_cache { + struct btrfs_key key; + struct btrfs_block_group_item item; +}; + struct crypto_hash; struct btrfs_fs_info { struct btrfs_root *extent_root; @@ -249,6 +262,7 @@ struct btrfs_fs_info { struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; struct radix_tree_root dev_radix; + struct radix_tree_root block_group_radix; u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3]; int extent_tree_insert_nr; @@ -301,49 +315,67 @@ struct btrfs_root { * info about object characteristics. There is one for every file and dir in * the FS */ -#define BTRFS_INODE_ITEM_KEY 1 +#define BTRFS_INODE_ITEM_KEY 1 + +/* reserve 2-15 close to the inode for later flexibility */ /* * dir items are the name -> inode pointers in a directory. There is one * for every name in a directory. */ -#define BTRFS_DIR_ITEM_KEY 2 -#define BTRFS_DIR_INDEX_KEY 3 +#define BTRFS_DIR_ITEM_KEY 16 +#define BTRFS_DIR_INDEX_KEY 17 /* - * inline data is file data that fits in the btree. + * extent data is for file data */ -#define BTRFS_INLINE_DATA_KEY 4 -/* - * extent data is for data that can't fit in the btree. It points to - * a (hopefully) huge chunk of disk - */ -#define BTRFS_EXTENT_DATA_KEY 5 +#define BTRFS_EXTENT_DATA_KEY 18 /* * csum items have the checksums for data in the extents */ -#define BTRFS_CSUM_ITEM_KEY 6 +#define BTRFS_CSUM_ITEM_KEY 19 + +/* reserve 20-31 for other file stuff */ /* * root items point to tree roots. There are typically in the root * tree used by the super block to find all the other trees */ -#define BTRFS_ROOT_ITEM_KEY 7 +#define BTRFS_ROOT_ITEM_KEY 32 /* * extent items are in the extent map tree. These record which blocks * are used, and how many references there are to each block */ -#define BTRFS_EXTENT_ITEM_KEY 8 +#define BTRFS_EXTENT_ITEM_KEY 33 + +/* + * block groups give us hints into the extent allocation trees. Which + * blocks are free etc etc + */ +#define BTRFS_BLOCK_GROUP_ITEM_KEY 34 /* * dev items list the devices that make up the FS */ -#define BTRFS_DEV_ITEM_KEY 9 +#define BTRFS_DEV_ITEM_KEY 35 /* * string items are for debugging. They just store a short string of * data in the FS */ -#define BTRFS_STRING_ITEM_KEY 10 +#define BTRFS_STRING_ITEM_KEY 253 + + +static inline u64 btrfs_block_group_used(struct btrfs_block_group_item *bi) +{ + return le64_to_cpu(bi->used); +} + +static inline void btrfs_set_block_group_used(struct + btrfs_block_group_item *bi, + u64 val) +{ + bi->used = cpu_to_le64(val); +} static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) { @@ -1037,6 +1069,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks); +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_free_block_groups(struct btrfs_fs_info *info); +int btrfs_read_block_groups(struct btrfs_root *root); /* ctree.c */ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 956727f015a5..1c27eb645510 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -529,6 +529,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pending_del_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); + INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; @@ -613,6 +614,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); + btrfs_read_block_groups(extent_root); + fs_info->generation = btrfs_super_generation(disk_super) + 1; memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); kobj_set_kset_s(fs_info, btrfs_subsys); @@ -741,6 +744,7 @@ int close_ctree(struct btrfs_root *root) iput(fs_info->btree_inode); free_dev_radix(fs_info); + btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); kfree(fs_info->extent_root); kfree(fs_info->tree_root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e6fe3fd38819..0bb4fc83cfd6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -127,6 +127,105 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } +static int write_one_cache_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_block_group_cache *cache) +{ + int ret; + int pending_ret; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_block_group_item *bi; + struct btrfs_key ins; + + find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins); + ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); + BUG_ON(ret); + bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + struct btrfs_block_group_item); + memcpy(bi, &cache->item, sizeof(*bi)); + mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(extent_root, path); + + finish_current_insert(trans, extent_root); + pending_ret = del_pending_extents(trans, extent_root); + if (ret) + return ret; + if (pending_ret) + return pending_ret; + return 0; + +} + +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_block_group_cache *cache[8]; + int ret; + int err = 0; + int werr = 0; + struct radix_tree_root *radix = &root->fs_info->block_group_radix; + int i; + struct btrfs_path *path; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + while(1) { + ret = radix_tree_gang_lookup_tag(radix, (void **)cache, + 0, ARRAY_SIZE(cache), + BTRFS_BLOCK_GROUP_DIRTY); + if (!ret) + break; + for (i = 0; i < ret; i++) { + radix_tree_tag_clear(radix, cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_DIRTY); + err = write_one_cache_group(trans, root, + path, cache[i]); + if (err) + werr = err; + } + } + btrfs_free_path(path); + return werr; +} + +static int update_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 blocknr, u64 num, int alloc) +{ + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *info = root->fs_info; + u64 total = num; + u64 old_val; + u64 block_in_group; + int ret; + while(total) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&cache, blocknr, 1); + if (!ret) + return -1; + block_in_group = blocknr - cache->key.objectid; + WARN_ON(block_in_group > cache->key.offset); + radix_tree_tag_set(&info->block_group_radix, + cache->key.objectid + cache->key.offset - 1, + BTRFS_BLOCK_GROUP_DIRTY); + + old_val = btrfs_block_group_used(&cache->item); + num = min(total, cache->key.offset - block_in_group); + total -= num; + blocknr += num; + if (alloc) + old_val += num; + else + old_val -= num; + btrfs_set_block_group_used(&cache->item, old_val); + } + return 0; +} + int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -264,6 +363,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ret = btrfs_del_item(trans, extent_root, path); if (ret) BUG(); + ret = update_block_group(trans, root, blocknr, num_blocks, 0); + BUG_ON(ret); } btrfs_release_path(extent_root, path); btrfs_free_path(path); @@ -365,21 +466,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root num_blocks = 1; total_needed = min(level + 2, BTRFS_MAX_LEVEL) * 3; } - if (info->last_insert.objectid == 0 && search_end == (u64)-1) { - struct btrfs_disk_key *last_key; - btrfs_init_path(path); - ins->objectid = (u64)-1; - ins->offset = (u64)-1; - ret = btrfs_search_slot(trans, root, ins, path, 0, 0); - if (ret < 0) - goto error; - BUG_ON(ret == 0); - if (path->slots[0] > 0) - path->slots[0]--; - l = btrfs_buffer_leaf(path->nodes[0]); - last_key = &l->items[path->slots[0]].key; - search_start = btrfs_disk_key_objectid(last_key); - } if (info->last_insert.objectid > search_start) search_start = info->last_insert.objectid; @@ -420,6 +506,8 @@ check_failed: goto check_pending; } btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) + goto next; if (key.objectid >= search_start) { if (start_found) { if (last_block < search_start) @@ -434,6 +522,7 @@ check_failed: } start_found = 1; last_block = key.objectid + key.offset; +next: path->slots[0]++; } // FIXME -ENOSPC @@ -498,7 +587,6 @@ error: btrfs_free_path(path); return ret; } - /* * finds a free extent and does all the dirty work required for allocation * returns the key for the extent through ins, and a tree buffer for @@ -532,6 +620,9 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ins->objectid = info->extent_tree_prealloc[nr]; info->extent_tree_insert[info->extent_tree_insert_nr++] = ins->objectid; + ret = update_block_group(trans, root, + ins->objectid, ins->offset, 1); + BUG_ON(ret); return 0; } /* do the real allocation */ @@ -558,6 +649,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, return ret; if (pending_ret) return pending_ret; + ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); return 0; } @@ -578,6 +670,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, BUG(); return NULL; } + BUG_ON(ret); buf = btrfs_find_create_tree_block(root, ins.objectid); set_buffer_uptodate(buf); return buf; @@ -758,3 +851,82 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_free_path(path); return ret; } + +int btrfs_free_block_groups(struct btrfs_fs_info *info) +{ + int ret; + struct btrfs_block_group_cache *cache[8]; + int i; + + while(1) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)cache, 0, + ARRAY_SIZE(cache)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + radix_tree_delete(&info->block_group_radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1); + kfree(cache[i]); + } + } + return 0; +} + +int btrfs_read_block_groups(struct btrfs_root *root) +{ + struct btrfs_path *path; + int ret; + int err = 0; + struct btrfs_block_group_item *bi; + struct btrfs_block_group_cache *cache; + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_leaf *leaf; + u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize; + + root = root->fs_info->extent_root; + key.objectid = 0; + key.offset = group_size_blocks; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + while(1) { + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, + &key, path, 0, 0); + if (ret != 0) { + err = ret; + break; + } + leaf = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&found_key, + &leaf->items[path->slots[0]].key); + cache = kmalloc(sizeof(*cache), GFP_NOFS); + if (!cache) { + err = -1; + break; + } + bi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_block_group_item); + memcpy(&cache->item, bi, sizeof(*bi)); + memcpy(&cache->key, &found_key, sizeof(found_key)); + key.objectid = found_key.objectid + found_key.offset; + btrfs_release_path(root, path); + ret = radix_tree_insert(&root->fs_info->block_group_radix, + found_key.objectid + + found_key.offset - 1, + (void *)cache); + BUG_ON(ret); + if (key.objectid >= + btrfs_super_total_blocks(root->fs_info->disk_super)) + break; + } + + btrfs_free_path(path); + return 0; +} diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 1e7038b070ae..2f95fc67a036 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -11,6 +11,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_root_item *ri; struct btrfs_dir_item *di; struct btrfs_inode_item *ii; + struct btrfs_block_group_item *bi; u32 type; printk("leaf %Lu total ptrs %d free space %d\n", @@ -53,6 +54,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) printk("\t\textent data refs %u\n", btrfs_extent_refs(ei)); break; + case BTRFS_BLOCK_GROUP_ITEM_KEY: + bi = btrfs_item_ptr(l, i, + struct btrfs_block_group_item); + printk("\t\tblock group used %Lu\n", + btrfs_block_group_used(bi)); + break; case BTRFS_STRING_ITEM_KEY: printk("\t\titem data %.*s\n", btrfs_item_size(item), btrfs_leaf_data(l) + btrfs_item_offset(item)); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5bbccbc7e3cc..edcebf79b04a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -377,7 +377,6 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, if (btrfs_disk_key_objectid(found_key) != inode->i_ino) break; if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY && - btrfs_disk_key_type(found_key) != BTRFS_INLINE_DATA_KEY && btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) break; if (btrfs_disk_key_offset(found_key) < inode->i_size) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 078cb9cbf9dd..8a2545f6a5b6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -125,6 +125,7 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, btrfs_set_super_device_root(fs_info->disk_super, bh_blocknr(dev_root->node)); } + btrfs_write_dirty_block_groups(trans, extent_root); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); if (old_extent_block == bh_blocknr(extent_root->node)) @@ -135,6 +136,7 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, &extent_root->root_key, &extent_root->root_item); BUG_ON(ret); + btrfs_write_dirty_block_groups(trans, extent_root); } return 0; } From cd1bc4653dc37f6390f4d6df4f987044c64f700b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 27 Apr 2007 10:08:34 -0400 Subject: [PATCH 0134/2985] Btrfs: more block allocator work Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 +- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 95 ++++++++++++++++++++++++++++++++++++------ fs/btrfs/super.c | 3 ++ 4 files changed, 90 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 419917279e65..c432222d40e3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -250,6 +250,8 @@ struct btrfs_block_group_item { struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; + u64 first_free; + u64 last_alloc; }; struct crypto_hash; @@ -257,7 +259,7 @@ struct btrfs_fs_info { struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct btrfs_root *dev_root; - struct btrfs_key last_insert; + struct btrfs_block_group_cache *block_group_cache; struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1c27eb645510..2489ffa5fb38 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -558,7 +558,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) } mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); + fs_info->block_group_cache = NULL; __setup_root(sb->s_blocksize, dev_root, fs_info, BTRFS_DEV_TREE_OBJECTID); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0bb4fc83cfd6..71e3b311fc42 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,6 +12,63 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +static int find_search_start(struct btrfs_root *root, int data) +{ + struct btrfs_block_group_cache *cache[8]; + struct btrfs_fs_info *info = root->fs_info; + u64 used; + u64 last; + int i; + int ret; + + cache[0] = info->block_group_cache; + if (!cache[0]) + goto find_new; + used = btrfs_block_group_used(&cache[0]->item); + if (used < (cache[0]->key.offset * 3 / 2)) + return 0; +find_new: + last = 0; + while(1) { + ret = radix_tree_gang_lookup_tag(&info->block_group_radix, + (void **)cache, + last, ARRAY_SIZE(cache), + BTRFS_BLOCK_GROUP_DIRTY); + if (!ret) + break; + for (i = 0; i < ret; i++) { + used = btrfs_block_group_used(&cache[i]->item); + if (used < (cache[i]->key.offset * 3 / 2)) { + info->block_group_cache = cache[i]; + cache[i]->last_alloc = cache[i]->first_free; + return 0; + } + last = cache[i]->key.objectid + + cache[i]->key.offset - 1; + } + } + last = 0; + while(1) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)cache, + last, ARRAY_SIZE(cache)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + used = btrfs_block_group_used(&cache[i]->item); + if (used < (cache[i]->key.offset * 3 / 2)) { + info->block_group_cache = cache[i]; + cache[i]->last_alloc = cache[i]->first_free; + return 0; + } + last = cache[i]->key.objectid + + cache[i]->key.offset - 1; + } + } + info->block_group_cache = NULL; + return 0; +} + int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks) @@ -205,8 +262,11 @@ static int update_block_group(struct btrfs_trans_handle *trans, while(total) { ret = radix_tree_gang_lookup(&info->block_group_radix, (void **)&cache, blocknr, 1); - if (!ret) + if (!ret) { + printk(KERN_CRIT "blocknr %Lu lookup failed\n", + blocknr); return -1; + } block_in_group = blocknr - cache->key.objectid; WARN_ON(block_in_group > cache->key.offset); radix_tree_tag_set(&info->block_group_radix, @@ -217,10 +277,15 @@ static int update_block_group(struct btrfs_trans_handle *trans, num = min(total, cache->key.offset - block_in_group); total -= num; blocknr += num; - if (alloc) + if (alloc) { old_val += num; - else + if (blocknr > cache->last_alloc) + cache->last_alloc = blocknr; + } else { old_val -= num; + if (blocknr < cache->first_free) + cache->first_free = blocknr; + } btrfs_set_block_group_used(&cache->item, old_val); } return 0; @@ -246,9 +311,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct clear_radix_bit(pinned_radix, gang[i]); } } - if (root->fs_info->last_insert.objectid > first) - root->fs_info->last_insert.objectid = first; - root->fs_info->last_insert.offset = 0; + root->fs_info->block_group_cache = NULL; return 0; } @@ -466,8 +529,10 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root num_blocks = 1; total_needed = min(level + 2, BTRFS_MAX_LEVEL) * 3; } - if (info->last_insert.objectid > search_start) - search_start = info->last_insert.objectid; + find_search_start(root, 0); + if (info->block_group_cache && + info->block_group_cache->last_alloc > search_start) + search_start = info->block_group_cache->last_alloc; check_failed: btrfs_init_path(path); @@ -567,8 +632,7 @@ check_pending: total_found < total_needed) { nr = total_needed - total_found - 1; BUG_ON(nr < 0); - root->fs_info->extent_tree_prealloc[nr] = - test_block; + info->extent_tree_prealloc[nr] = test_block; total_found++; test_block++; } @@ -576,9 +640,14 @@ check_pending: search_start = test_block; goto check_failed; } - root->fs_info->extent_tree_prealloc_nr = total_found; + info->extent_tree_prealloc_nr = total_found; + } + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&info->block_group_cache, + ins->objectid, 1); + if (ret) { + info->block_group_cache->last_alloc = ins->objectid; } - root->fs_info->last_insert.objectid = ins->objectid; ins->offset = num_blocks; btrfs_free_path(path); return 0; @@ -915,6 +984,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_block_group_item); memcpy(&cache->item, bi, sizeof(*bi)); memcpy(&cache->key, &found_key, sizeof(found_key)); + cache->last_alloc = 0; + cache->first_free = 0; key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); ret = radix_tree_insert(&root->fs_info->block_group_radix, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index edcebf79b04a..a10e902d3102 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -223,6 +223,7 @@ err: btrfs_update_inode(trans, root, dir); drop_nlink(dentry->d_inode); btrfs_update_inode(trans, root, dentry->d_inode); + dir->i_sb->s_dirt = 1; } return ret; } @@ -411,6 +412,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, error: btrfs_release_path(root, path); btrfs_free_path(path); + inode->i_sb->s_dirt = 1; return ret; } @@ -935,6 +937,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) goto out_fail; d_instantiate(dentry, inode); drop_on_err = 0; + dir->i_sb->s_dirt = 1; out_fail: btrfs_end_transaction(trans, root); From 28b8bb9e001aa1c4abdd10a8e36972658237fe43 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 27 Apr 2007 11:42:05 -0400 Subject: [PATCH 0135/2985] Btrfs: allocator tweaks Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 --- fs/btrfs/extent-tree.c | 7 +++++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2489ffa5fb38..e1b6e13a5ae8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -94,9 +94,6 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) } while (bh != head); out_unlock: unlock_page(page); - if (ret) { - touch_buffer(ret); - } page_cache_release(page); return ret; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 71e3b311fc42..aca5802a8a1e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -311,7 +311,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct clear_radix_bit(pinned_radix, gang[i]); } } - root->fs_info->block_group_cache = NULL; + if (root->fs_info->block_group_cache) { + root->fs_info->block_group_cache->last_alloc = + root->fs_info->block_group_cache->first_free; + } return 0; } @@ -578,7 +581,7 @@ check_failed: if (last_block < search_start) last_block = search_start; hole_size = key.objectid - last_block; - if (hole_size > num_blocks) { + if (hole_size >= num_blocks) { ins->objectid = last_block; ins->offset = hole_size; goto check_pending; From 06a2f9fa4c12a055cc396936408a78ae0acfb6b4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 28 Apr 2007 08:48:10 -0400 Subject: [PATCH 0136/2985] Btrfs: try to drop dead cow pages from ram Signed-off-by: Chris Mason --- fs/btrfs/TODO | 3 +-- fs/btrfs/extent-tree.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 5ff2cef9f028..6a8c8cd03ca4 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -6,8 +6,7 @@ * Check compat and incompat flags on the inode * Get rid of struct ctree_path, limiting tree levels held at one time * Add generation number to key pointer in nodes -* Add parent pointer back to extent map. -* Implement real list of pending transactions +* Add generation number to inode * Release * Do real tree locking * Add extent mirroring (backup copies of blocks) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index aca5802a8a1e..afc5267515e2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -291,10 +291,18 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } +static int try_remove_page(struct address_space *mapping, unsigned long index) +{ + int ret; + ret = invalidate_mapping_pages(mapping, index, index); + return ret; +} + int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { unsigned long gang[8]; + struct inode *btree_inode = root->fs_info->btree_inode; u64 first = 0; int ret; int i; @@ -309,6 +317,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct first = gang[0]; for (i = 0; i < ret; i++) { clear_radix_bit(pinned_radix, gang[i]); + try_remove_page(btree_inode->i_mapping, + gang[i] << (PAGE_CACHE_SHIFT - + btree_inode->i_blkbits)); } } if (root->fs_info->block_group_cache) { @@ -600,6 +611,12 @@ check_pending: */ btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); + if (ins->objectid >= btrfs_super_total_blocks(info->disk_super)) { + if (search_start == 0) + return -ENOSPC; + search_start = 0; + goto check_failed; + } for (test_block = ins->objectid; test_block < ins->objectid + num_blocks; test_block++) { if (test_radix_bit(&info->pinned_radix, test_block)) { From 7c4452b9a6ca7aabe37ea2e43d443110bdc08cd8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 28 Apr 2007 09:29:35 -0400 Subject: [PATCH 0137/2985] Btrfs: smarter transaction writeback Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 1 + fs/btrfs/super.c | 1 - fs/btrfs/transaction.c | 37 +++++++++++++++++++++++++++++++++++-- fs/btrfs/transaction.h | 1 + 4 files changed, 37 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index afc5267515e2..652cf305a967 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -762,6 +762,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, BUG_ON(ret); buf = btrfs_find_create_tree_block(root, ins.objectid); set_buffer_uptodate(buf); + set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a10e902d3102..1890e8648dbd 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -980,7 +980,6 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; } - filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8a2545f6a5b6..f9b8864dcc40 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -45,6 +45,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->use_count = 1; cur_trans->commit_done = 0; list_add_tail(&cur_trans->list, &root->fs_info->trans_list); + init_bit_radix(&cur_trans->dirty_pages); } cur_trans->num_writers++; return 0; @@ -106,8 +107,40 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); - return 0; + unsigned long gang[16]; + int ret; + int i; + int err; + int werr = 0; + struct page *page; + struct radix_tree_root *dirty_pages; + struct inode *btree_inode = root->fs_info->btree_inode; + + if (!trans || !trans->transaction) { + return filemap_write_and_wait(btree_inode->i_mapping); + } + dirty_pages = &trans->transaction->dirty_pages; + while(1) { + ret = find_first_radix_bit(dirty_pages, gang, ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + /* FIXME EIO */ + clear_radix_bit(dirty_pages, gang[i]); + page = find_lock_page(btree_inode->i_mapping, + gang[i]); + if (!page) + continue; + err = write_one_page(page, 0); + if (err) + werr = err; + page_cache_release(page); + } + } + err = filemap_fdatawait(btree_inode->i_mapping); + if (err) + werr = err; + return werr; } int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 3cc29900a074..afe42d167cee 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -9,6 +9,7 @@ struct btrfs_transaction { int commit_done; int magic; struct list_head list; + struct radix_tree_root dirty_pages; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; }; From 308535a05e4c39d2be26e0aeee722682deeb6f77 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 28 Apr 2007 15:17:08 -0400 Subject: [PATCH 0138/2985] Btrfs: prealloc more blocks for the extent map Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 652cf305a967..62051a36664a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -541,7 +541,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (num_blocks == 0) { fill_prealloc = 1; num_blocks = 1; - total_needed = min(level + 2, BTRFS_MAX_LEVEL) * 3; + total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; } find_search_start(root, 0); if (info->block_group_cache && From 31f3c99b73483f7b738a886c552050cbd6128ff3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 30 Apr 2007 15:25:45 -0400 Subject: [PATCH 0139/2985] Btrfs: allocator improvements, inode block groups Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/ctree.c | 10 +-- fs/btrfs/ctree.h | 18 +++++- fs/btrfs/extent-tree.c | 138 +++++++++++++++++++++++++++++------------ fs/btrfs/super.c | 53 +++++++++++++--- fs/btrfs/transaction.c | 1 + fs/btrfs/transaction.h | 15 +++++ 7 files changed, 181 insertions(+), 55 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 43a4f1ddb375..6b50076b02d5 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -3,6 +3,7 @@ struct btrfs_inode { struct btrfs_root *root; + struct btrfs_block_group_cache *block_group; struct btrfs_key location; struct inode vfs_inode; }; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b4783bf8bf4f..dbd3f636dd3d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -61,7 +61,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *cow_ret = buf; return 0; } - cow = btrfs_alloc_free_block(trans, root); + cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr); cow_node = btrfs_buffer_node(cow); if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) WARN_ON(1); @@ -800,7 +800,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = btrfs_alloc_free_block(trans, root); + t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr); c = btrfs_buffer_node(t); memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); @@ -905,7 +905,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(&c->header); - split_buffer = btrfs_alloc_free_block(trans, root); + split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr); split = btrfs_buffer_node(split_buffer); btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); @@ -1277,7 +1277,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root slot = path->slots[0]; nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; - right_buffer = btrfs_alloc_free_block(trans, root); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); BUG_ON(!right_buffer); right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); @@ -1374,7 +1374,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (!double_split) return ret; - right_buffer = btrfs_alloc_free_block(trans, root); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); BUG_ON(!right_buffer); right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c432222d40e3..e6bf9919536a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -174,6 +174,7 @@ struct btrfs_inode_item { __le64 generation; __le64 size; __le64 nblocks; + __le64 block_group; __le32 nlink; __le32 uid; __le32 gid; @@ -241,6 +242,7 @@ struct btrfs_device_item { /* tag for the radix tree of block groups in ram */ #define BTRFS_BLOCK_GROUP_DIRTY 0 +#define BTRFS_BLOCK_GROUP_AVAIL 1 #define BTRFS_BLOCK_GROUP_HINTS 8 #define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) struct btrfs_block_group_item { @@ -410,6 +412,17 @@ static inline void btrfs_set_inode_nblocks(struct btrfs_inode_item *i, u64 val) i->nblocks = cpu_to_le64(val); } +static inline u64 btrfs_inode_block_group(struct btrfs_inode_item *i) +{ + return le64_to_cpu(i->block_group); +} + +static inline void btrfs_set_inode_block_group(struct btrfs_inode_item *i, + u64 val) +{ + i->block_group = cpu_to_le64(val); +} + static inline u32 btrfs_inode_nlink(struct btrfs_inode_item *i) { return le32_to_cpu(i->nlink); @@ -1054,10 +1067,13 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) btrfs_item_offset((leaf)->items + (slot)))) /* extent-tree.c */ +struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache + *hint, int data); int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root); + struct btrfs_root *root, u64 hint); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, u64 num_blocks, u64 search_start, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 62051a36664a..8b8cbe25fffb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,42 +12,57 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -static int find_search_start(struct btrfs_root *root, int data) +struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache + *hint, int data) { struct btrfs_block_group_cache *cache[8]; + struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; u64 used; - u64 last; + u64 last = 0; + u64 hint_last; int i; int ret; - - cache[0] = info->block_group_cache; - if (!cache[0]) - goto find_new; - used = btrfs_block_group_used(&cache[0]->item); - if (used < (cache[0]->key.offset * 3 / 2)) - return 0; -find_new: - last = 0; + int full_search = 0; + if (hint) { + used = btrfs_block_group_used(&hint->item); + if (used < (hint->key.offset * 2) / 3) { + return hint; + } + radix_tree_tag_clear(&info->block_group_radix, + hint->key.objectid + hint->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + last = hint->key.objectid + hint->key.offset; + hint_last = last; + } else { + hint_last = 0; + last = 0; + } while(1) { ret = radix_tree_gang_lookup_tag(&info->block_group_radix, (void **)cache, last, ARRAY_SIZE(cache), - BTRFS_BLOCK_GROUP_DIRTY); + BTRFS_BLOCK_GROUP_AVAIL); if (!ret) break; for (i = 0; i < ret; i++) { used = btrfs_block_group_used(&cache[i]->item); - if (used < (cache[i]->key.offset * 3 / 2)) { + if (used < (cache[i]->key.offset * 2) / 3) { info->block_group_cache = cache[i]; - cache[i]->last_alloc = cache[i]->first_free; - return 0; + found_group = cache[i]; + goto found; } + radix_tree_tag_clear(&info->block_group_radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); last = cache[i]->key.objectid + - cache[i]->key.offset - 1; + cache[i]->key.offset; } } - last = 0; + last = hint_last; +again: while(1) { ret = radix_tree_gang_lookup(&info->block_group_radix, (void **)cache, @@ -56,17 +71,32 @@ find_new: break; for (i = 0; i < ret; i++) { used = btrfs_block_group_used(&cache[i]->item); - if (used < (cache[i]->key.offset * 3 / 2)) { + if (used < cache[i]->key.offset) { info->block_group_cache = cache[i]; - cache[i]->last_alloc = cache[i]->first_free; - return 0; + found_group = cache[i]; + goto found; } + radix_tree_tag_clear(&info->block_group_radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); last = cache[i]->key.objectid + - cache[i]->key.offset - 1; + cache[i]->key.offset; } } info->block_group_cache = NULL; - return 0; + if (!full_search) { + last = 0; + full_search = 1; + goto again; + } +found: + if (!found_group) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&found_group, 0, 1); + BUG_ON(ret != 1); + } + return found_group; } int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, @@ -243,6 +273,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, path, cache[i]); if (err) werr = err; + cache[i]->last_alloc = cache[i]->first_free; } } btrfs_free_path(path); @@ -322,10 +353,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btree_inode->i_blkbits)); } } - if (root->fs_info->block_group_cache) { - root->fs_info->block_group_cache->last_alloc = - root->fs_info->block_group_cache->first_free; - } return 0; } @@ -532,22 +559,43 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int total_found = 0; int fill_prealloc = 0; int level; + int update_block_group = 0; + struct btrfs_block_group_cache *hint_block_group; path = btrfs_alloc_path(); ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(btrfs_buffer_header(root->node)); + /* find search start here */ + if (0 && search_start && num_blocks) { + u64 used; + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&hint_block_group, + search_start, 1); + if (ret) { + used = btrfs_block_group_used(&hint_block_group->item); + if (used > (hint_block_group->key.offset * 9) / 10) + search_start = 0; + else if (search_start < hint_block_group->last_alloc) + search_start = hint_block_group->last_alloc; + } else { + search_start = 0; + } + } if (num_blocks == 0) { fill_prealloc = 1; num_blocks = 1; total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; } - find_search_start(root, 0); - if (info->block_group_cache && - info->block_group_cache->last_alloc > search_start) - search_start = info->block_group_cache->last_alloc; - + if (1 || !search_start) { + trans->block_group = btrfs_find_block_group(root, + trans->block_group, + 0); + if (trans->block_group->last_alloc > search_start) + search_start = trans->block_group->last_alloc; + update_block_group = 1; + } check_failed: btrfs_init_path(path); ins->objectid = search_start; @@ -662,11 +710,13 @@ check_pending: } info->extent_tree_prealloc_nr = total_found; } - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&info->block_group_cache, - ins->objectid, 1); - if (ret) { - info->block_group_cache->last_alloc = ins->objectid; + if (update_block_group) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&trans->block_group, + ins->objectid, 1); + if (ret) { + trans->block_group->last_alloc = ins->objectid; + } } ins->offset = num_blocks; btrfs_free_path(path); @@ -747,14 +797,14 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * returns the tree buffer or NULL. */ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root) + struct btrfs_root *root, u64 hint) { struct btrfs_key ins; int ret; struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, 0, (unsigned long)-1, &ins); + 1, hint, (unsigned long)-1, &ins); if (ret) { BUG(); return NULL; @@ -975,6 +1025,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_key found_key; struct btrfs_leaf *leaf; u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize; + u64 used; root = root->fs_info->extent_root; key.objectid = 0; @@ -1005,8 +1056,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_block_group_item); memcpy(&cache->item, bi, sizeof(*bi)); memcpy(&cache->key, &found_key, sizeof(found_key)); - cache->last_alloc = 0; - cache->first_free = 0; + cache->last_alloc = cache->key.objectid; + cache->first_free = cache->key.objectid; key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); ret = radix_tree_insert(&root->fs_info->block_group_radix, @@ -1014,6 +1065,13 @@ int btrfs_read_block_groups(struct btrfs_root *root) found_key.offset - 1, (void *)cache); BUG_ON(ret); + used = btrfs_block_group_used(bi); + if (used < (key.offset * 2) / 3) { + radix_tree_tag_set(&root->fs_info->block_group_radix, + found_key.objectid + + found_key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + } if (key.objectid >= btrfs_super_total_blocks(root->fs_info->disk_super)) break; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1890e8648dbd..7ecbe7c86186 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -52,6 +52,8 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_inode_item *inode_item; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; + struct btrfs_block_group_cache *alloc_group; + u64 alloc_group_block; int ret; path = btrfs_alloc_path(); @@ -82,6 +84,12 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); + alloc_group_block = btrfs_inode_block_group(inode_item); + ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix, + (void **)&alloc_group, + alloc_group_block, 1); + BUG_ON(!ret); + BTRFS_I(inode)->block_group = alloc_group; btrfs_free_path(path); inode_item = NULL; @@ -136,6 +144,8 @@ static void fill_inode_item(struct btrfs_inode_item *item, btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); btrfs_set_inode_nblocks(item, inode->i_blocks); btrfs_set_inode_generation(item, inode->i_generation); + btrfs_set_inode_block_group(item, + BTRFS_I(inode)->block_group->key.objectid); } @@ -237,6 +247,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) root = BTRFS_I(dir)->root; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); ret = btrfs_unlink_trans(trans, root, dir, dentry); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -262,6 +273,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) btrfs_init_path(path); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); key.objectid = inode->i_ino; key.offset = (u64)-1; key.flags = (u32)-1; @@ -429,6 +441,7 @@ static void btrfs_delete_inode(struct inode *inode) inode->i_size = 0; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); if (S_ISREG(inode->i_mode)) { ret = btrfs_truncate_in_trans(trans, root, inode); BUG_ON(ret); @@ -731,6 +744,7 @@ static int btrfs_write_inode(struct inode *inode, int wait) if (wait) { mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); ret = btrfs_commit_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); } @@ -744,6 +758,7 @@ static void btrfs_dirty_inode(struct inode *inode) mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -751,7 +766,9 @@ static void btrfs_dirty_inode(struct inode *inode) static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 objectid, int mode) + u64 objectid, + struct btrfs_block_group_cache *group, + int mode) { struct inode *inode; struct btrfs_inode_item inode_item; @@ -763,6 +780,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); BTRFS_I(inode)->root = root; + group = btrfs_find_block_group(root, group, 0); + BTRFS_I(inode)->block_group = group; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; @@ -832,6 +851,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); if (err) { @@ -839,11 +859,13 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; } - inode = btrfs_new_inode(trans, root, objectid, mode); + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, mode); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_unlock; - // FIXME mark the inode dirty + + btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dentry, inode); if (err) drop_inode = 1; @@ -853,6 +875,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; } dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); out_unlock: btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -904,6 +928,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out_unlock; @@ -915,7 +940,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) goto out_unlock; } - inode = btrfs_new_inode(trans, root, objectid, S_IFDIR | mode); + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, S_IFDIR | mode); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_fail; @@ -923,6 +949,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) drop_on_err = 1; inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; + btrfs_set_trans_block_group(trans, inode); err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino); if (err) @@ -938,6 +965,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_instantiate(dentry, inode); drop_on_err = 0; dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); out_fail: btrfs_end_transaction(trans, root); @@ -1349,6 +1378,7 @@ static void btrfs_truncate(struct inode *inode) /* FIXME, add redo link to tree so we don't leak on crash */ mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); BUG_ON(ret); ret = btrfs_end_transaction(trans, root); @@ -1445,6 +1475,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); bh = page_buffers(pages[i]); if (buffer_mapped(bh) && bh->b_blocknr == 0) { @@ -1481,6 +1512,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, kunmap(pages[i]); } SetPageChecked(pages[i]); + btrfs_update_inode_block_group(trans, inode); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); @@ -1821,6 +1853,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, mutex_unlock(&root->fs_info->fs_mutex); goto out_unlock; } + btrfs_set_trans_block_group(trans, inode); /* FIXME blocksize != 4096 */ inode->i_blocks += num_blocks << 3; if (start_pos < inode->i_size) { @@ -1845,6 +1878,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } BUG_ON(ret); alloc_extent_start = ins.objectid; + btrfs_update_inode_block_group(trans, inode); ret = btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -2017,6 +2051,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) struct btrfs_leaf *leaf; struct btrfs_root *new_root; struct inode *inode; + struct inode *dir; int ret; u64 objectid; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; @@ -2025,7 +2060,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - subvol = btrfs_alloc_free_block(trans, root); + subvol = btrfs_alloc_free_block(trans, root, 0); if (subvol == NULL) return -ENOSPC; leaf = btrfs_buffer_leaf(subvol); @@ -2069,10 +2104,9 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) * insert the directory item */ key.offset = (u64)-1; + dir = root->fs_info->sb->s_root->d_inode; ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, - name, namelen, - root->fs_info->sb->s_root->d_inode->i_ino, - &key, 0); + name, namelen, dir->i_ino, &key, 0); BUG_ON(ret); ret = btrfs_commit_transaction(trans, root); @@ -2084,7 +2118,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(new_root, 1); BUG_ON(!trans); - inode = btrfs_new_inode(trans, new_root, new_dirid, S_IFDIR | 0700); + inode = btrfs_new_inode(trans, new_root, new_dirid, + BTRFS_I(dir)->block_group, S_IFDIR | 0700); inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f9b8864dcc40..bdbf514c111f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -77,6 +77,7 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, h->transaction = root->fs_info->running_transaction; h->blocks_reserved = num_blocks; h->blocks_used = 0; + h->block_group = NULL; root->fs_info->running_transaction->use_count++; mutex_unlock(&root->fs_info->trans_mutex); h->magic = h->magic2 = TRANS_MAGIC; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index afe42d167cee..b5378119e76c 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -1,5 +1,6 @@ #ifndef __TRANSACTION__ #define __TRANSACTION__ +#include "btrfs_inode.h" struct btrfs_transaction { u64 transid; @@ -20,10 +21,24 @@ struct btrfs_trans_handle { unsigned long blocks_reserved; unsigned long blocks_used; struct btrfs_transaction *transaction; + struct btrfs_block_group_cache *block_group; int magic2; }; +static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, + struct inode *inode) +{ + trans->block_group = BTRFS_I(inode)->block_group; +} + +static inline void btrfs_update_inode_block_group(struct + btrfs_trans_handle *trans, + struct inode *inode) +{ + BTRFS_I(inode)->block_group = trans->block_group; +} + int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, From 090d18753c7fb73f7d846c3a89a50cd35136d144 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 1 May 2007 08:53:32 -0400 Subject: [PATCH 0140/2985] Btrfs: directory readahead Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 29 ++++++++++++++++++++++++++++- fs/btrfs/disk-io.h | 6 ++++++ fs/btrfs/extent-tree.c | 1 + fs/btrfs/super.c | 28 ++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e1b6e13a5ae8..354524adf984 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -253,6 +254,29 @@ static struct address_space_operations btree_aops = { .sync_page = block_sync_page, }; +int readahead_tree_block(struct btrfs_root *root, u64 blocknr) +{ + struct buffer_head *bh = NULL; + + bh = btrfs_find_create_tree_block(root, blocknr); + if (!bh) + return 0; + if (buffer_uptodate(bh)) + goto done; + if (test_set_buffer_locked(bh)) + goto done; + if (!buffer_uptodate(bh)) { + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; + submit_bh(READ, bh); + } else { + unlock_buffer(bh); + } +done: + brelse(bh); + return 0; +} + struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { struct buffer_head *bh = NULL; @@ -270,11 +294,14 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) wait_on_buffer(bh); if (!buffer_uptodate(bh)) goto fail; - csum_tree_block(root, bh, 1); } else { unlock_buffer(bh); } uptodate: + if (!buffer_checked(bh)) { + csum_tree_block(root, bh, 1); + set_buffer_checked(bh); + } if (check_tree_block(root, bh)) BUG(); return bh; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 444ebb0141ae..1ee7d2a55b5b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -5,6 +5,11 @@ #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) +enum btrfs_bh_state_bits { + BH_Checked = BH_PrivateStart, +}; +BUFFER_FNS(Checked, checked); + static inline struct btrfs_node *btrfs_buffer_node(struct buffer_head *bh) { return (struct btrfs_node *)bh->b_data; @@ -21,6 +26,7 @@ static inline struct btrfs_header *btrfs_buffer_header(struct buffer_head *bh) } struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr); +int readahead_tree_block(struct btrfs_root *root, u64 blocknr); struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr); int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8b8cbe25fffb..2837fddf7b1c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -812,6 +812,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, BUG_ON(ret); buf = btrfs_find_create_tree_block(root, ins.objectid); set_buffer_uptodate(buf); + set_buffer_checked(buf); set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7ecbe7c86186..a29a781b86c2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -585,6 +585,31 @@ printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_r return d_splice_alias(inode, dentry); } +static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path) +{ + struct btrfs_node *node; + int i; + int nritems; + u64 objectid; + u64 item_objectid; + u64 blocknr; + int slot; + + if (!path->nodes[1]) + return; + node = btrfs_buffer_node(path->nodes[1]); + slot = path->slots[1]; + objectid = btrfs_disk_key_objectid(&node->ptrs[slot].key); + nritems = btrfs_header_nritems(&node->header); + for (i = slot; i < nritems; i++) { + item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); + if (item_objectid != objectid) + break; + blocknr = btrfs_node_blockptr(node, i); + readahead_tree_block(root, blocknr); + } +} + static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct inode *inode = filp->f_path.dentry->d_inode; @@ -619,6 +644,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (ret < 0) goto err; advance = 0; + reada_leaves(root, path); while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); @@ -631,6 +657,8 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path->slots[0]; + if (path->slots[1] == 0) + reada_leaves(root, path); } else { slot++; path->slots[0]++; From 35b7e476107e3d54f03384e0f2fa3dfd68933353 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 2 May 2007 15:53:43 -0400 Subject: [PATCH 0141/2985] Btrfs: fix page cache memory leak Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 2 -- fs/btrfs/ctree.h | 3 +++ fs/btrfs/dir-item.c | 11 +++++---- fs/btrfs/disk-io.c | 8 ++++-- fs/btrfs/disk-io.h | 2 ++ fs/btrfs/extent-tree.c | 12 +-------- fs/btrfs/hash.c | 1 + fs/btrfs/print-tree.c | 1 + fs/btrfs/super.c | 56 +++++++++++++++++++++++++----------------- fs/btrfs/transaction.c | 8 +++--- 10 files changed, 59 insertions(+), 45 deletions(-) diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 9fc42e99c7df..e9ace32d7bae 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -22,9 +22,7 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) return -ENOMEM; memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long)); bits[0] = slot; - radix_tree_preload(GFP_NOFS); ret = radix_tree_insert(radix, slot, bits); - radix_tree_preload_end(); if (ret) return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e6bf9919536a..b5855a5365ef 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -8,6 +8,9 @@ struct btrfs_trans_handle; struct btrfs_transaction; +extern struct kmem_cache *btrfs_trans_handle_cachep; +extern struct kmem_cache *btrfs_transaction_cachep; +extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_MAGIC "_BtRfS_M" diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 7a7e9846860a..00a28d90fea6 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -4,11 +4,12 @@ #include "hash.h" #include "transaction.h" -struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_key *cpu_key, - u32 data_size) +static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle + *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, + u32 data_size) { int ret; char *ptr; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 354524adf984..5828a104dfef 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -694,7 +694,7 @@ static int free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) return 0; } -int del_fs_roots(struct btrfs_fs_info *fs_info) +static int del_fs_roots(struct btrfs_fs_info *fs_info) { int ret; struct btrfs_root *gang[8]; @@ -781,3 +781,7 @@ void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) brelse(buf); } +void btrfs_btree_balance_dirty(struct btrfs_root *root) +{ + balance_dirty_pages_ratelimited(root->fs_info->btree_inode->i_mapping); +} diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 1ee7d2a55b5b..822ccb8aa4ae 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -55,4 +55,6 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, u64 num_blocks); int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, u64 logical); +int btrfs_releasepage(struct page *page, gfp_t flags); +void btrfs_btree_balance_dirty(struct btrfs_root *root); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2837fddf7b1c..0e20d1c42fca 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -322,18 +322,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } -static int try_remove_page(struct address_space *mapping, unsigned long index) -{ - int ret; - ret = invalidate_mapping_pages(mapping, index, index); - return ret; -} - int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { unsigned long gang[8]; - struct inode *btree_inode = root->fs_info->btree_inode; u64 first = 0; int ret; int i; @@ -348,9 +340,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct first = gang[0]; for (i = 0; i < ret; i++) { clear_radix_bit(pinned_radix, gang[i]); - try_remove_page(btree_inode->i_mapping, - gang[i] << (PAGE_CACHE_SHIFT - - btree_inode->i_blkbits)); } } return 0; @@ -983,6 +972,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root break; if (wret < 0) ret = wret; + btrfs_btree_balance_dirty(root); } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index 22519b8e0cf2..32de1ea1b64e 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -11,6 +11,7 @@ */ #include +#include "hash.h" #define DELTA 0x9E3779B9 static void TEA_transform(__u32 buf[2], __u32 const in[]) diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 2f95fc67a036..28813411de66 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -1,6 +1,7 @@ #include #include "ctree.h" #include "disk-io.h" +#include "print-tree.h" void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a29a781b86c2..130a1d3d9f5f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -18,14 +18,14 @@ #include "btrfs_inode.h" #include "ioctl.h" -void btrfs_fsinfo_release(struct kobject *obj) +static void btrfs_fsinfo_release(struct kobject *obj) { struct btrfs_fs_info *fsinfo = container_of(obj, struct btrfs_fs_info, kobj); kfree(fsinfo); } -struct kobj_type btrfs_fsinfo_ktype = { +static struct kobj_type btrfs_fsinfo_ktype = { .release = btrfs_fsinfo_release, }; @@ -148,7 +148,6 @@ static void fill_inode_item(struct btrfs_inode_item *item, BTRFS_I(inode)->block_group->key.objectid); } - static int btrfs_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) @@ -251,6 +250,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) ret = btrfs_unlink_trans(trans, root, dir, dentry); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); return ret; } @@ -324,6 +324,7 @@ out: btrfs_free_path(path); mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_end_transaction(trans, root); + btrfs_btree_balance_dirty(root); if (ret && !err) err = ret; return err; @@ -449,6 +450,7 @@ static void btrfs_delete_inode(struct inode *inode) btrfs_free_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); return; no_delete: clear_inode(inode); @@ -481,7 +483,7 @@ out: return ret; } -int fixup_tree_root_location(struct btrfs_root *root, +static int fixup_tree_root_location(struct btrfs_root *root, struct btrfs_key *location, struct btrfs_root **sub_root) { @@ -512,7 +514,7 @@ int fixup_tree_root_location(struct btrfs_root *root, return 0; } -int btrfs_init_locked_inode(struct inode *inode, void *p) +static int btrfs_init_locked_inode(struct inode *inode, void *p) { struct btrfs_iget_args *args = p; inode->i_ino = args->ino; @@ -520,15 +522,15 @@ int btrfs_init_locked_inode(struct inode *inode, void *p) return 0; } -int btrfs_find_actor(struct inode *inode, void *opaque) +static int btrfs_find_actor(struct inode *inode, void *opaque) { struct btrfs_iget_args *args = opaque; return (args->ino == inode->i_ino && args->root == BTRFS_I(inode)->root); } -struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, - struct btrfs_root *root) +static struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, + struct btrfs_root *root) { struct inode *inode; struct btrfs_iget_args args; @@ -790,6 +792,7 @@ static void btrfs_dirty_inode(struct inode *inode) btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); } static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, @@ -913,6 +916,7 @@ out_unlock: inode_dec_link_count(inode); iput(inode); } + btrfs_btree_balance_dirty(root); return err; } @@ -1002,6 +1006,7 @@ out_unlock: mutex_unlock(&root->fs_info->fs_mutex); if (drop_on_err) iput(inode); + btrfs_btree_balance_dirty(root); return err; } @@ -1099,7 +1104,6 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, found_type != BTRFS_EXTENT_DATA_KEY) { extent_end = 0; extent_start = 0; - btrfs_release_path(root, path); goto out; } found_type = btrfs_file_extent_type(item); @@ -1135,7 +1139,6 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, btrfs_map_bh_to_logical(root, result, 0); } out: - btrfs_release_path(root, path); btrfs_free_path(path); return err; } @@ -1231,13 +1234,13 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page, } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { WARN_ON(bh->b_size != blocksize); err = btrfs_get_block(inode, block, bh, 0); - if (err) + if (err) { +printk("writepage going to recovery err %d\n", err); goto recover; + } if (buffer_new(bh)) { /* blockdev mappings never come here */ clear_buffer_new(bh); - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); } } bh = bh->b_this_page; @@ -1303,11 +1306,6 @@ done: if (uptodate) SetPageUptodate(page); end_page_writeback(page); - /* - * The page and buffer_heads can be released at any time from - * here on. - */ - wbc->pages_skipped++; /* We didn't write this page */ } return err; @@ -1409,10 +1407,11 @@ static void btrfs_truncate(struct inode *inode) btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); BUG_ON(ret); + btrfs_update_inode(trans, root, inode); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); - mark_inode_dirty(inode); + btrfs_btree_balance_dirty(root); } /* @@ -1777,10 +1776,15 @@ static int prepare_pages(struct btrfs_root *root, err = -ENOMEM; goto failed_release; } + cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); + wait_on_page_writeback(pages[i]); offset = pos & (PAGE_CACHE_SIZE -1); this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); - create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize, - (1 << BH_Uptodate)); + if (!page_has_buffers(pages[i])) { + create_empty_buffers(pages[i], + root->fs_info->sb->s_blocksize, + (1 << BH_Uptodate)); + } head = page_buffers(pages[i]); bh = head; do { @@ -1820,7 +1824,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct page *pages[8]; - struct page *pinned[2] = { NULL, NULL }; + struct page *pinned[2]; unsigned long first_index; unsigned long last_index; u64 start_pos; @@ -1829,6 +1833,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, struct btrfs_trans_handle *trans; struct btrfs_key ins; + pinned[0] = NULL; + pinned[1] = NULL; if (file->f_flags & O_DIRECT) return -EINVAL; pos = *ppos; @@ -1858,6 +1864,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (!PageUptodate(pinned[0])) { ret = mpage_readpage(pinned[0], btrfs_get_block); BUG_ON(ret); + wait_on_page_locked(pinned[0]); } else { unlock_page(pinned[0]); } @@ -1869,6 +1876,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (!PageUptodate(pinned[1])) { ret = mpage_readpage(pinned[1], btrfs_get_block); BUG_ON(ret); + wait_on_page_locked(pinned[1]); } else { unlock_page(pinned[1]); } @@ -1940,6 +1948,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, num_written += write_bytes; balance_dirty_pages_ratelimited(inode->i_mapping); + btrfs_btree_balance_dirty(root); cond_resched(); } out_unlock: @@ -2165,6 +2174,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) iput(inode); mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); return 0; } @@ -2220,6 +2230,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) ret = btrfs_commit_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); return 0; } @@ -2295,6 +2306,7 @@ out: mutex_unlock(&root->fs_info->fs_mutex); out_nolock: btrfs_free_path(path); + btrfs_btree_balance_dirty(root); return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bdbf514c111f..8bbe9107ff7e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -199,8 +199,9 @@ struct dirty_root { struct btrfs_root *root; }; -int add_dirty_roots(struct btrfs_trans_handle *trans, - struct radix_tree_root *radix, struct list_head *list) +static int add_dirty_roots(struct btrfs_trans_handle *trans, + struct radix_tree_root *radix, + struct list_head *list) { struct dirty_root *dirty; struct btrfs_root *gang[8]; @@ -246,7 +247,8 @@ int add_dirty_roots(struct btrfs_trans_handle *trans, return 0; } -int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) +static int drop_dirty_roots(struct btrfs_root *tree_root, + struct list_head *list) { struct dirty_root *dirty; struct btrfs_trans_handle *trans; From be08c1b9f8e679d45e086728445ac36cf250e92e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 May 2007 09:06:49 -0400 Subject: [PATCH 0142/2985] Btrfs: early metadata/data split Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 95 +++++++++++++++++++++--------------------- fs/btrfs/super.c | 2 +- 3 files changed, 50 insertions(+), 49 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b5855a5365ef..179a046ce5a4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1080,7 +1080,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, u64 num_blocks, u64 search_start, - u64 search_end, struct btrfs_key *ins); + u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0e20d1c42fca..c5ae51893f78 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6,7 +6,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 - search_end, struct btrfs_key *ins); + search_end, struct btrfs_key *ins, int data); static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -25,7 +25,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int i; int ret; int full_search = 0; - if (hint) { + if (!data && hint) { used = btrfs_block_group_used(&hint->item); if (used < (hint->key.offset * 2) / 3) { return hint; @@ -47,6 +47,14 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (!ret) break; for (i = 0; i < ret; i++) { + last = cache[i]->key.objectid + + cache[i]->key.offset; + if (!full_search && !data && + (cache[i]->key.objectid & cache[i]->key.offset)) + continue; + if (!full_search && data && + (cache[i]->key.objectid & cache[i]->key.offset) == 0) + continue; used = btrfs_block_group_used(&cache[i]->item); if (used < (cache[i]->key.offset * 2) / 3) { info->block_group_cache = cache[i]; @@ -57,8 +65,6 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, cache[i]->key.objectid + cache[i]->key.offset - 1, BTRFS_BLOCK_GROUP_AVAIL); - last = cache[i]->key.objectid + - cache[i]->key.offset; } } last = hint_last; @@ -70,6 +76,14 @@ again: if (!ret) break; for (i = 0; i < ret; i++) { + last = cache[i]->key.objectid + + cache[i]->key.offset; + if (!full_search && !data && + (cache[i]->key.objectid & cache[i]->key.offset)) + continue; + if (!full_search && data && + (cache[i]->key.objectid & cache[i]->key.offset) == 0) + continue; used = btrfs_block_group_used(&cache[i]->item); if (used < cache[i]->key.offset) { info->block_group_cache = cache[i]; @@ -80,8 +94,6 @@ again: cache[i]->key.objectid + cache[i]->key.offset - 1, BTRFS_BLOCK_GROUP_AVAIL); - last = cache[i]->key.objectid + - cache[i]->key.offset; } } info->block_group_cache = NULL; @@ -112,7 +124,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, u32 refs; find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, - &ins); + &ins, 0); path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); @@ -225,7 +237,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_item *bi; struct btrfs_key ins; - find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins); + find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins, 0); ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); BUG_ON(ret); bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], @@ -322,10 +334,18 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } +static int try_remove_page(struct address_space *mapping, unsigned long index) +{ + int ret; + ret = invalidate_mapping_pages(mapping, index, index); + return ret; +} + int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { unsigned long gang[8]; + struct inode *btree_inode = root->fs_info->btree_inode; u64 first = 0; int ret; int i; @@ -340,6 +360,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct first = gang[0]; for (i = 0; i < ret; i++) { clear_radix_bit(pinned_radix, gang[i]); + try_remove_page(btree_inode->i_mapping, + gang[i] << (PAGE_CACHE_SHIFT - + btree_inode->i_blkbits)); } } return 0; @@ -424,7 +447,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; - find_free_extent(trans, root, 0, 0, (u64)-1, &ins); + find_free_extent(trans, root, 0, 0, (u64)-1, &ins, 0); path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); @@ -531,7 +554,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root */ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 - search_end, struct btrfs_key *ins) + search_end, struct btrfs_key *ins, int data) { struct btrfs_path *path; struct btrfs_key key; @@ -548,43 +571,21 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int total_found = 0; int fill_prealloc = 0; int level; - int update_block_group = 0; - struct btrfs_block_group_cache *hint_block_group; + struct btrfs_block_group_cache *block_group; path = btrfs_alloc_path(); ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(btrfs_buffer_header(root->node)); - /* find search start here */ - if (0 && search_start && num_blocks) { - u64 used; - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&hint_block_group, - search_start, 1); - if (ret) { - used = btrfs_block_group_used(&hint_block_group->item); - if (used > (hint_block_group->key.offset * 9) / 10) - search_start = 0; - else if (search_start < hint_block_group->last_alloc) - search_start = hint_block_group->last_alloc; - } else { - search_start = 0; - } - } if (num_blocks == 0) { fill_prealloc = 1; num_blocks = 1; total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; } - if (1 || !search_start) { - trans->block_group = btrfs_find_block_group(root, - trans->block_group, - 0); - if (trans->block_group->last_alloc > search_start) - search_start = trans->block_group->last_alloc; - update_block_group = 1; - } + block_group = btrfs_find_block_group(root, trans->block_group, data); + if (block_group->last_alloc > search_start) + search_start = block_group->last_alloc; check_failed: btrfs_init_path(path); ins->objectid = search_start; @@ -699,13 +700,13 @@ check_pending: } info->extent_tree_prealloc_nr = total_found; } - if (update_block_group) { - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&trans->block_group, - ins->objectid, 1); - if (ret) { - trans->block_group->last_alloc = ins->objectid; - } + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&block_group, + ins->objectid, 1); + if (ret) { + block_group->last_alloc = ins->objectid; + if (!data) + trans->block_group = block_group; } ins->offset = num_blocks; btrfs_free_path(path); @@ -725,7 +726,7 @@ error: int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, u64 num_blocks, u64 search_start, - u64 search_end, struct btrfs_key *ins) + u64 search_end, struct btrfs_key *ins, int data) { int ret; int pending_ret; @@ -755,13 +756,13 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } /* do the real allocation */ ret = find_free_extent(trans, root, num_blocks, search_start, - search_end, ins); + search_end, ins, data); if (ret) return ret; /* then do prealloc for the extent tree */ ret = find_free_extent(trans, root, 0, ins->objectid + ins->offset, - search_end, &prealloc_key); + search_end, &prealloc_key, 0); if (ret) return ret; @@ -793,7 +794,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, hint, (unsigned long)-1, &ins); + 1, 0, (unsigned long)-1, &ins, 0); if (ret) { BUG(); return NULL; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 130a1d3d9f5f..23ba4e979fc9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1903,7 +1903,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { ret = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, 1, (u64)-1, &ins); + num_blocks, 1, (u64)-1, &ins, 1); BUG_ON(ret); ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset); From be74417553f4b2ee46be2088007a674ef2f02330 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 6 May 2007 10:15:01 -0400 Subject: [PATCH 0143/2985] Btrfs: more allocator enhancements Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 6 +- fs/btrfs/ctree.h | 7 +- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 317 +++++++++++++++++++++++++++++++---------- fs/btrfs/super.c | 2 +- 5 files changed, 249 insertions(+), 85 deletions(-) diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index e9ace32d7bae..1a0271445dfb 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -26,8 +26,10 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) if (ret) return ret; } - set_bit(bit_slot, bits + 1); - return 0; + ret = test_and_set_bit(bit_slot, bits + 1); + if (ret < 0) + ret = 1; + return ret; } int test_radix_bit(struct radix_tree_root *radix, unsigned long bit) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 179a046ce5a4..086e7dea3c92 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -257,6 +257,8 @@ struct btrfs_block_group_cache { struct btrfs_block_group_item item; u64 first_free; u64 last_alloc; + u64 pinned; + int data; }; struct crypto_hash; @@ -264,12 +266,12 @@ struct btrfs_fs_info { struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct btrfs_root *dev_root; - struct btrfs_block_group_cache *block_group_cache; struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; struct radix_tree_root dev_radix; struct radix_tree_root block_group_radix; + struct radix_tree_root block_group_data_radix; u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3]; int extent_tree_insert_nr; @@ -1072,7 +1074,8 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) /* extent-tree.c */ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache - *hint, int data); + *hint, u64 search_start, + int data); int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5828a104dfef..7930458c227e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -554,6 +554,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); + INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; @@ -582,7 +583,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) } mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - fs_info->block_group_cache = NULL; __setup_root(sb->s_blocksize, dev_root, fs_info, BTRFS_DEV_TREE_OBJECTID); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c5ae51893f78..2937fd9aba74 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,36 +12,88 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +static struct btrfs_block_group_cache *lookup_block_group(struct + btrfs_fs_info *info, + u64 blocknr) +{ + struct btrfs_block_group_cache *block_group; + int ret; + + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&block_group, + blocknr, 1); + if (ret) { + if (block_group->key.objectid <= blocknr && blocknr < + block_group->key.objectid + block_group->key.offset) + return block_group; + } + ret = radix_tree_gang_lookup(&info->block_group_data_radix, + (void **)&block_group, + blocknr, 1); + if (ret) { + if (block_group->key.objectid <= blocknr && blocknr < + block_group->key.objectid + block_group->key.offset) + return block_group; + } +printk("lookup_block_group fails for blocknr %Lu\n", blocknr); + return NULL; +} + struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache - *hint, int data) + *hint, u64 search_start, + int data) { struct btrfs_block_group_cache *cache[8]; struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; + struct radix_tree_root *radix; u64 used; u64 last = 0; u64 hint_last; int i; int ret; int full_search = 0; - if (!data && hint) { + + if (data) + radix = &info->block_group_data_radix; + else + radix = &info->block_group_radix; + + if (search_start) { + struct btrfs_block_group_cache *shint; + shint = lookup_block_group(info, search_start); + if (shint->data == data) { + used = btrfs_block_group_used(&shint->item); + if (used + shint->pinned < + (shint->key.offset * 8) / 10) { + return shint; + } + } + } + if (hint && hint->data == data) { used = btrfs_block_group_used(&hint->item); - if (used < (hint->key.offset * 2) / 3) { + if (used + hint->pinned < (hint->key.offset * 8) / 10) { return hint; } - radix_tree_tag_clear(&info->block_group_radix, - hint->key.objectid + hint->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); - last = hint->key.objectid + hint->key.offset; + if (used >= (hint->key.offset * 8) / 10) { + radix_tree_tag_clear(radix, + hint->key.objectid + + hint->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + } + last = hint->key.offset * 2; + if (hint->key.objectid >= last) + last = max(search_start, hint->key.objectid - last); + else + last = hint->key.objectid + hint->key.offset; hint_last = last; } else { - hint_last = 0; - last = 0; + hint_last = search_start; + last = search_start; } while(1) { - ret = radix_tree_gang_lookup_tag(&info->block_group_radix, - (void **)cache, + ret = radix_tree_gang_lookup_tag(radix, (void **)cache, last, ARRAY_SIZE(cache), BTRFS_BLOCK_GROUP_AVAIL); if (!ret) @@ -49,65 +101,54 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, for (i = 0; i < ret; i++) { last = cache[i]->key.objectid + cache[i]->key.offset; - if (!full_search && !data && - (cache[i]->key.objectid & cache[i]->key.offset)) - continue; - if (!full_search && data && - (cache[i]->key.objectid & cache[i]->key.offset) == 0) - continue; used = btrfs_block_group_used(&cache[i]->item); - if (used < (cache[i]->key.offset * 2) / 3) { - info->block_group_cache = cache[i]; + if (used + cache[i]->pinned < + (cache[i]->key.offset * 8) / 10) { found_group = cache[i]; goto found; } - radix_tree_tag_clear(&info->block_group_radix, - cache[i]->key.objectid + - cache[i]->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); + if (used >= (cache[i]->key.offset * 8) / 10) { + radix_tree_tag_clear(radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + } } } last = hint_last; again: while(1) { - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)cache, - last, ARRAY_SIZE(cache)); + ret = radix_tree_gang_lookup(radix, (void **)cache, + last, ARRAY_SIZE(cache)); if (!ret) break; for (i = 0; i < ret; i++) { last = cache[i]->key.objectid + cache[i]->key.offset; - if (!full_search && !data && - (cache[i]->key.objectid & cache[i]->key.offset)) - continue; - if (!full_search && data && - (cache[i]->key.objectid & cache[i]->key.offset) == 0) - continue; used = btrfs_block_group_used(&cache[i]->item); - if (used < cache[i]->key.offset) { - info->block_group_cache = cache[i]; + if (used + cache[i]->pinned < cache[i]->key.offset) { found_group = cache[i]; goto found; } - radix_tree_tag_clear(&info->block_group_radix, - cache[i]->key.objectid + - cache[i]->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); + if (used >= cache[i]->key.offset) { + radix_tree_tag_clear(radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + } } } - info->block_group_cache = NULL; if (!full_search) { - last = 0; + last = search_start; full_search = 1; goto again; } -found: if (!found_group) { - ret = radix_tree_gang_lookup(&info->block_group_radix, + ret = radix_tree_gang_lookup(radix, (void **)&found_group, 0, 1); BUG_ON(ret != 1); } +found: return found_group; } @@ -252,18 +293,20 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, return ret; if (pending_ret) return pending_ret; + if (cache->data) + cache->last_alloc = cache->first_free; return 0; } -int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +static int write_dirty_block_radix(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct radix_tree_root *radix) { struct btrfs_block_group_cache *cache[8]; int ret; int err = 0; int werr = 0; - struct radix_tree_root *radix = &root->fs_info->block_group_radix; int i; struct btrfs_path *path; @@ -285,35 +328,74 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, path, cache[i]); if (err) werr = err; - cache[i]->last_alloc = cache[i]->first_free; } } btrfs_free_path(path); return werr; } +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + int ret2; + ret = write_dirty_block_radix(trans, root, + &root->fs_info->block_group_radix); + ret2 = write_dirty_block_radix(trans, root, + &root->fs_info->block_group_data_radix); + if (ret) + return ret; + if (ret2) + return ret2; + return 0; +} + static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num, int alloc) { struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; + struct radix_tree_root *radix; u64 total = num; u64 old_val; u64 block_in_group; int ret; + if (num != 1) + radix = &info->block_group_data_radix; + else + radix = &info->block_group_radix; while(total) { - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&cache, blocknr, 1); + ret = radix_tree_gang_lookup(radix, (void **)&cache, + blocknr, 1); if (!ret) { printk(KERN_CRIT "blocknr %Lu lookup failed\n", blocknr); return -1; } block_in_group = blocknr - cache->key.objectid; + if (block_in_group > cache->key.offset || cache->key.objectid > + blocknr) { + if (radix == &info->block_group_data_radix) + radix = &info->block_group_radix; + else + radix = &info->block_group_data_radix; + ret = radix_tree_gang_lookup(radix, (void **)&cache, + blocknr, 1); + if (!ret) { + printk(KERN_CRIT "blocknr %Lu lookup failed\n", + blocknr); + return -1; + } + block_in_group = blocknr - cache->key.objectid; + if (block_in_group > cache->key.offset || + cache->key.objectid > blocknr) { + BUG(); + } + } WARN_ON(block_in_group > cache->key.offset); - radix_tree_tag_set(&info->block_group_radix, - cache->key.objectid + cache->key.offset - 1, + radix_tree_tag_set(radix, cache->key.objectid + + cache->key.offset - 1, BTRFS_BLOCK_GROUP_DIRTY); old_val = btrfs_block_group_used(&cache->item); @@ -346,6 +428,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct { unsigned long gang[8]; struct inode *btree_inode = root->fs_info->btree_inode; + struct btrfs_block_group_cache *block_group; u64 first = 0; int ret; int i; @@ -360,6 +443,14 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct first = gang[0]; for (i = 0; i < ret; i++) { clear_radix_bit(pinned_radix, gang[i]); + block_group = lookup_block_group(root->fs_info, + gang[i]); + if (block_group) { + WARN_ON(block_group->pinned == 0); + block_group->pinned--; + if (gang[i] < block_group->last_alloc) + block_group->last_alloc = gang[i]; + } try_remove_page(btree_inode->i_mapping, gang[i] << (PAGE_CACHE_SHIFT - btree_inode->i_blkbits)); @@ -420,10 +511,16 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) btrfs_block_release(root, bh); } err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); + if (!err) { + struct btrfs_block_group_cache *cache; + cache = lookup_block_group(root->fs_info, blocknr); + if (cache) + cache->pinned++; + } } else { err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr); } - BUG_ON(err); + BUG_ON(err < 0); return 0; } @@ -502,6 +599,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct int i; struct radix_tree_root *pending_radix; struct radix_tree_root *pinned_radix; + struct btrfs_block_group_cache *cache; pending_radix = &extent_root->fs_info->pending_del_radix; pinned_radix = &extent_root->fs_info->pinned_radix; @@ -513,7 +611,17 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct break; for (i = 0; i < ret; i++) { wret = set_radix_bit(pinned_radix, gang[i]); - BUG_ON(wret); + if (wret == 0) { + cache = lookup_block_group(extent_root->fs_info, + gang[i]); + if (cache) + cache->pinned++; + } + if (wret < 0) { + printk(KERN_CRIT "set_radix_bit, err %d\n", + wret); + BUG_ON(wret < 0); + } wret = clear_radix_bit(pending_radix, gang[i]); BUG_ON(wret); wret = __free_extent(trans, extent_root, @@ -563,6 +671,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int slot = 0; u64 last_block = 0; u64 test_block; + u64 orig_search_start = search_start; int start_found; struct btrfs_leaf *l; struct btrfs_root * root = orig_root->fs_info->extent_root; @@ -572,6 +681,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int fill_prealloc = 0; int level; struct btrfs_block_group_cache *block_group; + int full_scan = 0; path = btrfs_alloc_path(); ins->flags = 0; @@ -583,10 +693,21 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root num_blocks = 1; total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; } - block_group = btrfs_find_block_group(root, trans->block_group, data); + if (search_start) { + block_group = lookup_block_group(info, search_start); + block_group = btrfs_find_block_group(root, block_group, + search_start, data); + } else { + block_group = btrfs_find_block_group(root, + trans->block_group, 0, + data); + } + +check_failed: + if (block_group->data != data) + WARN_ON(1); if (block_group->last_alloc > search_start) search_start = block_group->last_alloc; -check_failed: btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; @@ -639,6 +760,13 @@ check_failed: } start_found = 1; last_block = key.objectid + key.offset; + if (last_block >= block_group->key.objectid + + block_group->key.offset) { + btrfs_release_path(root, path); + search_start = block_group->key.objectid + + block_group->key.offset * 2; + goto new_group; + } next: path->slots[0]++; } @@ -650,16 +778,17 @@ check_pending: btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); if (ins->objectid >= btrfs_super_total_blocks(info->disk_super)) { - if (search_start == 0) + if (full_scan) return -ENOSPC; - search_start = 0; - goto check_failed; + search_start = orig_search_start; + full_scan = 1; + goto new_group; } for (test_block = ins->objectid; test_block < ins->objectid + num_blocks; test_block++) { if (test_radix_bit(&info->pinned_radix, test_block)) { search_start = test_block + 1; - goto check_failed; + goto new_group; } } if (!fill_prealloc && info->extent_tree_insert_nr) { @@ -670,7 +799,7 @@ check_pending: ins->objectid <= last) { search_start = last + 1; WARN_ON(1); - goto check_failed; + goto new_group; } } if (!fill_prealloc && info->extent_tree_prealloc_nr) { @@ -680,7 +809,7 @@ check_pending: ins->objectid <= info->extent_tree_prealloc[0]) { search_start = info->extent_tree_prealloc[0] + 1; WARN_ON(1); - goto check_failed; + goto new_group; } } if (fill_prealloc) { @@ -696,14 +825,12 @@ check_pending: } if (total_found < total_needed) { search_start = test_block; - goto check_failed; + goto new_group; } info->extent_tree_prealloc_nr = total_found; } - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&block_group, - ins->objectid, 1); - if (ret) { + block_group = lookup_block_group(info, ins->objectid); + if (block_group) { block_group->last_alloc = ins->objectid; if (!data) trans->block_group = block_group; @@ -711,6 +838,18 @@ check_pending: ins->offset = num_blocks; btrfs_free_path(path); return 0; + +new_group: + if (search_start >= btrfs_super_total_blocks(info->disk_super)) { + search_start = orig_search_start; + full_scan = 1; + } + block_group = lookup_block_group(info, search_start); + if (!full_scan) + block_group = btrfs_find_block_group(root, block_group, + search_start, data); + goto check_failed; + error: btrfs_release_path(root, path); btrfs_free_path(path); @@ -794,7 +933,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, 0, (unsigned long)-1, &ins, 0); + 1, hint, (unsigned long)-1, &ins, 0); if (ret) { BUG(); return NULL; @@ -984,21 +1123,19 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } -int btrfs_free_block_groups(struct btrfs_fs_info *info) +static int free_block_group_radix(struct radix_tree_root *radix) { int ret; struct btrfs_block_group_cache *cache[8]; int i; while(1) { - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)cache, 0, + ret = radix_tree_gang_lookup(radix, (void **)cache, 0, ARRAY_SIZE(cache)); if (!ret) break; for (i = 0; i < ret; i++) { - radix_tree_delete(&info->block_group_radix, - cache[i]->key.objectid + + radix_tree_delete(radix, cache[i]->key.objectid + cache[i]->key.offset - 1); kfree(cache[i]); } @@ -1006,6 +1143,20 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } +int btrfs_free_block_groups(struct btrfs_fs_info *info) +{ + int ret; + int ret2; + + ret = free_block_group_radix(&info->block_group_radix); + ret2 = free_block_group_radix(&info->block_group_data_radix); + if (ret) + return ret; + if (ret2) + return ret2; + return 0; +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; @@ -1013,13 +1164,16 @@ int btrfs_read_block_groups(struct btrfs_root *root) int err = 0; struct btrfs_block_group_item *bi; struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *info = root->fs_info; + struct radix_tree_root *radix; struct btrfs_key key; struct btrfs_key found_key; struct btrfs_leaf *leaf; u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize; u64 used; + u64 nr = 0; - root = root->fs_info->extent_root; + root = info->extent_root; key.objectid = 0; key.offset = group_size_blocks; key.flags = 0; @@ -1030,7 +1184,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) return -ENOMEM; while(1) { - ret = btrfs_search_slot(NULL, root->fs_info->extent_root, + ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0); if (ret != 0) { err = ret; @@ -1050,23 +1204,28 @@ int btrfs_read_block_groups(struct btrfs_root *root) memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; cache->first_free = cache->key.objectid; + cache->pinned = 0; + cache->data = (nr & 1); key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); - ret = radix_tree_insert(&root->fs_info->block_group_radix, - found_key.objectid + + if (nr & 1) + radix = &info->block_group_data_radix; + else + radix = &info->block_group_radix; + ret = radix_tree_insert(radix, found_key.objectid + found_key.offset - 1, (void *)cache); BUG_ON(ret); used = btrfs_block_group_used(bi); - if (used < (key.offset * 2) / 3) { - radix_tree_tag_set(&root->fs_info->block_group_radix, - found_key.objectid + + if (used < (key.offset * 8) / 10) { + radix_tree_tag_set(radix, found_key.objectid + found_key.offset - 1, BTRFS_BLOCK_GROUP_AVAIL); } if (key.objectid >= - btrfs_super_total_blocks(root->fs_info->disk_super)) + btrfs_super_total_blocks(info->disk_super)) break; + nr++; } btrfs_free_path(path); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 23ba4e979fc9..d5ac0d8ee3c9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -811,7 +811,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); BTRFS_I(inode)->root = root; - group = btrfs_find_block_group(root, group, 0); + group = btrfs_find_block_group(root, group, 0, 0); BTRFS_I(inode)->block_group = group; inode->i_uid = current->fsuid; From 3e1ad54fe2839319c1aa66b954da0753f5b1f906 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 7 May 2007 20:03:49 -0400 Subject: [PATCH 0144/2985] Btrfs: allocator and tuning Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 75 +++++++++++++++++++----------------------- 2 files changed, 34 insertions(+), 42 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 086e7dea3c92..cdb7c23c41f9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -255,6 +255,7 @@ struct btrfs_block_group_item { struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; + struct radix_tree_root *radix; u64 first_free; u64 last_alloc; u64 pinned; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2937fd9aba74..3edfc300289f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -23,7 +23,7 @@ static struct btrfs_block_group_cache *lookup_block_group(struct (void **)&block_group, blocknr, 1); if (ret) { - if (block_group->key.objectid <= blocknr && blocknr < + if (block_group->key.objectid <= blocknr && blocknr <= block_group->key.objectid + block_group->key.offset) return block_group; } @@ -31,11 +31,16 @@ static struct btrfs_block_group_cache *lookup_block_group(struct (void **)&block_group, blocknr, 1); if (ret) { - if (block_group->key.objectid <= blocknr && blocknr < + if (block_group->key.objectid <= blocknr && blocknr <= block_group->key.objectid + block_group->key.offset) return block_group; } -printk("lookup_block_group fails for blocknr %Lu\n", blocknr); + WARN_ON(1); + printk("lookup_block_group fails for blocknr %Lu\n", blocknr); + printk("last ret was %d\n", ret); + if (ret) { + printk("last block group was %Lu %Lu\n", block_group->key.objectid, block_group->key.offset); + } return NULL; } @@ -356,45 +361,20 @@ static int update_block_group(struct btrfs_trans_handle *trans, { struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; - struct radix_tree_root *radix; u64 total = num; u64 old_val; u64 block_in_group; - int ret; - if (num != 1) - radix = &info->block_group_data_radix; - else - radix = &info->block_group_radix; + while(total) { - ret = radix_tree_gang_lookup(radix, (void **)&cache, - blocknr, 1); - if (!ret) { + cache = lookup_block_group(info, blocknr); + if (!cache) { printk(KERN_CRIT "blocknr %Lu lookup failed\n", blocknr); return -1; } block_in_group = blocknr - cache->key.objectid; - if (block_in_group > cache->key.offset || cache->key.objectid > - blocknr) { - if (radix == &info->block_group_data_radix) - radix = &info->block_group_radix; - else - radix = &info->block_group_data_radix; - ret = radix_tree_gang_lookup(radix, (void **)&cache, - blocknr, 1); - if (!ret) { - printk(KERN_CRIT "blocknr %Lu lookup failed\n", - blocknr); - return -1; - } - block_in_group = blocknr - cache->key.objectid; - if (block_in_group > cache->key.offset || - cache->key.objectid > blocknr) { - BUG(); - } - } WARN_ON(block_in_group > cache->key.offset); - radix_tree_tag_set(radix, cache->key.objectid + + radix_tree_tag_set(cache->radix, cache->key.objectid + cache->key.offset - 1, BTRFS_BLOCK_GROUP_DIRTY); @@ -693,6 +673,8 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root num_blocks = 1; total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; } + if (search_end == (u64)-1) + search_end = btrfs_super_total_blocks(info->disk_super); if (search_start) { block_group = lookup_block_group(info, search_start); block_group = btrfs_find_block_group(root, block_group, @@ -704,7 +686,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root } check_failed: - if (block_group->data != data) + if (!full_scan && block_group->data != data) WARN_ON(1); if (block_group->last_alloc > search_start) search_start = block_group->last_alloc; @@ -734,13 +716,13 @@ check_failed: goto error; if (!start_found) { ins->objectid = search_start; - ins->offset = (u64)-1 - search_start; + ins->offset = search_end - search_start; start_found = 1; goto check_pending; } ins->objectid = last_block > search_start ? last_block : search_start; - ins->offset = (u64)-1 - ins->objectid; + ins->offset = search_end - ins->objectid; goto check_pending; } btrfs_disk_key_to_cpu(&key, &l->items[slot].key); @@ -777,7 +759,7 @@ check_pending: */ btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); - if (ins->objectid >= btrfs_super_total_blocks(info->disk_super)) { + if (ins->objectid + num_blocks >= search_end) { if (full_scan) return -ENOSPC; search_start = orig_search_start; @@ -840,7 +822,7 @@ check_pending: return 0; new_group: - if (search_start >= btrfs_super_total_blocks(info->disk_super)) { + if (search_start + num_blocks >= search_end) { search_start = orig_search_start; full_scan = 1; } @@ -900,7 +882,12 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, return ret; /* then do prealloc for the extent tree */ - ret = find_free_extent(trans, root, 0, ins->objectid + ins->offset, + if (ins->objectid + ins->offset >= search_end) + search_end = ins->objectid - 1; + else + search_start = ins->objectid + ins->offset; + + ret = find_free_extent(trans, root, 0, search_start, search_end, &prealloc_key, 0); if (ret) return ret; @@ -1198,6 +1185,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) err = -1; break; } + + if (nr & 1) + radix = &info->block_group_data_radix; + else + radix = &info->block_group_radix; + bi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_block_group_item); memcpy(&cache->item, bi, sizeof(*bi)); @@ -1206,12 +1199,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->first_free = cache->key.objectid; cache->pinned = 0; cache->data = (nr & 1); + cache->radix = radix; + key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); - if (nr & 1) - radix = &info->block_group_data_radix; - else - radix = &info->block_group_radix; ret = radix_tree_insert(radix, found_key.objectid + found_key.offset - 1, (void *)cache); From e37c9e6921207cf503634b06bee37ecb7904408d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 May 2007 20:13:14 -0400 Subject: [PATCH 0145/2985] Btrfs: many allocator fixes, pretty solid Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 + fs/btrfs/bit-radix.c | 10 +- fs/btrfs/bit-radix.h | 2 +- fs/btrfs/ctree.h | 3 + fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 375 +++++++++++++++++++++++++++++++++++------ fs/btrfs/super.c | 4 +- fs/btrfs/transaction.c | 3 +- 8 files changed, 340 insertions(+), 59 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 6a8c8cd03ca4..f6df246f26c3 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -7,6 +7,7 @@ * Get rid of struct ctree_path, limiting tree levels held at one time * Add generation number to key pointer in nodes * Add generation number to inode +* Add ability to switch a block group from data to metadata or vice versa * Release * Do real tree locking * Add extent mirroring (backup copies of blocks) diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 1a0271445dfb..8f9cd4277231 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -77,7 +77,7 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) } int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, - int nr) + unsigned long start, int nr) { unsigned long *bits; unsigned long *gang[4]; @@ -85,10 +85,13 @@ int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, int ret; int i; int total_found = 0; + unsigned long slot; - ret = radix_tree_gang_lookup(radix, (void **)gang, 0, ARRAY_SIZE(gang)); + slot = start / BIT_RADIX_BITS_PER_ARRAY; + ret = radix_tree_gang_lookup(radix, (void **)gang, slot, + ARRAY_SIZE(gang)); + found = start % BIT_RADIX_BITS_PER_ARRAY; for (i = 0; i < ret && nr > 0; i++) { - found = 0; bits = gang[i]; while(nr > 0) { found = find_next_bit(bits + 1, @@ -104,6 +107,7 @@ int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, } else break; } + found = 0; } return total_found; } diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h index 56aad4c7d7f7..4e717e30db4f 100644 --- a/fs/btrfs/bit-radix.h +++ b/fs/btrfs/bit-radix.h @@ -6,7 +6,7 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit); int test_radix_bit(struct radix_tree_root *radix, unsigned long bit); int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit); int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, - int nr); + unsigned long start, int nr); static inline void init_bit_radix(struct radix_tree_root *radix) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index cdb7c23c41f9..92a6078de827 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -259,7 +259,9 @@ struct btrfs_block_group_cache { u64 first_free; u64 last_alloc; u64 pinned; + u64 last_prealloc; int data; + int cached; }; struct crypto_hash; @@ -273,6 +275,7 @@ struct btrfs_fs_info { struct radix_tree_root dev_radix; struct radix_tree_root block_group_radix; struct radix_tree_root block_group_data_radix; + struct radix_tree_root extent_map_radix; u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3]; int extent_tree_insert_nr; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7930458c227e..2dbf422a2b9a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -551,6 +551,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); + init_bit_radix(&fs_info->extent_map_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3edfc300289f..3ac9da453472 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,6 +12,97 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +static int cache_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache *block_group) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + struct btrfs_leaf *leaf; + struct radix_tree_root *extent_radix; + int slot; + u64 i; + u64 last = 0; + u64 hole_size; + int found = 0; + + root = root->fs_info->extent_root; + extent_radix = &root->fs_info->extent_map_radix; + + if (block_group->cached) + return 0; + if (block_group->data) + return 0; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; +printk("cache block group %Lu\n", block_group->key.objectid); + key.objectid = block_group->key.objectid; + key.flags = 0; + key.offset = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + if (ret && path->slots[0] > 0) + path->slots[0]--; + while(1) { + leaf = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(&leaf->header)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + else { + if (found) { + hole_size = block_group->key.objectid + + block_group->key.offset - last; + } else { + last = block_group->key.objectid; + hole_size = block_group->key.offset; + } + for (i = 0; i < hole_size; i++) { + set_radix_bit(extent_radix, + last + i); + } + break; + } + } + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + if (key.objectid >= block_group->key.objectid + + block_group->key.offset) { + if (found) { + hole_size = block_group->key.objectid + + block_group->key.offset - last; + } else { + last = block_group->key.objectid; + hole_size = block_group->key.offset; + } + for (i = 0; i < hole_size; i++) { + set_radix_bit(extent_radix, last + i); + } + break; + } + if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { + if (!found) { + last = key.objectid + key.offset; + found = 1; + } else { + hole_size = key.objectid - last; + for (i = 0; i < hole_size; i++) { + set_radix_bit(extent_radix, last + i); + } + last = key.objectid + key.offset; + } + } + path->slots[0]++; + } + + block_group->cached = 1; + btrfs_free_path(path); + return 0; +} + static struct btrfs_block_group_cache *lookup_block_group(struct btrfs_fs_info *info, u64 blocknr) @@ -44,6 +135,63 @@ static struct btrfs_block_group_cache *lookup_block_group(struct return NULL; } +static u64 leaf_range(struct btrfs_root *root) +{ + u64 size = BTRFS_LEAF_DATA_SIZE(root); + size = size / (sizeof(struct btrfs_extent_item) + + sizeof(struct btrfs_item)); + return size; +} + +static u64 find_search_start(struct btrfs_root *root, + struct btrfs_block_group_cache **cache_ret, + u64 search_start, int num) +{ + unsigned long gang[8]; + int ret; + struct btrfs_block_group_cache *cache = *cache_ret; + u64 last = max(search_start, cache->key.objectid); + + if (cache->data) + goto out; + if (num > 1) { + last = max(last, cache->last_prealloc); + } +again: + cache_block_group(root, cache); + while(1) { + ret = find_first_radix_bit(&root->fs_info->extent_map_radix, + gang, last, ARRAY_SIZE(gang)); + if (!ret) + goto out; + last = gang[ret-1] + 1; + if (num > 1) { + if (ret != ARRAY_SIZE(gang)) { + goto new_group; + } + if (gang[ret-1] - gang[0] > leaf_range(root)) { + continue; + } + } + if (gang[0] >= cache->key.objectid + cache->key.offset) { + goto new_group; + } + return gang[0]; + } +out: + return max(cache->last_alloc, search_start); + +new_group: + cache = lookup_block_group(root->fs_info, last + cache->key.offset - 1); + if (!cache) { + return max((*cache_ret)->last_alloc, search_start); + } + cache = btrfs_find_block_group(root, cache, + last + cache->key.offset - 1, 0); + *cache_ret = cache; + goto again; +} + struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, @@ -89,13 +237,18 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, } last = hint->key.offset * 2; if (hint->key.objectid >= last) - last = max(search_start, hint->key.objectid - last); + last = max(search_start + hint->key.offset - 1, + hint->key.objectid - last); else last = hint->key.objectid + hint->key.offset; hint_last = last; } else { - hint_last = search_start; - last = search_start; + if (hint) + hint_last = max(hint->key.objectid, search_start); + else + hint_last = search_start; + + last = hint_last; } while(1) { ret = radix_tree_gang_lookup_tag(radix, (void **)cache, @@ -357,13 +510,14 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 blocknr, u64 num, int alloc) + u64 blocknr, u64 num, int alloc, int mark_free) { struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; u64 total = num; u64 old_val; u64 block_in_group; + u64 i; while(total) { cache = lookup_block_group(info, blocknr); @@ -380,18 +534,38 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val = btrfs_block_group_used(&cache->item); num = min(total, cache->key.offset - block_in_group); - total -= num; - blocknr += num; if (alloc) { old_val += num; if (blocknr > cache->last_alloc) cache->last_alloc = blocknr; + if (!cache->data) { + for (i = 0; i < num; i++) { + clear_radix_bit(&info->extent_map_radix, + blocknr + i); + } + } } else { old_val -= num; if (blocknr < cache->first_free) cache->first_free = blocknr; + if (!cache->data && mark_free) { + for (i = 0; i < num; i++) { + set_radix_bit(&info->extent_map_radix, + blocknr + i); + } + } + if (old_val < (cache->key.offset * 8) / 10 && + old_val + num >= (cache->key.offset * 8) / 10) { +printk("group %Lu now available\n", cache->key.objectid); + radix_tree_tag_set(cache->radix, + cache->key.objectid + + cache->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + } } btrfs_set_block_group_used(&cache->item, old_val); + total -= num; + blocknr += num; } return 0; } @@ -413,9 +587,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct int ret; int i; struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; + struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix; while(1) { - ret = find_first_radix_bit(pinned_radix, gang, + ret = find_first_radix_bit(pinned_radix, gang, 0, ARRAY_SIZE(gang)); if (!ret) break; @@ -430,6 +605,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct block_group->pinned--; if (gang[i] < block_group->last_alloc) block_group->last_alloc = gang[i]; + if (gang[i] < block_group->last_prealloc) + block_group->last_prealloc = gang[i]; + if (!block_group->data) + set_radix_bit(extent_radix, gang[i]); } try_remove_page(btree_inode->i_mapping, gang[i] << (PAGE_CACHE_SHIFT - @@ -508,7 +687,8 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) * remove an extent from the root, returns 0 on success */ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin) + *root, u64 blocknr, u64 num_blocks, int pin, + int mark_free) { struct btrfs_path *path; struct btrfs_key key; @@ -556,10 +736,10 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ret = btrfs_del_item(trans, extent_root, path); if (ret) BUG(); - ret = update_block_group(trans, root, blocknr, num_blocks, 0); + ret = update_block_group(trans, root, blocknr, num_blocks, 0, + mark_free); BUG_ON(ret); } - btrfs_release_path(extent_root, path); btrfs_free_path(path); finish_current_insert(trans, extent_root); return ret; @@ -585,7 +765,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct pinned_radix = &extent_root->fs_info->pinned_radix; while(1) { - ret = find_first_radix_bit(pending_radix, gang, + ret = find_first_radix_bit(pending_radix, gang, 0, ARRAY_SIZE(gang)); if (!ret) break; @@ -605,7 +785,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct wret = clear_radix_bit(pending_radix, gang[i]); BUG_ON(wret); wret = __free_extent(trans, extent_root, - gang[i], 1, 0); + gang[i], 1, 0, 0); if (wret) err = wret; } @@ -627,7 +807,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root pin_down_block(root, blocknr, 1); return 0; } - ret = __free_extent(trans, root, blocknr, num_blocks, pin); + ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } @@ -688,18 +868,45 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root check_failed: if (!full_scan && block_group->data != data) WARN_ON(1); - if (block_group->last_alloc > search_start) - search_start = block_group->last_alloc; + + if (!data) + search_start = find_search_start(root, &block_group, + search_start, total_needed); + else + search_start = max(block_group->last_alloc, search_start); + btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; start_found = 0; + ret = btrfs_search_slot(trans, root, ins, path, 0, 0); if (ret < 0) goto error; - if (path->slots[0] > 0) + if (path->slots[0] > 0) { path->slots[0]--; + } + + l = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key); + /* + * a rare case, go back one key if we hit a block group item + * instead of an extent item + */ + if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY && + key.objectid + key.offset >= search_start) { + ins->objectid = key.objectid; + ins->offset = key.offset - 1; + btrfs_release_path(root, path); + ret = btrfs_search_slot(trans, root, ins, path, 0, 0); + if (ret < 0) + goto error; + + if (path->slots[0] > 0) { + path->slots[0]--; + } + } while (1) { l = btrfs_buffer_leaf(path->nodes[0]); @@ -725,21 +932,23 @@ check_failed: ins->offset = search_end - ins->objectid; goto check_pending; } + btrfs_disk_key_to_cpu(&key, &l->items[slot].key); - if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) - goto next; - if (key.objectid >= search_start) { - if (start_found) { - if (last_block < search_start) - last_block = search_start; - hole_size = key.objectid - last_block; - if (hole_size >= num_blocks) { - ins->objectid = last_block; - ins->offset = hole_size; - goto check_pending; - } + if (key.objectid >= search_start && key.objectid > last_block && + start_found) { + if (last_block < search_start) + last_block = search_start; + hole_size = key.objectid - last_block; + if (hole_size >= num_blocks) { + ins->objectid = last_block; + ins->offset = hole_size; + goto check_pending; } } + + if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) + goto next; + start_found = 1; last_block = key.objectid + key.offset; if (last_block >= block_group->key.objectid + @@ -759,6 +968,7 @@ check_pending: */ btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); + if (ins->objectid + num_blocks >= search_end) { if (full_scan) return -ENOSPC; @@ -780,7 +990,7 @@ check_pending: info->extent_tree_insert[0] && ins->objectid <= last) { search_start = last + 1; - WARN_ON(1); + WARN_ON(!full_scan); goto new_group; } } @@ -790,13 +1000,18 @@ check_pending: if (ins->objectid + num_blocks > first && ins->objectid <= info->extent_tree_prealloc[0]) { search_start = info->extent_tree_prealloc[0] + 1; - WARN_ON(1); + WARN_ON(!full_scan); goto new_group; } } if (fill_prealloc) { int nr; test_block = ins->objectid; + if (test_block - info->extent_tree_prealloc[total_needed - 1] >= + leaf_range(root)) { + total_found = 0; + info->extent_tree_prealloc_nr = total_found; + } while(test_block < ins->objectid + ins->offset && total_found < total_needed) { nr = total_needed - total_found - 1; @@ -811,11 +1026,15 @@ check_pending: } info->extent_tree_prealloc_nr = total_found; } - block_group = lookup_block_group(info, ins->objectid); - if (block_group) { - block_group->last_alloc = ins->objectid; - if (!data) - trans->block_group = block_group; + if (!data) { + block_group = lookup_block_group(info, ins->objectid); + if (block_group) { + if (fill_prealloc) + block_group->last_prealloc = + info->extent_tree_prealloc[total_needed-1]; + else + trans->block_group = block_group; + } } ins->offset = num_blocks; btrfs_free_path(path); @@ -824,6 +1043,7 @@ check_pending: new_group: if (search_start + num_blocks >= search_end) { search_start = orig_search_start; +printk("doing full scan!\n"); full_scan = 1; } block_group = lookup_block_group(info, search_start); @@ -871,26 +1091,57 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, info->extent_tree_insert[info->extent_tree_insert_nr++] = ins->objectid; ret = update_block_group(trans, root, - ins->objectid, ins->offset, 1); + ins->objectid, ins->offset, 1, 0); BUG_ON(ret); return 0; } + + /* + * if we're doing a data allocation, preallocate room in the + * extent tree first. This way the extent tree blocks end up + * in the correct block group. + */ + if (data) { + ret = find_free_extent(trans, root, 0, search_start, + search_end, &prealloc_key, 0); + if (ret) { + return ret; + } + if (prealloc_key.objectid + prealloc_key.offset >= search_end) { + int nr = info->extent_tree_prealloc_nr; + search_end = info->extent_tree_prealloc[nr - 1] - 1; + } else { + search_start = info->extent_tree_prealloc[0] + 1; + } + } /* do the real allocation */ ret = find_free_extent(trans, root, num_blocks, search_start, search_end, ins, data); - if (ret) + if (ret) { return ret; + } - /* then do prealloc for the extent tree */ - if (ins->objectid + ins->offset >= search_end) - search_end = ins->objectid - 1; - else - search_start = ins->objectid + ins->offset; + /* + * if we're doing a metadata allocation, preallocate space in the + * extent tree second. This way, we don't create a tiny hole + * in the allocation map between any unused preallocation blocks + * and the metadata block we're actually allocating. On disk, + * it'll go: + * [block we've allocated], [used prealloc 1], [ unused prealloc ] + * The unused prealloc will get reused the next time around. + */ + if (!data) { + if (ins->objectid + ins->offset >= search_end) + search_end = ins->objectid - 1; + else + search_start = ins->objectid + ins->offset; - ret = find_free_extent(trans, root, 0, search_start, - search_end, &prealloc_key, 0); - if (ret) - return ret; + ret = find_free_extent(trans, root, 0, search_start, + search_end, &prealloc_key, 0); + if (ret) { + return ret; + } + } super_blocks_used = btrfs_super_blocks_used(info->disk_super); btrfs_set_super_blocks_used(info->disk_super, super_blocks_used + @@ -900,11 +1151,13 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); - if (ret) + if (ret) { return ret; - if (pending_ret) + } + if (pending_ret) { return pending_ret; - ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); + } + ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0); return 0; } @@ -920,7 +1173,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, hint, (unsigned long)-1, &ins, 0); + 1, 0, (unsigned long)-1, &ins, 0); if (ret) { BUG(); return NULL; @@ -1134,6 +1387,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) { int ret; int ret2; + unsigned long gang[16]; + int i; ret = free_block_group_radix(&info->block_group_radix); ret2 = free_block_group_radix(&info->block_group_data_radix); @@ -1141,6 +1396,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return ret; if (ret2) return ret2; + + while(1) { + ret = find_first_radix_bit(&info->extent_map_radix, + gang, 0, ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + clear_radix_bit(&info->extent_map_radix, gang[i]); + } + } return 0; } @@ -1186,7 +1451,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) break; } - if (nr & 1) + if (nr % 3) radix = &info->block_group_data_radix; else radix = &info->block_group_radix; @@ -1197,8 +1462,14 @@ int btrfs_read_block_groups(struct btrfs_root *root) memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; cache->first_free = cache->key.objectid; + cache->last_prealloc = cache->key.objectid; cache->pinned = 0; - cache->data = (nr & 1); + cache->cached = 0; + + if (nr % 3) + cache->data = 1; + else + cache->data = 0; cache->radix = radix; key.objectid = found_key.objectid + found_key.offset; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d5ac0d8ee3c9..4c2870e2d12f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1539,7 +1539,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, kunmap(pages[i]); } SetPageChecked(pages[i]); - btrfs_update_inode_block_group(trans, inode); + // btrfs_update_inode_block_group(trans, inode); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); @@ -1914,7 +1914,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } BUG_ON(ret); alloc_extent_start = ins.objectid; - btrfs_update_inode_block_group(trans, inode); + // btrfs_update_inode_block_group(trans, inode); ret = btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8bbe9107ff7e..f0f03121b7b2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -122,7 +122,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, } dirty_pages = &trans->transaction->dirty_pages; while(1) { - ret = find_first_radix_bit(dirty_pages, gang, ARRAY_SIZE(gang)); + ret = find_first_radix_bit(dirty_pages, gang, + 0, ARRAY_SIZE(gang)); if (!ret) break; for (i = 0; i < ret; i++) { From 8d7be552a733e53135a91d2d28f57ab87dc0e889 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 10 May 2007 11:24:42 -0400 Subject: [PATCH 0146/2985] Btrfs: fix check_node and check_leaf to use less cpu Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 44 +++++++++++++++++++++++++----------------- fs/btrfs/extent-tree.c | 6 +++--- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index dbd3f636dd3d..990d297a7a13 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -130,15 +130,17 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) static int check_node(struct btrfs_root *root, struct btrfs_path *path, int level) { - int i; struct btrfs_node *parent = NULL; struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); int parent_slot; + int slot; + struct btrfs_key cpukey; u32 nritems = btrfs_header_nritems(&node->header); if (path->nodes[level + 1]) parent = btrfs_buffer_node(path->nodes[level + 1]); parent_slot = path->slots[level + 1]; + slot = path->slots[level]; BUG_ON(nritems == 0); if (parent) { struct btrfs_disk_key *parent_key; @@ -149,10 +151,13 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, btrfs_header_blocknr(&node->header)); } BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); - for (i = 0; nritems > 1 && i < nritems - 2; i++) { - struct btrfs_key cpukey; - btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key); - BUG_ON(comp_keys(&node->ptrs[i].key, &cpukey) >= 0); + if (slot != 0) { + btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot - 1].key); + BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) <= 0); + } + if (slot < nritems - 1) { + btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot + 1].key); + BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) >= 0); } return 0; } @@ -160,10 +165,12 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, int level) { - int i; struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]); struct btrfs_node *parent = NULL; int parent_slot; + int slot = path->slots[0]; + struct btrfs_key cpukey; + u32 nritems = btrfs_header_nritems(&leaf->header); if (path->nodes[level + 1]) @@ -182,19 +189,20 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(btrfs_node_blockptr(parent, parent_slot) != btrfs_header_blocknr(&leaf->header)); } - for (i = 0; nritems > 1 && i < nritems - 2; i++) { - struct btrfs_key cpukey; - btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key); - BUG_ON(comp_keys(&leaf->items[i].key, - &cpukey) >= 0); - BUG_ON(btrfs_item_offset(leaf->items + i) != - btrfs_item_end(leaf->items + i + 1)); - if (i == 0) { - BUG_ON(btrfs_item_offset(leaf->items + i) + - btrfs_item_size(leaf->items + i) != - BTRFS_LEAF_DATA_SIZE(root)); - } + if (slot != 0) { + btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot - 1].key); + BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) <= 0); + BUG_ON(btrfs_item_offset(leaf->items + slot - 1) != + btrfs_item_end(leaf->items + slot)); } + if (slot < nritems - 1) { + btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot + 1].key); + BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) >= 0); + BUG_ON(btrfs_item_offset(leaf->items + slot) != + btrfs_item_end(leaf->items + slot + 1)); + } + BUG_ON(btrfs_item_offset(leaf->items) + + btrfs_item_size(leaf->items) != BTRFS_LEAF_DATA_SIZE(root)); return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3ac9da453472..1497ff98f0d3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -235,7 +235,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, hint->key.offset - 1, BTRFS_BLOCK_GROUP_AVAIL); } - last = hint->key.offset * 2; + last = hint->key.offset * 3; if (hint->key.objectid >= last) last = max(search_start + hint->key.offset - 1, hint->key.objectid - last); @@ -554,8 +554,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, blocknr + i); } } - if (old_val < (cache->key.offset * 8) / 10 && - old_val + num >= (cache->key.offset * 8) / 10) { + if (old_val < (cache->key.offset * 6) / 10 && + old_val + num >= (cache->key.offset * 6) / 10) { printk("group %Lu now available\n", cache->key.objectid); radix_tree_tag_set(cache->radix, cache->key.objectid + From 509659cde578d891445afd67d87121dd13e71596 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 10 May 2007 12:36:17 -0400 Subject: [PATCH 0147/2985] Btrfs: switch to crc32c instead of sha256 Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 +++- fs/btrfs/disk-io.c | 12 ++++++------ fs/btrfs/file-item.c | 35 +++++++++++++++++++---------------- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 92a6078de827..d8caeba5d758 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -30,6 +30,8 @@ extern struct kmem_cache *btrfs_path_cachep; /* 32 bytes in various csum fields */ #define BTRFS_CSUM_SIZE 32 +/* four bytes for CRC32 */ +#define BTRFS_CRC32_SIZE 4 /* * the key defines the order in the tree, and so it also defines (optimal) @@ -235,7 +237,7 @@ struct btrfs_file_extent_item { } __attribute__ ((__packed__)); struct btrfs_csum_item { - u8 csum[BTRFS_CSUM_SIZE]; + u8 csum; } __attribute__ ((__packed__)); struct btrfs_device_item { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2dbf422a2b9a..678456d1e2f9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -196,14 +196,14 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, ret = crypto_hash_digest(&desc, &sg, 1, result); spin_unlock(&root->fs_info->hash_lock); if (ret) { - printk("sha256 digest failed\n"); + printk("digest failed\n"); } return ret; } static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, int verify) { - char result[BTRFS_CSUM_SIZE]; + char result[BTRFS_CRC32_SIZE]; int ret; struct btrfs_node *node; @@ -212,14 +212,14 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, if (ret) return ret; if (verify) { - if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) { + if (memcmp(bh->b_data, result, BTRFS_CRC32_SIZE)) { printk("checksum verify failed on %Lu\n", bh_blocknr(bh)); return 1; } } else { node = btrfs_buffer_node(bh); - memcpy(node->header.csum, result, BTRFS_CSUM_SIZE); + memcpy(node->header.csum, result, BTRFS_CRC32_SIZE); } return 0; } @@ -576,10 +576,10 @@ struct btrfs_root *open_ctree(struct super_block *sb) sizeof(struct btrfs_key)); insert_inode_hash(fs_info->btree_inode); mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); - fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); + fs_info->hash_tfm = crypto_alloc_hash("crc32c", 0, CRYPTO_ALG_ASYNC); spin_lock_init(&fs_info->hash_lock); if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { - printk("failed to allocate sha256 hash\n"); + printk("failed to allocate digest hash\n"); return NULL; } mutex_init(&fs_info->trans_mutex); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 10e4cf08e9ed..a66709e6d036 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -4,8 +4,8 @@ #include "transaction.h" #define MAX_CSUM_ITEMS(r) ((((BTRFS_LEAF_DATA_SIZE(r) - \ - sizeof(struct btrfs_item) * 2) / \ - sizeof(struct btrfs_csum_item)) - 1)) + sizeof(struct btrfs_item) * 2) / \ + BTRFS_CRC32_SIZE) - 1)) int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, @@ -78,7 +78,7 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; csums_in_item = btrfs_item_size(leaf->items + path->slots[0]); - csums_in_item /= sizeof(struct btrfs_csum_item); + csums_in_item /= BTRFS_CRC32_SIZE; if (csum_offset >= csums_in_item) { ret = -EFBIG; @@ -86,7 +86,8 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, } } item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); - item += csum_offset; + item = (struct btrfs_csum_item *)((unsigned char *)item + + csum_offset * BTRFS_CRC32_SIZE); return item; fail: if (ret > 0) @@ -143,8 +144,7 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, /* we found one, but it isn't big enough yet */ leaf = btrfs_buffer_leaf(path->nodes[0]); item_size = btrfs_item_size(leaf->items + path->slots[0]); - if ((item_size / sizeof(struct btrfs_csum_item)) >= - MAX_CSUM_ITEMS(root)) { + if ((item_size / BTRFS_CRC32_SIZE) >= MAX_CSUM_ITEMS(root)) { /* already at max size, make a new one */ goto insert; } @@ -159,7 +159,7 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, */ btrfs_release_path(root, path); ret = btrfs_search_slot(trans, root, &file_key, path, - sizeof(struct btrfs_csum_item), 1); + BTRFS_CRC32_SIZE, 1); if (ret < 0) goto fail; if (ret == 0) { @@ -180,10 +180,10 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, goto insert; } if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / - sizeof(struct btrfs_csum_item)) { - u32 diff = (csum_offset + 1) * sizeof(struct btrfs_csum_item); + BTRFS_CRC32_SIZE) { + u32 diff = (csum_offset + 1) * BTRFS_CRC32_SIZE; diff = diff - btrfs_item_size(leaf->items + path->slots[0]); - WARN_ON(diff != sizeof(struct btrfs_csum_item)); + WARN_ON(diff != BTRFS_CRC32_SIZE); ret = btrfs_extend_item(trans, root, path, diff); BUG_ON(ret); goto csum; @@ -193,7 +193,7 @@ insert: btrfs_release_path(root, path); csum_offset = 0; ret = btrfs_insert_empty_item(trans, root, path, &file_key, - sizeof(struct btrfs_csum_item)); + BTRFS_CRC32_SIZE); if (ret != 0) { printk("at insert for %Lu %u %Lu ret is %d\n", file_key.objectid, file_key.flags, file_key.offset, ret); WARN_ON(1); @@ -203,10 +203,13 @@ csum: item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_csum_item); ret = 0; - item += csum_offset; + item = (struct btrfs_csum_item *)((unsigned char *)item + + csum_offset * BTRFS_CRC32_SIZE); found: - btrfs_check_bounds(item->csum, BTRFS_CSUM_SIZE, path->nodes[0]->b_data, root->fs_info->sb->s_blocksize); - ret = btrfs_csum_data(root, data, len, item->csum); + btrfs_check_bounds(&item->csum, BTRFS_CRC32_SIZE, + path->nodes[0]->b_data, + root->fs_info->sb->s_blocksize); + ret = btrfs_csum_data(root, data, len, &item->csum); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); @@ -222,7 +225,7 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, struct btrfs_key file_key; struct btrfs_path *path; struct btrfs_csum_item *item; - char result[BTRFS_CSUM_SIZE]; + char result[BTRFS_CRC32_SIZE]; path = btrfs_alloc_path(); BUG_ON(!path); @@ -244,7 +247,7 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, ret = btrfs_csum_data(root, data, len, result); WARN_ON(ret); - if (memcmp(result, item->csum, BTRFS_CSUM_SIZE)) + if (memcmp(result, &item->csum, BTRFS_CRC32_SIZE)) ret = 1; fail: btrfs_release_path(root, path); From 098f59c2512426926722a96f82af127bd91fb5e4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 11 May 2007 11:33:21 -0400 Subject: [PATCH 0148/2985] Btrfs: patch queue: fix corruption when splitting large items Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 10 ++++++++-- fs/btrfs/super.c | 8 ++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 990d297a7a13..20e01f95ce6e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -871,6 +871,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_node_blockptr(lower, slot, blocknr); btrfs_set_header_nritems(&lower->header, nritems + 1); btrfs_mark_buffer_dirty(path->nodes[level]); + check_node(root, path, level); return 0; } @@ -1101,6 +1102,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root } else { btrfs_block_release(root, right_buf); } + if (path->nodes[1]) + check_node(root, path, 1); return 0; } /* @@ -1216,6 +1219,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(t); btrfs_mark_buffer_dirty(right_buf); + wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); if (wret) ret = wret; @@ -1231,6 +1235,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); + if (path->nodes[1]) + check_node(root, path, 1); return ret; } @@ -1326,13 +1332,12 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root wret = insert_ptr(trans, root, path, &disk_key, bh_blocknr(right_buffer), - path->slots[1] - 1, 1); + path->slots[1], 1); if (wret) ret = wret; btrfs_block_release(root, path->nodes[0]); path->nodes[0] = right_buffer; path->slots[0] = 0; - path->slots[1] -= 1; if (path->slots[1] == 0) { wret = fixup_low_keys(trans, root, path, &disk_key, 1); @@ -1379,6 +1384,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root } else btrfs_block_release(root, right_buffer); BUG_ON(path->slots[0] < 0); + check_node(root, path, 1); if (!double_split) return ret; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4c2870e2d12f..eaa48f09d1c4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1529,7 +1529,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, btrfs_set_file_extent_type(ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); - memcpy(ptr, bh->b_data, offset + write_bytes); + btrfs_memcpy(root, path->nodes[0]->b_data, + ptr, bh->b_data, offset + write_bytes); mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); } else { @@ -1686,9 +1687,9 @@ static int drop_extents(struct btrfs_trans_handle *trans, ret = btrfs_del_item(trans, root, path); BUG_ON(ret); btrfs_release_path(root, path); + extent = NULL; if (found_extent) { - inode->i_blocks -= - btrfs_file_extent_num_blocks(extent) << 3; + inode->i_blocks -= extent_num_blocks << 3; ret = btrfs_free_extent(trans, root, disk_blocknr, disk_num_blocks, 0); @@ -1832,7 +1833,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, u64 alloc_extent_start; struct btrfs_trans_handle *trans; struct btrfs_key ins; - pinned[0] = NULL; pinned[1] = NULL; if (file->f_flags & O_DIRECT) From de428b63b16f9c74a24228a517713e8c65f79475 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 May 2007 13:28:27 -0400 Subject: [PATCH 0149/2985] Btrfs: allocator optimizations, truncate readahead Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 12 +++++-- fs/btrfs/extent-tree.c | 76 ++++++++++++++++++++++++++++++++++-------- fs/btrfs/super.c | 76 +++++++++++++++++++++++++++++++----------- 4 files changed, 129 insertions(+), 37 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d8caeba5d758..5830b4333d83 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1081,7 +1081,7 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, - int data); + int data, int owner); int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 678456d1e2f9..4c0262b57664 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -257,24 +257,30 @@ static struct address_space_operations btree_aops = { int readahead_tree_block(struct btrfs_root *root, u64 blocknr) { struct buffer_head *bh = NULL; + int ret = 0; bh = btrfs_find_create_tree_block(root, blocknr); if (!bh) return 0; - if (buffer_uptodate(bh)) + if (buffer_uptodate(bh)) { + ret = 1; goto done; - if (test_set_buffer_locked(bh)) + } + if (test_set_buffer_locked(bh)) { + ret = 1; goto done; + } if (!buffer_uptodate(bh)) { get_bh(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(READ, bh); } else { unlock_buffer(bh); + ret = 1; } done: brelse(bh); - return 0; + return ret; } struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1497ff98f0d3..e3c6bfea3751 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,6 +12,33 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +static void reada_extent_leaves(struct btrfs_root *root, + struct btrfs_path *path, u64 limit) +{ + struct btrfs_node *node; + int i; + int nritems; + u64 item_objectid; + u64 blocknr; + int slot; + int ret; + + if (!path->nodes[1]) + return; + node = btrfs_buffer_node(path->nodes[1]); + slot = path->slots[1] + 1; + nritems = btrfs_header_nritems(&node->header); + for (i = slot; i < nritems && i < slot + 8; i++) { + item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); + if (item_objectid > limit) + break; + blocknr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; + } +} + static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) { @@ -24,6 +51,7 @@ static int cache_block_group(struct btrfs_root *root, u64 i; u64 last = 0; u64 hole_size; + u64 limit; int found = 0; root = root->fs_info->extent_root; @@ -46,14 +74,17 @@ printk("cache block group %Lu\n", block_group->key.objectid); return ret; if (ret && path->slots[0] > 0) path->slots[0]--; + limit = block_group->key.objectid + block_group->key.offset; + reada_extent_leaves(root, path, limit); while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; if (slot >= btrfs_header_nritems(&leaf->header)) { + reada_extent_leaves(root, path, limit); ret = btrfs_next_leaf(root, path); - if (ret == 0) + if (ret == 0) { continue; - else { + } else { if (found) { hole_size = block_group->key.objectid + block_group->key.offset - last; @@ -187,7 +218,7 @@ new_group: return max((*cache_ret)->last_alloc, search_start); } cache = btrfs_find_block_group(root, cache, - last + cache->key.offset - 1, 0); + last + cache->key.offset - 1, 0, 0); *cache_ret = cache; goto again; } @@ -195,7 +226,7 @@ new_group: struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, - int data) + int data, int owner) { struct btrfs_block_group_cache *cache[8]; struct btrfs_block_group_cache *found_group = NULL; @@ -207,6 +238,10 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int i; int ret; int full_search = 0; + int factor = 8; + + if (!owner) + factor = 5; if (data) radix = &info->block_group_data_radix; @@ -219,14 +254,14 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (shint->data == data) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < - (shint->key.offset * 8) / 10) { + (shint->key.offset * factor) / 10) { return shint; } } } if (hint && hint->data == data) { used = btrfs_block_group_used(&hint->item); - if (used + hint->pinned < (hint->key.offset * 8) / 10) { + if (used + hint->pinned < (hint->key.offset * factor) / 10) { return hint; } if (used >= (hint->key.offset * 8) / 10) { @@ -261,7 +296,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, cache[i]->key.offset; used = btrfs_block_group_used(&cache[i]->item); if (used + cache[i]->pinned < - (cache[i]->key.offset * 8) / 10) { + (cache[i]->key.offset * factor) / 10) { found_group = cache[i]; goto found; } @@ -272,6 +307,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, BTRFS_BLOCK_GROUP_AVAIL); } } + cond_resched(); } last = hint_last; again: @@ -295,13 +331,16 @@ again: BTRFS_BLOCK_GROUP_AVAIL); } } + cond_resched(); } if (!full_search) { +printk("find block group doing full search data %d start %Lu\n", data, search_start); last = search_start; full_search = 1; goto again; } if (!found_group) { +printk("find block group bailing to zero data %d\n", data); ret = radix_tree_gang_lookup(radix, (void **)&found_group, 0, 1); BUG_ON(ret != 1); @@ -554,8 +593,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, blocknr + i); } } - if (old_val < (cache->key.offset * 6) / 10 && - old_val + num >= (cache->key.offset * 6) / 10) { + if (old_val < (cache->key.offset * 5) / 10 && + old_val + num >= (cache->key.offset * 5) / 10) { printk("group %Lu now available\n", cache->key.objectid); radix_tree_tag_set(cache->radix, cache->key.objectid + @@ -842,6 +881,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int level; struct btrfs_block_group_cache *block_group; int full_scan = 0; + u64 limit; path = btrfs_alloc_path(); ins->flags = 0; @@ -858,11 +898,11 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (search_start) { block_group = lookup_block_group(info, search_start); block_group = btrfs_find_block_group(root, block_group, - search_start, data); + search_start, data, 1); } else { block_group = btrfs_find_block_group(root, trans->block_group, 0, - data); + data, 1); } check_failed: @@ -916,6 +956,12 @@ check_failed: info->extent_tree_prealloc_nr = 0; total_found = 0; } + if (start_found) + limit = last_block + + block_group->key.offset / 2; + else + limit = search_start + + block_group->key.offset / 2; ret = btrfs_next_leaf(root, path); if (ret == 0) continue; @@ -960,6 +1006,7 @@ check_failed: } next: path->slots[0]++; + cond_resched(); } // FIXME -ENOSPC check_pending: @@ -1049,7 +1096,8 @@ printk("doing full scan!\n"); block_group = lookup_block_group(info, search_start); if (!full_scan) block_group = btrfs_find_block_group(root, block_group, - search_start, data); + search_start, data, 0); + cond_resched(); goto check_failed; error: @@ -1102,7 +1150,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * in the correct block group. */ if (data) { - ret = find_free_extent(trans, root, 0, search_start, + ret = find_free_extent(trans, root, 0, 0, search_end, &prealloc_key, 0); if (ret) { return ret; @@ -1173,7 +1221,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, 0, (unsigned long)-1, &ins, 0); + 1, hint, (unsigned long)-1, &ins, 0); if (ret) { BUG(); return NULL; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index eaa48f09d1c4..0f79490123cf 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -351,6 +351,35 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, return ret; } +static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path, + u64 objectid) +{ + struct btrfs_node *node; + int i; + int nritems; + u64 item_objectid; + u64 blocknr; + int slot; + int ret; + + if (!path->nodes[1]) + return; + node = btrfs_buffer_node(path->nodes[1]); + slot = path->slots[1]; + if (slot == 0) + return; + nritems = btrfs_header_nritems(&node->header); + for (i = slot - 1; i >= 0; i--) { + item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); + if (item_objectid != objectid) + break; + blocknr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; + } +} + static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) @@ -386,6 +415,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, BUG_ON(path->slots[0] == 0); path->slots[0]--; } + reada_truncate(root, path, inode->i_ino); leaf = btrfs_buffer_leaf(path->nodes[0]); found_key = &leaf->items[path->slots[0]].key; if (btrfs_disk_key_objectid(found_key) != inode->i_ino) @@ -587,28 +617,30 @@ printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_r return d_splice_alias(inode, dentry); } -static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path) +static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path, + u64 objectid) { struct btrfs_node *node; int i; - int nritems; - u64 objectid; + u32 nritems; u64 item_objectid; u64 blocknr; int slot; + int ret; if (!path->nodes[1]) return; node = btrfs_buffer_node(path->nodes[1]); slot = path->slots[1]; - objectid = btrfs_disk_key_objectid(&node->ptrs[slot].key); nritems = btrfs_header_nritems(&node->header); - for (i = slot; i < nritems; i++) { + for (i = slot + 1; i < nritems; i++) { item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); if (item_objectid != objectid) break; blocknr = btrfs_node_blockptr(node, i); - readahead_tree_block(root, blocknr); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; } } @@ -646,21 +678,20 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (ret < 0) goto err; advance = 0; - reada_leaves(root, path); + reada_leaves(root, path, inode->i_ino); while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path->slots[0]; if (advance || slot >= nritems) { if (slot >= nritems -1) { + reada_leaves(root, path, inode->i_ino); ret = btrfs_next_leaf(root, path); if (ret) break; leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path->slots[0]; - if (path->slots[1] == 0) - reada_leaves(root, path); } else { slot++; path->slots[0]++; @@ -805,13 +836,18 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_inode_item inode_item; struct btrfs_key *location; int ret; + int owner; inode = new_inode(root->fs_info->sb); if (!inode) return ERR_PTR(-ENOMEM); BTRFS_I(inode)->root = root; - group = btrfs_find_block_group(root, group, 0, 0); + if (mode & S_IFDIR) + owner = 0; + else + owner = 1; + group = btrfs_find_block_group(root, group, 0, 0, owner); BTRFS_I(inode)->block_group = group; inode->i_uid = current->fsuid; @@ -1562,7 +1598,7 @@ failed: static int drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end) + u64 start, u64 end, u64 *hint_block) { int ret; struct btrfs_key key; @@ -1659,17 +1695,14 @@ static int drop_extents(struct btrfs_trans_handle *trans, new_num = (start - key.offset) >> inode->i_blkbits; old_num = btrfs_file_extent_num_blocks(extent); + *hint_block = + btrfs_file_extent_disk_blocknr(extent); inode->i_blocks -= (old_num - new_num) << 3; btrfs_set_file_extent_num_blocks(extent, new_num); mark_buffer_dirty(path->nodes[0]); } else { WARN_ON(1); - /* - ret = btrfs_truncate_item(trans, root, path, - start - key.offset); - BUG_ON(ret); - */ } } if (!keep) { @@ -1683,6 +1716,8 @@ static int drop_extents(struct btrfs_trans_handle *trans, btrfs_file_extent_disk_num_blocks(extent); extent_num_blocks = btrfs_file_extent_num_blocks(extent); + *hint_block = + btrfs_file_extent_disk_blocknr(extent); } ret = btrfs_del_item(trans, root, path); BUG_ON(ret); @@ -1831,6 +1866,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, u64 start_pos; u64 num_blocks; u64 alloc_extent_start; + u64 hint_block; struct btrfs_trans_handle *trans; struct btrfs_key ins; pinned[0] = NULL; @@ -1871,6 +1907,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } if (first_index != last_index && (last_index << PAGE_CACHE_SHIFT) < inode->i_size && + pos + count < inode->i_size && (count & (PAGE_CACHE_SIZE - 1))) { pinned[1] = grab_cache_page(inode->i_mapping, last_index); if (!PageUptodate(pinned[1])) { @@ -1892,18 +1929,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, btrfs_set_trans_block_group(trans, inode); /* FIXME blocksize != 4096 */ inode->i_blocks += num_blocks << 3; + hint_block = 0; if (start_pos < inode->i_size) { /* FIXME blocksize != pagesize */ ret = drop_extents(trans, root, inode, start_pos, (pos + count + root->blocksize -1) & - ~((u64)root->blocksize - 1)); + ~((u64)root->blocksize - 1), &hint_block); BUG_ON(ret); } if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { ret = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, 1, (u64)-1, &ins, 1); + num_blocks, hint_block, (u64)-1, + &ins, 1); BUG_ON(ret); ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset); @@ -2455,7 +2494,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, btrfs_fill_super, mnt); } - static int btrfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { From f9f3c6b666f717510b67036c314ec915b9059eaa Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 21 May 2007 14:05:12 -0400 Subject: [PATCH 0150/2985] Btrfs: 2.6.21-git fixes Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 -- fs/btrfs/disk-io.c | 3 --- fs/btrfs/super.c | 21 +-------------------- 3 files changed, 1 insertion(+), 25 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5830b4333d83..da12d8275817 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1197,6 +1197,4 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, u64 objectid, u64 offset, int cow); /* super.c */ -extern struct subsystem btrfs_subsys; - #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4c0262b57664..3da057f78bc8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -649,9 +649,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->generation = btrfs_super_generation(disk_super) + 1; memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); - kobj_set_kset_s(fs_info, btrfs_subsys); - kobject_set_name(&fs_info->kobj, "%s", sb->s_id); - kobject_register(&fs_info->kobj); mutex_unlock(&fs_info->fs_mutex); return tree_root; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0f79490123cf..0220c82841ad 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -18,24 +18,11 @@ #include "btrfs_inode.h" #include "ioctl.h" -static void btrfs_fsinfo_release(struct kobject *obj) -{ - struct btrfs_fs_info *fsinfo = container_of(obj, - struct btrfs_fs_info, kobj); - kfree(fsinfo); -} - -static struct kobj_type btrfs_fsinfo_ktype = { - .release = btrfs_fsinfo_release, -}; - struct btrfs_iget_args { u64 ino; struct btrfs_root *root; }; -decl_subsys(btrfs, &btrfs_fsinfo_ktype, NULL); - #define BTRFS_SUPER_MAGIC 0x9123682E static struct inode_operations btrfs_dir_inode_operations; @@ -2438,7 +2425,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, { struct btrfs_inode *ei = (struct btrfs_inode *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + if ((flags & (SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { inode_init_once(&ei->vfs_inode); } @@ -2588,12 +2575,7 @@ static int __init init_btrfs_fs(void) err = init_inodecache(); if (err) return err; - kset_set_kset_s(&btrfs_subsys, fs_subsys); - err = subsystem_register(&btrfs_subsys); - if (err) - goto out; return register_filesystem(&btrfs_fs_type); -out: destroy_inodecache(); return err; } @@ -2602,7 +2584,6 @@ static void __exit exit_btrfs_fs(void) { destroy_inodecache(); unregister_filesystem(&btrfs_fs_type); - subsystem_unregister(&btrfs_subsys); printk("btrfs unloaded\n"); } From e06afa839e726959be2166bec4cb85c117e213f1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 23 May 2007 15:44:28 -0400 Subject: [PATCH 0151/2985] Btrfs: rename Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 + fs/btrfs/ctree.h | 1 + fs/btrfs/dir-item.c | 27 +++++++-- fs/btrfs/super.c | 132 ++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 143 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index f6df246f26c3..861e34ae0979 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -8,6 +8,7 @@ * Add generation number to key pointer in nodes * Add generation number to inode * Add ability to switch a block group from data to metadata or vice versa +* forbid cross subvolume renames and hardlinks * Release * Do real tree locking * Add extent mirroring (backup copies of blocks) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index da12d8275817..3330004af1a6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -32,6 +32,7 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_CSUM_SIZE 32 /* four bytes for CRC32 */ #define BTRFS_CRC32_SIZE 4 +#define BTRFS_EMPTY_DIR_SIZE 6 /* * the key defines the order in the tree, and so it also defines (optimal) diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 00a28d90fea6..b408a3d20c7b 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -9,7 +9,9 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *cpu_key, - u32 data_size) + u32 data_size, + const char *name, + int name_len) { int ret; char *ptr; @@ -18,6 +20,10 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (ret == -EEXIST) { + struct btrfs_dir_item *di; + di = btrfs_match_dir_item_name(root, path, name, name_len); + if (di) + return ERR_PTR(-EEXIST); ret = btrfs_extend_item(trans, root, path, data_size); WARN_ON(ret > 0); if (ret) @@ -37,6 +43,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key *location, u8 type) { int ret = 0; + int ret2 = 0; struct btrfs_path *path; struct btrfs_dir_item *dir_item; char *name_ptr; @@ -51,9 +58,12 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); btrfs_init_path(path); data_size = sizeof(*dir_item) + name_len; - dir_item = insert_with_overflow(trans, root, path, &key, data_size); + dir_item = insert_with_overflow(trans, root, path, &key, data_size, + name, name_len); if (IS_ERR(dir_item)) { ret = PTR_ERR(dir_item); + if (ret == -EEXIST) + goto second_insert; goto out; } @@ -66,19 +76,20 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); btrfs_mark_buffer_dirty(path->nodes[0]); +second_insert: /* FIXME, use some real flag for selecting the extra index */ if (root == root->fs_info->tree_root) { ret = 0; goto out; } - btrfs_release_path(root, path); btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); key.offset = location->objectid; - dir_item = insert_with_overflow(trans, root, path, &key, data_size); + dir_item = insert_with_overflow(trans, root, path, &key, data_size, + name, name_len); if (IS_ERR(dir_item)) { - ret = PTR_ERR(dir_item); + ret2 = PTR_ERR(dir_item); goto out; } btrfs_cpu_key_to_disk(&dir_item->location, location); @@ -90,7 +101,11 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(path->nodes[0]); out: btrfs_free_path(path); - return ret; + if (ret) + return ret; + if (ret2) + return ret2; + return 0; } struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0220c82841ad..f49cad603ee8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -375,6 +375,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_key key; struct btrfs_disk_key *found_key; + u32 found_type; struct btrfs_leaf *leaf; struct btrfs_file_extent_item *fi = NULL; u64 extent_start = 0; @@ -386,12 +387,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; key.offset = (u64)-1; - key.flags = 0; - /* - * use BTRFS_CSUM_ITEM_KEY because it is larger than inline keys - * or extent data - */ - btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY); + key.flags = (u32)-1; while(1) { btrfs_init_path(path); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); @@ -405,10 +401,13 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, reada_truncate(root, path, inode->i_ino); leaf = btrfs_buffer_leaf(path->nodes[0]); found_key = &leaf->items[path->slots[0]].key; + found_type = btrfs_disk_key_type(found_key); if (btrfs_disk_key_objectid(found_key) != inode->i_ino) break; - if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY && - btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) + if (found_type != BTRFS_CSUM_ITEM_KEY && + found_type != BTRFS_DIR_ITEM_KEY && + found_type != BTRFS_DIR_INDEX_KEY && + found_type != BTRFS_EXTENT_DATA_KEY) break; if (btrfs_disk_key_offset(found_key) < inode->i_size) break; @@ -460,10 +459,8 @@ static void btrfs_delete_inode(struct inode *inode) mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); - if (S_ISREG(inode->i_mode)) { - ret = btrfs_truncate_in_trans(trans, root, inode); - BUG_ON(ret); - } + ret = btrfs_truncate_in_trans(trans, root, inode); + BUG_ON(ret); btrfs_free_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -2504,6 +2501,116 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } +static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, + struct inode * new_dir,struct dentry *new_dentry) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(old_dir)->root; + struct inode *new_inode = new_dentry->d_inode; + struct inode *old_inode = old_dentry->d_inode; + struct timespec ctime = CURRENT_TIME; + struct btrfs_path *path; + struct btrfs_dir_item *di; + int ret; + + if (S_ISDIR(old_inode->i_mode) && new_inode && + new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { + return -ENOTEMPTY; + } + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, new_dir); + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out_fail; + } + + old_dentry->d_inode->i_nlink++; + old_dir->i_ctime = old_dir->i_mtime = ctime; + new_dir->i_ctime = new_dir->i_mtime = ctime; + old_inode->i_ctime = ctime; + if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) { + struct btrfs_key *location = &BTRFS_I(new_dir)->location; + u64 old_parent_oid; + di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino, + "..", 2, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out_fail; + } + if (!di) { + ret = -ENOENT; + goto out_fail; + } + old_parent_oid = btrfs_disk_key_objectid(&di->location); + ret = btrfs_del_item(trans, root, path); + if (ret) { + ret = -EIO; + goto out_fail; + } + btrfs_release_path(root, path); + + di = btrfs_lookup_dir_index_item(trans, root, path, + old_inode->i_ino, + old_parent_oid, + "..", 2, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out_fail; + } + if (!di) { + ret = -ENOENT; + goto out_fail; + } + ret = btrfs_del_item(trans, root, path); + if (ret) { + ret = -EIO; + goto out_fail; + } + btrfs_release_path(root, path); + + ret = btrfs_insert_dir_item(trans, root, "..", 2, + old_inode->i_ino, location, 0); + if (ret) + goto out_fail; + } + + + ret = btrfs_add_link(trans, new_dentry, old_inode); + if (ret == -EEXIST && new_inode) + ret = 0; + else if (ret) + goto out_fail; + + ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry); + if (ret) + goto out_fail; + + if (new_inode) { + new_inode->i_ctime = CURRENT_TIME; + di = btrfs_lookup_dir_index_item(trans, root, path, + new_dir->i_ino, + new_inode->i_ino, + new_dentry->d_name.name, + new_dentry->d_name.len, -1); + if (di && !IS_ERR(di)) { + btrfs_del_item(trans, root, path); + btrfs_release_path(root, path); + } + if (S_ISDIR(new_inode->i_mode)) + clear_nlink(new_inode); + else + drop_nlink(new_inode); + btrfs_update_inode(trans, root, new_inode); + } +out_fail: + btrfs_free_path(path); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -2531,6 +2638,7 @@ static struct inode_operations btrfs_dir_inode_operations = { .unlink = btrfs_unlink, .mkdir = btrfs_mkdir, .rmdir = btrfs_rmdir, + .rename = btrfs_rename, }; static struct inode_operations btrfs_dir_ro_inode_operations = { From 2b8d99a723a3186a69b5bd941a272dbecae9205b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 May 2007 08:34:10 -0400 Subject: [PATCH 0152/2985] Btrfs: symlinks and hard links Signed-off-by: Chris Mason --- fs/btrfs/super.c | 163 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 147 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f49cad603ee8..c1a0adde9b1c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -26,11 +26,13 @@ struct btrfs_iget_args { #define BTRFS_SUPER_MAGIC 0x9123682E static struct inode_operations btrfs_dir_inode_operations; +static struct inode_operations btrfs_symlink_inode_operations; static struct inode_operations btrfs_dir_ro_inode_operations; static struct super_operations btrfs_super_ops; static struct file_operations btrfs_dir_file_operations; static struct inode_operations btrfs_file_inode_operations; static struct address_space_operations btrfs_aops; +static struct address_space_operations btrfs_symlink_aops; static struct file_operations btrfs_file_operations; static void btrfs_read_locked_inode(struct inode *inode) @@ -103,7 +105,8 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_op = &btrfs_dir_inode_operations; break; case S_IFLNK: - // inode->i_op = &page_symlink_inode_operations; + inode->i_op = &btrfs_symlink_inode_operations; + inode->i_mapping->a_ops = &btrfs_symlink_aops; break; } return; @@ -940,6 +943,41 @@ out_unlock: return err; } +static int btrfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct inode *inode = old_dentry->d_inode; + int err; + int drop_inode = 0; + + if (inode->i_nlink == 0) + return -ENOENT; + + inc_nlink(inode); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + atomic_inc(&inode->i_count); + err = btrfs_add_nondir(trans, dentry, inode); + if (err) + drop_inode = 1; + dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, dir); + btrfs_update_inode(trans, root, inode); + + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + if (drop_inode) { + inode_dec_link_count(inode); + iput(inode); + } + btrfs_btree_balance_dirty(root); + return err; +} + static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 dirid) @@ -2577,33 +2615,25 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, } - ret = btrfs_add_link(trans, new_dentry, old_inode); - if (ret == -EEXIST && new_inode) - ret = 0; - else if (ret) - goto out_fail; - ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry); if (ret) goto out_fail; if (new_inode) { new_inode->i_ctime = CURRENT_TIME; - di = btrfs_lookup_dir_index_item(trans, root, path, - new_dir->i_ino, - new_inode->i_ino, - new_dentry->d_name.name, - new_dentry->d_name.len, -1); - if (di && !IS_ERR(di)) { - btrfs_del_item(trans, root, path); - btrfs_release_path(root, path); - } + ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry); + if (ret) + goto out_fail; if (S_ISDIR(new_inode->i_mode)) clear_nlink(new_inode); else drop_nlink(new_inode); btrfs_update_inode(trans, root, new_inode); } + ret = btrfs_add_link(trans, new_dentry, old_inode); + if (ret) + goto out_fail; + out_fail: btrfs_free_path(path); btrfs_end_transaction(trans, root); @@ -2611,6 +2641,94 @@ out_fail: return ret; } +static int btrfs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_path *path; + struct btrfs_key key; + struct inode *inode; + int err; + int drop_inode = 0; + u64 objectid; + int name_len; + int datasize; + char *ptr; + struct btrfs_file_extent_item *ei; + + name_len = strlen(symname) + 1; + if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) + return -ENAMETOOLONG; + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + + err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); + if (err) { + err = -ENOSPC; + goto out_unlock; + } + + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_unlock; + + btrfs_set_trans_block_group(trans, inode); + err = btrfs_add_nondir(trans, dentry, inode); + if (err) + drop_inode = 1; + else { + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; + } + dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); + if (drop_inode) + goto out_unlock; + + path = btrfs_alloc_path(); + BUG_ON(!path); + key.objectid = inode->i_ino; + key.offset = 0; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + datasize = btrfs_file_extent_calc_inline_size(name_len); + err = btrfs_insert_empty_item(trans, root, path, &key, + datasize); + BUG_ON(err); + ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(ei, trans->transid); + btrfs_set_file_extent_type(ei, + BTRFS_FILE_EXTENT_INLINE); + ptr = btrfs_file_extent_inline_start(ei); + btrfs_memcpy(root, path->nodes[0]->b_data, + ptr, symname, name_len); + mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + inode->i_op = &btrfs_symlink_inode_operations; + inode->i_mapping->a_ops = &btrfs_symlink_aops; + inode->i_size = name_len - 1; + btrfs_update_inode(trans, root, inode); + err = 0; + +out_unlock: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + if (drop_inode) { + inode_dec_link_count(inode); + iput(inode); + } + btrfs_btree_balance_dirty(root); + return err; +} + static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -2636,9 +2754,11 @@ static struct inode_operations btrfs_dir_inode_operations = { .lookup = btrfs_lookup, .create = btrfs_create, .unlink = btrfs_unlink, + .link = btrfs_link, .mkdir = btrfs_mkdir, .rmdir = btrfs_rmdir, .rename = btrfs_rename, + .symlink = btrfs_symlink, }; static struct inode_operations btrfs_dir_ro_inode_operations = { @@ -2660,6 +2780,11 @@ static struct address_space_operations btrfs_aops = { .commit_write = btrfs_commit_write, }; +static struct address_space_operations btrfs_symlink_aops = { + .readpage = btrfs_readpage, + .writepage = btrfs_writepage, +}; + static struct inode_operations btrfs_file_inode_operations = { .truncate = btrfs_truncate, .getattr = btrfs_getattr, @@ -2676,6 +2801,12 @@ static struct file_operations btrfs_file_operations = { .fsync = btrfs_sync_file, }; +static struct inode_operations btrfs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, +}; + static int __init init_btrfs_fs(void) { int err; From 3a686375629da5d2e2ad019265b66ef113c87455 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 May 2007 13:35:57 -0400 Subject: [PATCH 0153/2985] Btrfs: sparse files! Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 14 +++-- fs/btrfs/file-item.c | 11 ++-- fs/btrfs/super.c | 121 +++++++++++++++++++++++++++++++++-------- 4 files changed, 116 insertions(+), 31 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3330004af1a6..e8f741167c6b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1180,6 +1180,7 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, u64 offset, + u64 disk_num_blocks, u64 num_blocks); int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e3c6bfea3751..a366415e03a6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -443,6 +443,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, buf_leaf = btrfs_buffer_leaf(buf); for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) { if (leaf) { + u64 disk_blocknr; key = &buf_leaf->items[i].key; if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) continue; @@ -451,8 +452,10 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE) continue; - ret = btrfs_inc_extent_ref(trans, root, - btrfs_file_extent_disk_blocknr(fi), + disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + if (disk_blocknr == 0) + continue; + ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, btrfs_file_extent_disk_num_blocks(fi)); BUG_ON(ret); } else { @@ -1248,6 +1251,7 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, leaf = btrfs_buffer_leaf(cur); nritems = btrfs_header_nritems(&leaf->header); for (i = 0; i < nritems; i++) { + u64 disk_blocknr; key = &leaf->items[i].key; if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) continue; @@ -1258,8 +1262,10 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, * FIXME make sure to insert a trans record that * repeats the snapshot del on crash */ - ret = btrfs_free_extent(trans, root, - btrfs_file_extent_disk_blocknr(fi), + disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + if (disk_blocknr == 0) + continue; + ret = btrfs_free_extent(trans, root, disk_blocknr, btrfs_file_extent_disk_num_blocks(fi), 0); BUG_ON(ret); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a66709e6d036..7990b5742114 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -9,7 +9,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, - u64 offset, u64 num_blocks) + u64 offset, u64 disk_num_blocks, + u64 num_blocks) { int ret = 0; struct btrfs_file_extent_item *item; @@ -30,7 +31,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_disk_blocknr(item, offset); - btrfs_set_file_extent_disk_num_blocks(item, num_blocks); + btrfs_set_file_extent_disk_num_blocks(item, disk_num_blocks); btrfs_set_file_extent_offset(item, 0); btrfs_set_file_extent_num_blocks(item, num_blocks); btrfs_set_file_extent_generation(item, trans->transid); @@ -176,14 +177,14 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid || csum_offset >= MAX_CSUM_ITEMS(root)) { - WARN_ON(1); goto insert; } if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / BTRFS_CRC32_SIZE) { u32 diff = (csum_offset + 1) * BTRFS_CRC32_SIZE; diff = diff - btrfs_item_size(leaf->items + path->slots[0]); - WARN_ON(diff != BTRFS_CRC32_SIZE); + if (diff != BTRFS_CRC32_SIZE) + goto insert; ret = btrfs_extend_item(trans, root, path, diff); BUG_ON(ret); goto csum; @@ -241,7 +242,7 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, ret = PTR_ERR(item); /* a csum that isn't present is a preallocated region. */ if (ret == -ENOENT || ret == -EFBIG) - ret = 1; + ret = -ENOENT; goto fail; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c1a0adde9b1c..5b87c4e9d491 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -17,6 +17,7 @@ #include "transaction.h" #include "btrfs_inode.h" #include "ioctl.h" +#include "print-tree.h" struct btrfs_iget_args { u64 ino; @@ -421,14 +422,17 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item); if (btrfs_file_extent_type(fi) != BTRFS_FILE_EXTENT_INLINE) { + u64 num_dec; extent_start = btrfs_file_extent_disk_blocknr(fi); extent_num_blocks = btrfs_file_extent_disk_num_blocks(fi); /* FIXME blocksize != 4096 */ - inode->i_blocks -= - btrfs_file_extent_num_blocks(fi) << 3; - found_extent = 1; + num_dec = btrfs_file_extent_num_blocks(fi) << 3; + if (extent_start != 0) { + found_extent = 1; + inode->i_blocks -= num_dec; + } } } ret = btrfs_del_item(trans, root, path); @@ -448,6 +452,43 @@ error: return ret; } +static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + int err; + + err = inode_change_ok(inode, attr); + if (err) + return err; + + if (S_ISREG(inode->i_mode) && + attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 mask = root->blocksize - 1; + u64 pos = (inode->i_size + mask) & ~mask; + u64 hole_size; + + if (attr->ia_size < pos) + goto out; + hole_size = (attr->ia_size - pos + mask) & ~mask; + hole_size >>= inode->i_blkbits; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + err = btrfs_insert_file_extent(trans, root, inode->i_ino, + pos, 0, 0, hole_size); + BUG_ON(err); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + } + + err = inode_setattr(inode, attr); + +out: + return err; +} static void btrfs_delete_inode(struct inode *inode) { struct btrfs_trans_handle *trans; @@ -1169,8 +1210,10 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, if (found_type == BTRFS_FILE_EXTENT_REG) { extent_start = extent_start >> inode->i_blkbits; extent_end = extent_start + btrfs_file_extent_num_blocks(item); + err = 0; + if (blocknr == 0) + goto out; if (iblock >= extent_start && iblock < extent_end) { - err = 0; btrfs_map_bh_to_logical(root, result, blocknr + iblock - extent_start); goto out; @@ -1591,7 +1634,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, ptr, bh->b_data, offset + write_bytes); mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); - } else { + } else if (buffer_mapped(bh)) { btrfs_csum_file_block(trans, root, inode->i_ino, pages[i]->index << PAGE_CACHE_SHIFT, kmap(pages[i]), PAGE_CACHE_SIZE); @@ -1693,15 +1736,24 @@ static int drop_extents(struct btrfs_trans_handle *trans, goto out; } - search_start = extent_end; + if (found_inline) { + u64 mask = root->blocksize - 1; + search_start = (extent_end + mask) & ~mask; + } else + search_start = extent_end; if (end < extent_end && end >= key.offset) { if (found_extent) { + u64 disk_blocknr = + btrfs_file_extent_disk_blocknr(extent); + u64 disk_num_blocks = + btrfs_file_extent_disk_num_blocks(extent); memcpy(&old, extent, sizeof(old)); - ret = btrfs_inc_extent_ref(trans, root, - btrfs_file_extent_disk_blocknr(&old), - btrfs_file_extent_disk_num_blocks(&old)); - BUG_ON(ret); + if (disk_blocknr != 0) { + ret = btrfs_inc_extent_ref(trans, root, + disk_blocknr, disk_num_blocks); + BUG_ON(ret); + } } WARN_ON(found_inline); bookend = 1; @@ -1719,7 +1771,10 @@ static int drop_extents(struct btrfs_trans_handle *trans, old_num = btrfs_file_extent_num_blocks(extent); *hint_block = btrfs_file_extent_disk_blocknr(extent); - inode->i_blocks -= (old_num - new_num) << 3; + if (btrfs_file_extent_disk_blocknr(extent)) { + inode->i_blocks -= + (old_num - new_num) << 3; + } btrfs_set_file_extent_num_blocks(extent, new_num); mark_buffer_dirty(path->nodes[0]); @@ -1745,7 +1800,7 @@ static int drop_extents(struct btrfs_trans_handle *trans, BUG_ON(ret); btrfs_release_path(root, path); extent = NULL; - if (found_extent) { + if (found_extent && disk_blocknr != 0) { inode->i_blocks -= extent_num_blocks << 3; ret = btrfs_free_extent(trans, root, disk_blocknr, @@ -1785,18 +1840,19 @@ static int drop_extents(struct btrfs_trans_handle *trans, btrfs_file_extent_offset(&old) + ((end - key.offset) >> inode->i_blkbits)); WARN_ON(btrfs_file_extent_num_blocks(&old) < - (end - key.offset) >> inode->i_blkbits); + (extent_end - end) >> inode->i_blkbits); btrfs_set_file_extent_num_blocks(extent, - btrfs_file_extent_num_blocks(&old) - - ((end - key.offset) >> inode->i_blkbits)); + (extent_end - end) >> inode->i_blkbits); btrfs_set_file_extent_type(extent, BTRFS_FILE_EXTENT_REG); btrfs_set_file_extent_generation(extent, btrfs_file_extent_generation(&old)); btrfs_mark_buffer_dirty(path->nodes[0]); - inode->i_blocks += - btrfs_file_extent_num_blocks(extent) << 3; + if (btrfs_file_extent_disk_blocknr(&old) != 0) { + inode->i_blocks += + btrfs_file_extent_num_blocks(extent) << 3; + } ret = 0; goto out; } @@ -1960,6 +2016,21 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ~((u64)root->blocksize - 1), &hint_block); BUG_ON(ret); } + if (inode->i_size < start_pos) { + u64 last_pos_in_file; + u64 hole_size; + u64 mask = root->blocksize - 1; + last_pos_in_file = (inode->i_size + mask) & ~mask; + hole_size = (start_pos - last_pos_in_file + mask) & ~mask; + hole_size >>= inode->i_blkbits; + if (last_pos_in_file < start_pos) { + ret = btrfs_insert_file_extent(trans, root, + inode->i_ino, + last_pos_in_file, + 0, 0, hole_size); + } + BUG_ON(ret); + } if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { ret = btrfs_alloc_extent(trans, root, inode->i_ino, @@ -1967,7 +2038,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, &ins, 1); BUG_ON(ret); ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - start_pos, ins.objectid, ins.offset); + start_pos, ins.objectid, ins.offset, + ins.offset); BUG_ON(ret); } else { ins.offset = 0; @@ -2044,10 +2116,13 @@ static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, page->index << PAGE_CACHE_SHIFT, kmap(page), PAGE_CACHE_SIZE); if (ret) { - printk("failed to verify ino %lu page %lu\n", - page->mapping->host->i_ino, - page->index); - memset(page_address(page), 0, PAGE_CACHE_SIZE); + if (ret != -ENOENT) { + printk("failed to verify ino %lu page %lu\n", + page->mapping->host->i_ino, + page->index); + memset(page_address(page), 0, PAGE_CACHE_SIZE); + flush_dcache_page(page); + } } SetPageChecked(page); kunmap(page); @@ -2759,6 +2834,7 @@ static struct inode_operations btrfs_dir_inode_operations = { .rmdir = btrfs_rmdir, .rename = btrfs_rename, .symlink = btrfs_symlink, + .setattr = btrfs_setattr, }; static struct inode_operations btrfs_dir_ro_inode_operations = { @@ -2788,6 +2864,7 @@ static struct address_space_operations btrfs_symlink_aops = { static struct inode_operations btrfs_file_inode_operations = { .truncate = btrfs_truncate, .getattr = btrfs_getattr, + .setattr = btrfs_setattr, }; static struct file_operations btrfs_file_operations = { From 1de037a43edf67f3a9f66dd197195b3c08febb16 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 May 2007 15:17:08 -0400 Subject: [PATCH 0154/2985] Btrfs: fixup various fsx failures Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 + fs/btrfs/file-item.c | 30 ++++++ fs/btrfs/super.c | 213 ++++++++++++++++++++++++++++++++++++------- 3 files changed, 214 insertions(+), 32 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e8f741167c6b..756ea9d0123b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1198,5 +1198,8 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_path *path, u64 objectid, u64 offset, int cow); +int btrfs_csum_truncate(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u64 isize); /* super.c */ #endif diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 7990b5742114..0782e924ddeb 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -2,6 +2,7 @@ #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "print-tree.h" #define MAX_CSUM_ITEMS(r) ((((BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) * 2) / \ @@ -27,6 +28,10 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(*item)); + if (ret) { +printk("failed to insert %Lu %Lu ret %d\n", objectid, pos, ret); +btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0])); + } BUG_ON(ret); item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); @@ -218,6 +223,31 @@ fail: return ret; } +int btrfs_csum_truncate(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u64 isize) +{ + struct btrfs_key key; + struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[0]); + int slot = path->slots[0]; + int ret; + u32 new_item_size; + u64 new_item_span; + u64 blocks; + + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + if (isize <= key.offset) + return 0; + new_item_span = isize - key.offset; + blocks = (new_item_span + root->blocksize - 1) / root->blocksize; + new_item_size = blocks * BTRFS_CRC32_SIZE; + if (new_item_size >= btrfs_item_size(leaf->items + slot)) + return 0; + ret = btrfs_truncate_item(trans, root, path, new_item_size); + BUG_ON(ret); + return ret; +} + int btrfs_csum_verify_file_block(struct btrfs_root *root, u64 objectid, u64 offset, char *data, size_t len) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5b87c4e9d491..0eb64d6eaf9d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -36,6 +36,13 @@ static struct address_space_operations btrfs_aops; static struct address_space_operations btrfs_symlink_aops; static struct file_operations btrfs_file_operations; +static int drop_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + u64 start, u64 end, u64 *hint_block); +static int btrfs_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create); + static void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; @@ -381,10 +388,12 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct btrfs_disk_key *found_key; u32 found_type; struct btrfs_leaf *leaf; - struct btrfs_file_extent_item *fi = NULL; + struct btrfs_file_extent_item *fi; u64 extent_start = 0; u64 extent_num_blocks = 0; + u64 item_end = 0; int found_extent; + int del_item; path = btrfs_alloc_path(); BUG_ON(!path); @@ -394,6 +403,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, key.flags = (u32)-1; while(1) { btrfs_init_path(path); + fi = NULL; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { goto error; @@ -413,16 +423,52 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, found_type != BTRFS_DIR_INDEX_KEY && found_type != BTRFS_EXTENT_DATA_KEY) break; - if (btrfs_disk_key_offset(found_key) < inode->i_size) - break; - found_extent = 0; - if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) { + item_end = btrfs_disk_key_offset(found_key); + if (found_type == BTRFS_EXTENT_DATA_KEY) { fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); if (btrfs_file_extent_type(fi) != BTRFS_FILE_EXTENT_INLINE) { - u64 num_dec; + item_end += btrfs_file_extent_num_blocks(fi) << + inode->i_blkbits; + } + } + if (found_type == BTRFS_CSUM_ITEM_KEY) { + ret = btrfs_csum_truncate(trans, root, path, + inode->i_size); + BUG_ON(ret); + } + if (item_end < inode->i_size) { + if (found_type) { + btrfs_set_key_type(&key, found_type - 1); + continue; + } + break; + } + if (btrfs_disk_key_offset(found_key) >= inode->i_size) + del_item = 1; + else + del_item = 0; + found_extent = 0; + + if (found_type == BTRFS_EXTENT_DATA_KEY && + btrfs_file_extent_type(fi) != + BTRFS_FILE_EXTENT_INLINE) { + u64 num_dec; + if (!del_item) { + u64 orig_num_blocks = + btrfs_file_extent_num_blocks(fi); + extent_num_blocks = inode->i_size - + btrfs_disk_key_offset(found_key) + + root->blocksize - 1; + extent_num_blocks >>= inode->i_blkbits; + btrfs_set_file_extent_num_blocks(fi, + extent_num_blocks); + inode->i_blocks -= (orig_num_blocks - + extent_num_blocks) << 3; + mark_buffer_dirty(path->nodes[0]); + } else { extent_start = btrfs_file_extent_disk_blocknr(fi); extent_num_blocks = @@ -435,8 +481,12 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, } } } - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); + if (del_item) { + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + } else { + break; + } btrfs_release_path(root, path); if (found_extent) { ret = btrfs_free_extent(trans, root, extent_start, @@ -452,6 +502,68 @@ error: return ret; } +static int btrfs_truncate_page(struct address_space *mapping, loff_t from) +{ + struct inode *inode = mapping->host; + unsigned blocksize = 1 << inode->i_blkbits; + pgoff_t index = from >> PAGE_CACHE_SHIFT; + unsigned offset = from & (PAGE_CACHE_SIZE-1); + struct page *page; + char *kaddr; + int ret = 0; + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 alloc_hint; + struct btrfs_key ins; + struct btrfs_trans_handle *trans; + + if ((offset & (blocksize - 1)) == 0) + goto out; + + ret = -ENOMEM; + page = grab_cache_page(mapping, index); + if (!page) + goto out; + + if (!PageUptodate(page)) { + ret = mpage_readpage(page, btrfs_get_block); + lock_page(page); + if (!PageUptodate(page)) { + ret = -EIO; + goto out; + } + } + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + ret = drop_extents(trans, root, inode, page->index << PAGE_CACHE_SHIFT, + (page->index + 1) << PAGE_CACHE_SHIFT, &alloc_hint); + BUG_ON(ret); + ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, + alloc_hint, (u64)-1, &ins, 1); + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + page->index << PAGE_CACHE_SHIFT, + ins.objectid, 1, 1); + BUG_ON(ret); + SetPageChecked(page); + kaddr = kmap(page); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + btrfs_csum_file_block(trans, root, inode->i_ino, + page->index << PAGE_CACHE_SHIFT, + kaddr, PAGE_CACHE_SIZE); + kunmap(page); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); +out: + return ret; +} + static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; @@ -469,8 +581,11 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) u64 pos = (inode->i_size + mask) & ~mask; u64 hole_size; - if (attr->ia_size < pos) + if (attr->ia_size <= pos) goto out; + + btrfs_truncate_page(inode->i_mapping, inode->i_size); + hole_size = (attr->ia_size - pos + mask) & ~mask; hole_size >>= inode->i_blkbits; @@ -483,10 +598,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); } - +out: err = inode_setattr(inode, attr); -out: return err; } static void btrfs_delete_inode(struct inode *inode) @@ -1161,17 +1275,30 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, u64 extent_end = 0; u64 objectid = inode->i_ino; u32 found_type; + u64 alloc_hint = 0; struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_file_extent_item *item; struct btrfs_leaf *leaf; struct btrfs_disk_key *found_key; + struct btrfs_trans_handle *trans = NULL; path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); if (create) { WARN_ON(1); + /* this almost but not quite works */ + trans = btrfs_start_transaction(root, 1); + if (!trans) { + err = -ENOMEM; + goto out; + } + ret = drop_extents(trans, root, inode, + iblock << inode->i_blkbits, + (iblock + 1) << inode->i_blkbits, + &alloc_hint); + BUG_ON(ret); } ret = btrfs_lookup_file_extent(NULL, root, path, @@ -1185,7 +1312,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, if (ret != 0) { if (path->slots[0] == 0) { btrfs_release_path(root, path); - goto out; + goto not_found; } path->slots[0]--; } @@ -1203,7 +1330,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, found_type != BTRFS_EXTENT_DATA_KEY) { extent_end = 0; extent_start = 0; - goto out; + goto not_found; } found_type = btrfs_file_extent_type(item); extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); @@ -1211,7 +1338,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, extent_start = extent_start >> inode->i_blkbits; extent_end = extent_start + btrfs_file_extent_num_blocks(item); err = 0; - if (blocknr == 0) + if (btrfs_file_extent_disk_blocknr(item) == 0) goto out; if (iblock >= extent_start && iblock < extent_end) { btrfs_map_bh_to_logical(root, result, blocknr + @@ -1227,7 +1354,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, extent_end = (extent_start + size) >> inode->i_blkbits; extent_start >>= inode->i_blkbits; if (iblock < extent_start || iblock > extent_end) { - goto out; + goto not_found; } ptr = btrfs_file_extent_inline_start(item); map = kmap(result->b_page); @@ -1239,7 +1366,24 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, SetPageChecked(result->b_page); btrfs_map_bh_to_logical(root, result, 0); } +not_found: + if (create) { + struct btrfs_key ins; + ret = btrfs_alloc_extent(trans, root, inode->i_ino, + 1, alloc_hint, (u64)-1, + &ins, 1); + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + iblock << inode->i_blkbits, + ins.objectid, ins.offset, + ins.offset); + BUG_ON(ret); + SetPageChecked(result->b_page); + btrfs_map_bh_to_logical(root, result, ins.objectid); + } out: + if (trans) + err = btrfs_end_transaction(trans, root); btrfs_free_path(path); return err; } @@ -1258,7 +1402,7 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, static int btrfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { - return nobh_prepare_write(page, from, to, btrfs_get_block); + return block_prepare_write(page, from, to, btrfs_get_block); } static void btrfs_write_super(struct super_block *sb) @@ -1500,12 +1644,13 @@ static void btrfs_truncate(struct inode *inode) if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return; - nobh_truncate_page(inode->i_mapping, inode->i_size); + btrfs_truncate_page(inode->i_mapping, inode->i_size); - /* FIXME, add redo link to tree so we don't leak on crash */ mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); + + /* FIXME, add redo link to tree so we don't leak on crash */ ret = btrfs_truncate_in_trans(trans, root, inode); BUG_ON(ret); btrfs_update_inode(trans, root, inode); @@ -1515,10 +1660,6 @@ static void btrfs_truncate(struct inode *inode) btrfs_btree_balance_dirty(root); } -/* - * Make sure any changes to nobh_commit_write() are reflected in - * nobh_truncate_page(), since it doesn't call commit_write(). - */ static int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) { @@ -1528,6 +1669,7 @@ static int btrfs_commit_write(struct file *file, struct page *page, SetPageUptodate(page); bh = page_buffers(page); + set_buffer_uptodate(bh); if (buffer_mapped(bh) && bh->b_blocknr != 0) { set_page_dirty(page); } @@ -1972,8 +2114,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, first_index = pos >> PAGE_CACHE_SHIFT; last_index = (pos + count) >> PAGE_CACHE_SHIFT; - if ((first_index << PAGE_CACHE_SHIFT) < inode->i_size && - (pos & (PAGE_CACHE_SIZE - 1))) { + if ((pos & (PAGE_CACHE_SIZE - 1))) { pinned[0] = grab_cache_page(inode->i_mapping, first_index); if (!PageUptodate(pinned[0])) { ret = mpage_readpage(pinned[0], btrfs_get_block); @@ -1983,10 +2124,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, unlock_page(pinned[0]); } } - if (first_index != last_index && - (last_index << PAGE_CACHE_SHIFT) < inode->i_size && - pos + count < inode->i_size && - (count & (PAGE_CACHE_SIZE - 1))) { + if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { pinned[1] = grab_cache_page(inode->i_mapping, last_index); if (!PageUptodate(pinned[1])) { ret = mpage_readpage(pinned[1], btrfs_get_block); @@ -2110,23 +2248,34 @@ static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, if (!PageChecked(page)) { /* FIXME, do it per block */ struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + struct buffer_head *bh; - int ret = btrfs_csum_verify_file_block(root, + if (page_has_buffers(page)) { + bh = page_buffers(page); + if (!buffer_mapped(bh)) { + SetPageChecked(page); + goto checked; + } + } + + ret = btrfs_csum_verify_file_block(root, page->mapping->host->i_ino, page->index << PAGE_CACHE_SHIFT, kmap(page), PAGE_CACHE_SIZE); if (ret) { if (ret != -ENOENT) { - printk("failed to verify ino %lu page %lu\n", + printk("failed to verify ino %lu page %lu ret %d\n", page->mapping->host->i_ino, - page->index); - memset(page_address(page), 0, PAGE_CACHE_SIZE); + page->index, ret); + memset(page_address(page), 1, PAGE_CACHE_SIZE); flush_dcache_page(page); } } SetPageChecked(page); kunmap(page); } +checked: /* * Faults on the destination of a read are common, so do it before * taking the kmap. From 1e2677e000fbc7f1892b1ea9f952f1667f4443f1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 May 2007 16:52:18 -0400 Subject: [PATCH 0155/2985] Btrfs: block group switching Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 5 ++- fs/btrfs/extent-tree.c | 87 ++++++++++++++++++++++++++++++------------ 2 files changed, 66 insertions(+), 26 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 756ea9d0123b..1bb62867deda 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -249,10 +249,13 @@ struct btrfs_device_item { /* tag for the radix tree of block groups in ram */ #define BTRFS_BLOCK_GROUP_DIRTY 0 #define BTRFS_BLOCK_GROUP_AVAIL 1 -#define BTRFS_BLOCK_GROUP_HINTS 8 #define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) + + +#define BTRFS_BLOCK_GROUP_DATA 1 struct btrfs_block_group_item { __le64 used; + u8 flags; } __attribute__ ((__packed__)); struct btrfs_block_group_cache { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a366415e03a6..b6dc020bdde9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -232,6 +232,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; struct radix_tree_root *radix; + struct radix_tree_root *swap_radix; u64 used; u64 last = 0; u64 hint_last; @@ -239,14 +240,18 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int ret; int full_search = 0; int factor = 8; + int data_swap = 0; if (!owner) factor = 5; - if (data) + if (data) { radix = &info->block_group_data_radix; - else + swap_radix = &info->block_group_radix; + } else { radix = &info->block_group_radix; + swap_radix = &info->block_group_data_radix; + } if (search_start) { struct btrfs_block_group_cache *shint; @@ -334,15 +339,27 @@ again: cond_resched(); } if (!full_search) { -printk("find block group doing full search data %d start %Lu\n", data, search_start); last = search_start; full_search = 1; goto again; } + if (!data_swap) { + struct radix_tree_root *tmp = radix; + data_swap = 1; + radix = swap_radix; + swap_radix = tmp; + last = search_start; + goto again; + } if (!found_group) { printk("find block group bailing to zero data %d\n", data); ret = radix_tree_gang_lookup(radix, (void **)&found_group, 0, 1); + if (ret == 0) { + ret = radix_tree_gang_lookup(swap_radix, + (void **)&found_group, + 0, 1); + } BUG_ON(ret != 1); } found: @@ -552,7 +569,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 blocknr, u64 num, int alloc, int mark_free) + u64 blocknr, u64 num, int alloc, int mark_free, + int data) { struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; @@ -560,6 +578,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 old_val; u64 block_in_group; u64 i; + int ret; while(total) { cache = lookup_block_group(info, blocknr); @@ -577,7 +596,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val = btrfs_block_group_used(&cache->item); num = min(total, cache->key.offset - block_in_group); if (alloc) { - old_val += num; if (blocknr > cache->last_alloc) cache->last_alloc = blocknr; if (!cache->data) { @@ -586,6 +604,30 @@ static int update_block_group(struct btrfs_trans_handle *trans, blocknr + i); } } + if (cache->data != data && + old_val < cache->key.offset / 2) { +printk("changing block group %Lu from %d to %d\n", cache->key.objectid, cache->data, data); + cache->data = data; + radix_tree_delete(cache->radix, + cache->key.objectid + + cache->key.offset - 1); + + if (data) { + cache->radix = + &info->block_group_data_radix; + cache->item.flags |= + BTRFS_BLOCK_GROUP_DATA; + } else { + cache->radix = &info->block_group_radix; + cache->item.flags &= + ~BTRFS_BLOCK_GROUP_DATA; + } + ret = radix_tree_insert(cache->radix, + cache->key.objectid + + cache->key.offset - 1, + (void *)cache); + } + old_val += num; } else { old_val -= num; if (blocknr < cache->first_free) @@ -596,8 +638,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, blocknr + i); } } - if (old_val < (cache->key.offset * 5) / 10 && - old_val + num >= (cache->key.offset * 5) / 10) { + if (old_val < cache->key.offset / 2 && + old_val + num >= cache->key.offset / 2) { printk("group %Lu now available\n", cache->key.objectid); radix_tree_tag_set(cache->radix, cache->key.objectid + @@ -779,7 +821,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) BUG(); ret = update_block_group(trans, root, blocknr, num_blocks, 0, - mark_free); + mark_free, 0); BUG_ON(ret); } btrfs_free_path(path); @@ -909,10 +951,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root } check_failed: - if (!full_scan && block_group->data != data) - WARN_ON(1); - - if (!data) + if (!block_group->data) search_start = find_search_start(root, &block_group, search_start, total_needed); else @@ -1142,7 +1181,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, info->extent_tree_insert[info->extent_tree_insert_nr++] = ins->objectid; ret = update_block_group(trans, root, - ins->objectid, ins->offset, 1, 0); + ins->objectid, ins->offset, 1, 0, 0); BUG_ON(ret); return 0; } @@ -1208,7 +1247,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (pending_ret) { return pending_ret; } - ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0); + ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, + data); return 0; } @@ -1477,7 +1517,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_leaf *leaf; u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize; u64 used; - u64 nr = 0; root = info->extent_root; key.objectid = 0; @@ -1505,13 +1544,16 @@ int btrfs_read_block_groups(struct btrfs_root *root) break; } - if (nr % 3) - radix = &info->block_group_data_radix; - else - radix = &info->block_group_radix; - bi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_block_group_item); + if (bi->flags & BTRFS_BLOCK_GROUP_DATA) { + radix = &info->block_group_data_radix; + cache->data = 1; + } else { + radix = &info->block_group_radix; + cache->data = 0; + } + memcpy(&cache->item, bi, sizeof(*bi)); memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; @@ -1520,10 +1562,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->pinned = 0; cache->cached = 0; - if (nr % 3) - cache->data = 1; - else - cache->data = 0; cache->radix = radix; key.objectid = found_key.objectid + found_key.offset; @@ -1541,7 +1579,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) if (key.objectid >= btrfs_super_total_blocks(info->disk_super)) break; - nr++; } btrfs_free_path(path); From fbdc762b4e1833b5d75cada5aabeadccd8379792 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 30 May 2007 10:22:12 -0400 Subject: [PATCH 0156/2985] Btrfs: use a separate flag for search_start vs a hint in find_free_extent Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 69 +++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b6dc020bdde9..85616b458e18 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5,8 +5,9 @@ #include "transaction.h" static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 search_start, u64 - search_end, struct btrfs_key *ins, int data); + *orig_root, u64 num_blocks, u64 search_start, + u64 search_end, u64 hint_block, + struct btrfs_key *ins, int data); static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -157,12 +158,6 @@ static struct btrfs_block_group_cache *lookup_block_group(struct block_group->key.objectid + block_group->key.offset) return block_group; } - WARN_ON(1); - printk("lookup_block_group fails for blocknr %Lu\n", blocknr); - printk("last ret was %d\n", ret); - if (ret) { - printk("last block group was %Lu %Lu\n", block_group->key.objectid, block_group->key.offset); - } return NULL; } @@ -378,7 +373,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_key ins; u32 refs; - find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, + find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, 0, &ins, 0); path = btrfs_alloc_path(); BUG_ON(!path); @@ -495,7 +490,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_item *bi; struct btrfs_key ins; - find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins, 0); + find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0); ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); BUG_ON(ret); bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], @@ -788,7 +783,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; - find_free_extent(trans, root, 0, 0, (u64)-1, &ins, 0); + find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0); path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); @@ -906,7 +901,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root */ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 - search_end, struct btrfs_key *ins, int data) + search_end, u64 hint_block, + struct btrfs_key *ins, int data) { struct btrfs_path *path; struct btrfs_key key; @@ -926,6 +922,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int level; struct btrfs_block_group_cache *block_group; int full_scan = 0; + int wrapped = 0; u64 limit; path = btrfs_alloc_path(); @@ -940,10 +937,10 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root } if (search_end == (u64)-1) search_end = btrfs_super_total_blocks(info->disk_super); - if (search_start) { - block_group = lookup_block_group(info, search_start); + if (hint_block) { + block_group = lookup_block_group(info, hint_block); block_group = btrfs_find_block_group(root, block_group, - search_start, data, 1); + hint_block, data, 1); } else { block_group = btrfs_find_block_group(root, trans->block_group, 0, @@ -954,7 +951,7 @@ check_failed: if (!block_group->data) search_start = find_search_start(root, &block_group, search_start, total_needed); - else + else if (!full_scan) search_start = max(block_group->last_alloc, search_start); btrfs_init_path(path); @@ -1039,7 +1036,7 @@ check_failed: start_found = 1; last_block = key.objectid + key.offset; - if (last_block >= block_group->key.objectid + + if (!full_scan && last_block >= block_group->key.objectid + block_group->key.offset) { btrfs_release_path(root, path); search_start = block_group->key.objectid + @@ -1059,10 +1056,15 @@ check_pending: BUG_ON(ins->objectid < search_start); if (ins->objectid + num_blocks >= search_end) { - if (full_scan) - return -ENOSPC; + if (full_scan) { + ret = -ENOSPC; + goto error; + } search_start = orig_search_start; - full_scan = 1; + if (wrapped) + full_scan = 1; + else + wrapped = 1; goto new_group; } for (test_block = ins->objectid; @@ -1132,14 +1134,20 @@ check_pending: new_group: if (search_start + num_blocks >= search_end) { search_start = orig_search_start; -printk("doing full scan!\n"); - full_scan = 1; + if (full_scan) { + ret = -ENOSPC; + goto error; + } + if (wrapped) + full_scan = 1; + else + wrapped = 1; } block_group = lookup_block_group(info, search_start); + cond_resched(); if (!full_scan) block_group = btrfs_find_block_group(root, block_group, search_start, data, 0); - cond_resched(); goto check_failed; error: @@ -1156,12 +1164,13 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 search_start, + u64 num_blocks, u64 hint_block, u64 search_end, struct btrfs_key *ins, int data) { int ret; int pending_ret; u64 super_blocks_used; + u64 search_start = 0; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; @@ -1193,7 +1202,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, */ if (data) { ret = find_free_extent(trans, root, 0, 0, - search_end, &prealloc_key, 0); + search_end, 0, &prealloc_key, 0); if (ret) { return ret; } @@ -1204,9 +1213,11 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, search_start = info->extent_tree_prealloc[0] + 1; } } + if (hint_block < search_start) + hint_block = search_start; /* do the real allocation */ ret = find_free_extent(trans, root, num_blocks, search_start, - search_end, ins, data); + search_end, hint_block, ins, data); if (ret) { return ret; } @@ -1226,8 +1237,12 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, else search_start = ins->objectid + ins->offset; + if (hint_block < search_start) + hint_block = search_start; + ret = find_free_extent(trans, root, 0, search_start, - search_end, &prealloc_key, 0); + search_end, hint_block, + &prealloc_key, 0); if (ret) { return ret; } From fabb568183de7996257080260d3537fa75b3667e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 7 Jun 2007 22:13:21 -0400 Subject: [PATCH 0157/2985] Btrfs: d_type optimization Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 10 +++++++ fs/btrfs/extent-tree.c | 1 + fs/btrfs/super.c | 66 ++++++++++++++++++++++++++++++++++++------ 3 files changed, 68 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1bb62867deda..a1c95c980fdc 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -34,6 +34,16 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_CRC32_SIZE 4 #define BTRFS_EMPTY_DIR_SIZE 6 +#define BTRFS_FT_UNKNOWN 0 +#define BTRFS_FT_REG_FILE 1 +#define BTRFS_FT_DIR 2 +#define BTRFS_FT_CHRDEV 3 +#define BTRFS_FT_BLKDEV 4 +#define BTRFS_FT_FIFO 5 +#define BTRFS_FT_SOCK 6 +#define BTRFS_FT_SYMLINK 7 +#define BTRFS_FT_MAX 8 + /* * the key defines the order in the tree, and so it also defines (optimal) * block layout. objectid corresonds to the inode number. The flags diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 85616b458e18..645a4228bfe2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1264,6 +1264,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, data); + BUG_ON(ret); return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0eb64d6eaf9d..b58b4cf66766 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -43,6 +43,18 @@ static int drop_extents(struct btrfs_trans_handle *trans, static int btrfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *result, int create); + +#define S_SHIFT 12 +static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { + [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR, + [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO, + [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK, + [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, +}; + static void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; @@ -785,6 +797,9 @@ static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path, break; } } +static unsigned char btrfs_filetype_table[] = { + DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK +}; static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { @@ -799,7 +814,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct btrfs_leaf *leaf; int slot; int advance; - unsigned char d_type = DT_UNKNOWN; + unsigned char d_type; int over = 0; u32 di_cur; u32 di_total; @@ -853,6 +868,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) di_cur = 0; di_total = btrfs_item_size(leaf->items + slot); while(di_cur < di_total) { + d_type = btrfs_filetype_table[btrfs_dir_type(di)]; over = filldir(dirent, (const char *)(di + 1), btrfs_dir_name_len(di), btrfs_disk_key_offset(&item->key), @@ -1012,6 +1028,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return inode; } +static inline u8 btrfs_inode_type(struct inode *inode) +{ + return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; +} + static int btrfs_add_link(struct btrfs_trans_handle *trans, struct dentry *dentry, struct inode *inode) { @@ -1026,7 +1047,7 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, ret = btrfs_insert_dir_item(trans, root, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, - &key, 0); + &key, btrfs_inode_type(inode)); if (ret == 0) { dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2; ret = btrfs_update_inode(trans, root, @@ -1150,12 +1171,12 @@ static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid, - &key, 1); + &key, BTRFS_FT_DIR); if (ret) goto error; key.objectid = dirid; ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid, - &key, 1); + &key, BTRFS_FT_DIR); if (ret) goto error; error: @@ -1265,6 +1286,10 @@ printk("btrfs sync_fs\n"); return 0; } +#define BTRFS_GET_BLOCK_NO_CREATE 0 +#define BTRFS_GET_BLOCK_CREATE 1 +#define BTRFS_GET_BLOCK_NO_DIRECT 2 + static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { @@ -1286,7 +1311,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); - if (create) { + if (create & BTRFS_GET_BLOCK_CREATE) { WARN_ON(1); /* this almost but not quite works */ trans = btrfs_start_transaction(root, 1); @@ -1349,6 +1374,11 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, char *ptr; char *map; u32 size; + + if (create & BTRFS_GET_BLOCK_NO_DIRECT) { + err = -EINVAL; + goto out; + } size = btrfs_file_extent_inline_len(leaf->items + path->slots[0]); extent_end = (extent_start + size) >> inode->i_blkbits; @@ -1367,7 +1397,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, btrfs_map_bh_to_logical(root, result, 0); } not_found: - if (create) { + if (create & BTRFS_GET_BLOCK_CREATE) { struct btrfs_key ins; ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, alloc_hint, (u64)-1, @@ -1399,6 +1429,21 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, return err; } +static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + mutex_lock(&root->fs_info->fs_mutex); + btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT); + mutex_unlock(&root->fs_info->fs_mutex); + return 0; +} + +static sector_t btrfs_bmap(struct address_space *as, sector_t block) +{ + return generic_block_bmap(as, block, btrfs_get_block_bmap); +} + static int btrfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { @@ -2428,7 +2473,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) key.offset = (u64)-1; dir = root->fs_info->sb->s_root->d_inode; ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, - name, namelen, dir->i_ino, &key, 0); + name, namelen, dir->i_ino, &key, + BTRFS_FT_DIR); BUG_ON(ret); ret = btrfs_commit_transaction(trans, root); @@ -2505,7 +2551,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, name, namelen, root->fs_info->sb->s_root->d_inode->i_ino, - &key, 0); + &key, BTRFS_FT_DIR); BUG_ON(ret); @@ -2833,7 +2879,8 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, btrfs_release_path(root, path); ret = btrfs_insert_dir_item(trans, root, "..", 2, - old_inode->i_ino, location, 0); + old_inode->i_ino, location, + BTRFS_FT_DIR); if (ret) goto out_fail; } @@ -3003,6 +3050,7 @@ static struct address_space_operations btrfs_aops = { .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, .commit_write = btrfs_commit_write, + .bmap = btrfs_bmap, }; static struct address_space_operations btrfs_symlink_aops = { From 08607c1b182b3b8b54d7141a0c012cda17d201e6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 8 Jun 2007 15:33:54 -0400 Subject: [PATCH 0158/2985] Btrfs: add compat ioctl Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 - fs/btrfs/ctree.h | 5 ++-- fs/btrfs/disk-io.c | 4 +-- fs/btrfs/super.c | 27 +++++++++++++++++-- fs/btrfs/transaction.c | 60 ++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/transaction.h | 8 ++++++ 6 files changed, 98 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 861e34ae0979..61d25b5de287 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -7,7 +7,6 @@ * Get rid of struct ctree_path, limiting tree levels held at one time * Add generation number to key pointer in nodes * Add generation number to inode -* Add ability to switch a block group from data to metadata or vice versa * forbid cross subvolume renames and hardlinks * Release * Do real tree locking diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a1c95c980fdc..74e6aadf6bb7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3,7 +3,7 @@ #include #include -#include +#include #include "bit-radix.h" struct btrfs_trans_handle; @@ -281,6 +281,7 @@ struct btrfs_block_group_cache { }; struct crypto_hash; + struct btrfs_fs_info { struct btrfs_root *extent_root; struct btrfs_root *tree_root; @@ -308,9 +309,9 @@ struct btrfs_fs_info { struct mutex fs_mutex; struct list_head trans_list; struct crypto_hash *hash_tfm; + struct delayed_work trans_work; spinlock_t hash_lock; int do_barriers; - struct kobject kobj; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3da057f78bc8..048282f06afb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -577,6 +577,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->do_barriers = 1; fs_info->extent_tree_insert_nr = 0; fs_info->extent_tree_prealloc_nr = 0; + INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); @@ -648,7 +649,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; - memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); mutex_unlock(&fs_info->fs_mutex); return tree_root; } @@ -746,6 +746,7 @@ int close_ctree(struct btrfs_root *root) struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = root->fs_info; + btrfs_transaction_flush_work(root); mutex_lock(&fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); @@ -776,7 +777,6 @@ int close_ctree(struct btrfs_root *root) del_fs_roots(fs_info); kfree(fs_info->extent_root); kfree(fs_info->tree_root); - kobject_unregister(&fs_info->kobj); return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b58b4cf66766..5864917953a4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -950,7 +951,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) return -ENOMEM; } sb->s_root = root_dentry; - + btrfs_transaction_queue_work(tree_root, HZ * 30); return 0; } @@ -1452,7 +1453,7 @@ static int btrfs_prepare_write(struct file *file, struct page *page, static void btrfs_write_super(struct super_block *sb) { - btrfs_sync_fs(sb, 1); + sb->s_dirt = 0; } static int btrfs_readpage(struct file *file, struct page *page) @@ -2698,6 +2699,20 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int return ret; } +#ifdef CONFIG_COMPAT +static long btrfs_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct inode *inode = file->f_path.dentry->d_inode; + int ret; + lock_kernel(); + ret = btrfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); + unlock_kernel(); + return ret; + +} +#endif + static struct kmem_cache *btrfs_inode_cachep; struct kmem_cache *btrfs_trans_handle_cachep; struct kmem_cache *btrfs_transaction_cachep; @@ -3042,6 +3057,9 @@ static struct file_operations btrfs_dir_file_operations = { .read = generic_read_dir, .readdir = btrfs_readdir, .ioctl = btrfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = btrfs_compat_ioctl, +#endif }; static struct address_space_operations btrfs_aops = { @@ -3073,6 +3091,9 @@ static struct file_operations btrfs_file_operations = { .open = generic_file_open, .ioctl = btrfs_ioctl, .fsync = btrfs_sync_file, +#ifdef CONFIG_COMPAT + .compat_ioctl = btrfs_compat_ioctl, +#endif }; static struct inode_operations btrfs_symlink_inode_operations = { @@ -3085,6 +3106,7 @@ static int __init init_btrfs_fs(void) { int err; printk("btrfs loaded!\n"); + btrfs_init_transaction_sys(); err = init_inodecache(); if (err) return err; @@ -3095,6 +3117,7 @@ static int __init init_btrfs_fs(void) static void __exit exit_btrfs_fs(void) { + btrfs_exit_transaction_sys(); destroy_inodecache(); unregister_filesystem(&btrfs_fs_type); printk("btrfs unloaded\n"); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f0f03121b7b2..bf7eef67ba0b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -8,6 +8,8 @@ static int total_trans = 0; extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; +static struct workqueue_struct *trans_wq; + #define BTRFS_ROOT_TRANS_TAG 0 #define TRANS_MAGIC 0xE1E10E @@ -44,6 +46,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->in_commit = 0; cur_trans->use_count = 1; cur_trans->commit_done = 0; + cur_trans->start_time = get_seconds(); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); init_bit_radix(&cur_trans->dirty_pages); } @@ -350,3 +353,60 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; } +void btrfs_transaction_cleaner(struct work_struct *work) +{ + struct btrfs_fs_info *fs_info = container_of(work, + struct btrfs_fs_info, + trans_work.work); + + struct btrfs_root *root = fs_info->tree_root; + struct btrfs_transaction *cur; + struct btrfs_trans_handle *trans; + unsigned long now; + unsigned long delay = HZ * 30; + int ret; + +printk("btrfs transaction cleaner\n"); + mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->trans_mutex); + cur = root->fs_info->running_transaction; + if (!cur) { + mutex_unlock(&root->fs_info->trans_mutex); + goto out; + } + now = get_seconds(); + if (now < cur->start_time || now - cur->start_time < 30) { + mutex_unlock(&root->fs_info->trans_mutex); + delay = HZ * 5; + goto out; + } + mutex_unlock(&root->fs_info->trans_mutex); +printk("forcing commit\n"); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_commit_transaction(trans, root); +out: + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_transaction_queue_work(root, delay); +} + +void btrfs_transaction_queue_work(struct btrfs_root *root, int delay) +{ + queue_delayed_work(trans_wq, &root->fs_info->trans_work, delay); +} + +void btrfs_transaction_flush_work(struct btrfs_root *root) +{ + cancel_rearming_delayed_workqueue(trans_wq, &root->fs_info->trans_work); + flush_workqueue(trans_wq); +} + +void __init btrfs_init_transaction_sys(void) +{ + trans_wq = create_workqueue("btrfs"); +} + +void __exit btrfs_exit_transaction_sys(void) +{ + destroy_workqueue(trans_wq); +} + diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index b5378119e76c..0b08208be853 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -11,6 +11,7 @@ struct btrfs_transaction { int magic; struct list_head list; struct radix_tree_root dirty_pages; + unsigned long start_time; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; }; @@ -47,4 +48,11 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root); + +void btrfs_transaction_cleaner(struct work_struct *work); +void btrfs_transaction_flush_work(struct btrfs_root *root); +void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); +void btrfs_init_transaction_sys(void); +void btrfs_exit_transaction_sys(void); + #endif From facda1e787d43191a3368c322f682054991c41b8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 8 Jun 2007 18:11:48 -0400 Subject: [PATCH 0159/2985] Btrfs: get forced transaction commits via workqueue Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 +++- fs/btrfs/disk-io.c | 4 ++++ fs/btrfs/transaction.c | 30 ++++++++++++++++++++---------- fs/btrfs/transaction.h | 3 --- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 74e6aadf6bb7..e93ba1a5c812 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -283,6 +283,7 @@ struct btrfs_block_group_cache { struct crypto_hash; struct btrfs_fs_info { + spinlock_t hash_lock; struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct btrfs_root *dev_root; @@ -308,10 +309,11 @@ struct btrfs_fs_info { struct mutex trans_mutex; struct mutex fs_mutex; struct list_head trans_list; + struct list_head dead_roots; struct crypto_hash *hash_tfm; struct delayed_work trans_work; - spinlock_t hash_lock; int do_barriers; + int closing; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 048282f06afb..751069c0e9f5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -563,6 +563,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); + INIT_LIST_HEAD(&fs_info->dead_roots); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; @@ -577,6 +578,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->do_barriers = 1; fs_info->extent_tree_insert_nr = 0; fs_info->extent_tree_prealloc_nr = 0; + fs_info->closing = 0; + INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, @@ -746,6 +749,7 @@ int close_ctree(struct btrfs_root *root) struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = root->fs_info; + fs_info->closing = 1; btrfs_transaction_flush_work(root); mutex_lock(&fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bf7eef67ba0b..b859db395fd5 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -12,12 +12,10 @@ static struct workqueue_struct *trans_wq; #define BTRFS_ROOT_TRANS_TAG 0 -#define TRANS_MAGIC 0xE1E10E static void put_transaction(struct btrfs_transaction *transaction) { WARN_ON(transaction->use_count == 0); transaction->use_count--; - WARN_ON(transaction->magic != TRANS_MAGIC); if (transaction->use_count == 0) { WARN_ON(total_trans == 0); total_trans--; @@ -42,7 +40,6 @@ static int join_transaction(struct btrfs_root *root) cur_trans->transid = root->fs_info->generation; init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); - cur_trans->magic = TRANS_MAGIC; cur_trans->in_commit = 0; cur_trans->use_count = 1; cur_trans->commit_done = 0; @@ -83,7 +80,6 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, h->block_group = NULL; root->fs_info->running_transaction->use_count++; mutex_unlock(&root->fs_info->trans_mutex); - h->magic = h->magic2 = TRANS_MAGIC; return h; } @@ -92,8 +88,6 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, { struct btrfs_transaction *cur_trans; - WARN_ON(trans->magic != TRANS_MAGIC); - WARN_ON(trans->magic2 != TRANS_MAGIC); mutex_lock(&root->fs_info->trans_mutex); cur_trans = root->fs_info->running_transaction; WARN_ON(cur_trans->num_writers < 1); @@ -257,8 +251,8 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, struct dirty_root *dirty; struct btrfs_trans_handle *trans; int ret; - while(!list_empty(list)) { + mutex_lock(&tree_root->fs_info->fs_mutex); dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); trans = btrfs_start_transaction(tree_root, 1); @@ -271,6 +265,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); kfree(dirty); + mutex_unlock(&tree_root->fs_info->fs_mutex); } return 0; } @@ -346,10 +341,18 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, wake_up(&cur_trans->commit_wait); put_transaction(cur_trans); put_transaction(cur_trans); + if (root->fs_info->closing) + list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots); + else + list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); mutex_unlock(&root->fs_info->trans_mutex); kmem_cache_free(btrfs_trans_handle_cachep, trans); - drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots); + if (root->fs_info->closing) { + mutex_unlock(&root->fs_info->fs_mutex); + drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots); + mutex_lock(&root->fs_info->fs_mutex); + } return ret; } @@ -362,11 +365,19 @@ void btrfs_transaction_cleaner(struct work_struct *work) struct btrfs_root *root = fs_info->tree_root; struct btrfs_transaction *cur; struct btrfs_trans_handle *trans; + struct list_head dirty_roots; unsigned long now; unsigned long delay = HZ * 30; int ret; -printk("btrfs transaction cleaner\n"); + INIT_LIST_HEAD(&dirty_roots); + mutex_lock(&root->fs_info->trans_mutex); + list_splice_init(&root->fs_info->dead_roots, &dirty_roots); + mutex_unlock(&root->fs_info->trans_mutex); + + if (!list_empty(&dirty_roots)) { + drop_dirty_roots(root, &dirty_roots); + } mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); cur = root->fs_info->running_transaction; @@ -381,7 +392,6 @@ printk("btrfs transaction cleaner\n"); goto out; } mutex_unlock(&root->fs_info->trans_mutex); -printk("forcing commit\n"); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); out: diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 0b08208be853..f25b4900db45 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -8,7 +8,6 @@ struct btrfs_transaction { int in_commit; int use_count; int commit_done; - int magic; struct list_head list; struct radix_tree_root dirty_pages; unsigned long start_time; @@ -17,13 +16,11 @@ struct btrfs_transaction { }; struct btrfs_trans_handle { - int magic; u64 transid; unsigned long blocks_reserved; unsigned long blocks_used; struct btrfs_transaction *transaction; struct btrfs_block_group_cache *block_group; - int magic2; }; From ad693af684757478e5fcb225aef319cab7ba6c75 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 9 Jun 2007 08:19:57 -0400 Subject: [PATCH 0160/2985] Btrfs: reap dead roots right after commit Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b859db395fd5..a2c5820f1d3d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -371,13 +371,6 @@ void btrfs_transaction_cleaner(struct work_struct *work) int ret; INIT_LIST_HEAD(&dirty_roots); - mutex_lock(&root->fs_info->trans_mutex); - list_splice_init(&root->fs_info->dead_roots, &dirty_roots); - mutex_unlock(&root->fs_info->trans_mutex); - - if (!list_empty(&dirty_roots)) { - drop_dirty_roots(root, &dirty_roots); - } mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); cur = root->fs_info->running_transaction; @@ -396,6 +389,14 @@ void btrfs_transaction_cleaner(struct work_struct *work) ret = btrfs_commit_transaction(trans, root); out: mutex_unlock(&root->fs_info->fs_mutex); + + mutex_lock(&root->fs_info->trans_mutex); + list_splice_init(&root->fs_info->dead_roots, &dirty_roots); + mutex_unlock(&root->fs_info->trans_mutex); + + if (!list_empty(&dirty_roots)) { + drop_dirty_roots(root, &dirty_roots); + } btrfs_transaction_queue_work(root, delay); } From 0cf6c620176a294bdf1bedddc492f6ae857e0019 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 9 Jun 2007 09:22:25 -0400 Subject: [PATCH 0161/2985] Btrfs: remove device tree Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 105 +------------------ fs/btrfs/disk-io.c | 224 ++--------------------------------------- fs/btrfs/super.c | 88 ---------------- fs/btrfs/transaction.c | 6 -- 4 files changed, 10 insertions(+), 413 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e93ba1a5c812..5ab25a0cb16a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -16,11 +16,10 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_MAGIC "_BtRfS_M" #define BTRFS_ROOT_TREE_OBJECTID 1ULL -#define BTRFS_DEV_TREE_OBJECTID 2ULL -#define BTRFS_EXTENT_TREE_OBJECTID 3ULL -#define BTRFS_FS_TREE_OBJECTID 4ULL -#define BTRFS_ROOT_TREE_DIR_OBJECTID 5ULL -#define BTRFS_FIRST_FREE_OBJECTID 6ULL +#define BTRFS_EXTENT_TREE_OBJECTID 2ULL +#define BTRFS_FS_TREE_OBJECTID 3ULL +#define BTRFS_ROOT_TREE_DIR_OBJECTID 4ULL +#define BTRFS_FIRST_FREE_OBJECTID 5ULL /* * we can actually store much bigger names, but lets not confuse the rest @@ -111,12 +110,6 @@ struct btrfs_super_block { __le64 total_blocks; __le64 blocks_used; __le64 root_dir_objectid; - __le64 last_device_id; - /* fields below here vary with the underlying disk */ - __le64 device_block_start; - __le64 device_num_blocks; - __le64 device_root; - __le64 device_id; } __attribute__ ((__packed__)); /* @@ -251,11 +244,6 @@ struct btrfs_csum_item { u8 csum; } __attribute__ ((__packed__)); -struct btrfs_device_item { - __le16 pathlen; - __le64 device_id; -} __attribute__ ((__packed__)); - /* tag for the radix tree of block groups in ram */ #define BTRFS_BLOCK_GROUP_DIRTY 0 #define BTRFS_BLOCK_GROUP_AVAIL 1 @@ -286,11 +274,9 @@ struct btrfs_fs_info { spinlock_t hash_lock; struct btrfs_root *extent_root; struct btrfs_root *tree_root; - struct btrfs_root *dev_root; struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; - struct radix_tree_root dev_radix; struct radix_tree_root block_group_radix; struct radix_tree_root block_group_data_radix; struct radix_tree_root extent_map_radix; @@ -385,11 +371,6 @@ struct btrfs_root { */ #define BTRFS_BLOCK_GROUP_ITEM_KEY 34 -/* - * dev items list the devices that make up the FS - */ -#define BTRFS_DEV_ITEM_KEY 35 - /* * string items are for debugging. They just store a short string of * data in the FS @@ -880,62 +861,6 @@ static inline void btrfs_set_super_root_dir(struct btrfs_super_block *s, u64 s->root_dir_objectid = cpu_to_le64(val); } -static inline u64 btrfs_super_last_device_id(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->last_device_id); -} - -static inline void btrfs_set_super_last_device_id(struct btrfs_super_block *s, - u64 val) -{ - s->last_device_id = cpu_to_le64(val); -} - -static inline u64 btrfs_super_device_id(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->device_id); -} - -static inline void btrfs_set_super_device_id(struct btrfs_super_block *s, - u64 val) -{ - s->device_id = cpu_to_le64(val); -} - -static inline u64 btrfs_super_device_block_start(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->device_block_start); -} - -static inline void btrfs_set_super_device_block_start(struct btrfs_super_block - *s, u64 val) -{ - s->device_block_start = cpu_to_le64(val); -} - -static inline u64 btrfs_super_device_num_blocks(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->device_num_blocks); -} - -static inline void btrfs_set_super_device_num_blocks(struct btrfs_super_block - *s, u64 val) -{ - s->device_num_blocks = cpu_to_le64(val); -} - -static inline u64 btrfs_super_device_root(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->device_root); -} - -static inline void btrfs_set_super_device_root(struct btrfs_super_block - *s, u64 val) -{ - s->device_root = cpu_to_le64(val); -} - - static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) { return (u8 *)l->items; @@ -1031,28 +956,6 @@ static inline void btrfs_set_file_extent_num_blocks(struct e->num_blocks = cpu_to_le64(val); } -static inline u16 btrfs_device_pathlen(struct btrfs_device_item *d) -{ - return le16_to_cpu(d->pathlen); -} - -static inline void btrfs_set_device_pathlen(struct btrfs_device_item *d, - u16 val) -{ - d->pathlen = cpu_to_le16(val); -} - -static inline u64 btrfs_device_id(struct btrfs_device_item *d) -{ - return le64_to_cpu(d->device_id); -} - -static inline void btrfs_set_device_id(struct btrfs_device_item *d, - u64 val) -{ - d->device_id = cpu_to_le64(val); -} - static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 751069c0e9f5..8a88404525e8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -11,47 +11,9 @@ #include "transaction.h" #include "btrfs_inode.h" -struct dev_lookup { - u64 block_start; - u64 num_blocks; - u64 device_id; - struct block_device *bdev; -}; - -int btrfs_insert_dev_radix(struct btrfs_root *root, - struct block_device *bdev, - u64 device_id, - u64 block_start, - u64 num_blocks) -{ - struct dev_lookup *lookup; - int ret; - - lookup = kmalloc(sizeof(*lookup), GFP_NOFS); - if (!lookup) - return -ENOMEM; - lookup->block_start = block_start; - lookup->num_blocks = num_blocks; - lookup->bdev = bdev; - lookup->device_id = device_id; - - ret = radix_tree_insert(&root->fs_info->dev_radix, block_start + - num_blocks - 1, lookup); - return ret; -} - u64 bh_blocknr(struct buffer_head *bh) { - int blkbits = bh->b_page->mapping->host->i_blkbits; - u64 blocknr = bh->b_page->index << (PAGE_CACHE_SHIFT - blkbits); - unsigned long offset; - - if (PageHighMem(bh->b_page)) - offset = (unsigned long)bh->b_data; - else - offset = bh->b_data - (char *)page_address(bh->b_page); - blocknr += offset >> (PAGE_CACHE_SHIFT - blkbits); - return blocknr; + return bh->b_blocknr; } static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) @@ -102,32 +64,14 @@ out_unlock: int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, u64 logical) { - struct dev_lookup *lookup[2]; - - int ret; - if (logical == 0) { bh->b_bdev = NULL; bh->b_blocknr = 0; set_buffer_mapped(bh); - return 0; + } else { + map_bh(bh, root->fs_info->sb, logical); } - root = root->fs_info->dev_root; - ret = radix_tree_gang_lookup(&root->fs_info->dev_radix, - (void **)lookup, - (unsigned long)logical, - ARRAY_SIZE(lookup)); - if (ret == 0 || lookup[0]->block_start > logical || - lookup[0]->block_start + lookup[0]->num_blocks <= logical) { - ret = -ENOENT; - goto out; - } - bh->b_bdev = lookup[0]->bdev; - bh->b_blocknr = logical - lookup[0]->block_start; - set_buffer_mapped(bh); - ret = 0; -out: - return ret; + return 0; } struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, @@ -382,24 +326,18 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, u64 highest_inode; int ret = 0; -printk("read_fs_root looking for %Lu %Lu %u\n", location->objectid, location->offset, location->flags); root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)location->objectid); - if (root) { -printk("found %p in cache\n", root); + if (root) return root; - } root = kmalloc(sizeof(*root), GFP_NOFS); - if (!root) { -printk("failed1\n"); + if (!root) return ERR_PTR(-ENOMEM); - } if (location->offset == (u64)-1) { ret = find_and_setup_root(fs_info->sb->s_blocksize, fs_info->tree_root, fs_info, location->objectid, root); if (ret) { -printk("failed2\n"); kfree(root); return ERR_PTR(ret); } @@ -413,7 +351,6 @@ printk("failed2\n"); BUG_ON(!path); ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); if (ret != 0) { -printk("internal search_slot gives us %d\n", ret); if (ret > 0) ret = -ENOENT; goto out; @@ -435,13 +372,11 @@ out: btrfs_root_blocknr(&root->root_item)); BUG_ON(!root->node); insert: -printk("inserting %p\n", root); root->ref_cows = 1; ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, root); if (ret) { -printk("radix_tree_insert gives us %d\n", ret); brelse(root->node); kfree(root); return ERR_PTR(ret); @@ -450,116 +385,25 @@ printk("radix_tree_insert gives us %d\n", ret); if (ret == 0) { root->highest_inode = highest_inode; root->last_inode_alloc = highest_inode; -printk("highest inode is %Lu\n", highest_inode); } -printk("all worked\n"); return root; } -static int btrfs_open_disk(struct btrfs_root *root, u64 device_id, - u64 block_start, u64 num_blocks, - char *filename, int name_len) -{ - char *null_filename; - struct block_device *bdev; - int ret; - - null_filename = kmalloc(name_len + 1, GFP_NOFS); - if (!null_filename) - return -ENOMEM; - memcpy(null_filename, filename, name_len); - null_filename[name_len] = '\0'; - - bdev = open_bdev_excl(null_filename, O_RDWR, root->fs_info->sb); - if (IS_ERR(bdev)) { - ret = PTR_ERR(bdev); - goto out; - } - set_blocksize(bdev, root->fs_info->sb->s_blocksize); - ret = btrfs_insert_dev_radix(root, bdev, device_id, - block_start, num_blocks); - BUG_ON(ret); - ret = 0; -out: - kfree(null_filename); - return ret; -} - -static int read_device_info(struct btrfs_root *root) -{ - struct btrfs_path *path; - int ret; - struct btrfs_key key; - struct btrfs_leaf *leaf; - struct btrfs_device_item *dev_item; - int nritems; - int slot; - - root = root->fs_info->dev_root; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - key.objectid = 0; - key.offset = 0; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY); - - mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); - while(1) { - slot = path->slots[0]; - if (slot >= nritems) { - ret = btrfs_next_leaf(root, path); - if (ret) - break; - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); - slot = path->slots[0]; - } - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); - if (btrfs_key_type(&key) != BTRFS_DEV_ITEM_KEY) { - path->slots[0]++; - continue; - } - dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_device_item); -printk("found key %Lu %Lu\n", key.objectid, key.offset); - if (btrfs_device_id(dev_item) != - btrfs_super_device_id(root->fs_info->disk_super)) { - ret = btrfs_open_disk(root, btrfs_device_id(dev_item), - key.objectid, key.offset, - (char *)(dev_item + 1), - btrfs_device_pathlen(dev_item)); - BUG_ON(ret); - } - path->slots[0]++; - } - btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - return 0; -} - struct btrfs_root *open_ctree(struct super_block *sb) { struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root), - GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), GFP_NOFS); int ret; struct btrfs_super_block *disk_super; - struct dev_lookup *dev_lookup; init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); init_bit_radix(&fs_info->extent_map_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); - INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); @@ -568,7 +412,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; - fs_info->dev_root = dev_root; fs_info->sb = sb; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; @@ -595,19 +438,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - __setup_root(sb->s_blocksize, dev_root, - fs_info, BTRFS_DEV_TREE_OBJECTID); - __setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); - dev_lookup = kmalloc(sizeof(*dev_lookup), GFP_NOFS); - dev_lookup->block_start = 0; - dev_lookup->num_blocks = (u32)-2; - dev_lookup->bdev = sb->s_bdev; - dev_lookup->device_id = 0; - ret = radix_tree_insert(&fs_info->dev_radix, (u32)-2, dev_lookup); - BUG_ON(ret); fs_info->sb_buffer = read_tree_block(tree_root, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); @@ -622,24 +455,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) btrfs_super_total_blocks(disk_super) << fs_info->btree_inode->i_blkbits); - radix_tree_delete(&fs_info->dev_radix, (u32)-2); - dev_lookup->block_start = btrfs_super_device_block_start(disk_super); - dev_lookup->num_blocks = btrfs_super_device_num_blocks(disk_super); - dev_lookup->device_id = btrfs_super_device_id(disk_super); - - ret = radix_tree_insert(&fs_info->dev_radix, - dev_lookup->block_start + - dev_lookup->num_blocks - 1, dev_lookup); - BUG_ON(ret); - fs_info->disk_super = disk_super; - - dev_root->node = read_tree_block(tree_root, - btrfs_super_device_root(disk_super)); - - ret = read_device_info(dev_root); - BUG_ON(ret); - tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); @@ -719,30 +535,6 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) return 0; } -static int free_dev_radix(struct btrfs_fs_info *fs_info) -{ - struct dev_lookup *lookup[8]; - struct block_device *super_bdev = fs_info->sb->s_bdev; - int ret; - int i; - while(1) { - ret = radix_tree_gang_lookup(&fs_info->dev_radix, - (void **)lookup, 0, - ARRAY_SIZE(lookup)); - if (!ret) - break; - for (i = 0; i < ret; i++) { - if (lookup[i]->bdev != super_bdev) - close_bdev_excl(lookup[i]->bdev); - radix_tree_delete(&fs_info->dev_radix, - lookup[i]->block_start + - lookup[i]->num_blocks - 1); - kfree(lookup[i]); - } - } - return 0; -} - int close_ctree(struct btrfs_root *root) { int ret; @@ -765,9 +557,6 @@ int close_ctree(struct btrfs_root *root) if (fs_info->extent_root->node) btrfs_block_release(fs_info->extent_root, fs_info->extent_root->node); - if (fs_info->dev_root->node) - btrfs_block_release(fs_info->dev_root, - fs_info->dev_root->node); if (fs_info->tree_root->node) btrfs_block_release(fs_info->tree_root, fs_info->tree_root->node); @@ -776,7 +565,6 @@ int close_ctree(struct btrfs_root *root) truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); - free_dev_radix(fs_info); btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); kfree(fs_info->extent_root); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5864917953a4..2b24a5a2be63 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2566,83 +2566,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) return 0; } -static int add_disk(struct btrfs_root *root, char *name, int namelen) -{ - struct block_device *bdev; - struct btrfs_path *path; - struct super_block *sb = root->fs_info->sb; - struct btrfs_root *dev_root = root->fs_info->dev_root; - struct btrfs_trans_handle *trans; - struct btrfs_device_item *dev_item; - struct btrfs_key key; - u16 item_size; - u64 num_blocks; - u64 new_blocks; - u64 device_id; - int ret; - -printk("adding disk %s\n", name); - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super); - bdev = open_bdev_excl(name, O_RDWR, sb); - if (IS_ERR(bdev)) { - ret = PTR_ERR(bdev); -printk("open bdev excl failed ret %d\n", ret); - goto out_nolock; - } - set_blocksize(bdev, sb->s_blocksize); - new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits; - key.objectid = num_blocks; - key.offset = new_blocks; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY); - - mutex_lock(&dev_root->fs_info->fs_mutex); - trans = btrfs_start_transaction(dev_root, 1); - item_size = sizeof(*dev_item) + namelen; -printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size); - ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size); - if (ret) { -printk("insert failed %d\n", ret); - close_bdev_excl(bdev); - if (ret > 0) - ret = -EEXIST; - goto out; - } - dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_device_item); - btrfs_set_device_pathlen(dev_item, namelen); - memcpy(dev_item + 1, name, namelen); - - device_id = btrfs_super_last_device_id(root->fs_info->disk_super) + 1; - btrfs_set_super_last_device_id(root->fs_info->disk_super, device_id); - btrfs_set_device_id(dev_item, device_id); - mark_buffer_dirty(path->nodes[0]); - - ret = btrfs_insert_dev_radix(root, bdev, device_id, num_blocks, - new_blocks); - - if (!ret) { - btrfs_set_super_total_blocks(root->fs_info->disk_super, - num_blocks + new_blocks); - i_size_write(root->fs_info->btree_inode, - (num_blocks + new_blocks) << - root->fs_info->btree_inode->i_blkbits); - } - -out: - ret = btrfs_commit_transaction(trans, dev_root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); -out_nolock: - btrfs_free_path(path); - btrfs_btree_balance_dirty(root); - - return ret; -} - static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { @@ -2682,17 +2605,6 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int ret = create_snapshot(root, vol_args.name, namelen); WARN_ON(ret); break; - case BTRFS_IOC_ADD_DISK: - if (copy_from_user(&vol_args, - (struct btrfs_ioctl_vol_args __user *)arg, - sizeof(vol_args))) - return -EFAULT; - namelen = strlen(vol_args.name); - if (namelen > BTRFS_VOL_NAME_MAX) - return -EINVAL; - vol_args.name[namelen] = '\0'; - ret = add_disk(root, vol_args.name, namelen); - break; default: return -ENOTTY; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a2c5820f1d3d..4f3c8ddcb401 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -150,13 +150,7 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *extent_root = fs_info->extent_root; - struct btrfs_root *dev_root = fs_info->dev_root; - if (btrfs_super_device_root(fs_info->disk_super) != - bh_blocknr(dev_root->node)) { - btrfs_set_super_device_root(fs_info->disk_super, - bh_blocknr(dev_root->node)); - } btrfs_write_dirty_block_groups(trans, extent_root); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); From 5276aedab0baacfb3c5483208b8be85a8416bd5f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 11 Jun 2007 21:33:38 -0400 Subject: [PATCH 0162/2985] Btrfs: fix oops after block group lookup Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 - fs/btrfs/ctree.h | 3 +++ fs/btrfs/extent-tree.c | 29 ++++++++++++++++------------- fs/btrfs/super.c | 8 ++------ 4 files changed, 21 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 61d25b5de287..d9b6d38c603a 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -16,6 +16,5 @@ * Use relocation to try and fix write errors * Make allocator much smarter * xattrs (directory streams for regular files) -* fsck * Scrub & defrag diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5ab25a0cb16a..4e136b7b03ae 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -998,6 +998,9 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) btrfs_item_offset((leaf)->items + (slot)))) /* extent-tree.c */ +struct btrfs_block_group_cache *btrfs_lookup_block_group(struct + btrfs_fs_info *info, + u64 blocknr); struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 645a4228bfe2..f509ffa38d03 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -135,9 +135,9 @@ printk("cache block group %Lu\n", block_group->key.objectid); return 0; } -static struct btrfs_block_group_cache *lookup_block_group(struct - btrfs_fs_info *info, - u64 blocknr) +struct btrfs_block_group_cache *btrfs_lookup_block_group(struct + btrfs_fs_info *info, + u64 blocknr) { struct btrfs_block_group_cache *block_group; int ret; @@ -208,7 +208,8 @@ out: return max(cache->last_alloc, search_start); new_group: - cache = lookup_block_group(root->fs_info, last + cache->key.offset - 1); + cache = btrfs_lookup_block_group(root->fs_info, + last + cache->key.offset - 1); if (!cache) { return max((*cache_ret)->last_alloc, search_start); } @@ -250,7 +251,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (search_start) { struct btrfs_block_group_cache *shint; - shint = lookup_block_group(info, search_start); + shint = btrfs_lookup_block_group(info, search_start); if (shint->data == data) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < @@ -576,7 +577,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, int ret; while(total) { - cache = lookup_block_group(info, blocknr); + cache = btrfs_lookup_block_group(info, blocknr); if (!cache) { printk(KERN_CRIT "blocknr %Lu lookup failed\n", blocknr); @@ -677,8 +678,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct first = gang[0]; for (i = 0; i < ret; i++) { clear_radix_bit(pinned_radix, gang[i]); - block_group = lookup_block_group(root->fs_info, - gang[i]); + block_group = btrfs_lookup_block_group(root->fs_info, + gang[i]); if (block_group) { WARN_ON(block_group->pinned == 0); block_group->pinned--; @@ -751,7 +752,8 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); if (!err) { struct btrfs_block_group_cache *cache; - cache = lookup_block_group(root->fs_info, blocknr); + cache = btrfs_lookup_block_group(root->fs_info, + blocknr); if (cache) cache->pinned++; } @@ -851,7 +853,8 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct for (i = 0; i < ret; i++) { wret = set_radix_bit(pinned_radix, gang[i]); if (wret == 0) { - cache = lookup_block_group(extent_root->fs_info, + cache = + btrfs_lookup_block_group(extent_root->fs_info, gang[i]); if (cache) cache->pinned++; @@ -938,7 +941,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (search_end == (u64)-1) search_end = btrfs_super_total_blocks(info->disk_super); if (hint_block) { - block_group = lookup_block_group(info, hint_block); + block_group = btrfs_lookup_block_group(info, hint_block); block_group = btrfs_find_block_group(root, block_group, hint_block, data, 1); } else { @@ -1118,7 +1121,7 @@ check_pending: info->extent_tree_prealloc_nr = total_found; } if (!data) { - block_group = lookup_block_group(info, ins->objectid); + block_group = btrfs_lookup_block_group(info, ins->objectid); if (block_group) { if (fill_prealloc) block_group->last_prealloc = @@ -1143,7 +1146,7 @@ new_group: else wrapped = 1; } - block_group = lookup_block_group(info, search_start); + block_group = btrfs_lookup_block_group(info, search_start); cond_resched(); if (!full_scan) block_group = btrfs_find_block_group(root, block_group, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2b24a5a2be63..9749090cfbb8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -62,7 +62,6 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_inode_item *inode_item; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; - struct btrfs_block_group_cache *alloc_group; u64 alloc_group_block; int ret; @@ -95,11 +94,8 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); alloc_group_block = btrfs_inode_block_group(inode_item); - ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix, - (void **)&alloc_group, - alloc_group_block, 1); - BUG_ON(!ret); - BTRFS_I(inode)->block_group = alloc_group; + BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, + alloc_group_block); btrfs_free_path(path); inode_item = NULL; From 39279cc3d2704cfbf9c35dcb5bdd392159ae4625 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Jun 2007 06:35:45 -0400 Subject: [PATCH 0163/2985] Btrfs: split up super.c Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 4 +- fs/btrfs/TODO | 1 + fs/btrfs/ctree.h | 25 +- fs/btrfs/disk-io.c | 46 +- fs/btrfs/file.c | 783 ++++++++++++ fs/btrfs/inode.c | 2210 ++++++++++++++++++++++++++++++++++ fs/btrfs/super.c | 2879 +------------------------------------------- 7 files changed, 3070 insertions(+), 2878 deletions(-) create mode 100644 fs/btrfs/file.c create mode 100644 fs/btrfs/inode.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 5346f706b2c5..5af2426116d3 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ - transaction.o bit-radix.o + transaction.o bit-radix.o inode.o file.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ @@ -16,7 +16,7 @@ else KERNELDIR := /lib/modules/`uname -r`/build all:: - $(MAKE) C=1 -C $(KERNELDIR) M=`pwd` modules + $(MAKE) -C $(KERNELDIR) M=`pwd` modules clean:: rm *.o btrfs.ko endif diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index d9b6d38c603a..4a48a2365d57 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -1,3 +1,4 @@ +* fix printk warnings * cleanup, add more error checking, get rid of BUG_ONs * Fix ENOSPC handling * Make allocator smarter diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4e136b7b03ae..24410d42e3ce 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1123,5 +1123,28 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); -/* super.c */ +/* inode.c */ +void btrfs_delete_inode(struct inode *inode); +void btrfs_read_locked_inode(struct inode *inode); +int btrfs_write_inode(struct inode *inode, int wait); +void btrfs_dirty_inode(struct inode *inode); +struct inode *btrfs_alloc_inode(struct super_block *sb); +void btrfs_destroy_inode(struct inode *inode); +int btrfs_init_cachep(void); +void btrfs_destroy_cachep(void); +int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, + unsigned long arg); +long btrfs_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg); +struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, + struct btrfs_root *root); +int btrfs_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to); +int btrfs_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create); +/* file.c */ +extern struct file_operations btrfs_file_operations; +int btrfs_drop_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + u64 start, u64 end, u64 *hint_block); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8a88404525e8..96bf3ef3a798 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -22,7 +22,7 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { printk(KERN_CRIT "bh_blocknr(buf) is %Lu, header is %Lu\n", bh_blocknr(buf), btrfs_header_blocknr(&node->header)); - BUG(); + return 1; } return 0; } @@ -253,7 +253,7 @@ uptodate: set_buffer_checked(bh); } if (check_tree_block(root, bh)) - BUG(); + goto fail; return bh; fail: brelse(bh); @@ -398,8 +398,13 @@ struct btrfs_root *open_ctree(struct super_block *sb) struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), GFP_NOFS); int ret; + int err = -EIO; struct btrfs_super_block *disk_super; + if (!extent_root || !tree_root || !fs_info) { + err = -ENOMEM; + goto fail; + } init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); init_bit_radix(&fs_info->extent_map_radix); @@ -431,9 +436,11 @@ struct btrfs_root *open_ctree(struct super_block *sb) mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->hash_tfm = crypto_alloc_hash("crc32c", 0, CRYPTO_ALG_ASYNC); spin_lock_init(&fs_info->hash_lock); + if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { - printk("failed to allocate digest hash\n"); - return NULL; + printk("btrfs: failed hash setup, modprobe cryptomgr?\n"); + err = -ENOMEM; + goto fail_iput; } mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); @@ -446,30 +453,53 @@ struct btrfs_root *open_ctree(struct super_block *sb) sb->s_blocksize); if (!fs_info->sb_buffer) - return NULL; + goto fail_iput; disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; + if (!btrfs_super_root(disk_super)) - return NULL; + goto fail_sb_buffer; i_size_write(fs_info->btree_inode, btrfs_super_total_blocks(disk_super) << fs_info->btree_inode->i_blkbits); fs_info->disk_super = disk_super; + + if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, + sizeof(disk_super->magic))) { + printk("btrfs: valid FS not found on %s\n", sb->s_id); + goto fail_sb_buffer; + } tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); - BUG_ON(!tree_root->node); + if (!tree_root->node) + goto fail_sb_buffer; mutex_lock(&fs_info->fs_mutex); ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); - BUG_ON(ret); + if (ret) { + mutex_unlock(&fs_info->fs_mutex); + goto fail_tree_root; + } btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; mutex_unlock(&fs_info->fs_mutex); return tree_root; + +fail_tree_root: + btrfs_block_release(tree_root, tree_root->node); +fail_sb_buffer: + btrfs_block_release(tree_root, fs_info->sb_buffer); +fail_iput: + iput(fs_info->btree_inode); +fail: + kfree(extent_root); + kfree(tree_root); + kfree(fs_info); + return ERR_PTR(err); } int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c new file mode 100644 index 000000000000..7f8e3035d99e --- /dev/null +++ b/fs/btrfs/file.c @@ -0,0 +1,783 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" +#include "btrfs_inode.h" +#include "ioctl.h" +#include "print-tree.h" + + +static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, + struct page **prepared_pages, + const char __user * buf) +{ + long page_fault = 0; + int i; + int offset = pos & (PAGE_CACHE_SIZE - 1); + + for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { + size_t count = min_t(size_t, + PAGE_CACHE_SIZE - offset, write_bytes); + struct page *page = prepared_pages[i]; + fault_in_pages_readable(buf, count); + + /* Copy data from userspace to the current page */ + kmap(page); + page_fault = __copy_from_user(page_address(page) + offset, + buf, count); + /* Flush processor's dcache for this page */ + flush_dcache_page(page); + kunmap(page); + buf += count; + write_bytes -= count; + + if (page_fault) + break; + } + return page_fault ? -EFAULT : 0; +} + +static void btrfs_drop_pages(struct page **pages, size_t num_pages) +{ + size_t i; + for (i = 0; i < num_pages; i++) { + if (!pages[i]) + break; + unlock_page(pages[i]); + mark_page_accessed(pages[i]); + page_cache_release(pages[i]); + } +} + +static int dirty_and_release_pages(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct file *file, + struct page **pages, + size_t num_pages, + loff_t pos, + size_t write_bytes) +{ + int i; + int offset; + int err = 0; + int ret; + int this_write; + struct inode *inode = file->f_path.dentry->d_inode; + struct buffer_head *bh; + struct btrfs_file_extent_item *ei; + + for (i = 0; i < num_pages; i++) { + offset = pos & (PAGE_CACHE_SIZE -1); + this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); + /* FIXME, one block at a time */ + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + bh = page_buffers(pages[i]); + + if (buffer_mapped(bh) && bh->b_blocknr == 0) { + struct btrfs_key key; + struct btrfs_path *path; + char *ptr; + u32 datasize; + + /* create an inline extent, and copy the data in */ + path = btrfs_alloc_path(); + BUG_ON(!path); + key.objectid = inode->i_ino; + key.offset = pages[i]->index << PAGE_CACHE_SHIFT; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + BUG_ON(write_bytes >= PAGE_CACHE_SIZE); + datasize = offset + + btrfs_file_extent_calc_inline_size(write_bytes); + + ret = btrfs_insert_empty_item(trans, root, path, &key, + datasize); + BUG_ON(ret); + ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(ei, trans->transid); + btrfs_set_file_extent_type(ei, + BTRFS_FILE_EXTENT_INLINE); + ptr = btrfs_file_extent_inline_start(ei); + btrfs_memcpy(root, path->nodes[0]->b_data, + ptr, bh->b_data, offset + write_bytes); + mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + } else if (buffer_mapped(bh)) { + /* csum the file data */ + btrfs_csum_file_block(trans, root, inode->i_ino, + pages[i]->index << PAGE_CACHE_SHIFT, + kmap(pages[i]), PAGE_CACHE_SIZE); + kunmap(pages[i]); + } + SetPageChecked(pages[i]); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + + ret = btrfs_commit_write(file, pages[i], offset, + offset + this_write); + pos += this_write; + if (ret) { + err = ret; + goto failed; + } + WARN_ON(this_write > write_bytes); + write_bytes -= this_write; + } +failed: + return err; +} + +/* + * this is very complex, but the basic idea is to drop all extents + * in the range start - end. hint_block is filled in with a block number + * that would be a good hint to the block allocator for this file. + * + * If an extent intersects the range but is not entirely inside the range + * it is either truncated or split. Anything entirely inside the range + * is deleted from the tree. + */ +int btrfs_drop_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + u64 start, u64 end, u64 *hint_block) +{ + int ret; + struct btrfs_key key; + struct btrfs_leaf *leaf; + int slot; + struct btrfs_file_extent_item *extent; + u64 extent_end = 0; + int keep; + struct btrfs_file_extent_item old; + struct btrfs_path *path; + u64 search_start = start; + int bookend; + int found_type; + int found_extent; + int found_inline; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + while(1) { + btrfs_release_path(root, path); + ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, + search_start, -1); + if (ret < 0) + goto out; + if (ret > 0) { + if (path->slots[0] == 0) { + ret = 0; + goto out; + } + path->slots[0]--; + } + keep = 0; + bookend = 0; + found_extent = 0; + found_inline = 0; + extent = NULL; + leaf = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + if (key.offset >= end || key.objectid != inode->i_ino) { + ret = 0; + goto out; + } + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) { + ret = 0; + goto out; + } + extent = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + found_type = btrfs_file_extent_type(extent); + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_end = key.offset + + (btrfs_file_extent_num_blocks(extent) << + inode->i_blkbits); + found_extent = 1; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + found_inline = 1; + extent_end = key.offset + + btrfs_file_extent_inline_len(leaf->items + slot); + } + + /* we found nothing we can drop */ + if (!found_extent && !found_inline) { + ret = 0; + goto out; + } + + /* we found nothing inside the range */ + if (search_start >= extent_end) { + ret = 0; + goto out; + } + + /* FIXME, there's only one inline extent allowed right now */ + if (found_inline) { + u64 mask = root->blocksize - 1; + search_start = (extent_end + mask) & ~mask; + } else + search_start = extent_end; + + if (end < extent_end && end >= key.offset) { + if (found_extent) { + u64 disk_blocknr = + btrfs_file_extent_disk_blocknr(extent); + u64 disk_num_blocks = + btrfs_file_extent_disk_num_blocks(extent); + memcpy(&old, extent, sizeof(old)); + if (disk_blocknr != 0) { + ret = btrfs_inc_extent_ref(trans, root, + disk_blocknr, disk_num_blocks); + BUG_ON(ret); + } + } + WARN_ON(found_inline); + bookend = 1; + } + + /* truncate existing extent */ + if (start > key.offset) { + u64 new_num; + u64 old_num; + keep = 1; + WARN_ON(start & (root->blocksize - 1)); + if (found_extent) { + new_num = (start - key.offset) >> + inode->i_blkbits; + old_num = btrfs_file_extent_num_blocks(extent); + *hint_block = + btrfs_file_extent_disk_blocknr(extent); + if (btrfs_file_extent_disk_blocknr(extent)) { + inode->i_blocks -= + (old_num - new_num) << 3; + } + btrfs_set_file_extent_num_blocks(extent, + new_num); + mark_buffer_dirty(path->nodes[0]); + } else { + WARN_ON(1); + } + } + /* delete the entire extent */ + if (!keep) { + u64 disk_blocknr = 0; + u64 disk_num_blocks = 0; + u64 extent_num_blocks = 0; + if (found_extent) { + disk_blocknr = + btrfs_file_extent_disk_blocknr(extent); + disk_num_blocks = + btrfs_file_extent_disk_num_blocks(extent); + extent_num_blocks = + btrfs_file_extent_num_blocks(extent); + *hint_block = + btrfs_file_extent_disk_blocknr(extent); + } + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + btrfs_release_path(root, path); + extent = NULL; + if (found_extent && disk_blocknr != 0) { + inode->i_blocks -= extent_num_blocks << 3; + ret = btrfs_free_extent(trans, root, + disk_blocknr, + disk_num_blocks, 0); + } + + BUG_ON(ret); + if (!bookend && search_start >= end) { + ret = 0; + goto out; + } + if (!bookend) + continue; + } + /* create bookend, splitting the extent in two */ + if (bookend && found_extent) { + struct btrfs_key ins; + ins.objectid = inode->i_ino; + ins.offset = end; + ins.flags = 0; + btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); + + btrfs_release_path(root, path); + ret = btrfs_insert_empty_item(trans, root, path, &ins, + sizeof(*extent)); + BUG_ON(ret); + extent = btrfs_item_ptr( + btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_disk_blocknr(extent, + btrfs_file_extent_disk_blocknr(&old)); + btrfs_set_file_extent_disk_num_blocks(extent, + btrfs_file_extent_disk_num_blocks(&old)); + + btrfs_set_file_extent_offset(extent, + btrfs_file_extent_offset(&old) + + ((end - key.offset) >> inode->i_blkbits)); + WARN_ON(btrfs_file_extent_num_blocks(&old) < + (extent_end - end) >> inode->i_blkbits); + btrfs_set_file_extent_num_blocks(extent, + (extent_end - end) >> inode->i_blkbits); + + btrfs_set_file_extent_type(extent, + BTRFS_FILE_EXTENT_REG); + btrfs_set_file_extent_generation(extent, + btrfs_file_extent_generation(&old)); + btrfs_mark_buffer_dirty(path->nodes[0]); + if (btrfs_file_extent_disk_blocknr(&old) != 0) { + inode->i_blocks += + btrfs_file_extent_num_blocks(extent) << 3; + } + ret = 0; + goto out; + } + } +out: + btrfs_free_path(path); + return ret; +} + +/* + * this gets pages into the page cache and locks them down + */ +static int prepare_pages(struct btrfs_root *root, + struct file *file, + struct page **pages, + size_t num_pages, + loff_t pos, + unsigned long first_index, + unsigned long last_index, + size_t write_bytes, + u64 alloc_extent_start) +{ + int i; + unsigned long index = pos >> PAGE_CACHE_SHIFT; + struct inode *inode = file->f_path.dentry->d_inode; + int offset; + int err = 0; + int this_write; + struct buffer_head *bh; + struct buffer_head *head; + loff_t isize = i_size_read(inode); + + memset(pages, 0, num_pages * sizeof(struct page *)); + + for (i = 0; i < num_pages; i++) { + pages[i] = grab_cache_page(inode->i_mapping, index + i); + if (!pages[i]) { + err = -ENOMEM; + goto failed_release; + } + cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); + wait_on_page_writeback(pages[i]); + offset = pos & (PAGE_CACHE_SIZE -1); + this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); + if (!page_has_buffers(pages[i])) { + create_empty_buffers(pages[i], + root->fs_info->sb->s_blocksize, + (1 << BH_Uptodate)); + } + head = page_buffers(pages[i]); + bh = head; + do { + err = btrfs_map_bh_to_logical(root, bh, + alloc_extent_start); + BUG_ON(err); + if (err) + goto failed_truncate; + bh = bh->b_this_page; + if (alloc_extent_start) + alloc_extent_start++; + } while (bh != head); + pos += this_write; + WARN_ON(this_write > write_bytes); + write_bytes -= this_write; + } + return 0; + +failed_release: + btrfs_drop_pages(pages, num_pages); + return err; + +failed_truncate: + btrfs_drop_pages(pages, num_pages); + if (pos > isize) + vmtruncate(inode, isize); + return err; +} + +static ssize_t btrfs_file_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + loff_t pos; + size_t num_written = 0; + int err = 0; + int ret = 0; + struct inode *inode = file->f_path.dentry->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct page *pages[8]; + struct page *pinned[2]; + unsigned long first_index; + unsigned long last_index; + u64 start_pos; + u64 num_blocks; + u64 alloc_extent_start; + u64 hint_block; + struct btrfs_trans_handle *trans; + struct btrfs_key ins; + pinned[0] = NULL; + pinned[1] = NULL; + if (file->f_flags & O_DIRECT) + return -EINVAL; + pos = *ppos; + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + current->backing_dev_info = inode->i_mapping->backing_dev_info; + err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + if (err) + goto out; + if (count == 0) + goto out; + err = remove_suid(file->f_path.dentry); + if (err) + goto out; + file_update_time(file); + + start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); + num_blocks = (count + pos - start_pos + root->blocksize - 1) >> + inode->i_blkbits; + + mutex_lock(&inode->i_mutex); + first_index = pos >> PAGE_CACHE_SHIFT; + last_index = (pos + count) >> PAGE_CACHE_SHIFT; + + /* + * there are lots of better ways to do this, but this code + * makes sure the first and last page in the file range are + * up to date and ready for cow + */ + if ((pos & (PAGE_CACHE_SIZE - 1))) { + pinned[0] = grab_cache_page(inode->i_mapping, first_index); + if (!PageUptodate(pinned[0])) { + ret = mpage_readpage(pinned[0], btrfs_get_block); + BUG_ON(ret); + wait_on_page_locked(pinned[0]); + } else { + unlock_page(pinned[0]); + } + } + if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { + pinned[1] = grab_cache_page(inode->i_mapping, last_index); + if (!PageUptodate(pinned[1])) { + ret = mpage_readpage(pinned[1], btrfs_get_block); + BUG_ON(ret); + wait_on_page_locked(pinned[1]); + } else { + unlock_page(pinned[1]); + } + } + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (!trans) { + err = -ENOMEM; + mutex_unlock(&root->fs_info->fs_mutex); + goto out_unlock; + } + btrfs_set_trans_block_group(trans, inode); + /* FIXME blocksize != 4096 */ + inode->i_blocks += num_blocks << 3; + hint_block = 0; + + /* FIXME...EIEIO, ENOSPC and more */ + + /* step one, delete the existing extents in this range */ + if (start_pos < inode->i_size) { + /* FIXME blocksize != pagesize */ + ret = btrfs_drop_extents(trans, root, inode, + start_pos, + (pos + count + root->blocksize -1) & + ~((u64)root->blocksize - 1), + &hint_block); + BUG_ON(ret); + } + + /* insert any holes we need to create */ + if (inode->i_size < start_pos) { + u64 last_pos_in_file; + u64 hole_size; + u64 mask = root->blocksize - 1; + last_pos_in_file = (inode->i_size + mask) & ~mask; + hole_size = (start_pos - last_pos_in_file + mask) & ~mask; + hole_size >>= inode->i_blkbits; + if (last_pos_in_file < start_pos) { + ret = btrfs_insert_file_extent(trans, root, + inode->i_ino, + last_pos_in_file, + 0, 0, hole_size); + } + BUG_ON(ret); + } + + /* + * either allocate an extent for the new bytes or setup the key + * to show we are doing inline data in the extent + */ + if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || + pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { + ret = btrfs_alloc_extent(trans, root, inode->i_ino, + num_blocks, hint_block, (u64)-1, + &ins, 1); + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + start_pos, ins.objectid, ins.offset, + ins.offset); + BUG_ON(ret); + } else { + ins.offset = 0; + ins.objectid = 0; + } + BUG_ON(ret); + alloc_extent_start = ins.objectid; + ret = btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + while(count > 0) { + size_t offset = pos & (PAGE_CACHE_SIZE - 1); + size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); + size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + + memset(pages, 0, sizeof(pages)); + ret = prepare_pages(root, file, pages, num_pages, + pos, first_index, last_index, + write_bytes, alloc_extent_start); + BUG_ON(ret); + + /* FIXME blocks != pagesize */ + if (alloc_extent_start) + alloc_extent_start += num_pages; + ret = btrfs_copy_from_user(pos, num_pages, + write_bytes, pages, buf); + BUG_ON(ret); + + ret = dirty_and_release_pages(NULL, root, file, pages, + num_pages, pos, write_bytes); + BUG_ON(ret); + btrfs_drop_pages(pages, num_pages); + + buf += write_bytes; + count -= write_bytes; + pos += write_bytes; + num_written += write_bytes; + + balance_dirty_pages_ratelimited(inode->i_mapping); + btrfs_btree_balance_dirty(root); + cond_resched(); + } +out_unlock: + mutex_unlock(&inode->i_mutex); +out: + if (pinned[0]) + page_cache_release(pinned[0]); + if (pinned[1]) + page_cache_release(pinned[1]); + *ppos = pos; + current->backing_dev_info = NULL; + mark_inode_dirty(inode); + return num_written ? num_written : err; +} + +/* + * FIXME, do this by stuffing the csum we want in the info hanging off + * page->private. For now, verify file csums on read + */ +static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, + unsigned long offset, unsigned long size) +{ + char *kaddr; + unsigned long left, count = desc->count; + struct inode *inode = page->mapping->host; + + if (size > count) + size = count; + + if (!PageChecked(page)) { + /* FIXME, do it per block */ + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + struct buffer_head *bh; + + if (page_has_buffers(page)) { + bh = page_buffers(page); + if (!buffer_mapped(bh)) { + SetPageChecked(page); + goto checked; + } + } + + ret = btrfs_csum_verify_file_block(root, + page->mapping->host->i_ino, + page->index << PAGE_CACHE_SHIFT, + kmap(page), PAGE_CACHE_SIZE); + if (ret) { + if (ret != -ENOENT) { + printk("failed to verify ino %lu page %lu ret %d\n", + page->mapping->host->i_ino, + page->index, ret); + memset(page_address(page), 1, PAGE_CACHE_SIZE); + flush_dcache_page(page); + } + } + SetPageChecked(page); + kunmap(page); + } +checked: + /* + * Faults on the destination of a read are common, so do it before + * taking the kmap. + */ + if (!fault_in_pages_writeable(desc->arg.buf, size)) { + kaddr = kmap_atomic(page, KM_USER0); + left = __copy_to_user_inatomic(desc->arg.buf, + kaddr + offset, size); + kunmap_atomic(kaddr, KM_USER0); + if (left == 0) + goto success; + } + + /* Do it the slow way */ + kaddr = kmap(page); + left = __copy_to_user(desc->arg.buf, kaddr + offset, size); + kunmap(page); + + if (left) { + size -= left; + desc->error = -EFAULT; + } +success: + desc->count = count - size; + desc->written += size; + desc->arg.buf += size; + return size; +} + +/** + * btrfs_file_aio_read - filesystem read routine, with a mod to csum verify + * @iocb: kernel I/O control block + * @iov: io vector request + * @nr_segs: number of segments in the iovec + * @pos: current file position + */ +static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct file *filp = iocb->ki_filp; + ssize_t retval; + unsigned long seg; + size_t count; + loff_t *ppos = &iocb->ki_pos; + + count = 0; + for (seg = 0; seg < nr_segs; seg++) { + const struct iovec *iv = &iov[seg]; + + /* + * If any segment has a negative length, or the cumulative + * length ever wraps negative then return -EINVAL. + */ + count += iv->iov_len; + if (unlikely((ssize_t)(count|iv->iov_len) < 0)) + return -EINVAL; + if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len)) + continue; + if (seg == 0) + return -EFAULT; + nr_segs = seg; + count -= iv->iov_len; /* This segment is no good */ + break; + } + retval = 0; + if (count) { + for (seg = 0; seg < nr_segs; seg++) { + read_descriptor_t desc; + + desc.written = 0; + desc.arg.buf = iov[seg].iov_base; + desc.count = iov[seg].iov_len; + if (desc.count == 0) + continue; + desc.error = 0; + do_generic_file_read(filp, ppos, &desc, + btrfs_read_actor); + retval += desc.written; + if (desc.error) { + retval = retval ?: desc.error; + break; + } + } + } + return retval; +} + +static int btrfs_sync_file(struct file *file, + struct dentry *dentry, int datasync) +{ + struct inode *inode = dentry->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + struct btrfs_trans_handle *trans; + + /* + * FIXME, use inode generation number to check if we can skip the + * commit + */ + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (!trans) { + ret = -ENOMEM; + goto out; + } + ret = btrfs_commit_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); +out: + return ret > 0 ? EIO : ret; +} + +struct file_operations btrfs_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .aio_read = btrfs_file_aio_read, + .write = btrfs_file_write, + .mmap = generic_file_mmap, + .open = generic_file_open, + .ioctl = btrfs_ioctl, + .fsync = btrfs_sync_file, +#ifdef CONFIG_COMPAT + .compat_ioctl = btrfs_compat_ioctl, +#endif +}; + diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c new file mode 100644 index 000000000000..62a3a778d37d --- /dev/null +++ b/fs/btrfs/inode.c @@ -0,0 +1,2210 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" +#include "btrfs_inode.h" +#include "ioctl.h" +#include "print-tree.h" + +struct btrfs_iget_args { + u64 ino; + struct btrfs_root *root; +}; + +static struct inode_operations btrfs_dir_inode_operations; +static struct inode_operations btrfs_symlink_inode_operations; +static struct inode_operations btrfs_dir_ro_inode_operations; +static struct inode_operations btrfs_file_inode_operations; +static struct address_space_operations btrfs_aops; +static struct address_space_operations btrfs_symlink_aops; +static struct file_operations btrfs_dir_file_operations; + +static struct kmem_cache *btrfs_inode_cachep; +struct kmem_cache *btrfs_trans_handle_cachep; +struct kmem_cache *btrfs_transaction_cachep; +struct kmem_cache *btrfs_bit_radix_cachep; +struct kmem_cache *btrfs_path_cachep; + +#define S_SHIFT 12 +static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { + [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR, + [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO, + [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK, + [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, +}; + +void btrfs_read_locked_inode(struct inode *inode) +{ + struct btrfs_path *path; + struct btrfs_inode_item *inode_item; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_key location; + u64 alloc_group_block; + int ret; + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + mutex_lock(&root->fs_info->fs_mutex); + + memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); + ret = btrfs_lookup_inode(NULL, root, path, &location, 0); + if (ret) { + btrfs_free_path(path); + goto make_bad; + } + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], + struct btrfs_inode_item); + + inode->i_mode = btrfs_inode_mode(inode_item); + inode->i_nlink = btrfs_inode_nlink(inode_item); + inode->i_uid = btrfs_inode_uid(inode_item); + inode->i_gid = btrfs_inode_gid(inode_item); + inode->i_size = btrfs_inode_size(inode_item); + inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime); + inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime); + inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime); + inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime); + inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime); + inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); + inode->i_blocks = btrfs_inode_nblocks(inode_item); + inode->i_generation = btrfs_inode_generation(inode_item); + alloc_group_block = btrfs_inode_block_group(inode_item); + BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, + alloc_group_block); + + btrfs_free_path(path); + inode_item = NULL; + + mutex_unlock(&root->fs_info->fs_mutex); + + switch (inode->i_mode & S_IFMT) { +#if 0 + default: + init_special_inode(inode, inode->i_mode, + btrfs_inode_rdev(inode_item)); + break; +#endif + case S_IFREG: + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; + break; + case S_IFDIR: + inode->i_fop = &btrfs_dir_file_operations; + if (root == root->fs_info->tree_root) + inode->i_op = &btrfs_dir_ro_inode_operations; + else + inode->i_op = &btrfs_dir_inode_operations; + break; + case S_IFLNK: + inode->i_op = &btrfs_symlink_inode_operations; + inode->i_mapping->a_ops = &btrfs_symlink_aops; + break; + } + return; + +make_bad: + btrfs_release_path(root, path); + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + make_bad_inode(inode); +} + +static void fill_inode_item(struct btrfs_inode_item *item, + struct inode *inode) +{ + btrfs_set_inode_uid(item, inode->i_uid); + btrfs_set_inode_gid(item, inode->i_gid); + btrfs_set_inode_size(item, inode->i_size); + btrfs_set_inode_mode(item, inode->i_mode); + btrfs_set_inode_nlink(item, inode->i_nlink); + btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec); + btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec); + btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec); + btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec); + btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec); + btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); + btrfs_set_inode_nblocks(item, inode->i_blocks); + btrfs_set_inode_generation(item, inode->i_generation); + btrfs_set_inode_block_group(item, + BTRFS_I(inode)->block_group->key.objectid); +} + +static int btrfs_update_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + struct btrfs_inode_item *inode_item; + struct btrfs_path *path; + int ret; + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_lookup_inode(trans, root, path, + &BTRFS_I(inode)->location, 1); + if (ret) { + if (ret > 0) + ret = -ENOENT; + goto failed; + } + + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], + struct btrfs_inode_item); + + fill_inode_item(inode_item, inode); + btrfs_mark_buffer_dirty(path->nodes[0]); + ret = 0; +failed: + btrfs_release_path(root, path); + btrfs_free_path(path); + return ret; +} + + +static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *dir, + struct dentry *dentry) +{ + struct btrfs_path *path; + const char *name = dentry->d_name.name; + int name_len = dentry->d_name.len; + int ret = 0; + u64 objectid; + struct btrfs_dir_item *di; + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, + name, name_len, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto err; + } + if (!di) { + ret = -ENOENT; + goto err; + } + objectid = btrfs_disk_key_objectid(&di->location); + ret = btrfs_delete_one_dir_name(trans, root, path, di); + BUG_ON(ret); + btrfs_release_path(root, path); + + di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, + objectid, name, name_len, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto err; + } + if (!di) { + ret = -ENOENT; + goto err; + } + ret = btrfs_delete_one_dir_name(trans, root, path, di); + BUG_ON(ret); + + dentry->d_inode->i_ctime = dir->i_ctime; +err: + btrfs_free_path(path); + if (!ret) { + dir->i_size -= name_len * 2; + btrfs_update_inode(trans, root, dir); + drop_nlink(dentry->d_inode); + btrfs_update_inode(trans, root, dentry->d_inode); + dir->i_sb->s_dirt = 1; + } + return ret; +} + +static int btrfs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct btrfs_root *root; + struct btrfs_trans_handle *trans; + int ret; + + root = BTRFS_I(dir)->root; + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + ret = btrfs_unlink_trans(trans, root, dir, dentry); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); + return ret; +} + +static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + int err; + int ret; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_trans_handle *trans; + struct btrfs_key found_key; + int found_type; + struct btrfs_leaf *leaf; + char *goodnames = ".."; + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + key.objectid = inode->i_ino; + key.offset = (u64)-1; + key.flags = (u32)-1; + while(1) { + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) { + err = ret; + goto out; + } + BUG_ON(ret == 0); + if (path->slots[0] == 0) { + err = -ENOENT; + goto out; + } + path->slots[0]--; + leaf = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&found_key, + &leaf->items[path->slots[0]].key); + found_type = btrfs_key_type(&found_key); + if (found_key.objectid != inode->i_ino) { + err = -ENOENT; + goto out; + } + if ((found_type != BTRFS_DIR_ITEM_KEY && + found_type != BTRFS_DIR_INDEX_KEY) || + (!btrfs_match_dir_item_name(root, path, goodnames, 2) && + !btrfs_match_dir_item_name(root, path, goodnames, 1))) { + err = -ENOTEMPTY; + goto out; + } + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + + if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1) + break; + btrfs_release_path(root, path); + } + ret = 0; + btrfs_release_path(root, path); + + /* now the directory is empty */ + err = btrfs_unlink_trans(trans, root, dir, dentry); + if (!err) { + inode->i_size = 0; + } +out: + btrfs_release_path(root, path); + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + ret = btrfs_end_transaction(trans, root); + btrfs_btree_balance_dirty(root); + if (ret && !err) + err = ret; + return err; +} + +static int btrfs_free_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + struct btrfs_path *path; + int ret; + + clear_inode(inode); + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_lookup_inode(trans, root, path, + &BTRFS_I(inode)->location, -1); + BUG_ON(ret); + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + btrfs_free_path(path); + return ret; +} + +/* + * truncates go from a high offset to a low offset. So, walk + * from hi to lo in the node and issue readas. Stop when you find + * keys from a different objectid + */ +static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path, + u64 objectid) +{ + struct btrfs_node *node; + int i; + int nritems; + u64 item_objectid; + u64 blocknr; + int slot; + int ret; + + if (!path->nodes[1]) + return; + node = btrfs_buffer_node(path->nodes[1]); + slot = path->slots[1]; + if (slot == 0) + return; + nritems = btrfs_header_nritems(&node->header); + for (i = slot - 1; i >= 0; i--) { + item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); + if (item_objectid != objectid) + break; + blocknr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; + } +} + +/* + * this can truncate away extent items, csum items and directory items. + * It starts at a high offset and removes keys until it can't find + * any higher than i_size. + * + * csum items that cross the new i_size are truncated to the new size + * as well. + */ +static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + int ret; + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_disk_key *found_key; + u32 found_type; + struct btrfs_leaf *leaf; + struct btrfs_file_extent_item *fi; + u64 extent_start = 0; + u64 extent_num_blocks = 0; + u64 item_end = 0; + int found_extent; + int del_item; + + path = btrfs_alloc_path(); + BUG_ON(!path); + /* FIXME, add redo link to tree so we don't leak on crash */ + key.objectid = inode->i_ino; + key.offset = (u64)-1; + key.flags = (u32)-1; + while(1) { + btrfs_init_path(path); + fi = NULL; + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) { + goto error; + } + if (ret > 0) { + BUG_ON(path->slots[0] == 0); + path->slots[0]--; + } + reada_truncate(root, path, inode->i_ino); + leaf = btrfs_buffer_leaf(path->nodes[0]); + found_key = &leaf->items[path->slots[0]].key; + found_type = btrfs_disk_key_type(found_key); + + if (btrfs_disk_key_objectid(found_key) != inode->i_ino) + break; + if (found_type != BTRFS_CSUM_ITEM_KEY && + found_type != BTRFS_DIR_ITEM_KEY && + found_type != BTRFS_DIR_INDEX_KEY && + found_type != BTRFS_EXTENT_DATA_KEY) + break; + + item_end = btrfs_disk_key_offset(found_key); + if (found_type == BTRFS_EXTENT_DATA_KEY) { + fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(fi) != + BTRFS_FILE_EXTENT_INLINE) { + item_end += btrfs_file_extent_num_blocks(fi) << + inode->i_blkbits; + } + } + if (found_type == BTRFS_CSUM_ITEM_KEY) { + ret = btrfs_csum_truncate(trans, root, path, + inode->i_size); + BUG_ON(ret); + } + if (item_end < inode->i_size) { + if (found_type) { + btrfs_set_key_type(&key, found_type - 1); + continue; + } + break; + } + if (btrfs_disk_key_offset(found_key) >= inode->i_size) + del_item = 1; + else + del_item = 0; + found_extent = 0; + + /* FIXME, shrink the extent if the ref count is only 1 */ + if (found_type == BTRFS_EXTENT_DATA_KEY && + btrfs_file_extent_type(fi) != + BTRFS_FILE_EXTENT_INLINE) { + u64 num_dec; + if (!del_item) { + u64 orig_num_blocks = + btrfs_file_extent_num_blocks(fi); + extent_num_blocks = inode->i_size - + btrfs_disk_key_offset(found_key) + + root->blocksize - 1; + extent_num_blocks >>= inode->i_blkbits; + btrfs_set_file_extent_num_blocks(fi, + extent_num_blocks); + inode->i_blocks -= (orig_num_blocks - + extent_num_blocks) << 3; + mark_buffer_dirty(path->nodes[0]); + } else { + extent_start = + btrfs_file_extent_disk_blocknr(fi); + extent_num_blocks = + btrfs_file_extent_disk_num_blocks(fi); + /* FIXME blocksize != 4096 */ + num_dec = btrfs_file_extent_num_blocks(fi) << 3; + if (extent_start != 0) { + found_extent = 1; + inode->i_blocks -= num_dec; + } + } + } + if (del_item) { + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + } else { + break; + } + btrfs_release_path(root, path); + if (found_extent) { + ret = btrfs_free_extent(trans, root, extent_start, + extent_num_blocks, 0); + BUG_ON(ret); + } + } + ret = 0; +error: + btrfs_release_path(root, path); + btrfs_free_path(path); + inode->i_sb->s_dirt = 1; + return ret; +} + +/* + * taken from block_truncate_page, but does cow as it zeros out + * any bytes left in the last page in the file. + */ +static int btrfs_truncate_page(struct address_space *mapping, loff_t from) +{ + struct inode *inode = mapping->host; + unsigned blocksize = 1 << inode->i_blkbits; + pgoff_t index = from >> PAGE_CACHE_SHIFT; + unsigned offset = from & (PAGE_CACHE_SIZE-1); + struct page *page; + char *kaddr; + int ret = 0; + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 alloc_hint = 0; + struct btrfs_key ins; + struct btrfs_trans_handle *trans; + + if ((offset & (blocksize - 1)) == 0) + goto out; + + ret = -ENOMEM; + page = grab_cache_page(mapping, index); + if (!page) + goto out; + + if (!PageUptodate(page)) { + ret = mpage_readpage(page, btrfs_get_block); + lock_page(page); + if (!PageUptodate(page)) { + ret = -EIO; + goto out; + } + } + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + ret = btrfs_drop_extents(trans, root, inode, + page->index << PAGE_CACHE_SHIFT, + (page->index + 1) << PAGE_CACHE_SHIFT, + &alloc_hint); + BUG_ON(ret); + ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, + alloc_hint, (u64)-1, &ins, 1); + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + page->index << PAGE_CACHE_SHIFT, + ins.objectid, 1, 1); + BUG_ON(ret); + SetPageChecked(page); + kaddr = kmap(page); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + btrfs_csum_file_block(trans, root, inode->i_ino, + page->index << PAGE_CACHE_SHIFT, + kaddr, PAGE_CACHE_SIZE); + kunmap(page); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); +out: + return ret; +} + +static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + int err; + + err = inode_change_ok(inode, attr); + if (err) + return err; + + if (S_ISREG(inode->i_mode) && + attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 mask = root->blocksize - 1; + u64 pos = (inode->i_size + mask) & ~mask; + u64 hole_size; + + if (attr->ia_size <= pos) + goto out; + + btrfs_truncate_page(inode->i_mapping, inode->i_size); + + hole_size = (attr->ia_size - pos + mask) & ~mask; + hole_size >>= inode->i_blkbits; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + err = btrfs_insert_file_extent(trans, root, inode->i_ino, + pos, 0, 0, hole_size); + BUG_ON(err); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + } +out: + err = inode_setattr(inode, attr); + + return err; +} +void btrfs_delete_inode(struct inode *inode) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + + truncate_inode_pages(&inode->i_data, 0); + if (is_bad_inode(inode)) { + goto no_delete; + } + inode->i_size = 0; + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + ret = btrfs_truncate_in_trans(trans, root, inode); + BUG_ON(ret); + btrfs_free_inode(trans, root, inode); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); + return; +no_delete: + clear_inode(inode); +} + +/* + * this returns the key found in the dir entry in the location pointer. + * If no dir entries were found, location->objectid is 0. + */ +static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, + struct btrfs_key *location) +{ + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + struct btrfs_dir_item *di; + struct btrfs_path *path; + struct btrfs_root *root = BTRFS_I(dir)->root; + int ret; + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, + namelen, 0); + if (!di || IS_ERR(di)) { + location->objectid = 0; + ret = 0; + goto out; + } + btrfs_disk_key_to_cpu(location, &di->location); +out: + btrfs_release_path(root, path); + btrfs_free_path(path); + return ret; +} + +/* + * when we hit a tree root in a directory, the btrfs part of the inode + * needs to be changed to reflect the root directory of the tree root. This + * is kind of like crossing a mount point. + */ +static int fixup_tree_root_location(struct btrfs_root *root, + struct btrfs_key *location, + struct btrfs_root **sub_root) +{ + struct btrfs_path *path; + struct btrfs_root_item *ri; + + if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) + return 0; + if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) + return 0; + + path = btrfs_alloc_path(); + BUG_ON(!path); + mutex_lock(&root->fs_info->fs_mutex); + + *sub_root = btrfs_read_fs_root(root->fs_info, location); + if (IS_ERR(*sub_root)) + return PTR_ERR(*sub_root); + + ri = &(*sub_root)->root_item; + location->objectid = btrfs_root_dirid(ri); + location->flags = 0; + btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); + location->offset = 0; + + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + return 0; +} + +static int btrfs_init_locked_inode(struct inode *inode, void *p) +{ + struct btrfs_iget_args *args = p; + inode->i_ino = args->ino; + BTRFS_I(inode)->root = args->root; + return 0; +} + +static int btrfs_find_actor(struct inode *inode, void *opaque) +{ + struct btrfs_iget_args *args = opaque; + return (args->ino == inode->i_ino && + args->root == BTRFS_I(inode)->root); +} + +struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, + struct btrfs_root *root) +{ + struct inode *inode; + struct btrfs_iget_args args; + args.ino = objectid; + args.root = root; + + inode = iget5_locked(s, objectid, btrfs_find_actor, + btrfs_init_locked_inode, + (void *)&args); + return inode; +} + +static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct inode * inode; + struct btrfs_inode *bi = BTRFS_I(dir); + struct btrfs_root *root = bi->root; + struct btrfs_root *sub_root = root; + struct btrfs_key location; + int ret; + + if (dentry->d_name.len > BTRFS_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_inode_by_name(dir, dentry, &location); + mutex_unlock(&root->fs_info->fs_mutex); + if (ret < 0) + return ERR_PTR(ret); + inode = NULL; + if (location.objectid) { + ret = fixup_tree_root_location(root, &location, &sub_root); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) + return ERR_PTR(-ENOENT); + inode = btrfs_iget_locked(dir->i_sb, location.objectid, + sub_root); + if (!inode) + return ERR_PTR(-EACCES); + if (inode->i_state & I_NEW) { + /* the inode and parent dir are two different roots */ + if (sub_root != root) { + igrab(inode); + sub_root->inode = inode; + } + BTRFS_I(inode)->root = sub_root; + memcpy(&BTRFS_I(inode)->location, &location, + sizeof(location)); + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + } + } + return d_splice_alias(inode, dentry); +} + +/* + * readahead one full node of leaves as long as their keys include + * the objectid supplied + */ +static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path, + u64 objectid) +{ + struct btrfs_node *node; + int i; + u32 nritems; + u64 item_objectid; + u64 blocknr; + int slot; + int ret; + + if (!path->nodes[1]) + return; + node = btrfs_buffer_node(path->nodes[1]); + slot = path->slots[1]; + nritems = btrfs_header_nritems(&node->header); + for (i = slot + 1; i < nritems; i++) { + item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); + if (item_objectid != objectid) + break; + blocknr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; + } +} +static unsigned char btrfs_filetype_table[] = { + DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK +}; + +static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_item *item; + struct btrfs_dir_item *di; + struct btrfs_key key; + struct btrfs_path *path; + int ret; + u32 nritems; + struct btrfs_leaf *leaf; + int slot; + int advance; + unsigned char d_type; + int over = 0; + u32 di_cur; + u32 di_total; + u32 di_len; + int key_type = BTRFS_DIR_INDEX_KEY; + + /* FIXME, use a real flag for deciding about the key type */ + if (root->fs_info->tree_root == root) + key_type = BTRFS_DIR_ITEM_KEY; + mutex_lock(&root->fs_info->fs_mutex); + key.objectid = inode->i_ino; + key.flags = 0; + btrfs_set_key_type(&key, key_type); + key.offset = filp->f_pos; + path = btrfs_alloc_path(); + btrfs_init_path(path); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto err; + advance = 0; + reada_leaves(root, path, inode->i_ino); + while(1) { + leaf = btrfs_buffer_leaf(path->nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path->slots[0]; + if (advance || slot >= nritems) { + if (slot >= nritems -1) { + reada_leaves(root, path, inode->i_ino); + ret = btrfs_next_leaf(root, path); + if (ret) + break; + leaf = btrfs_buffer_leaf(path->nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path->slots[0]; + } else { + slot++; + path->slots[0]++; + } + } + advance = 1; + item = leaf->items + slot; + if (btrfs_disk_key_objectid(&item->key) != key.objectid) + break; + if (btrfs_disk_key_type(&item->key) != key_type) + break; + if (btrfs_disk_key_offset(&item->key) < filp->f_pos) + continue; + filp->f_pos = btrfs_disk_key_offset(&item->key); + advance = 1; + di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); + di_cur = 0; + di_total = btrfs_item_size(leaf->items + slot); + while(di_cur < di_total) { + d_type = btrfs_filetype_table[btrfs_dir_type(di)]; + over = filldir(dirent, (const char *)(di + 1), + btrfs_dir_name_len(di), + btrfs_disk_key_offset(&item->key), + btrfs_disk_key_objectid(&di->location), + d_type); + if (over) + goto nopos; + di_len = btrfs_dir_name_len(di) + sizeof(*di); + di_cur += di_len; + di = (struct btrfs_dir_item *)((char *)di + di_len); + } + } + filp->f_pos++; +nopos: + ret = 0; +err: + btrfs_release_path(root, path); + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + +int btrfs_write_inode(struct inode *inode, int wait) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + int ret = 0; + + if (wait) { + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + ret = btrfs_commit_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + } + return ret; +} + +/* + * This is somewhat expense, updating the tree every time the + * inode changes. But, it is most likely to find the inode in cache. + * FIXME, needs more benchmarking...there are no reasons other than performance + * to keep or drop this code. + */ +void btrfs_dirty_inode(struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + btrfs_update_inode(trans, root, inode); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); +} + +static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, + struct btrfs_block_group_cache *group, + int mode) +{ + struct inode *inode; + struct btrfs_inode_item inode_item; + struct btrfs_key *location; + int ret; + int owner; + + inode = new_inode(root->fs_info->sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + BTRFS_I(inode)->root = root; + if (mode & S_IFDIR) + owner = 0; + else + owner = 1; + group = btrfs_find_block_group(root, group, 0, 0, owner); + BTRFS_I(inode)->block_group = group; + + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_mode = mode; + inode->i_ino = objectid; + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + fill_inode_item(&inode_item, inode); + location = &BTRFS_I(inode)->location; + location->objectid = objectid; + location->flags = 0; + location->offset = 0; + btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); + + ret = btrfs_insert_inode(trans, root, objectid, &inode_item); + BUG_ON(ret); + + insert_inode_hash(inode); + return inode; +} + +static inline u8 btrfs_inode_type(struct inode *inode) +{ + return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; +} + +static int btrfs_add_link(struct btrfs_trans_handle *trans, + struct dentry *dentry, struct inode *inode) +{ + int ret; + struct btrfs_key key; + struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root; + key.objectid = inode->i_ino; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.offset = 0; + + ret = btrfs_insert_dir_item(trans, root, + dentry->d_name.name, dentry->d_name.len, + dentry->d_parent->d_inode->i_ino, + &key, btrfs_inode_type(inode)); + if (ret == 0) { + dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2; + ret = btrfs_update_inode(trans, root, + dentry->d_parent->d_inode); + } + return ret; +} + +static int btrfs_add_nondir(struct btrfs_trans_handle *trans, + struct dentry *dentry, struct inode *inode) +{ + int err = btrfs_add_link(trans, dentry, inode); + if (!err) { + d_instantiate(dentry, inode); + return 0; + } + if (err > 0) + err = -EEXIST; + return err; +} + +static int btrfs_create(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct inode *inode; + int err; + int drop_inode = 0; + u64 objectid; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + + err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); + if (err) { + err = -ENOSPC; + goto out_unlock; + } + + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, mode); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_unlock; + + btrfs_set_trans_block_group(trans, inode); + err = btrfs_add_nondir(trans, dentry, inode); + if (err) + drop_inode = 1; + else { + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; + } + dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); +out_unlock: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + if (drop_inode) { + inode_dec_link_count(inode); + iput(inode); + } + btrfs_btree_balance_dirty(root); + return err; +} + +static int btrfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct inode *inode = old_dentry->d_inode; + int err; + int drop_inode = 0; + + if (inode->i_nlink == 0) + return -ENOENT; + + inc_nlink(inode); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + atomic_inc(&inode->i_count); + err = btrfs_add_nondir(trans, dentry, inode); + if (err) + drop_inode = 1; + dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, dir); + btrfs_update_inode(trans, root, inode); + + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + if (drop_inode) { + inode_dec_link_count(inode); + iput(inode); + } + btrfs_btree_balance_dirty(root); + return err; +} + +static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, u64 dirid) +{ + int ret; + char buf[2]; + struct btrfs_key key; + + buf[0] = '.'; + buf[1] = '.'; + + key.objectid = objectid; + key.offset = 0; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + + ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid, + &key, BTRFS_FT_DIR); + if (ret) + goto error; + key.objectid = dirid; + ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid, + &key, BTRFS_FT_DIR); + if (ret) + goto error; +error: + return ret; +} + +static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct inode *inode; + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(dir)->root; + int err = 0; + int drop_on_err = 0; + u64 objectid; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out_unlock; + } + + err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); + if (err) { + err = -ENOSPC; + goto out_unlock; + } + + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, S_IFDIR | mode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_fail; + } + drop_on_err = 1; + inode->i_op = &btrfs_dir_inode_operations; + inode->i_fop = &btrfs_dir_file_operations; + btrfs_set_trans_block_group(trans, inode); + + err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino); + if (err) + goto out_fail; + + inode->i_size = 6; + err = btrfs_update_inode(trans, root, inode); + if (err) + goto out_fail; + err = btrfs_add_link(trans, dentry, inode); + if (err) + goto out_fail; + d_instantiate(dentry, inode); + drop_on_err = 0; + dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); + +out_fail: + btrfs_end_transaction(trans, root); +out_unlock: + mutex_unlock(&root->fs_info->fs_mutex); + if (drop_on_err) + iput(inode); + btrfs_btree_balance_dirty(root); + return err; +} + +/* + * FIBMAP and others want to pass in a fake buffer head. They need to + * use BTRFS_GET_BLOCK_NO_DIRECT to make sure we don't try to memcpy + * any packed file data into the fake bh + */ +#define BTRFS_GET_BLOCK_NO_CREATE 0 +#define BTRFS_GET_BLOCK_CREATE 1 +#define BTRFS_GET_BLOCK_NO_DIRECT 2 + +/* + * FIXME create==1 doe not work. + */ +static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + int ret; + int err = 0; + u64 blocknr; + u64 extent_start = 0; + u64 extent_end = 0; + u64 objectid = inode->i_ino; + u32 found_type; + u64 alloc_hint = 0; + struct btrfs_path *path; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_file_extent_item *item; + struct btrfs_leaf *leaf; + struct btrfs_disk_key *found_key; + struct btrfs_trans_handle *trans = NULL; + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + if (create & BTRFS_GET_BLOCK_CREATE) { + WARN_ON(1); + /* this almost but not quite works */ + trans = btrfs_start_transaction(root, 1); + if (!trans) { + err = -ENOMEM; + goto out; + } + ret = btrfs_drop_extents(trans, root, inode, + iblock << inode->i_blkbits, + (iblock + 1) << inode->i_blkbits, + &alloc_hint); + BUG_ON(ret); + } + + ret = btrfs_lookup_file_extent(NULL, root, path, + inode->i_ino, + iblock << inode->i_blkbits, 0); + if (ret < 0) { + err = ret; + goto out; + } + + if (ret != 0) { + if (path->slots[0] == 0) { + btrfs_release_path(root, path); + goto not_found; + } + path->slots[0]--; + } + + item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + struct btrfs_file_extent_item); + leaf = btrfs_buffer_leaf(path->nodes[0]); + blocknr = btrfs_file_extent_disk_blocknr(item); + blocknr += btrfs_file_extent_offset(item); + + /* are we inside the extent that was found? */ + found_key = &leaf->items[path->slots[0]].key; + found_type = btrfs_disk_key_type(found_key); + if (btrfs_disk_key_objectid(found_key) != objectid || + found_type != BTRFS_EXTENT_DATA_KEY) { + extent_end = 0; + extent_start = 0; + goto not_found; + } + found_type = btrfs_file_extent_type(item); + extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_start = extent_start >> inode->i_blkbits; + extent_end = extent_start + btrfs_file_extent_num_blocks(item); + err = 0; + if (btrfs_file_extent_disk_blocknr(item) == 0) + goto out; + if (iblock >= extent_start && iblock < extent_end) { + btrfs_map_bh_to_logical(root, result, blocknr + + iblock - extent_start); + goto out; + } + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + char *ptr; + char *map; + u32 size; + + if (create & BTRFS_GET_BLOCK_NO_DIRECT) { + err = -EINVAL; + goto out; + } + size = btrfs_file_extent_inline_len(leaf->items + + path->slots[0]); + extent_end = (extent_start + size) >> inode->i_blkbits; + extent_start >>= inode->i_blkbits; + if (iblock < extent_start || iblock > extent_end) { + goto not_found; + } + ptr = btrfs_file_extent_inline_start(item); + map = kmap(result->b_page); + memcpy(map, ptr, size); + memset(map + size, 0, PAGE_CACHE_SIZE - size); + flush_dcache_page(result->b_page); + kunmap(result->b_page); + set_buffer_uptodate(result); + SetPageChecked(result->b_page); + btrfs_map_bh_to_logical(root, result, 0); + } +not_found: + if (create & BTRFS_GET_BLOCK_CREATE) { + struct btrfs_key ins; + ret = btrfs_alloc_extent(trans, root, inode->i_ino, + 1, alloc_hint, (u64)-1, + &ins, 1); + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + iblock << inode->i_blkbits, + ins.objectid, ins.offset, + ins.offset); + BUG_ON(ret); + SetPageChecked(result->b_page); + btrfs_map_bh_to_logical(root, result, ins.objectid); + } +out: + if (trans) + err = btrfs_end_transaction(trans, root); + btrfs_free_path(path); + return err; +} + +int btrfs_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + int err; + struct btrfs_root *root = BTRFS_I(inode)->root; + mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_get_block_lock(inode, iblock, result, create); + mutex_unlock(&root->fs_info->fs_mutex); + return err; +} + +static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + mutex_lock(&root->fs_info->fs_mutex); + btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT); + mutex_unlock(&root->fs_info->fs_mutex); + return 0; +} + +static sector_t btrfs_bmap(struct address_space *as, sector_t block) +{ + return generic_block_bmap(as, block, btrfs_get_block_bmap); +} + +static int btrfs_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + return block_prepare_write(page, from, to, btrfs_get_block); +} + +static int btrfs_readpage(struct file *file, struct page *page) +{ + return mpage_readpage(page, btrfs_get_block); +} + +/* + * Aside from a tiny bit of packed file data handling, this is the + * same as the generic code. + * + * While block_write_full_page is writing back the dirty buffers under + * the page lock, whoever dirtied the buffers may decide to clean them + * again at any time. We handle that by only looking at the buffer + * state inside lock_buffer(). + * + * If block_write_full_page() is called for regular writeback + * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a + * locked buffer. This only can happen if someone has written the buffer + * directly, with submit_bh(). At the address_space level PageWriteback + * prevents this contention from occurring. + */ +static int __btrfs_write_full_page(struct inode *inode, struct page *page, + struct writeback_control *wbc) +{ + int err; + sector_t block; + sector_t last_block; + struct buffer_head *bh, *head; + const unsigned blocksize = 1 << inode->i_blkbits; + int nr_underway = 0; + + BUG_ON(!PageLocked(page)); + + last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; + + if (!page_has_buffers(page)) { + create_empty_buffers(page, blocksize, + (1 << BH_Dirty)|(1 << BH_Uptodate)); + } + + /* + * Be very careful. We have no exclusion from __set_page_dirty_buffers + * here, and the (potentially unmapped) buffers may become dirty at + * any time. If a buffer becomes dirty here after we've inspected it + * then we just miss that fact, and the page stays dirty. + * + * Buffers outside i_size may be dirtied by __set_page_dirty_buffers; + * handle that here by just cleaning them. + */ + + block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + head = page_buffers(page); + bh = head; + + /* + * Get all the dirty buffers mapped to disk addresses and + * handle any aliases from the underlying blockdev's mapping. + */ + do { + if (block > last_block) { + /* + * mapped buffers outside i_size will occur, because + * this page can be outside i_size when there is a + * truncate in progress. + */ + /* + * The buffer was zeroed by block_write_full_page() + */ + clear_buffer_dirty(bh); + set_buffer_uptodate(bh); + } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { + WARN_ON(bh->b_size != blocksize); + err = btrfs_get_block(inode, block, bh, 0); + if (err) { + goto recover; + } + if (buffer_new(bh)) { + /* blockdev mappings never come here */ + clear_buffer_new(bh); + } + } + bh = bh->b_this_page; + block++; + } while (bh != head); + + do { + if (!buffer_mapped(bh)) + continue; + /* + * If it's a fully non-blocking write attempt and we cannot + * lock the buffer then redirty the page. Note that this can + * potentially cause a busy-wait loop from pdflush and kswapd + * activity, but those code paths have their own higher-level + * throttling. + */ + if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { + lock_buffer(bh); + } else if (test_set_buffer_locked(bh)) { + redirty_page_for_writepage(wbc, page); + continue; + } + if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) { + mark_buffer_async_write(bh); + } else { + unlock_buffer(bh); + } + } while ((bh = bh->b_this_page) != head); + + /* + * The page and its buffers are protected by PageWriteback(), so we can + * drop the bh refcounts early. + */ + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + submit_bh(WRITE, bh); + nr_underway++; + } + bh = next; + } while (bh != head); + unlock_page(page); + + err = 0; +done: + if (nr_underway == 0) { + /* + * The page was marked dirty, but the buffers were + * clean. Someone wrote them back by hand with + * ll_rw_block/submit_bh. A rare case. + */ + int uptodate = 1; + do { + if (!buffer_uptodate(bh)) { + uptodate = 0; + break; + } + bh = bh->b_this_page; + } while (bh != head); + if (uptodate) + SetPageUptodate(page); + end_page_writeback(page); + } + return err; + +recover: + /* + * ENOSPC, or some other error. We may already have added some + * blocks to the file, so we need to write these out to avoid + * exposing stale data. + * The page is currently locked and not marked for writeback + */ + bh = head; + /* Recovery: lock and submit the mapped buffers */ + do { + if (buffer_mapped(bh) && buffer_dirty(bh)) { + lock_buffer(bh); + mark_buffer_async_write(bh); + } else { + /* + * The buffer may have been set dirty during + * attachment to a dirty page. + */ + clear_buffer_dirty(bh); + } + } while ((bh = bh->b_this_page) != head); + SetPageError(page); + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + clear_buffer_dirty(bh); + submit_bh(WRITE, bh); + nr_underway++; + } + bh = next; + } while (bh != head); + unlock_page(page); + goto done; +} + +static int btrfs_writepage(struct page *page, struct writeback_control *wbc) +{ + struct inode * const inode = page->mapping->host; + loff_t i_size = i_size_read(inode); + const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; + unsigned offset; + void *kaddr; + + /* Is the page fully inside i_size? */ + if (page->index < end_index) + return __btrfs_write_full_page(inode, page, wbc); + + /* Is the page fully outside i_size? (truncate in progress) */ + offset = i_size & (PAGE_CACHE_SIZE-1); + if (page->index >= end_index+1 || !offset) { + /* + * The page may have dirty, unmapped buffers. For example, + * they may have been added in ext3_writepage(). Make them + * freeable here, so the page does not leak. + */ + block_invalidatepage(page, 0); + unlock_page(page); + return 0; /* don't care */ + } + + /* + * The page straddles i_size. It must be zeroed out on each and every + * writepage invokation because it may be mmapped. "A file is mapped + * in multiples of the page size. For a file that is not a multiple of + * the page size, the remaining memory is zeroed when mapped, and + * writes to that region are not written out to the file." + */ + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + return __btrfs_write_full_page(inode, page, wbc); +} + +static void btrfs_truncate(struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + struct btrfs_trans_handle *trans; + + if (!S_ISREG(inode->i_mode)) + return; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + + btrfs_truncate_page(inode->i_mapping, inode->i_size); + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + /* FIXME, add redo link to tree so we don't leak on crash */ + ret = btrfs_truncate_in_trans(trans, root, inode); + BUG_ON(ret); + btrfs_update_inode(trans, root, inode); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); +} + +int btrfs_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct inode *inode = page->mapping->host; + struct buffer_head *bh; + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + + SetPageUptodate(page); + bh = page_buffers(page); + set_buffer_uptodate(bh); + if (buffer_mapped(bh) && bh->b_blocknr != 0) { + set_page_dirty(page); + } + if (pos > inode->i_size) { + i_size_write(inode, pos); + mark_inode_dirty(inode); + } + return 0; +} + +static int create_subvol(struct btrfs_root *root, char *name, int namelen) +{ + struct btrfs_trans_handle *trans; + struct btrfs_key key; + struct btrfs_root_item root_item; + struct btrfs_inode_item *inode_item; + struct buffer_head *subvol; + struct btrfs_leaf *leaf; + struct btrfs_root *new_root; + struct inode *inode; + struct inode *dir; + int ret; + u64 objectid; + u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + subvol = btrfs_alloc_free_block(trans, root, 0); + if (subvol == NULL) + return -ENOSPC; + leaf = btrfs_buffer_leaf(subvol); + btrfs_set_header_nritems(&leaf->header, 0); + btrfs_set_header_level(&leaf->header, 0); + btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol)); + btrfs_set_header_generation(&leaf->header, trans->transid); + btrfs_set_header_owner(&leaf->header, root->root_key.objectid); + memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, + sizeof(leaf->header.fsid)); + mark_buffer_dirty(subvol); + + inode_item = &root_item.inode; + memset(inode_item, 0, sizeof(*inode_item)); + btrfs_set_inode_generation(inode_item, 1); + btrfs_set_inode_size(inode_item, 3); + btrfs_set_inode_nlink(inode_item, 1); + btrfs_set_inode_nblocks(inode_item, 1); + btrfs_set_inode_mode(inode_item, S_IFDIR | 0755); + + btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); + btrfs_set_root_refs(&root_item, 1); + brelse(subvol); + subvol = NULL; + + ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, + 0, &objectid); + BUG_ON(ret); + + btrfs_set_root_dirid(&root_item, new_dirid); + + key.objectid = objectid; + key.offset = 1; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, + &root_item); + BUG_ON(ret); + + /* + * insert the directory item + */ + key.offset = (u64)-1; + dir = root->fs_info->sb->s_root->d_inode; + ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, + name, namelen, dir->i_ino, &key, + BTRFS_FT_DIR); + BUG_ON(ret); + + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + + new_root = btrfs_read_fs_root(root->fs_info, &key); + BUG_ON(!new_root); + + trans = btrfs_start_transaction(new_root, 1); + BUG_ON(!trans); + + inode = btrfs_new_inode(trans, new_root, new_dirid, + BTRFS_I(dir)->block_group, S_IFDIR | 0700); + inode->i_op = &btrfs_dir_inode_operations; + inode->i_fop = &btrfs_dir_file_operations; + + ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid); + BUG_ON(ret); + + inode->i_nlink = 1; + inode->i_size = 6; + ret = btrfs_update_inode(trans, new_root, inode); + BUG_ON(ret); + + ret = btrfs_commit_transaction(trans, new_root); + BUG_ON(ret); + + iput(inode); + + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); + return 0; +} + +static int create_snapshot(struct btrfs_root *root, char *name, int namelen) +{ + struct btrfs_trans_handle *trans; + struct btrfs_key key; + struct btrfs_root_item new_root_item; + int ret; + u64 objectid; + + if (!root->ref_cows) + return -EINVAL; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + ret = btrfs_update_inode(trans, root, root->inode); + BUG_ON(ret); + + ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, + 0, &objectid); + BUG_ON(ret); + + memcpy(&new_root_item, &root->root_item, + sizeof(new_root_item)); + + key.objectid = objectid; + key.offset = 1; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node)); + + ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, + &new_root_item); + BUG_ON(ret); + + /* + * insert the directory item + */ + key.offset = (u64)-1; + ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, + name, namelen, + root->fs_info->sb->s_root->d_inode->i_ino, + &key, BTRFS_FT_DIR); + + BUG_ON(ret); + + ret = btrfs_inc_root_ref(trans, root); + BUG_ON(ret); + + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); + return 0; +} + +int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int + cmd, unsigned long arg) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_ioctl_vol_args vol_args; + int ret = 0; + struct btrfs_dir_item *di; + int namelen; + struct btrfs_path *path; + u64 root_dirid; + + switch (cmd) { + case BTRFS_IOC_SNAP_CREATE: + if (copy_from_user(&vol_args, + (struct btrfs_ioctl_vol_args __user *)arg, + sizeof(vol_args))) + return -EFAULT; + namelen = strlen(vol_args.name); + if (namelen > BTRFS_VOL_NAME_MAX) + return -EINVAL; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, + mutex_lock(&root->fs_info->fs_mutex); + di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, + path, root_dirid, + vol_args.name, namelen, 0); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_free_path(path); + if (di && !IS_ERR(di)) + return -EEXIST; + + if (root == root->fs_info->tree_root) + ret = create_subvol(root, vol_args.name, namelen); + else + ret = create_snapshot(root, vol_args.name, namelen); + WARN_ON(ret); + break; + default: + return -ENOTTY; + } + return ret; +} + +#ifdef CONFIG_COMPAT +long btrfs_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct inode *inode = file->f_path.dentry->d_inode; + int ret; + lock_kernel(); + ret = btrfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); + unlock_kernel(); + return ret; + +} +#endif + +/* + * Called inside transaction, so use GFP_NOFS + */ +struct inode *btrfs_alloc_inode(struct super_block *sb) +{ + struct btrfs_inode *ei; + + ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); + if (!ei) + return NULL; + return &ei->vfs_inode; +} + +void btrfs_destroy_inode(struct inode *inode) +{ + WARN_ON(!list_empty(&inode->i_dentry)); + WARN_ON(inode->i_data.nrpages); + + kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); +} + +static void init_once(void * foo, struct kmem_cache * cachep, + unsigned long flags) +{ + struct btrfs_inode *ei = (struct btrfs_inode *) foo; + + inode_init_once(&ei->vfs_inode); +} + +void btrfs_destroy_cachep(void) +{ + if (btrfs_inode_cachep) + kmem_cache_destroy(btrfs_inode_cachep); + if (btrfs_trans_handle_cachep) + kmem_cache_destroy(btrfs_trans_handle_cachep); + if (btrfs_transaction_cachep) + kmem_cache_destroy(btrfs_transaction_cachep); + if (btrfs_bit_radix_cachep) + kmem_cache_destroy(btrfs_bit_radix_cachep); + if (btrfs_path_cachep) + kmem_cache_destroy(btrfs_path_cachep); +} + +int btrfs_init_cachep(void) +{ + btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache", + sizeof(struct btrfs_inode), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + init_once, NULL); + if (!btrfs_inode_cachep) + goto fail; + btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache", + sizeof(struct btrfs_trans_handle), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + if (!btrfs_trans_handle_cachep) + goto fail; + btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache", + sizeof(struct btrfs_transaction), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + if (!btrfs_transaction_cachep) + goto fail; + btrfs_path_cachep = kmem_cache_create("btrfs_path_cache", + sizeof(struct btrfs_transaction), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + if (!btrfs_path_cachep) + goto fail; + btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix", + 256, + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD | + SLAB_DESTROY_BY_RCU), + NULL, NULL); + if (!btrfs_bit_radix_cachep) + goto fail; + return 0; +fail: + btrfs_destroy_cachep(); + return -ENOMEM; +} + +static int btrfs_getattr(struct vfsmount *mnt, + struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + generic_fillattr(inode, stat); + stat->blksize = 256 * 1024; + return 0; +} + +static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, + struct inode * new_dir,struct dentry *new_dentry) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(old_dir)->root; + struct inode *new_inode = new_dentry->d_inode; + struct inode *old_inode = old_dentry->d_inode; + struct timespec ctime = CURRENT_TIME; + struct btrfs_path *path; + struct btrfs_dir_item *di; + int ret; + + if (S_ISDIR(old_inode->i_mode) && new_inode && + new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { + return -ENOTEMPTY; + } + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, new_dir); + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out_fail; + } + + old_dentry->d_inode->i_nlink++; + old_dir->i_ctime = old_dir->i_mtime = ctime; + new_dir->i_ctime = new_dir->i_mtime = ctime; + old_inode->i_ctime = ctime; + if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) { + struct btrfs_key *location = &BTRFS_I(new_dir)->location; + u64 old_parent_oid; + di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino, + "..", 2, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out_fail; + } + if (!di) { + ret = -ENOENT; + goto out_fail; + } + old_parent_oid = btrfs_disk_key_objectid(&di->location); + ret = btrfs_del_item(trans, root, path); + if (ret) { + ret = -EIO; + goto out_fail; + } + btrfs_release_path(root, path); + + di = btrfs_lookup_dir_index_item(trans, root, path, + old_inode->i_ino, + old_parent_oid, + "..", 2, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out_fail; + } + if (!di) { + ret = -ENOENT; + goto out_fail; + } + ret = btrfs_del_item(trans, root, path); + if (ret) { + ret = -EIO; + goto out_fail; + } + btrfs_release_path(root, path); + + ret = btrfs_insert_dir_item(trans, root, "..", 2, + old_inode->i_ino, location, + BTRFS_FT_DIR); + if (ret) + goto out_fail; + } + + + ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry); + if (ret) + goto out_fail; + + if (new_inode) { + new_inode->i_ctime = CURRENT_TIME; + ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry); + if (ret) + goto out_fail; + if (S_ISDIR(new_inode->i_mode)) + clear_nlink(new_inode); + else + drop_nlink(new_inode); + btrfs_update_inode(trans, root, new_inode); + } + ret = btrfs_add_link(trans, new_dentry, old_inode); + if (ret) + goto out_fail; + +out_fail: + btrfs_free_path(path); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + +static int btrfs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_path *path; + struct btrfs_key key; + struct inode *inode; + int err; + int drop_inode = 0; + u64 objectid; + int name_len; + int datasize; + char *ptr; + struct btrfs_file_extent_item *ei; + + name_len = strlen(symname) + 1; + if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) + return -ENAMETOOLONG; + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + + err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); + if (err) { + err = -ENOSPC; + goto out_unlock; + } + + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_unlock; + + btrfs_set_trans_block_group(trans, inode); + err = btrfs_add_nondir(trans, dentry, inode); + if (err) + drop_inode = 1; + else { + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; + } + dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); + if (drop_inode) + goto out_unlock; + + path = btrfs_alloc_path(); + BUG_ON(!path); + key.objectid = inode->i_ino; + key.offset = 0; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + datasize = btrfs_file_extent_calc_inline_size(name_len); + err = btrfs_insert_empty_item(trans, root, path, &key, + datasize); + BUG_ON(err); + ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(ei, trans->transid); + btrfs_set_file_extent_type(ei, + BTRFS_FILE_EXTENT_INLINE); + ptr = btrfs_file_extent_inline_start(ei); + btrfs_memcpy(root, path->nodes[0]->b_data, + ptr, symname, name_len); + mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + inode->i_op = &btrfs_symlink_inode_operations; + inode->i_mapping->a_ops = &btrfs_symlink_aops; + inode->i_size = name_len - 1; + btrfs_update_inode(trans, root, inode); + err = 0; + +out_unlock: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + if (drop_inode) { + inode_dec_link_count(inode); + iput(inode); + } + btrfs_btree_balance_dirty(root); + return err; +} + +static struct inode_operations btrfs_dir_inode_operations = { + .lookup = btrfs_lookup, + .create = btrfs_create, + .unlink = btrfs_unlink, + .link = btrfs_link, + .mkdir = btrfs_mkdir, + .rmdir = btrfs_rmdir, + .rename = btrfs_rename, + .symlink = btrfs_symlink, + .setattr = btrfs_setattr, +}; + +static struct inode_operations btrfs_dir_ro_inode_operations = { + .lookup = btrfs_lookup, +}; + +static struct file_operations btrfs_dir_file_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = btrfs_readdir, + .ioctl = btrfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = btrfs_compat_ioctl, +#endif +}; + +static struct address_space_operations btrfs_aops = { + .readpage = btrfs_readpage, + .writepage = btrfs_writepage, + .sync_page = block_sync_page, + .prepare_write = btrfs_prepare_write, + .commit_write = btrfs_commit_write, + .bmap = btrfs_bmap, +}; + +static struct address_space_operations btrfs_symlink_aops = { + .readpage = btrfs_readpage, + .writepage = btrfs_writepage, +}; + +static struct inode_operations btrfs_file_inode_operations = { + .truncate = btrfs_truncate, + .getattr = btrfs_getattr, + .setattr = btrfs_setattr, +}; + +static struct inode_operations btrfs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, +}; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9749090cfbb8..94b3535a2a11 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -20,873 +20,9 @@ #include "ioctl.h" #include "print-tree.h" -struct btrfs_iget_args { - u64 ino; - struct btrfs_root *root; -}; - #define BTRFS_SUPER_MAGIC 0x9123682E -static struct inode_operations btrfs_dir_inode_operations; -static struct inode_operations btrfs_symlink_inode_operations; -static struct inode_operations btrfs_dir_ro_inode_operations; static struct super_operations btrfs_super_ops; -static struct file_operations btrfs_dir_file_operations; -static struct inode_operations btrfs_file_inode_operations; -static struct address_space_operations btrfs_aops; -static struct address_space_operations btrfs_symlink_aops; -static struct file_operations btrfs_file_operations; - -static int drop_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, - u64 start, u64 end, u64 *hint_block); -static int btrfs_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create); - - -#define S_SHIFT 12 -static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { - [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR, - [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK, - [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, -}; - -static void btrfs_read_locked_inode(struct inode *inode) -{ - struct btrfs_path *path; - struct btrfs_inode_item *inode_item; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_key location; - u64 alloc_group_block; - int ret; - - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - mutex_lock(&root->fs_info->fs_mutex); - - memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); - ret = btrfs_lookup_inode(NULL, root, path, &location, 0); - if (ret) { - btrfs_free_path(path); - goto make_bad; - } - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_inode_item); - - inode->i_mode = btrfs_inode_mode(inode_item); - inode->i_nlink = btrfs_inode_nlink(inode_item); - inode->i_uid = btrfs_inode_uid(inode_item); - inode->i_gid = btrfs_inode_gid(inode_item); - inode->i_size = btrfs_inode_size(inode_item); - inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime); - inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime); - inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime); - inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime); - inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime); - inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); - inode->i_blocks = btrfs_inode_nblocks(inode_item); - inode->i_generation = btrfs_inode_generation(inode_item); - alloc_group_block = btrfs_inode_block_group(inode_item); - BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, - alloc_group_block); - - btrfs_free_path(path); - inode_item = NULL; - - mutex_unlock(&root->fs_info->fs_mutex); - - switch (inode->i_mode & S_IFMT) { -#if 0 - default: - init_special_inode(inode, inode->i_mode, - btrfs_inode_rdev(inode_item)); - break; -#endif - case S_IFREG: - inode->i_mapping->a_ops = &btrfs_aops; - inode->i_fop = &btrfs_file_operations; - inode->i_op = &btrfs_file_inode_operations; - break; - case S_IFDIR: - inode->i_fop = &btrfs_dir_file_operations; - if (root == root->fs_info->tree_root) - inode->i_op = &btrfs_dir_ro_inode_operations; - else - inode->i_op = &btrfs_dir_inode_operations; - break; - case S_IFLNK: - inode->i_op = &btrfs_symlink_inode_operations; - inode->i_mapping->a_ops = &btrfs_symlink_aops; - break; - } - return; - -make_bad: - btrfs_release_path(root, path); - btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - make_bad_inode(inode); -} - -static void fill_inode_item(struct btrfs_inode_item *item, - struct inode *inode) -{ - btrfs_set_inode_uid(item, inode->i_uid); - btrfs_set_inode_gid(item, inode->i_gid); - btrfs_set_inode_size(item, inode->i_size); - btrfs_set_inode_mode(item, inode->i_mode); - btrfs_set_inode_nlink(item, inode->i_nlink); - btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec); - btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec); - btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec); - btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec); - btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec); - btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); - btrfs_set_inode_nblocks(item, inode->i_blocks); - btrfs_set_inode_generation(item, inode->i_generation); - btrfs_set_inode_block_group(item, - BTRFS_I(inode)->block_group->key.objectid); -} - -static int btrfs_update_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode) -{ - struct btrfs_inode_item *inode_item; - struct btrfs_path *path; - int ret; - - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - ret = btrfs_lookup_inode(trans, root, path, - &BTRFS_I(inode)->location, 1); - if (ret) { - if (ret > 0) - ret = -ENOENT; - goto failed; - } - - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_inode_item); - - fill_inode_item(inode_item, inode); - btrfs_mark_buffer_dirty(path->nodes[0]); - ret = 0; -failed: - btrfs_release_path(root, path); - btrfs_free_path(path); - return ret; -} - - -static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *dir, - struct dentry *dentry) -{ - struct btrfs_path *path; - const char *name = dentry->d_name.name; - int name_len = dentry->d_name.len; - int ret = 0; - u64 objectid; - struct btrfs_dir_item *di; - - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, - name, name_len, -1); - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto err; - } - if (!di) { - ret = -ENOENT; - goto err; - } - objectid = btrfs_disk_key_objectid(&di->location); - ret = btrfs_delete_one_dir_name(trans, root, path, di); - BUG_ON(ret); - btrfs_release_path(root, path); - - di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, - objectid, name, name_len, -1); - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto err; - } - if (!di) { - ret = -ENOENT; - goto err; - } - ret = btrfs_delete_one_dir_name(trans, root, path, di); - BUG_ON(ret); - - dentry->d_inode->i_ctime = dir->i_ctime; -err: - btrfs_free_path(path); - if (!ret) { - dir->i_size -= name_len * 2; - btrfs_update_inode(trans, root, dir); - drop_nlink(dentry->d_inode); - btrfs_update_inode(trans, root, dentry->d_inode); - dir->i_sb->s_dirt = 1; - } - return ret; -} - -static int btrfs_unlink(struct inode *dir, struct dentry *dentry) -{ - struct btrfs_root *root; - struct btrfs_trans_handle *trans; - int ret; - - root = BTRFS_I(dir)->root; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, dir); - ret = btrfs_unlink_trans(trans, root, dir, dentry); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); - return ret; -} - -static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) -{ - struct inode *inode = dentry->d_inode; - int err; - int ret; - struct btrfs_root *root = BTRFS_I(dir)->root; - struct btrfs_path *path; - struct btrfs_key key; - struct btrfs_trans_handle *trans; - struct btrfs_key found_key; - int found_type; - struct btrfs_leaf *leaf; - char *goodnames = ".."; - - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, dir); - key.objectid = inode->i_ino; - key.offset = (u64)-1; - key.flags = (u32)-1; - while(1) { - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) { - err = ret; - goto out; - } - BUG_ON(ret == 0); - if (path->slots[0] == 0) { - err = -ENOENT; - goto out; - } - path->slots[0]--; - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); - found_type = btrfs_key_type(&found_key); - if (found_key.objectid != inode->i_ino) { - err = -ENOENT; - goto out; - } - if ((found_type != BTRFS_DIR_ITEM_KEY && - found_type != BTRFS_DIR_INDEX_KEY) || - (!btrfs_match_dir_item_name(root, path, goodnames, 2) && - !btrfs_match_dir_item_name(root, path, goodnames, 1))) { - err = -ENOTEMPTY; - goto out; - } - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); - - if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1) - break; - btrfs_release_path(root, path); - } - ret = 0; - btrfs_release_path(root, path); - - /* now the directory is empty */ - err = btrfs_unlink_trans(trans, root, dir, dentry); - if (!err) { - inode->i_size = 0; - } -out: - btrfs_release_path(root, path); - btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - ret = btrfs_end_transaction(trans, root); - btrfs_btree_balance_dirty(root); - if (ret && !err) - err = ret; - return err; -} - -static int btrfs_free_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode) -{ - struct btrfs_path *path; - int ret; - - clear_inode(inode); - - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - ret = btrfs_lookup_inode(trans, root, path, - &BTRFS_I(inode)->location, -1); - BUG_ON(ret); - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); - btrfs_free_path(path); - return ret; -} - -static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path, - u64 objectid) -{ - struct btrfs_node *node; - int i; - int nritems; - u64 item_objectid; - u64 blocknr; - int slot; - int ret; - - if (!path->nodes[1]) - return; - node = btrfs_buffer_node(path->nodes[1]); - slot = path->slots[1]; - if (slot == 0) - return; - nritems = btrfs_header_nritems(&node->header); - for (i = slot - 1; i >= 0; i--) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); - if (item_objectid != objectid) - break; - blocknr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, blocknr); - if (ret) - break; - } -} - -static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode) -{ - int ret; - struct btrfs_path *path; - struct btrfs_key key; - struct btrfs_disk_key *found_key; - u32 found_type; - struct btrfs_leaf *leaf; - struct btrfs_file_extent_item *fi; - u64 extent_start = 0; - u64 extent_num_blocks = 0; - u64 item_end = 0; - int found_extent; - int del_item; - - path = btrfs_alloc_path(); - BUG_ON(!path); - /* FIXME, add redo link to tree so we don't leak on crash */ - key.objectid = inode->i_ino; - key.offset = (u64)-1; - key.flags = (u32)-1; - while(1) { - btrfs_init_path(path); - fi = NULL; - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) { - goto error; - } - if (ret > 0) { - BUG_ON(path->slots[0] == 0); - path->slots[0]--; - } - reada_truncate(root, path, inode->i_ino); - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - found_type = btrfs_disk_key_type(found_key); - if (btrfs_disk_key_objectid(found_key) != inode->i_ino) - break; - if (found_type != BTRFS_CSUM_ITEM_KEY && - found_type != BTRFS_DIR_ITEM_KEY && - found_type != BTRFS_DIR_INDEX_KEY && - found_type != BTRFS_EXTENT_DATA_KEY) - break; - item_end = btrfs_disk_key_offset(found_key); - if (found_type == BTRFS_EXTENT_DATA_KEY) { - fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) != - BTRFS_FILE_EXTENT_INLINE) { - item_end += btrfs_file_extent_num_blocks(fi) << - inode->i_blkbits; - } - } - if (found_type == BTRFS_CSUM_ITEM_KEY) { - ret = btrfs_csum_truncate(trans, root, path, - inode->i_size); - BUG_ON(ret); - } - if (item_end < inode->i_size) { - if (found_type) { - btrfs_set_key_type(&key, found_type - 1); - continue; - } - break; - } - if (btrfs_disk_key_offset(found_key) >= inode->i_size) - del_item = 1; - else - del_item = 0; - found_extent = 0; - - if (found_type == BTRFS_EXTENT_DATA_KEY && - btrfs_file_extent_type(fi) != - BTRFS_FILE_EXTENT_INLINE) { - u64 num_dec; - if (!del_item) { - u64 orig_num_blocks = - btrfs_file_extent_num_blocks(fi); - extent_num_blocks = inode->i_size - - btrfs_disk_key_offset(found_key) + - root->blocksize - 1; - extent_num_blocks >>= inode->i_blkbits; - btrfs_set_file_extent_num_blocks(fi, - extent_num_blocks); - inode->i_blocks -= (orig_num_blocks - - extent_num_blocks) << 3; - mark_buffer_dirty(path->nodes[0]); - } else { - extent_start = - btrfs_file_extent_disk_blocknr(fi); - extent_num_blocks = - btrfs_file_extent_disk_num_blocks(fi); - /* FIXME blocksize != 4096 */ - num_dec = btrfs_file_extent_num_blocks(fi) << 3; - if (extent_start != 0) { - found_extent = 1; - inode->i_blocks -= num_dec; - } - } - } - if (del_item) { - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); - } else { - break; - } - btrfs_release_path(root, path); - if (found_extent) { - ret = btrfs_free_extent(trans, root, extent_start, - extent_num_blocks, 0); - BUG_ON(ret); - } - } - ret = 0; -error: - btrfs_release_path(root, path); - btrfs_free_path(path); - inode->i_sb->s_dirt = 1; - return ret; -} - -static int btrfs_truncate_page(struct address_space *mapping, loff_t from) -{ - struct inode *inode = mapping->host; - unsigned blocksize = 1 << inode->i_blkbits; - pgoff_t index = from >> PAGE_CACHE_SHIFT; - unsigned offset = from & (PAGE_CACHE_SIZE-1); - struct page *page; - char *kaddr; - int ret = 0; - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 alloc_hint; - struct btrfs_key ins; - struct btrfs_trans_handle *trans; - - if ((offset & (blocksize - 1)) == 0) - goto out; - - ret = -ENOMEM; - page = grab_cache_page(mapping, index); - if (!page) - goto out; - - if (!PageUptodate(page)) { - ret = mpage_readpage(page, btrfs_get_block); - lock_page(page); - if (!PageUptodate(page)) { - ret = -EIO; - goto out; - } - } - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - - ret = drop_extents(trans, root, inode, page->index << PAGE_CACHE_SHIFT, - (page->index + 1) << PAGE_CACHE_SHIFT, &alloc_hint); - BUG_ON(ret); - ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, - alloc_hint, (u64)-1, &ins, 1); - BUG_ON(ret); - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - page->index << PAGE_CACHE_SHIFT, - ins.objectid, 1, 1); - BUG_ON(ret); - SetPageChecked(page); - kaddr = kmap(page); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - btrfs_csum_file_block(trans, root, inode->i_ino, - page->index << PAGE_CACHE_SHIFT, - kaddr, PAGE_CACHE_SIZE); - kunmap(page); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - - set_page_dirty(page); - unlock_page(page); - page_cache_release(page); -out: - return ret; -} - -static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) -{ - struct inode *inode = dentry->d_inode; - int err; - - err = inode_change_ok(inode, attr); - if (err) - return err; - - if (S_ISREG(inode->i_mode) && - attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 mask = root->blocksize - 1; - u64 pos = (inode->i_size + mask) & ~mask; - u64 hole_size; - - if (attr->ia_size <= pos) - goto out; - - btrfs_truncate_page(inode->i_mapping, inode->i_size); - - hole_size = (attr->ia_size - pos + mask) & ~mask; - hole_size >>= inode->i_blkbits; - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - err = btrfs_insert_file_extent(trans, root, inode->i_ino, - pos, 0, 0, hole_size); - BUG_ON(err); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - } -out: - err = inode_setattr(inode, attr); - - return err; -} -static void btrfs_delete_inode(struct inode *inode) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; - - truncate_inode_pages(&inode->i_data, 0); - if (is_bad_inode(inode)) { - goto no_delete; - } - inode->i_size = 0; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - ret = btrfs_truncate_in_trans(trans, root, inode); - BUG_ON(ret); - btrfs_free_inode(trans, root, inode); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); - return; -no_delete: - clear_inode(inode); -} - -static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, - struct btrfs_key *location) -{ - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; - struct btrfs_dir_item *di; - struct btrfs_path *path; - struct btrfs_root *root = BTRFS_I(dir)->root; - int ret; - - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, - namelen, 0); - if (!di || IS_ERR(di)) { - location->objectid = 0; - ret = 0; - goto out; - } - btrfs_disk_key_to_cpu(location, &di->location); -out: - btrfs_release_path(root, path); - btrfs_free_path(path); - return ret; -} - -static int fixup_tree_root_location(struct btrfs_root *root, - struct btrfs_key *location, - struct btrfs_root **sub_root) -{ - struct btrfs_path *path; - struct btrfs_root_item *ri; - - if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) - return 0; - if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) - return 0; - - path = btrfs_alloc_path(); - BUG_ON(!path); - mutex_lock(&root->fs_info->fs_mutex); - - *sub_root = btrfs_read_fs_root(root->fs_info, location); - if (IS_ERR(*sub_root)) - return PTR_ERR(*sub_root); - - ri = &(*sub_root)->root_item; - location->objectid = btrfs_root_dirid(ri); - location->flags = 0; - btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); - location->offset = 0; - - btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - return 0; -} - -static int btrfs_init_locked_inode(struct inode *inode, void *p) -{ - struct btrfs_iget_args *args = p; - inode->i_ino = args->ino; - BTRFS_I(inode)->root = args->root; - return 0; -} - -static int btrfs_find_actor(struct inode *inode, void *opaque) -{ - struct btrfs_iget_args *args = opaque; - return (args->ino == inode->i_ino && - args->root == BTRFS_I(inode)->root); -} - -static struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, - struct btrfs_root *root) -{ - struct inode *inode; - struct btrfs_iget_args args; - args.ino = objectid; - args.root = root; - - inode = iget5_locked(s, objectid, btrfs_find_actor, - btrfs_init_locked_inode, - (void *)&args); - return inode; -} - -static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) -{ - struct inode * inode; - struct btrfs_inode *bi = BTRFS_I(dir); - struct btrfs_root *root = bi->root; - struct btrfs_root *sub_root = root; - struct btrfs_key location; - int ret; - - if (dentry->d_name.len > BTRFS_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_inode_by_name(dir, dentry, &location); - mutex_unlock(&root->fs_info->fs_mutex); - if (ret < 0) - return ERR_PTR(ret); - inode = NULL; - if (location.objectid) { - ret = fixup_tree_root_location(root, &location, &sub_root); - if (ret < 0) - return ERR_PTR(ret); - if (ret > 0) - return ERR_PTR(-ENOENT); - inode = btrfs_iget_locked(dir->i_sb, location.objectid, - sub_root); - if (!inode) - return ERR_PTR(-EACCES); - if (inode->i_state & I_NEW) { - if (sub_root != root) { -printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root); - igrab(inode); - sub_root->inode = inode; - } - BTRFS_I(inode)->root = sub_root; - memcpy(&BTRFS_I(inode)->location, &location, - sizeof(location)); - btrfs_read_locked_inode(inode); - unlock_new_inode(inode); - } - } - return d_splice_alias(inode, dentry); -} - -static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path, - u64 objectid) -{ - struct btrfs_node *node; - int i; - u32 nritems; - u64 item_objectid; - u64 blocknr; - int slot; - int ret; - - if (!path->nodes[1]) - return; - node = btrfs_buffer_node(path->nodes[1]); - slot = path->slots[1]; - nritems = btrfs_header_nritems(&node->header); - for (i = slot + 1; i < nritems; i++) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); - if (item_objectid != objectid) - break; - blocknr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, blocknr); - if (ret) - break; - } -} -static unsigned char btrfs_filetype_table[] = { - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK -}; - -static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_item *item; - struct btrfs_dir_item *di; - struct btrfs_key key; - struct btrfs_path *path; - int ret; - u32 nritems; - struct btrfs_leaf *leaf; - int slot; - int advance; - unsigned char d_type; - int over = 0; - u32 di_cur; - u32 di_total; - u32 di_len; - int key_type = BTRFS_DIR_INDEX_KEY; - - /* FIXME, use a real flag for deciding about the key type */ - if (root->fs_info->tree_root == root) - key_type = BTRFS_DIR_ITEM_KEY; - mutex_lock(&root->fs_info->fs_mutex); - key.objectid = inode->i_ino; - key.flags = 0; - btrfs_set_key_type(&key, key_type); - key.offset = filp->f_pos; - path = btrfs_alloc_path(); - btrfs_init_path(path); - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) - goto err; - advance = 0; - reada_leaves(root, path, inode->i_ino); - while(1) { - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); - slot = path->slots[0]; - if (advance || slot >= nritems) { - if (slot >= nritems -1) { - reada_leaves(root, path, inode->i_ino); - ret = btrfs_next_leaf(root, path); - if (ret) - break; - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); - slot = path->slots[0]; - } else { - slot++; - path->slots[0]++; - } - } - advance = 1; - item = leaf->items + slot; - if (btrfs_disk_key_objectid(&item->key) != key.objectid) - break; - if (btrfs_disk_key_type(&item->key) != key_type) - break; - if (btrfs_disk_key_offset(&item->key) < filp->f_pos) - continue; - filp->f_pos = btrfs_disk_key_offset(&item->key); - advance = 1; - di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); - di_cur = 0; - di_total = btrfs_item_size(leaf->items + slot); - while(di_cur < di_total) { - d_type = btrfs_filetype_table[btrfs_dir_type(di)]; - over = filldir(dirent, (const char *)(di + 1), - btrfs_dir_name_len(di), - btrfs_disk_key_offset(&item->key), - btrfs_disk_key_objectid(&di->location), - d_type); - if (over) - goto nopos; - di_len = btrfs_dir_name_len(di) + sizeof(*di); - di_cur += di_len; - di = (struct btrfs_dir_item *)((char *)di + di_len); - } - } - filp->f_pos++; -nopos: - ret = 0; -err: - btrfs_release_path(root, path); - btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - return ret; -} static void btrfs_put_super (struct super_block * sb) { @@ -907,6 +43,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) struct btrfs_super_block *disk_super; struct btrfs_root *tree_root; struct btrfs_inode *bi; + int err; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_magic = BTRFS_SUPER_MAGIC; @@ -915,16 +52,12 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) tree_root = open_ctree(sb); - if (!tree_root) { + if (!tree_root || IS_ERR(tree_root)) { printk("btrfs: open_ctree failed\n"); return -EIO; } sb->s_fs_info = tree_root; disk_super = tree_root->fs_info->disk_super; - printk("read in super total blocks %Lu root %Lu\n", - btrfs_super_total_blocks(disk_super), - btrfs_super_root_dir(disk_super)); - inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super), tree_root); bi = BTRFS_I(inode); @@ -934,8 +67,10 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) bi->root = tree_root; btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); - if (!inode) - return -ENOMEM; + if (!inode) { + err = -ENOMEM; + goto fail_close; + } if (inode->i_state & I_NEW) { btrfs_read_locked_inode(inode); unlock_new_inode(inode); @@ -944,323 +79,18 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) root_dentry = d_alloc_root(inode); if (!root_dentry) { iput(inode); - return -ENOMEM; + err = -ENOMEM; + goto fail_close; } sb->s_root = root_dentry; btrfs_transaction_queue_work(tree_root, HZ * 30); return 0; -} -static int btrfs_write_inode(struct inode *inode, int wait) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; - int ret = 0; - - if (wait) { - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - ret = btrfs_commit_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - } - return ret; -} - -static void btrfs_dirty_inode(struct inode *inode) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - btrfs_update_inode(trans, root, inode); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); -} - -static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 objectid, - struct btrfs_block_group_cache *group, - int mode) -{ - struct inode *inode; - struct btrfs_inode_item inode_item; - struct btrfs_key *location; - int ret; - int owner; - - inode = new_inode(root->fs_info->sb); - if (!inode) - return ERR_PTR(-ENOMEM); - - BTRFS_I(inode)->root = root; - if (mode & S_IFDIR) - owner = 0; - else - owner = 1; - group = btrfs_find_block_group(root, group, 0, 0, owner); - BTRFS_I(inode)->block_group = group; - - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_mode = mode; - inode->i_ino = objectid; - inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - fill_inode_item(&inode_item, inode); - location = &BTRFS_I(inode)->location; - location->objectid = objectid; - location->flags = 0; - location->offset = 0; - btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); - - ret = btrfs_insert_inode(trans, root, objectid, &inode_item); - BUG_ON(ret); - - insert_inode_hash(inode); - return inode; -} - -static inline u8 btrfs_inode_type(struct inode *inode) -{ - return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; -} - -static int btrfs_add_link(struct btrfs_trans_handle *trans, - struct dentry *dentry, struct inode *inode) -{ - int ret; - struct btrfs_key key; - struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root; - key.objectid = inode->i_ino; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); - key.offset = 0; - - ret = btrfs_insert_dir_item(trans, root, - dentry->d_name.name, dentry->d_name.len, - dentry->d_parent->d_inode->i_ino, - &key, btrfs_inode_type(inode)); - if (ret == 0) { - dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2; - ret = btrfs_update_inode(trans, root, - dentry->d_parent->d_inode); - } - return ret; -} - -static int btrfs_add_nondir(struct btrfs_trans_handle *trans, - struct dentry *dentry, struct inode *inode) -{ - int err = btrfs_add_link(trans, dentry, inode); - if (!err) { - d_instantiate(dentry, inode); - return 0; - } - if (err > 0) - err = -EEXIST; +fail_close: + close_ctree(tree_root); return err; } -static int btrfs_create(struct inode *dir, struct dentry *dentry, - int mode, struct nameidata *nd) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(dir)->root; - struct inode *inode; - int err; - int drop_inode = 0; - u64 objectid; - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, dir); - - err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); - if (err) { - err = -ENOSPC; - goto out_unlock; - } - - inode = btrfs_new_inode(trans, root, objectid, - BTRFS_I(dir)->block_group, mode); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_unlock; - - btrfs_set_trans_block_group(trans, inode); - err = btrfs_add_nondir(trans, dentry, inode); - if (err) - drop_inode = 1; - else { - inode->i_mapping->a_ops = &btrfs_aops; - inode->i_fop = &btrfs_file_operations; - inode->i_op = &btrfs_file_inode_operations; - } - dir->i_sb->s_dirt = 1; - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); -out_unlock: - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - - if (drop_inode) { - inode_dec_link_count(inode); - iput(inode); - } - btrfs_btree_balance_dirty(root); - return err; -} - -static int btrfs_link(struct dentry *old_dentry, struct inode *dir, - struct dentry *dentry) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(dir)->root; - struct inode *inode = old_dentry->d_inode; - int err; - int drop_inode = 0; - - if (inode->i_nlink == 0) - return -ENOENT; - - inc_nlink(inode); - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, dir); - atomic_inc(&inode->i_count); - err = btrfs_add_nondir(trans, dentry, inode); - if (err) - drop_inode = 1; - dir->i_sb->s_dirt = 1; - btrfs_update_inode_block_group(trans, dir); - btrfs_update_inode(trans, root, inode); - - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - - if (drop_inode) { - inode_dec_link_count(inode); - iput(inode); - } - btrfs_btree_balance_dirty(root); - return err; -} - -static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 objectid, u64 dirid) -{ - int ret; - char buf[2]; - struct btrfs_key key; - - buf[0] = '.'; - buf[1] = '.'; - - key.objectid = objectid; - key.offset = 0; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); - - ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid, - &key, BTRFS_FT_DIR); - if (ret) - goto error; - key.objectid = dirid; - ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid, - &key, BTRFS_FT_DIR); - if (ret) - goto error; -error: - return ret; -} - -static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) -{ - struct inode *inode; - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(dir)->root; - int err = 0; - int drop_on_err = 0; - u64 objectid; - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, dir); - if (IS_ERR(trans)) { - err = PTR_ERR(trans); - goto out_unlock; - } - - err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); - if (err) { - err = -ENOSPC; - goto out_unlock; - } - - inode = btrfs_new_inode(trans, root, objectid, - BTRFS_I(dir)->block_group, S_IFDIR | mode); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - goto out_fail; - } - drop_on_err = 1; - inode->i_op = &btrfs_dir_inode_operations; - inode->i_fop = &btrfs_dir_file_operations; - btrfs_set_trans_block_group(trans, inode); - - err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino); - if (err) - goto out_fail; - - inode->i_size = 6; - err = btrfs_update_inode(trans, root, inode); - if (err) - goto out_fail; - err = btrfs_add_link(trans, dentry, inode); - if (err) - goto out_fail; - d_instantiate(dentry, inode); - drop_on_err = 0; - dir->i_sb->s_dirt = 1; - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); - -out_fail: - btrfs_end_transaction(trans, root); -out_unlock: - mutex_unlock(&root->fs_info->fs_mutex); - if (drop_on_err) - iput(inode); - btrfs_btree_balance_dirty(root); - return err; -} - -static int btrfs_sync_file(struct file *file, - struct dentry *dentry, int datasync) -{ - struct inode *inode = dentry->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; - struct btrfs_trans_handle *trans; - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - if (!trans) { - ret = -ENOMEM; - goto out; - } - ret = btrfs_commit_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); -out: - return ret > 0 ? EIO : ret; -} - static int btrfs_sync_fs(struct super_block *sb, int wait) { struct btrfs_trans_handle *trans; @@ -1278,1430 +108,15 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; BUG_ON(ret); -printk("btrfs sync_fs\n"); mutex_unlock(&root->fs_info->fs_mutex); return 0; } -#define BTRFS_GET_BLOCK_NO_CREATE 0 -#define BTRFS_GET_BLOCK_CREATE 1 -#define BTRFS_GET_BLOCK_NO_DIRECT 2 - -static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) -{ - int ret; - int err = 0; - u64 blocknr; - u64 extent_start = 0; - u64 extent_end = 0; - u64 objectid = inode->i_ino; - u32 found_type; - u64 alloc_hint = 0; - struct btrfs_path *path; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_file_extent_item *item; - struct btrfs_leaf *leaf; - struct btrfs_disk_key *found_key; - struct btrfs_trans_handle *trans = NULL; - - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - if (create & BTRFS_GET_BLOCK_CREATE) { - WARN_ON(1); - /* this almost but not quite works */ - trans = btrfs_start_transaction(root, 1); - if (!trans) { - err = -ENOMEM; - goto out; - } - ret = drop_extents(trans, root, inode, - iblock << inode->i_blkbits, - (iblock + 1) << inode->i_blkbits, - &alloc_hint); - BUG_ON(ret); - } - - ret = btrfs_lookup_file_extent(NULL, root, path, - inode->i_ino, - iblock << inode->i_blkbits, 0); - if (ret < 0) { - err = ret; - goto out; - } - - if (ret != 0) { - if (path->slots[0] == 0) { - btrfs_release_path(root, path); - goto not_found; - } - path->slots[0]--; - } - - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_file_extent_item); - leaf = btrfs_buffer_leaf(path->nodes[0]); - blocknr = btrfs_file_extent_disk_blocknr(item); - blocknr += btrfs_file_extent_offset(item); - - /* are we inside the extent that was found? */ - found_key = &leaf->items[path->slots[0]].key; - found_type = btrfs_disk_key_type(found_key); - if (btrfs_disk_key_objectid(found_key) != objectid || - found_type != BTRFS_EXTENT_DATA_KEY) { - extent_end = 0; - extent_start = 0; - goto not_found; - } - found_type = btrfs_file_extent_type(item); - extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); - if (found_type == BTRFS_FILE_EXTENT_REG) { - extent_start = extent_start >> inode->i_blkbits; - extent_end = extent_start + btrfs_file_extent_num_blocks(item); - err = 0; - if (btrfs_file_extent_disk_blocknr(item) == 0) - goto out; - if (iblock >= extent_start && iblock < extent_end) { - btrfs_map_bh_to_logical(root, result, blocknr + - iblock - extent_start); - goto out; - } - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - char *ptr; - char *map; - u32 size; - - if (create & BTRFS_GET_BLOCK_NO_DIRECT) { - err = -EINVAL; - goto out; - } - size = btrfs_file_extent_inline_len(leaf->items + - path->slots[0]); - extent_end = (extent_start + size) >> inode->i_blkbits; - extent_start >>= inode->i_blkbits; - if (iblock < extent_start || iblock > extent_end) { - goto not_found; - } - ptr = btrfs_file_extent_inline_start(item); - map = kmap(result->b_page); - memcpy(map, ptr, size); - memset(map + size, 0, PAGE_CACHE_SIZE - size); - flush_dcache_page(result->b_page); - kunmap(result->b_page); - set_buffer_uptodate(result); - SetPageChecked(result->b_page); - btrfs_map_bh_to_logical(root, result, 0); - } -not_found: - if (create & BTRFS_GET_BLOCK_CREATE) { - struct btrfs_key ins; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, - 1, alloc_hint, (u64)-1, - &ins, 1); - BUG_ON(ret); - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - iblock << inode->i_blkbits, - ins.objectid, ins.offset, - ins.offset); - BUG_ON(ret); - SetPageChecked(result->b_page); - btrfs_map_bh_to_logical(root, result, ins.objectid); - } -out: - if (trans) - err = btrfs_end_transaction(trans, root); - btrfs_free_path(path); - return err; -} - -static int btrfs_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) -{ - int err; - struct btrfs_root *root = BTRFS_I(inode)->root; - mutex_lock(&root->fs_info->fs_mutex); - err = btrfs_get_block_lock(inode, iblock, result, create); - mutex_unlock(&root->fs_info->fs_mutex); - return err; -} - -static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - mutex_lock(&root->fs_info->fs_mutex); - btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT); - mutex_unlock(&root->fs_info->fs_mutex); - return 0; -} - -static sector_t btrfs_bmap(struct address_space *as, sector_t block) -{ - return generic_block_bmap(as, block, btrfs_get_block_bmap); -} - -static int btrfs_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - return block_prepare_write(page, from, to, btrfs_get_block); -} - static void btrfs_write_super(struct super_block *sb) { sb->s_dirt = 0; } -static int btrfs_readpage(struct file *file, struct page *page) -{ - return mpage_readpage(page, btrfs_get_block); -} - -/* - * While block_write_full_page is writing back the dirty buffers under - * the page lock, whoever dirtied the buffers may decide to clean them - * again at any time. We handle that by only looking at the buffer - * state inside lock_buffer(). - * - * If block_write_full_page() is called for regular writeback - * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a - * locked buffer. This only can happen if someone has written the buffer - * directly, with submit_bh(). At the address_space level PageWriteback - * prevents this contention from occurring. - */ -static int __btrfs_write_full_page(struct inode *inode, struct page *page, - struct writeback_control *wbc) -{ - int err; - sector_t block; - sector_t last_block; - struct buffer_head *bh, *head; - const unsigned blocksize = 1 << inode->i_blkbits; - int nr_underway = 0; - - BUG_ON(!PageLocked(page)); - - last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; - - if (!page_has_buffers(page)) { - create_empty_buffers(page, blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - } - - /* - * Be very careful. We have no exclusion from __set_page_dirty_buffers - * here, and the (potentially unmapped) buffers may become dirty at - * any time. If a buffer becomes dirty here after we've inspected it - * then we just miss that fact, and the page stays dirty. - * - * Buffers outside i_size may be dirtied by __set_page_dirty_buffers; - * handle that here by just cleaning them. - */ - - block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - head = page_buffers(page); - bh = head; - - /* - * Get all the dirty buffers mapped to disk addresses and - * handle any aliases from the underlying blockdev's mapping. - */ - do { - if (block > last_block) { - /* - * mapped buffers outside i_size will occur, because - * this page can be outside i_size when there is a - * truncate in progress. - */ - /* - * The buffer was zeroed by block_write_full_page() - */ - clear_buffer_dirty(bh); - set_buffer_uptodate(bh); - } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { - WARN_ON(bh->b_size != blocksize); - err = btrfs_get_block(inode, block, bh, 0); - if (err) { -printk("writepage going to recovery err %d\n", err); - goto recover; - } - if (buffer_new(bh)) { - /* blockdev mappings never come here */ - clear_buffer_new(bh); - } - } - bh = bh->b_this_page; - block++; - } while (bh != head); - - do { - if (!buffer_mapped(bh)) - continue; - /* - * If it's a fully non-blocking write attempt and we cannot - * lock the buffer then redirty the page. Note that this can - * potentially cause a busy-wait loop from pdflush and kswapd - * activity, but those code paths have their own higher-level - * throttling. - */ - if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { - lock_buffer(bh); - } else if (test_set_buffer_locked(bh)) { - redirty_page_for_writepage(wbc, page); - continue; - } - if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) { - mark_buffer_async_write(bh); - } else { - unlock_buffer(bh); - } - } while ((bh = bh->b_this_page) != head); - - /* - * The page and its buffers are protected by PageWriteback(), so we can - * drop the bh refcounts early. - */ - BUG_ON(PageWriteback(page)); - set_page_writeback(page); - - do { - struct buffer_head *next = bh->b_this_page; - if (buffer_async_write(bh)) { - submit_bh(WRITE, bh); - nr_underway++; - } - bh = next; - } while (bh != head); - unlock_page(page); - - err = 0; -done: - if (nr_underway == 0) { - /* - * The page was marked dirty, but the buffers were - * clean. Someone wrote them back by hand with - * ll_rw_block/submit_bh. A rare case. - */ - int uptodate = 1; - do { - if (!buffer_uptodate(bh)) { - uptodate = 0; - break; - } - bh = bh->b_this_page; - } while (bh != head); - if (uptodate) - SetPageUptodate(page); - end_page_writeback(page); - } - return err; - -recover: - /* - * ENOSPC, or some other error. We may already have added some - * blocks to the file, so we need to write these out to avoid - * exposing stale data. - * The page is currently locked and not marked for writeback - */ - bh = head; - /* Recovery: lock and submit the mapped buffers */ - do { - if (buffer_mapped(bh) && buffer_dirty(bh)) { - lock_buffer(bh); - mark_buffer_async_write(bh); - } else { - /* - * The buffer may have been set dirty during - * attachment to a dirty page. - */ - clear_buffer_dirty(bh); - } - } while ((bh = bh->b_this_page) != head); - SetPageError(page); - BUG_ON(PageWriteback(page)); - set_page_writeback(page); - do { - struct buffer_head *next = bh->b_this_page; - if (buffer_async_write(bh)) { - clear_buffer_dirty(bh); - submit_bh(WRITE, bh); - nr_underway++; - } - bh = next; - } while (bh != head); - unlock_page(page); - goto done; -} - -/* - * The generic ->writepage function for buffer-backed address_spaces - */ -static int btrfs_writepage(struct page *page, struct writeback_control *wbc) -{ - struct inode * const inode = page->mapping->host; - loff_t i_size = i_size_read(inode); - const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; - unsigned offset; - void *kaddr; - - /* Is the page fully inside i_size? */ - if (page->index < end_index) - return __btrfs_write_full_page(inode, page, wbc); - - /* Is the page fully outside i_size? (truncate in progress) */ - offset = i_size & (PAGE_CACHE_SIZE-1); - if (page->index >= end_index+1 || !offset) { - /* - * The page may have dirty, unmapped buffers. For example, - * they may have been added in ext3_writepage(). Make them - * freeable here, so the page does not leak. - */ - block_invalidatepage(page, 0); - unlock_page(page); - return 0; /* don't care */ - } - - /* - * The page straddles i_size. It must be zeroed out on each and every - * writepage invokation because it may be mmapped. "A file is mapped - * in multiples of the page size. For a file that is not a multiple of - * the page size, the remaining memory is zeroed when mapped, and - * writes to that region are not written out to the file." - */ - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - return __btrfs_write_full_page(inode, page, wbc); -} - -static void btrfs_truncate(struct inode *inode) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; - struct btrfs_trans_handle *trans; - - if (!S_ISREG(inode->i_mode)) - return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - - btrfs_truncate_page(inode->i_mapping, inode->i_size); - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - - /* FIXME, add redo link to tree so we don't leak on crash */ - ret = btrfs_truncate_in_trans(trans, root, inode); - BUG_ON(ret); - btrfs_update_inode(trans, root, inode); - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); -} - -static int btrfs_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - struct inode *inode = page->mapping->host; - struct buffer_head *bh; - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - - SetPageUptodate(page); - bh = page_buffers(page); - set_buffer_uptodate(bh); - if (buffer_mapped(bh) && bh->b_blocknr != 0) { - set_page_dirty(page); - } - if (pos > inode->i_size) { - i_size_write(inode, pos); - mark_inode_dirty(inode); - } - return 0; -} - -static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, - struct page **prepared_pages, - const char __user * buf) -{ - long page_fault = 0; - int i; - int offset = pos & (PAGE_CACHE_SIZE - 1); - - for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { - size_t count = min_t(size_t, - PAGE_CACHE_SIZE - offset, write_bytes); - struct page *page = prepared_pages[i]; - fault_in_pages_readable(buf, count); - - /* Copy data from userspace to the current page */ - kmap(page); - page_fault = __copy_from_user(page_address(page) + offset, - buf, count); - /* Flush processor's dcache for this page */ - flush_dcache_page(page); - kunmap(page); - buf += count; - write_bytes -= count; - - if (page_fault) - break; - } - return page_fault ? -EFAULT : 0; -} - -static void btrfs_drop_pages(struct page **pages, size_t num_pages) -{ - size_t i; - for (i = 0; i < num_pages; i++) { - if (!pages[i]) - break; - unlock_page(pages[i]); - mark_page_accessed(pages[i]); - page_cache_release(pages[i]); - } -} -static int dirty_and_release_pages(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct file *file, - struct page **pages, - size_t num_pages, - loff_t pos, - size_t write_bytes) -{ - int i; - int offset; - int err = 0; - int ret; - int this_write; - struct inode *inode = file->f_path.dentry->d_inode; - struct buffer_head *bh; - struct btrfs_file_extent_item *ei; - - for (i = 0; i < num_pages; i++) { - offset = pos & (PAGE_CACHE_SIZE -1); - this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); - /* FIXME, one block at a time */ - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - - bh = page_buffers(pages[i]); - if (buffer_mapped(bh) && bh->b_blocknr == 0) { - struct btrfs_key key; - struct btrfs_path *path; - char *ptr; - u32 datasize; - - path = btrfs_alloc_path(); - BUG_ON(!path); - key.objectid = inode->i_ino; - key.offset = pages[i]->index << PAGE_CACHE_SHIFT; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); - BUG_ON(write_bytes >= PAGE_CACHE_SIZE); - datasize = offset + - btrfs_file_extent_calc_inline_size(write_bytes); - ret = btrfs_insert_empty_item(trans, root, path, &key, - datasize); - BUG_ON(ret); - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(ei, trans->transid); - btrfs_set_file_extent_type(ei, - BTRFS_FILE_EXTENT_INLINE); - ptr = btrfs_file_extent_inline_start(ei); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, bh->b_data, offset + write_bytes); - mark_buffer_dirty(path->nodes[0]); - btrfs_free_path(path); - } else if (buffer_mapped(bh)) { - btrfs_csum_file_block(trans, root, inode->i_ino, - pages[i]->index << PAGE_CACHE_SHIFT, - kmap(pages[i]), PAGE_CACHE_SIZE); - kunmap(pages[i]); - } - SetPageChecked(pages[i]); - // btrfs_update_inode_block_group(trans, inode); - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); - - ret = btrfs_commit_write(file, pages[i], offset, - offset + this_write); - pos += this_write; - if (ret) { - err = ret; - goto failed; - } - WARN_ON(this_write > write_bytes); - write_bytes -= this_write; - } -failed: - return err; -} - -static int drop_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, - u64 start, u64 end, u64 *hint_block) -{ - int ret; - struct btrfs_key key; - struct btrfs_leaf *leaf; - int slot; - struct btrfs_file_extent_item *extent; - u64 extent_end = 0; - int keep; - struct btrfs_file_extent_item old; - struct btrfs_path *path; - u64 search_start = start; - int bookend; - int found_type; - int found_extent; - int found_inline; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - while(1) { - btrfs_release_path(root, path); - ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, - search_start, -1); - if (ret < 0) - goto out; - if (ret > 0) { - if (path->slots[0] == 0) { - ret = 0; - goto out; - } - path->slots[0]--; - } - keep = 0; - bookend = 0; - found_extent = 0; - found_inline = 0; - extent = NULL; - leaf = btrfs_buffer_leaf(path->nodes[0]); - slot = path->slots[0]; - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); - if (key.offset >= end || key.objectid != inode->i_ino) { - ret = 0; - goto out; - } - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) { - ret = 0; - goto out; - } - extent = btrfs_item_ptr(leaf, slot, - struct btrfs_file_extent_item); - found_type = btrfs_file_extent_type(extent); - if (found_type == BTRFS_FILE_EXTENT_REG) { - extent_end = key.offset + - (btrfs_file_extent_num_blocks(extent) << - inode->i_blkbits); - found_extent = 1; - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - found_inline = 1; - extent_end = key.offset + - btrfs_file_extent_inline_len(leaf->items + slot); - } - - if (!found_extent && !found_inline) { - ret = 0; - goto out; - } - - if (search_start >= extent_end) { - ret = 0; - goto out; - } - - if (found_inline) { - u64 mask = root->blocksize - 1; - search_start = (extent_end + mask) & ~mask; - } else - search_start = extent_end; - - if (end < extent_end && end >= key.offset) { - if (found_extent) { - u64 disk_blocknr = - btrfs_file_extent_disk_blocknr(extent); - u64 disk_num_blocks = - btrfs_file_extent_disk_num_blocks(extent); - memcpy(&old, extent, sizeof(old)); - if (disk_blocknr != 0) { - ret = btrfs_inc_extent_ref(trans, root, - disk_blocknr, disk_num_blocks); - BUG_ON(ret); - } - } - WARN_ON(found_inline); - bookend = 1; - } - - if (start > key.offset) { - u64 new_num; - u64 old_num; - /* truncate existing extent */ - keep = 1; - WARN_ON(start & (root->blocksize - 1)); - if (found_extent) { - new_num = (start - key.offset) >> - inode->i_blkbits; - old_num = btrfs_file_extent_num_blocks(extent); - *hint_block = - btrfs_file_extent_disk_blocknr(extent); - if (btrfs_file_extent_disk_blocknr(extent)) { - inode->i_blocks -= - (old_num - new_num) << 3; - } - btrfs_set_file_extent_num_blocks(extent, - new_num); - mark_buffer_dirty(path->nodes[0]); - } else { - WARN_ON(1); - } - } - if (!keep) { - u64 disk_blocknr = 0; - u64 disk_num_blocks = 0; - u64 extent_num_blocks = 0; - if (found_extent) { - disk_blocknr = - btrfs_file_extent_disk_blocknr(extent); - disk_num_blocks = - btrfs_file_extent_disk_num_blocks(extent); - extent_num_blocks = - btrfs_file_extent_num_blocks(extent); - *hint_block = - btrfs_file_extent_disk_blocknr(extent); - } - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); - btrfs_release_path(root, path); - extent = NULL; - if (found_extent && disk_blocknr != 0) { - inode->i_blocks -= extent_num_blocks << 3; - ret = btrfs_free_extent(trans, root, - disk_blocknr, - disk_num_blocks, 0); - } - - BUG_ON(ret); - if (!bookend && search_start >= end) { - ret = 0; - goto out; - } - if (!bookend) - continue; - } - if (bookend && found_extent) { - /* create bookend */ - struct btrfs_key ins; - ins.objectid = inode->i_ino; - ins.offset = end; - ins.flags = 0; - btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); - - btrfs_release_path(root, path); - ret = btrfs_insert_empty_item(trans, root, path, &ins, - sizeof(*extent)); - BUG_ON(ret); - extent = btrfs_item_ptr( - btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(extent, - btrfs_file_extent_disk_blocknr(&old)); - btrfs_set_file_extent_disk_num_blocks(extent, - btrfs_file_extent_disk_num_blocks(&old)); - - btrfs_set_file_extent_offset(extent, - btrfs_file_extent_offset(&old) + - ((end - key.offset) >> inode->i_blkbits)); - WARN_ON(btrfs_file_extent_num_blocks(&old) < - (extent_end - end) >> inode->i_blkbits); - btrfs_set_file_extent_num_blocks(extent, - (extent_end - end) >> inode->i_blkbits); - - btrfs_set_file_extent_type(extent, - BTRFS_FILE_EXTENT_REG); - btrfs_set_file_extent_generation(extent, - btrfs_file_extent_generation(&old)); - btrfs_mark_buffer_dirty(path->nodes[0]); - if (btrfs_file_extent_disk_blocknr(&old) != 0) { - inode->i_blocks += - btrfs_file_extent_num_blocks(extent) << 3; - } - ret = 0; - goto out; - } - } -out: - btrfs_free_path(path); - return ret; -} - -static int prepare_pages(struct btrfs_root *root, - struct file *file, - struct page **pages, - size_t num_pages, - loff_t pos, - unsigned long first_index, - unsigned long last_index, - size_t write_bytes, - u64 alloc_extent_start) -{ - int i; - unsigned long index = pos >> PAGE_CACHE_SHIFT; - struct inode *inode = file->f_path.dentry->d_inode; - int offset; - int err = 0; - int this_write; - struct buffer_head *bh; - struct buffer_head *head; - loff_t isize = i_size_read(inode); - - memset(pages, 0, num_pages * sizeof(struct page *)); - - for (i = 0; i < num_pages; i++) { - pages[i] = grab_cache_page(inode->i_mapping, index + i); - if (!pages[i]) { - err = -ENOMEM; - goto failed_release; - } - cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); - wait_on_page_writeback(pages[i]); - offset = pos & (PAGE_CACHE_SIZE -1); - this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); - if (!page_has_buffers(pages[i])) { - create_empty_buffers(pages[i], - root->fs_info->sb->s_blocksize, - (1 << BH_Uptodate)); - } - head = page_buffers(pages[i]); - bh = head; - do { - err = btrfs_map_bh_to_logical(root, bh, - alloc_extent_start); - BUG_ON(err); - if (err) - goto failed_truncate; - bh = bh->b_this_page; - if (alloc_extent_start) - alloc_extent_start++; - } while (bh != head); - pos += this_write; - WARN_ON(this_write > write_bytes); - write_bytes -= this_write; - } - return 0; - -failed_release: - btrfs_drop_pages(pages, num_pages); - return err; - -failed_truncate: - btrfs_drop_pages(pages, num_pages); - if (pos > isize) - vmtruncate(inode, isize); - return err; -} - -static ssize_t btrfs_file_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - loff_t pos; - size_t num_written = 0; - int err = 0; - int ret = 0; - struct inode *inode = file->f_path.dentry->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct page *pages[8]; - struct page *pinned[2]; - unsigned long first_index; - unsigned long last_index; - u64 start_pos; - u64 num_blocks; - u64 alloc_extent_start; - u64 hint_block; - struct btrfs_trans_handle *trans; - struct btrfs_key ins; - pinned[0] = NULL; - pinned[1] = NULL; - if (file->f_flags & O_DIRECT) - return -EINVAL; - pos = *ppos; - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); - current->backing_dev_info = inode->i_mapping->backing_dev_info; - err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); - if (err) - goto out; - if (count == 0) - goto out; - err = remove_suid(file->f_path.dentry); - if (err) - goto out; - file_update_time(file); - - start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); - num_blocks = (count + pos - start_pos + root->blocksize - 1) >> - inode->i_blkbits; - - mutex_lock(&inode->i_mutex); - first_index = pos >> PAGE_CACHE_SHIFT; - last_index = (pos + count) >> PAGE_CACHE_SHIFT; - - if ((pos & (PAGE_CACHE_SIZE - 1))) { - pinned[0] = grab_cache_page(inode->i_mapping, first_index); - if (!PageUptodate(pinned[0])) { - ret = mpage_readpage(pinned[0], btrfs_get_block); - BUG_ON(ret); - wait_on_page_locked(pinned[0]); - } else { - unlock_page(pinned[0]); - } - } - if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { - pinned[1] = grab_cache_page(inode->i_mapping, last_index); - if (!PageUptodate(pinned[1])) { - ret = mpage_readpage(pinned[1], btrfs_get_block); - BUG_ON(ret); - wait_on_page_locked(pinned[1]); - } else { - unlock_page(pinned[1]); - } - } - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - if (!trans) { - err = -ENOMEM; - mutex_unlock(&root->fs_info->fs_mutex); - goto out_unlock; - } - btrfs_set_trans_block_group(trans, inode); - /* FIXME blocksize != 4096 */ - inode->i_blocks += num_blocks << 3; - hint_block = 0; - if (start_pos < inode->i_size) { - /* FIXME blocksize != pagesize */ - ret = drop_extents(trans, root, inode, - start_pos, - (pos + count + root->blocksize -1) & - ~((u64)root->blocksize - 1), &hint_block); - BUG_ON(ret); - } - if (inode->i_size < start_pos) { - u64 last_pos_in_file; - u64 hole_size; - u64 mask = root->blocksize - 1; - last_pos_in_file = (inode->i_size + mask) & ~mask; - hole_size = (start_pos - last_pos_in_file + mask) & ~mask; - hole_size >>= inode->i_blkbits; - if (last_pos_in_file < start_pos) { - ret = btrfs_insert_file_extent(trans, root, - inode->i_ino, - last_pos_in_file, - 0, 0, hole_size); - } - BUG_ON(ret); - } - if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || - pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { - ret = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, hint_block, (u64)-1, - &ins, 1); - BUG_ON(ret); - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - start_pos, ins.objectid, ins.offset, - ins.offset); - BUG_ON(ret); - } else { - ins.offset = 0; - ins.objectid = 0; - } - BUG_ON(ret); - alloc_extent_start = ins.objectid; - // btrfs_update_inode_block_group(trans, inode); - ret = btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - - while(count > 0) { - size_t offset = pos & (PAGE_CACHE_SIZE - 1); - size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); - size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; - - memset(pages, 0, sizeof(pages)); - ret = prepare_pages(root, file, pages, num_pages, - pos, first_index, last_index, - write_bytes, alloc_extent_start); - BUG_ON(ret); - - /* FIXME blocks != pagesize */ - if (alloc_extent_start) - alloc_extent_start += num_pages; - ret = btrfs_copy_from_user(pos, num_pages, - write_bytes, pages, buf); - BUG_ON(ret); - - ret = dirty_and_release_pages(NULL, root, file, pages, - num_pages, pos, write_bytes); - BUG_ON(ret); - btrfs_drop_pages(pages, num_pages); - - buf += write_bytes; - count -= write_bytes; - pos += write_bytes; - num_written += write_bytes; - - balance_dirty_pages_ratelimited(inode->i_mapping); - btrfs_btree_balance_dirty(root); - cond_resched(); - } -out_unlock: - mutex_unlock(&inode->i_mutex); -out: - if (pinned[0]) - page_cache_release(pinned[0]); - if (pinned[1]) - page_cache_release(pinned[1]); - *ppos = pos; - current->backing_dev_info = NULL; - mark_inode_dirty(inode); - return num_written ? num_written : err; -} - -static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, - unsigned long offset, unsigned long size) -{ - char *kaddr; - unsigned long left, count = desc->count; - struct inode *inode = page->mapping->host; - - if (size > count) - size = count; - - if (!PageChecked(page)) { - /* FIXME, do it per block */ - struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; - struct buffer_head *bh; - - if (page_has_buffers(page)) { - bh = page_buffers(page); - if (!buffer_mapped(bh)) { - SetPageChecked(page); - goto checked; - } - } - - ret = btrfs_csum_verify_file_block(root, - page->mapping->host->i_ino, - page->index << PAGE_CACHE_SHIFT, - kmap(page), PAGE_CACHE_SIZE); - if (ret) { - if (ret != -ENOENT) { - printk("failed to verify ino %lu page %lu ret %d\n", - page->mapping->host->i_ino, - page->index, ret); - memset(page_address(page), 1, PAGE_CACHE_SIZE); - flush_dcache_page(page); - } - } - SetPageChecked(page); - kunmap(page); - } -checked: - /* - * Faults on the destination of a read are common, so do it before - * taking the kmap. - */ - if (!fault_in_pages_writeable(desc->arg.buf, size)) { - kaddr = kmap_atomic(page, KM_USER0); - left = __copy_to_user_inatomic(desc->arg.buf, - kaddr + offset, size); - kunmap_atomic(kaddr, KM_USER0); - if (left == 0) - goto success; - } - - /* Do it the slow way */ - kaddr = kmap(page); - left = __copy_to_user(desc->arg.buf, kaddr + offset, size); - kunmap(page); - - if (left) { - size -= left; - desc->error = -EFAULT; - } -success: - desc->count = count - size; - desc->written += size; - desc->arg.buf += size; - return size; -} - -/** - * btrfs_file_aio_read - filesystem read routine - * @iocb: kernel I/O control block - * @iov: io vector request - * @nr_segs: number of segments in the iovec - * @pos: current file position - */ -static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - struct file *filp = iocb->ki_filp; - ssize_t retval; - unsigned long seg; - size_t count; - loff_t *ppos = &iocb->ki_pos; - - count = 0; - for (seg = 0; seg < nr_segs; seg++) { - const struct iovec *iv = &iov[seg]; - - /* - * If any segment has a negative length, or the cumulative - * length ever wraps negative then return -EINVAL. - */ - count += iv->iov_len; - if (unlikely((ssize_t)(count|iv->iov_len) < 0)) - return -EINVAL; - if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len)) - continue; - if (seg == 0) - return -EFAULT; - nr_segs = seg; - count -= iv->iov_len; /* This segment is no good */ - break; - } - retval = 0; - if (count) { - for (seg = 0; seg < nr_segs; seg++) { - read_descriptor_t desc; - - desc.written = 0; - desc.arg.buf = iov[seg].iov_base; - desc.count = iov[seg].iov_len; - if (desc.count == 0) - continue; - desc.error = 0; - do_generic_file_read(filp, ppos, &desc, - btrfs_read_actor); - retval += desc.written; - if (desc.error) { - retval = retval ?: desc.error; - break; - } - } - } - return retval; -} - -static int create_subvol(struct btrfs_root *root, char *name, int namelen) -{ - struct btrfs_trans_handle *trans; - struct btrfs_key key; - struct btrfs_root_item root_item; - struct btrfs_inode_item *inode_item; - struct buffer_head *subvol; - struct btrfs_leaf *leaf; - struct btrfs_root *new_root; - struct inode *inode; - struct inode *dir; - int ret; - u64 objectid; - u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - BUG_ON(!trans); - - subvol = btrfs_alloc_free_block(trans, root, 0); - if (subvol == NULL) - return -ENOSPC; - leaf = btrfs_buffer_leaf(subvol); - btrfs_set_header_nritems(&leaf->header, 0); - btrfs_set_header_level(&leaf->header, 0); - btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol)); - btrfs_set_header_generation(&leaf->header, trans->transid); - btrfs_set_header_owner(&leaf->header, root->root_key.objectid); - memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, - sizeof(leaf->header.fsid)); - mark_buffer_dirty(subvol); - - inode_item = &root_item.inode; - memset(inode_item, 0, sizeof(*inode_item)); - btrfs_set_inode_generation(inode_item, 1); - btrfs_set_inode_size(inode_item, 3); - btrfs_set_inode_nlink(inode_item, 1); - btrfs_set_inode_nblocks(inode_item, 1); - btrfs_set_inode_mode(inode_item, S_IFDIR | 0755); - - btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); - btrfs_set_root_refs(&root_item, 1); - brelse(subvol); - subvol = NULL; - - ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, - 0, &objectid); - BUG_ON(ret); - - btrfs_set_root_dirid(&root_item, new_dirid); - - key.objectid = objectid; - key.offset = 1; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, - &root_item); - BUG_ON(ret); - - /* - * insert the directory item - */ - key.offset = (u64)-1; - dir = root->fs_info->sb->s_root->d_inode; - ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, - name, namelen, dir->i_ino, &key, - BTRFS_FT_DIR); - BUG_ON(ret); - - ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); - - new_root = btrfs_read_fs_root(root->fs_info, &key); - BUG_ON(!new_root); - - trans = btrfs_start_transaction(new_root, 1); - BUG_ON(!trans); - - inode = btrfs_new_inode(trans, new_root, new_dirid, - BTRFS_I(dir)->block_group, S_IFDIR | 0700); - inode->i_op = &btrfs_dir_inode_operations; - inode->i_fop = &btrfs_dir_file_operations; - - ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid); - BUG_ON(ret); - - inode->i_nlink = 1; - inode->i_size = 6; - ret = btrfs_update_inode(trans, new_root, inode); - BUG_ON(ret); - - ret = btrfs_commit_transaction(trans, new_root); - BUG_ON(ret); - - iput(inode); - - mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); - return 0; -} - -static int create_snapshot(struct btrfs_root *root, char *name, int namelen) -{ - struct btrfs_trans_handle *trans; - struct btrfs_key key; - struct btrfs_root_item new_root_item; - int ret; - u64 objectid; - - if (!root->ref_cows) - return -EINVAL; - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - BUG_ON(!trans); - - ret = btrfs_update_inode(trans, root, root->inode); - BUG_ON(ret); - - ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, - 0, &objectid); - BUG_ON(ret); - - memcpy(&new_root_item, &root->root_item, - sizeof(new_root_item)); - - key.objectid = objectid; - key.offset = 1; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node)); - - ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, - &new_root_item); - BUG_ON(ret); - - /* - * insert the directory item - */ - key.offset = (u64)-1; - ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, - name, namelen, - root->fs_info->sb->s_root->d_inode->i_ino, - &key, BTRFS_FT_DIR); - - BUG_ON(ret); - - ret = btrfs_inc_root_ref(trans, root); - BUG_ON(ret); - - ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); - return 0; -} - -static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int - cmd, unsigned long arg) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_ioctl_vol_args vol_args; - int ret = 0; - struct btrfs_dir_item *di; - int namelen; - struct btrfs_path *path; - u64 root_dirid; - - switch (cmd) { - case BTRFS_IOC_SNAP_CREATE: - if (copy_from_user(&vol_args, - (struct btrfs_ioctl_vol_args __user *)arg, - sizeof(vol_args))) - return -EFAULT; - namelen = strlen(vol_args.name); - if (namelen > BTRFS_VOL_NAME_MAX) - return -EINVAL; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, - mutex_lock(&root->fs_info->fs_mutex); - di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, - path, root_dirid, - vol_args.name, namelen, 0); - mutex_unlock(&root->fs_info->fs_mutex); - btrfs_free_path(path); - if (di && !IS_ERR(di)) - return -EEXIST; - - if (root == root->fs_info->tree_root) - ret = create_subvol(root, vol_args.name, namelen); - else - ret = create_snapshot(root, vol_args.name, namelen); - WARN_ON(ret); - break; - default: - return -ENOTTY; - } - return ret; -} - -#ifdef CONFIG_COMPAT -static long btrfs_compat_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - struct inode *inode = file->f_path.dentry->d_inode; - int ret; - lock_kernel(); - ret = btrfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); - unlock_kernel(); - return ret; - -} -#endif - -static struct kmem_cache *btrfs_inode_cachep; -struct kmem_cache *btrfs_trans_handle_cachep; -struct kmem_cache *btrfs_transaction_cachep; -struct kmem_cache *btrfs_bit_radix_cachep; -struct kmem_cache *btrfs_path_cachep; - -/* - * Called inside transaction, so use GFP_NOFS - */ -static struct inode *btrfs_alloc_inode(struct super_block *sb) -{ - struct btrfs_inode *ei; - - ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); - if (!ei) - return NULL; - return &ei->vfs_inode; -} - -static void btrfs_destroy_inode(struct inode *inode) -{ - WARN_ON(!list_empty(&inode->i_dentry)); - WARN_ON(inode->i_data.nrpages); - - kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); -} - -static void init_once(void * foo, struct kmem_cache * cachep, - unsigned long flags) -{ - struct btrfs_inode *ei = (struct btrfs_inode *) foo; - - if ((flags & (SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { - inode_init_once(&ei->vfs_inode); - } -} - -static int init_inodecache(void) -{ - btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache", - sizeof(struct btrfs_inode), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - init_once, NULL); - btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache", - sizeof(struct btrfs_trans_handle), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - NULL, NULL); - btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache", - sizeof(struct btrfs_transaction), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - NULL, NULL); - btrfs_path_cachep = kmem_cache_create("btrfs_path_cache", - sizeof(struct btrfs_transaction), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - NULL, NULL); - btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix", - 256, - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD | - SLAB_DESTROY_BY_RCU), - NULL, NULL); - if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL || - btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL) - return -ENOMEM; - return 0; -} - -static void destroy_inodecache(void) -{ - kmem_cache_destroy(btrfs_inode_cachep); - kmem_cache_destroy(btrfs_trans_handle_cachep); - kmem_cache_destroy(btrfs_transaction_cachep); - kmem_cache_destroy(btrfs_bit_radix_cachep); - kmem_cache_destroy(btrfs_path_cachep); -} - static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -2709,15 +124,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, btrfs_fill_super, mnt); } -static int btrfs_getattr(struct vfsmount *mnt, - struct dentry *dentry, struct kstat *stat) -{ - struct inode *inode = dentry->d_inode; - generic_fillattr(inode, stat); - stat->blksize = 256 * 1024; - return 0; -} - static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct btrfs_root *root = btrfs_sb(dentry->d_sb); @@ -2732,197 +138,6 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, - struct inode * new_dir,struct dentry *new_dentry) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(old_dir)->root; - struct inode *new_inode = new_dentry->d_inode; - struct inode *old_inode = old_dentry->d_inode; - struct timespec ctime = CURRENT_TIME; - struct btrfs_path *path; - struct btrfs_dir_item *di; - int ret; - - if (S_ISDIR(old_inode->i_mode) && new_inode && - new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { - return -ENOTEMPTY; - } - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, new_dir); - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out_fail; - } - - old_dentry->d_inode->i_nlink++; - old_dir->i_ctime = old_dir->i_mtime = ctime; - new_dir->i_ctime = new_dir->i_mtime = ctime; - old_inode->i_ctime = ctime; - if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) { - struct btrfs_key *location = &BTRFS_I(new_dir)->location; - u64 old_parent_oid; - di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino, - "..", 2, -1); - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto out_fail; - } - if (!di) { - ret = -ENOENT; - goto out_fail; - } - old_parent_oid = btrfs_disk_key_objectid(&di->location); - ret = btrfs_del_item(trans, root, path); - if (ret) { - ret = -EIO; - goto out_fail; - } - btrfs_release_path(root, path); - - di = btrfs_lookup_dir_index_item(trans, root, path, - old_inode->i_ino, - old_parent_oid, - "..", 2, -1); - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto out_fail; - } - if (!di) { - ret = -ENOENT; - goto out_fail; - } - ret = btrfs_del_item(trans, root, path); - if (ret) { - ret = -EIO; - goto out_fail; - } - btrfs_release_path(root, path); - - ret = btrfs_insert_dir_item(trans, root, "..", 2, - old_inode->i_ino, location, - BTRFS_FT_DIR); - if (ret) - goto out_fail; - } - - - ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry); - if (ret) - goto out_fail; - - if (new_inode) { - new_inode->i_ctime = CURRENT_TIME; - ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry); - if (ret) - goto out_fail; - if (S_ISDIR(new_inode->i_mode)) - clear_nlink(new_inode); - else - drop_nlink(new_inode); - btrfs_update_inode(trans, root, new_inode); - } - ret = btrfs_add_link(trans, new_dentry, old_inode); - if (ret) - goto out_fail; - -out_fail: - btrfs_free_path(path); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - return ret; -} - -static int btrfs_symlink(struct inode *dir, struct dentry *dentry, - const char *symname) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(dir)->root; - struct btrfs_path *path; - struct btrfs_key key; - struct inode *inode; - int err; - int drop_inode = 0; - u64 objectid; - int name_len; - int datasize; - char *ptr; - struct btrfs_file_extent_item *ei; - - name_len = strlen(symname) + 1; - if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) - return -ENAMETOOLONG; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, dir); - - err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); - if (err) { - err = -ENOSPC; - goto out_unlock; - } - - inode = btrfs_new_inode(trans, root, objectid, - BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_unlock; - - btrfs_set_trans_block_group(trans, inode); - err = btrfs_add_nondir(trans, dentry, inode); - if (err) - drop_inode = 1; - else { - inode->i_mapping->a_ops = &btrfs_aops; - inode->i_fop = &btrfs_file_operations; - inode->i_op = &btrfs_file_inode_operations; - } - dir->i_sb->s_dirt = 1; - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); - if (drop_inode) - goto out_unlock; - - path = btrfs_alloc_path(); - BUG_ON(!path); - key.objectid = inode->i_ino; - key.offset = 0; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); - datasize = btrfs_file_extent_calc_inline_size(name_len); - err = btrfs_insert_empty_item(trans, root, path, &key, - datasize); - BUG_ON(err); - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(ei, trans->transid); - btrfs_set_file_extent_type(ei, - BTRFS_FILE_EXTENT_INLINE); - ptr = btrfs_file_extent_inline_start(ei); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, symname, name_len); - mark_buffer_dirty(path->nodes[0]); - btrfs_free_path(path); - inode->i_op = &btrfs_symlink_inode_operations; - inode->i_mapping->a_ops = &btrfs_symlink_aops; - inode->i_size = name_len - 1; - btrfs_update_inode(trans, root, inode); - err = 0; - -out_unlock: - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - - if (drop_inode) { - inode_dec_link_count(inode); - iput(inode); - } - btrfs_btree_balance_dirty(root); - return err; -} - static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -2944,91 +159,21 @@ static struct super_operations btrfs_super_ops = { .statfs = btrfs_statfs, }; -static struct inode_operations btrfs_dir_inode_operations = { - .lookup = btrfs_lookup, - .create = btrfs_create, - .unlink = btrfs_unlink, - .link = btrfs_link, - .mkdir = btrfs_mkdir, - .rmdir = btrfs_rmdir, - .rename = btrfs_rename, - .symlink = btrfs_symlink, - .setattr = btrfs_setattr, -}; - -static struct inode_operations btrfs_dir_ro_inode_operations = { - .lookup = btrfs_lookup, -}; - -static struct file_operations btrfs_dir_file_operations = { - .llseek = generic_file_llseek, - .read = generic_read_dir, - .readdir = btrfs_readdir, - .ioctl = btrfs_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = btrfs_compat_ioctl, -#endif -}; - -static struct address_space_operations btrfs_aops = { - .readpage = btrfs_readpage, - .writepage = btrfs_writepage, - .sync_page = block_sync_page, - .prepare_write = btrfs_prepare_write, - .commit_write = btrfs_commit_write, - .bmap = btrfs_bmap, -}; - -static struct address_space_operations btrfs_symlink_aops = { - .readpage = btrfs_readpage, - .writepage = btrfs_writepage, -}; - -static struct inode_operations btrfs_file_inode_operations = { - .truncate = btrfs_truncate, - .getattr = btrfs_getattr, - .setattr = btrfs_setattr, -}; - -static struct file_operations btrfs_file_operations = { - .llseek = generic_file_llseek, - .read = do_sync_read, - .aio_read = btrfs_file_aio_read, - .write = btrfs_file_write, - .mmap = generic_file_mmap, - .open = generic_file_open, - .ioctl = btrfs_ioctl, - .fsync = btrfs_sync_file, -#ifdef CONFIG_COMPAT - .compat_ioctl = btrfs_compat_ioctl, -#endif -}; - -static struct inode_operations btrfs_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = page_follow_link_light, - .put_link = page_put_link, -}; - static int __init init_btrfs_fs(void) { int err; - printk("btrfs loaded!\n"); btrfs_init_transaction_sys(); - err = init_inodecache(); + err = btrfs_init_cachep(); if (err) return err; return register_filesystem(&btrfs_fs_type); - destroy_inodecache(); - return err; } static void __exit exit_btrfs_fs(void) { btrfs_exit_transaction_sys(); - destroy_inodecache(); + btrfs_destroy_cachep(); unregister_filesystem(&btrfs_fs_type); - printk("btrfs unloaded\n"); } module_init(init_btrfs_fs) From 84f54cfa78c81991e087309a9b379f25f1ffdb10 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Jun 2007 07:43:08 -0400 Subject: [PATCH 0164/2985] Btrfs: 64 bit div fixes Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 43 +++++++++++++++++++++++------------------- fs/btrfs/file-item.c | 3 ++- fs/btrfs/file.c | 7 ++++--- fs/btrfs/root-tree.c | 2 +- 4 files changed, 31 insertions(+), 24 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f509ffa38d03..6949eebc9dc1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -65,7 +65,6 @@ static int cache_block_group(struct btrfs_root *root, path = btrfs_alloc_path(); if (!path) return -ENOMEM; -printk("cache block group %Lu\n", block_group->key.objectid); key.objectid = block_group->key.objectid; key.flags = 0; key.offset = 0; @@ -164,8 +163,8 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct static u64 leaf_range(struct btrfs_root *root) { u64 size = BTRFS_LEAF_DATA_SIZE(root); - size = size / (sizeof(struct btrfs_extent_item) + - sizeof(struct btrfs_item)); + do_div(size, sizeof(struct btrfs_extent_item) + + sizeof(struct btrfs_item)); return size; } @@ -219,6 +218,13 @@ new_group: goto again; } +static u64 div_factor(u64 num, int factor) +{ + num *= factor; + do_div(num, 10); + return num; +} + struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, @@ -255,17 +261,18 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (shint->data == data) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < - (shint->key.offset * factor) / 10) { + div_factor(shint->key.offset, factor)) { return shint; } } } if (hint && hint->data == data) { used = btrfs_block_group_used(&hint->item); - if (used + hint->pinned < (hint->key.offset * factor) / 10) { + if (used + hint->pinned < + div_factor(hint->key.offset, factor)) { return hint; } - if (used >= (hint->key.offset * 8) / 10) { + if (used >= div_factor(hint->key.offset, 8)) { radix_tree_tag_clear(radix, hint->key.objectid + hint->key.offset - 1, @@ -297,11 +304,11 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, cache[i]->key.offset; used = btrfs_block_group_used(&cache[i]->item); if (used + cache[i]->pinned < - (cache[i]->key.offset * factor) / 10) { + div_factor(cache[i]->key.offset, factor)) { found_group = cache[i]; goto found; } - if (used >= (cache[i]->key.offset * 8) / 10) { + if (used >= div_factor(cache[i]->key.offset, 8)) { radix_tree_tag_clear(radix, cache[i]->key.objectid + cache[i]->key.offset - 1, @@ -348,7 +355,6 @@ again: goto again; } if (!found_group) { -printk("find block group bailing to zero data %d\n", data); ret = radix_tree_gang_lookup(radix, (void **)&found_group, 0, 1); if (ret == 0) { @@ -386,7 +392,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); if (ret != 0) { -printk("can't find block %Lu %Lu\n", blocknr, num_blocks); BUG(); } BUG_ON(ret != 0); @@ -601,8 +606,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, } } if (cache->data != data && - old_val < cache->key.offset / 2) { -printk("changing block group %Lu from %d to %d\n", cache->key.objectid, cache->data, data); + old_val < (cache->key.offset >> 1)) { cache->data = data; radix_tree_delete(cache->radix, cache->key.objectid + @@ -634,9 +638,8 @@ printk("changing block group %Lu from %d to %d\n", cache->key.objectid, cache->d blocknr + i); } } - if (old_val < cache->key.offset / 2 && - old_val + num >= cache->key.offset / 2) { -printk("group %Lu now available\n", cache->key.objectid); + if (old_val < (cache->key.offset >> 1) && + old_val + num >= (cache->key.offset >> 1)) { radix_tree_tag_set(cache->radix, cache->key.objectid + cache->key.offset - 1, @@ -1000,10 +1003,10 @@ check_failed: } if (start_found) limit = last_block + - block_group->key.offset / 2; + (block_group->key.offset >> 1); else limit = search_start + - block_group->key.offset / 2; + (block_group->key.offset >> 1); ret = btrfs_next_leaf(root, path); if (ret == 0) continue; @@ -1534,9 +1537,11 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_key key; struct btrfs_key found_key; struct btrfs_leaf *leaf; - u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize; + u64 group_size_blocks; u64 used; + group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >> + root->fs_info->sb->s_blocksize_bits; root = info->extent_root; key.objectid = 0; key.offset = group_size_blocks; @@ -1590,7 +1595,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) (void *)cache); BUG_ON(ret); used = btrfs_block_group_used(bi); - if (used < (key.offset * 8) / 10) { + if (used < div_factor(key.offset, 8)) { radix_tree_tag_set(radix, found_key.objectid + found_key.offset - 1, BTRFS_BLOCK_GROUP_AVAIL); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 0782e924ddeb..fb8c214160c1 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -239,7 +239,8 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, if (isize <= key.offset) return 0; new_item_span = isize - key.offset; - blocks = (new_item_span + root->blocksize - 1) / root->blocksize; + blocks = (new_item_span + root->blocksize - 1) >> + root->fs_info->sb->s_blocksize_bits; new_item_size = blocks * BTRFS_CRC32_SIZE; if (new_item_size >= btrfs_item_size(leaf->items + slot)) return 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7f8e3035d99e..0325dc038592 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -82,7 +82,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, for (i = 0; i < num_pages; i++) { offset = pos & (PAGE_CACHE_SIZE -1); - this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); + this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); /* FIXME, one block at a time */ mutex_lock(&root->fs_info->fs_mutex); @@ -395,7 +395,7 @@ static int prepare_pages(struct btrfs_root *root, cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); wait_on_page_writeback(pages[i]); offset = pos & (PAGE_CACHE_SIZE -1); - this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); + this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); if (!page_has_buffers(pages[i])) { create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize, @@ -567,7 +567,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, while(count > 0) { size_t offset = pos & (PAGE_CACHE_SIZE - 1); - size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); + size_t write_bytes = min(count, + (size_t)PAGE_CACHE_SIZE - offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a42943bd9179..0564a73bb2e2 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -102,7 +102,7 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = btrfs_del_item(trans, root, path); } else { btrfs_set_root_refs(ri, refs - 1); -printk("ref now %u root %Lu %Lu %u\n", refs -1, key->objectid, key->offset, key->flags); +printk("ref now %u root %llu %Lu %u\n", refs -1, key->objectid, key->offset, key->flags); mark_buffer_dirty(path->nodes[0]); } out: From 5af3981c1878b0657b9babd2ef7ec98c2008cf2c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Jun 2007 07:50:13 -0400 Subject: [PATCH 0165/2985] Btrfs: printk fixes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 10 ++++++---- fs/btrfs/extent-tree.c | 5 ----- fs/btrfs/file-item.c | 5 ----- fs/btrfs/print-tree.c | 42 ++++++++++++++++++++++-------------------- fs/btrfs/root-tree.c | 2 +- 5 files changed, 29 insertions(+), 35 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 96bf3ef3a798..878101172546 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -20,8 +20,9 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { - printk(KERN_CRIT "bh_blocknr(buf) is %Lu, header is %Lu\n", - bh_blocknr(buf), btrfs_header_blocknr(&node->header)); + printk(KERN_CRIT "bh_blocknr(buf) is %llu, header is %llu\n", + (unsigned long long)bh_blocknr(buf), + (unsigned long long)btrfs_header_blocknr(&node->header)); return 1; } return 0; @@ -157,8 +158,9 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, return ret; if (verify) { if (memcmp(bh->b_data, result, BTRFS_CRC32_SIZE)) { - printk("checksum verify failed on %Lu\n", - bh_blocknr(bh)); + printk("btrfs: %s checksum verify failed on %llu\n", + root->fs_info->sb->s_id, + (unsigned long long)bh_blocknr(bh)); return 1; } } else { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6949eebc9dc1..fe02fbfa6d59 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -584,8 +584,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, while(total) { cache = btrfs_lookup_block_group(info, blocknr); if (!cache) { - printk(KERN_CRIT "blocknr %Lu lookup failed\n", - blocknr); return -1; } block_in_group = blocknr - cache->key.objectid; @@ -795,9 +793,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); if (ret) { - printk("failed to find %Lu\n", key.objectid); - btrfs_print_tree(extent_root, extent_root->node); - printk("failed to find %Lu\n", key.objectid); BUG(); } ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index fb8c214160c1..d5a98827e38a 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -28,10 +28,6 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(*item)); - if (ret) { -printk("failed to insert %Lu %Lu ret %d\n", objectid, pos, ret); -btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0])); - } BUG_ON(ret); item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); @@ -201,7 +197,6 @@ insert: ret = btrfs_insert_empty_item(trans, root, path, &file_key, BTRFS_CRC32_SIZE); if (ret != 0) { - printk("at insert for %Lu %u %Lu ret is %d\n", file_key.objectid, file_key.flags, file_key.offset, ret); WARN_ON(1); goto fail; } diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 28813411de66..21791f037561 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -15,31 +15,32 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_block_group_item *bi; u32 type; - printk("leaf %Lu total ptrs %d free space %d\n", - btrfs_header_blocknr(&l->header), nr, + printk("leaf %llu total ptrs %d free space %d\n", + (unsigned long long)btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { item = l->items + i; type = btrfs_disk_key_type(&item->key); - printk("\titem %d key (%Lu %x %Lu) itemoff %d itemsize %d\n", + printk("\titem %d key (%llu %x %llu) itemoff %d itemsize %d\n", i, - btrfs_disk_key_objectid(&item->key), + (unsigned long long)btrfs_disk_key_objectid(&item->key), btrfs_disk_key_flags(&item->key), - btrfs_disk_key_offset(&item->key), + (unsigned long long)btrfs_disk_key_offset(&item->key), btrfs_item_offset(item), btrfs_item_size(item)); switch (type) { case BTRFS_INODE_ITEM_KEY: ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); - printk("\t\tinode generation %Lu size %Lu mode %o\n", - btrfs_inode_generation(ii), - btrfs_inode_size(ii), + printk("\t\tinode generation %llu size %llu mode %o\n", + (unsigned long long)btrfs_inode_generation(ii), + (unsigned long long)btrfs_inode_size(ii), btrfs_inode_mode(ii)); break; case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); - printk("\t\tdir oid %Lu flags %u type %u\n", - btrfs_disk_key_objectid(&di->location), + printk("\t\tdir oid %llu flags %u type %u\n", + (unsigned long long)btrfs_disk_key_objectid( + &di->location), btrfs_dir_flags(di), btrfs_dir_type(di)); printk("\t\tname %.*s\n", @@ -47,8 +48,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); - printk("\t\troot data blocknr %Lu refs %u\n", - btrfs_root_blocknr(ri), btrfs_root_refs(ri)); + printk("\t\troot data blocknr %llu refs %u\n", + (unsigned long long)btrfs_root_blocknr(ri), + btrfs_root_refs(ri)); break; case BTRFS_EXTENT_ITEM_KEY: ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); @@ -58,8 +60,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, struct btrfs_block_group_item); - printk("\t\tblock group used %Lu\n", - btrfs_block_group_used(bi)); + printk("\t\tblock group used %llu\n", + (unsigned long long)btrfs_block_group_used(bi)); break; case BTRFS_STRING_ITEM_KEY: printk("\t\titem data %.*s\n", btrfs_item_size(item), @@ -83,17 +85,17 @@ void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t) btrfs_print_leaf(root, (struct btrfs_leaf *)c); return; } - printk("node %Lu level %d total ptrs %d free spc %u\n", - btrfs_header_blocknr(&c->header), + printk("node %llu level %d total ptrs %d free spc %u\n", + (unsigned long long)btrfs_header_blocknr(&c->header), btrfs_header_level(&c->header), nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { - printk("\tkey %d (%Lu %u %Lu) block %Lu\n", + printk("\tkey %d (%llu %u %llu) block %llu\n", i, - c->ptrs[i].key.objectid, + (unsigned long long)c->ptrs[i].key.objectid, c->ptrs[i].key.flags, - c->ptrs[i].key.offset, - btrfs_node_blockptr(c, i)); + (unsigned long long)c->ptrs[i].key.offset, + (unsigned long long)btrfs_node_blockptr(c, i)); } for (i = 0; i < nr; i++) { struct buffer_head *next_buf = read_tree_block(root, diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 0564a73bb2e2..13c30284fd84 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -102,7 +102,7 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = btrfs_del_item(trans, root, path); } else { btrfs_set_root_refs(ri, refs - 1); -printk("ref now %u root %llu %Lu %u\n", refs -1, key->objectid, key->offset, key->flags); + WARN_ON(1); mark_buffer_dirty(path->nodes[0]); } out: From 8a712645c3fbbf43293af8640af337b7cf1fef01 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Jun 2007 08:21:35 -0400 Subject: [PATCH 0166/2985] Btrfs: no slashes in subvolume names Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 62a3a778d37d..1cabc1951b0e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1831,6 +1831,8 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int namelen = strlen(vol_args.name); if (namelen > BTRFS_VOL_NAME_MAX) return -EINVAL; + if (strchr(vol_args.name, '/')) + return -EINVAL; path = btrfs_alloc_path(); if (!path) return -ENOMEM; From 6cbd55707802b98843f953d1ae6d8f5bcd9a76c0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Jun 2007 09:07:21 -0400 Subject: [PATCH 0167/2985] Btrfs: add GPLv2 Signed-off-by: Chris Mason --- fs/btrfs/COPYING | 356 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/TODO | 1 - fs/btrfs/bit-radix.c | 18 +++ fs/btrfs/bit-radix.h | 18 +++ fs/btrfs/btrfs_inode.h | 18 +++ fs/btrfs/ctree.c | 18 +++ fs/btrfs/ctree.h | 18 +++ fs/btrfs/dir-item.c | 18 +++ fs/btrfs/disk-io.c | 18 +++ fs/btrfs/disk-io.h | 18 +++ fs/btrfs/extent-tree.c | 18 +++ fs/btrfs/file-item.c | 18 +++ fs/btrfs/file.c | 18 +++ fs/btrfs/hash.c | 18 +++ fs/btrfs/hash.h | 18 +++ fs/btrfs/inode-item.c | 18 +++ fs/btrfs/inode-map.c | 18 +++ fs/btrfs/inode.c | 18 +++ fs/btrfs/ioctl.h | 18 +++ fs/btrfs/print-tree.c | 18 +++ fs/btrfs/print-tree.h | 18 +++ fs/btrfs/root-tree.c | 18 +++ fs/btrfs/super.c | 18 +++ fs/btrfs/sysfs.c | 18 +++ fs/btrfs/transaction.c | 18 +++ fs/btrfs/transaction.h | 18 +++ 26 files changed, 788 insertions(+), 1 deletion(-) create mode 100644 fs/btrfs/COPYING diff --git a/fs/btrfs/COPYING b/fs/btrfs/COPYING new file mode 100644 index 000000000000..ca442d313d86 --- /dev/null +++ b/fs/btrfs/COPYING @@ -0,0 +1,356 @@ + + NOTE! This copyright does *not* cover user programs that use kernel + services by normal system calls - this is merely considered normal use + of the kernel, and does *not* fall under the heading of "derived work". + Also note that the GPL below is copyrighted by the Free Software + Foundation, but the instance of code that it refers to (the Linux + kernel) is copyrighted by me and others who actually wrote it. + + Also note that the only valid version of the GPL as far as the kernel + is concerned is _this_ particular version of the license (ie v2, not + v2.2 or v3.x or whatever), unless explicitly otherwise stated. + + Linus Torvalds + +---------------------------------------- + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 4a48a2365d57..d9b6d38c603a 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -1,4 +1,3 @@ -* fix printk warnings * cleanup, add more error checking, get rid of BUG_ONs * Fix ENOSPC handling * Make allocator smarter diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 8f9cd4277231..a4ddf4de9797 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "bit-radix.h" diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h index 4e717e30db4f..c100f54d5c32 100644 --- a/fs/btrfs/bit-radix.h +++ b/fs/btrfs/bit-radix.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BIT_RADIX__ #define __BIT_RADIX__ #include diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6b50076b02d5..783f06b63259 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BTRFS_I__ #define __BTRFS_I__ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 20e01f95ce6e..4736155af419 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 24410d42e3ce..4bd648d68e88 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BTRFS__ #define __BTRFS__ diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index b408a3d20c7b..0d179a44ebf4 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 878101172546..8c68a64c0ceb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 822ccb8aa4ae..7b76ccc48754 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __DISKIO__ #define __DISKIO__ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fe02fbfa6d59..975a0eb9241b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index d5a98827e38a..37dd3801a107 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0325dc038592..566e526b60ff 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index 32de1ea1b64e..e5c76903d722 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + /* * Original copy from: * linux/fs/ext3/hash.c diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h index e56c4670093d..d3be02670585 100644 --- a/fs/btrfs/hash.h +++ b/fs/btrfs/hash.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __HASH__ #define __HASH__ int btrfs_name_hash(const char *name, int len, u64 *hash_result); diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index c3b990b661c2..449e9dbdf957 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 318e27a6378b..58930fc48be6 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1cabc1951b0e..46f869d6d0fb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 497fadd98c3a..23bed48c51a7 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __IOCTL_ #define __IOCTL_ #include diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 21791f037561..1fa705683008 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 396041a05cfa..205cd03601ab 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __PRINT_TREE_ #define __PRINT_TREE_ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 13c30284fd84..7a2d63c1ee80 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 94b3535a2a11..ef8ad18b275c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index ed8b7e20cfbc..ead9dc8051be 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4f3c8ddcb401..30914521cf01 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include "ctree.h" diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index f25b4900db45..5fb1d322aa10 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __TRANSACTION__ #define __TRANSACTION__ #include "btrfs_inode.h" From 340887809d92a4dd6c71caa97d658cef32c098c3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Jun 2007 11:36:58 -0400 Subject: [PATCH 0168/2985] Btrfs: i386 fixes from axboe Signed-off-by: Chris Mason --- fs/btrfs/INSTALL | 51 ++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/disk-io.c | 2 +- fs/btrfs/file.c | 7 ++++-- fs/btrfs/inode.c | 3 +-- fs/btrfs/transaction.c | 1 + 5 files changed, 59 insertions(+), 5 deletions(-) create mode 100644 fs/btrfs/INSTALL diff --git a/fs/btrfs/INSTALL b/fs/btrfs/INSTALL new file mode 100644 index 000000000000..e83ff6e8c8d5 --- /dev/null +++ b/fs/btrfs/INSTALL @@ -0,0 +1,51 @@ +Install Instructions + +Btrfs puts snapshots and subvolumes into the root directory of the FS. This +directory can only be changed by btrfsctl right now, and normal filesystem +operations do not work on it. The default subvolume is called 'default', +and you can create files and directories in mount_point/default + +Btrfs uses the crypto manager interface in the kernel for file and +metadata checksums. You need to compile the kernel with: + +CONFIG_CRYPTO=y +CONFIG_CRYPTO_MANAGER=m +CONFIG_CRYPTO_CRC32C=m + +cryptomanager and crc32c can be static as well. Once your kernel is +setup, typing make in the btrfs module sources will build against the +running kernel. When the build is complete: + +modprobe crc32c +modprobe cryptomgr +insmod btrfs.ko + +The Btrfs utility programs require libuuid to build. This can be found +in the e2fsprogs sources, and is usually available as libuuid or +e2fsprogs-devel from various distros. + +Building the utilities is just make ; make install. The programs go +into /usr/local/bin. The commands available are: + +mkfs.btrfs: create a filesystem + +btrfsctl: control program to create snapshots and subvolumes: + + mount /dev/sda2 /mnt + btrfsctl -s new_subvol_name /mnt + btrfsctl -s snapshot_of_default /mnt/default + btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name + btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol + ls /mnt + default snapshot_of_a_snapshot snapshot_of_new_subvol + new_subvol_name snapshot_of_default + + Snapshots and subvolumes cannot be deleted right now, but you can + rm -rf all the files and directories inside them. + +btrfsck: do a limited check of the FS extent trees. + +debug-tree: print all of the FS metadata in text form. Example: + + debug-tree /dev/sda2 >& big_output_file + diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8c68a64c0ceb..b0ebba6f2e46 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -106,7 +106,7 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, int err; u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); - page = grab_cache_page(mapping, index); + page = find_or_create_page(mapping, index, GFP_NOFS); if (!page) return NULL; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 566e526b60ff..83836fb3d4e9 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -112,7 +112,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, if (buffer_mapped(bh) && bh->b_blocknr == 0) { struct btrfs_key key; struct btrfs_path *path; - char *ptr; + char *ptr, *kaddr; u32 datasize; /* create an inline extent, and copy the data in */ @@ -135,8 +135,11 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, btrfs_set_file_extent_type(ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); + kaddr = kmap_atomic(bh->b_page, KM_USER0); btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, bh->b_data, offset + write_bytes); + ptr, kaddr + bh_offset(bh), + offset + write_bytes); + kunmap_atomic(kaddr, KM_USER0); mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); } else if (buffer_mapped(bh)) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 46f869d6d0fb..d3d5a4cdb6af 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1754,6 +1754,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) BTRFS_I(dir)->block_group, S_IFDIR | 0700); inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; + new_root->inode = inode; ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid); BUG_ON(ret); @@ -1766,8 +1767,6 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) ret = btrfs_commit_transaction(trans, new_root); BUG_ON(ret); - iput(inode); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root); return 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 30914521cf01..cd4f1d140385 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -18,6 +18,7 @@ #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" From f1ace244c8c1e16eaa5c8b3b5339849651e31ede Mon Sep 17 00:00:00 2001 From: Aneesh Date: Wed, 13 Jun 2007 16:18:26 -0400 Subject: [PATCH 0170/2985] btrfs: Code cleanup Attaching below is some of the code cleanups that i came across while reading the code. a) alloc_path already calls init_path. b) Mention that btrfs_inode is the in memory copy.Ext4 have ext4_inode_info as the in memory copy ext4_inode as the disk copy Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/ctree.c | 1 - fs/btrfs/dir-item.c | 1 - fs/btrfs/extent-tree.c | 4 ---- fs/btrfs/file-item.c | 2 -- fs/btrfs/inode-item.c | 1 - fs/btrfs/inode.c | 10 +--------- fs/btrfs/root-tree.c | 3 --- 8 files changed, 2 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 783f06b63259..6bce46be49ff 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -19,6 +19,7 @@ #ifndef __BTRFS_I__ #define __BTRFS_I__ +/* in memory btrfs inode */ struct btrfs_inode { struct btrfs_root *root; struct btrfs_block_group_cache *block_group; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 4736155af419..498dea0854c1 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1647,7 +1647,6 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (!ret) { ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 0d179a44ebf4..ff10cf5a8422 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -74,7 +74,6 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); path = btrfs_alloc_path(); - btrfs_init_path(path); data_size = sizeof(*dir_item) + name_len; dir_item = insert_with_overflow(trans, root, path, &key, data_size, name, name_len); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 975a0eb9241b..5f4eaba8b85f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -402,7 +402,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, &ins, 0); path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -437,7 +436,6 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_extent_item *item; path = btrfs_alloc_path(); - btrfs_init_path(path); key.objectid = blocknr; key.offset = num_blocks; key.flags = 0; @@ -807,7 +805,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0); path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); if (ret) { @@ -1463,7 +1460,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); level = btrfs_header_level(btrfs_buffer_header(snap)); orig_level = level; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 37dd3801a107..3c140cc1dd05 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -38,7 +38,6 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); file_key.objectid = objectid; file_key.offset = pos; file_key.flags = 0; @@ -274,7 +273,6 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); file_key.objectid = objectid; file_key.offset = offset; file_key.flags = 0; diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 449e9dbdf957..e8b5a9f7c0c2 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -35,7 +35,6 @@ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); ret = btrfs_insert_item(trans, root, &key, inode_item, sizeof(*inode_item)); btrfs_release_path(root, path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d3d5a4cdb6af..e639cc9e089f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -79,7 +79,6 @@ void btrfs_read_locked_inode(struct inode *inode) path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); mutex_lock(&root->fs_info->fs_mutex); memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); @@ -177,7 +176,6 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location, 1); if (ret) { @@ -214,7 +212,6 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, name, name_len, -1); if (IS_ERR(di)) { @@ -289,7 +286,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -360,7 +356,6 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location, -1); BUG_ON(ret); @@ -687,7 +682,6 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, namelen, 0); if (!di || IS_ERR(di)) { @@ -874,7 +868,6 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; path = btrfs_alloc_path(); - btrfs_init_path(path); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; @@ -1263,7 +1256,6 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); if (create & BTRFS_GET_BLOCK_CREATE) { WARN_ON(1); /* this almost but not quite works */ @@ -1280,7 +1272,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, } ret = btrfs_lookup_file_extent(NULL, root, path, - inode->i_ino, + objectid, iblock << inode->i_blkbits, 0); if (ret < 0) { err = ret; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 7a2d63c1ee80..8e1b9046d5ec 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -36,7 +36,6 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); if (ret < 0) goto out; @@ -70,7 +69,6 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); ret = btrfs_search_slot(trans, root, key, path, 0, 1); if (ret < 0) goto out; @@ -106,7 +104,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); ret = btrfs_search_slot(trans, root, key, path, -1, 1); if (ret < 0) goto out; From 9ebefb180bad4914a31c4e1748ba187a30e1e990 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Jun 2007 13:50:00 -0400 Subject: [PATCH 0171/2985] Btrfs: patch queue: page_mkwrite Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 + fs/btrfs/disk-io.c | 4 +- fs/btrfs/file-item.c | 2 +- fs/btrfs/file.c | 176 ++++--------------------- fs/btrfs/inode.c | 303 ++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 328 insertions(+), 159 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4bd648d68e88..1e83ad720b10 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1142,6 +1142,8 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); /* inode.c */ +int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); +int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); void btrfs_read_locked_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, int wait); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b0ebba6f2e46..602b63dc76c3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -155,9 +155,9 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, desc.tfm = tfm; desc.flags = 0; sg_init_one(&sg, data, len); - spin_lock(&root->fs_info->hash_lock); + spin_lock_irq(&root->fs_info->hash_lock); ret = crypto_hash_digest(&desc, &sg, 1, result); - spin_unlock(&root->fs_info->hash_lock); + spin_unlock_irq(&root->fs_info->hash_lock); if (ret) { printk("digest failed\n"); } diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 3c140cc1dd05..1068993ab1c9 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -228,6 +228,7 @@ found: path->nodes[0]->b_data, root->fs_info->sb->s_blocksize); ret = btrfs_csum_data(root, data, len, &item->csum); +// printk("file %lu offset %llu csum %X\n", objectid, (unsigned long long)offset, *(int *)(&item->csum)); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); @@ -298,4 +299,3 @@ fail: mutex_unlock(&root->fs_info->fs_mutex); return ret; } - diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 83836fb3d4e9..de8d47b44e12 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -103,10 +103,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); /* FIXME, one block at a time */ - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - bh = page_buffers(pages[i]); if (buffer_mapped(bh) && bh->b_blocknr == 0) { @@ -115,6 +111,10 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, char *ptr, *kaddr; u32 datasize; + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + /* create an inline extent, and copy the data in */ path = btrfs_alloc_path(); BUG_ON(!path); @@ -135,24 +135,19 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, btrfs_set_file_extent_type(ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); + kaddr = kmap_atomic(bh->b_page, KM_USER0); btrfs_memcpy(root, path->nodes[0]->b_data, ptr, kaddr + bh_offset(bh), offset + write_bytes); kunmap_atomic(kaddr, KM_USER0); + mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); - } else if (buffer_mapped(bh)) { - /* csum the file data */ - btrfs_csum_file_block(trans, root, inode->i_ino, - pages[i]->index << PAGE_CACHE_SHIFT, - kmap(pages[i]), PAGE_CACHE_SIZE); - kunmap(pages[i]); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); } - SetPageChecked(pages[i]); - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_commit_write(file, pages[i], offset, offset + this_write); @@ -503,7 +498,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if ((pos & (PAGE_CACHE_SIZE - 1))) { pinned[0] = grab_cache_page(inode->i_mapping, first_index); if (!PageUptodate(pinned[0])) { - ret = mpage_readpage(pinned[0], btrfs_get_block); + ret = btrfs_readpage(NULL, pinned[0]); BUG_ON(ret); wait_on_page_locked(pinned[0]); } else { @@ -513,7 +508,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { pinned[1] = grab_cache_page(inode->i_mapping, last_index); if (!PageUptodate(pinned[1])) { - ret = mpage_readpage(pinned[1], btrfs_get_block); + ret = btrfs_readpage(NULL, pinned[1]); BUG_ON(ret); wait_on_page_locked(pinned[1]); } else { @@ -633,138 +628,6 @@ out: return num_written ? num_written : err; } -/* - * FIXME, do this by stuffing the csum we want in the info hanging off - * page->private. For now, verify file csums on read - */ -static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, - unsigned long offset, unsigned long size) -{ - char *kaddr; - unsigned long left, count = desc->count; - struct inode *inode = page->mapping->host; - - if (size > count) - size = count; - - if (!PageChecked(page)) { - /* FIXME, do it per block */ - struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; - struct buffer_head *bh; - - if (page_has_buffers(page)) { - bh = page_buffers(page); - if (!buffer_mapped(bh)) { - SetPageChecked(page); - goto checked; - } - } - - ret = btrfs_csum_verify_file_block(root, - page->mapping->host->i_ino, - page->index << PAGE_CACHE_SHIFT, - kmap(page), PAGE_CACHE_SIZE); - if (ret) { - if (ret != -ENOENT) { - printk("failed to verify ino %lu page %lu ret %d\n", - page->mapping->host->i_ino, - page->index, ret); - memset(page_address(page), 1, PAGE_CACHE_SIZE); - flush_dcache_page(page); - } - } - SetPageChecked(page); - kunmap(page); - } -checked: - /* - * Faults on the destination of a read are common, so do it before - * taking the kmap. - */ - if (!fault_in_pages_writeable(desc->arg.buf, size)) { - kaddr = kmap_atomic(page, KM_USER0); - left = __copy_to_user_inatomic(desc->arg.buf, - kaddr + offset, size); - kunmap_atomic(kaddr, KM_USER0); - if (left == 0) - goto success; - } - - /* Do it the slow way */ - kaddr = kmap(page); - left = __copy_to_user(desc->arg.buf, kaddr + offset, size); - kunmap(page); - - if (left) { - size -= left; - desc->error = -EFAULT; - } -success: - desc->count = count - size; - desc->written += size; - desc->arg.buf += size; - return size; -} - -/** - * btrfs_file_aio_read - filesystem read routine, with a mod to csum verify - * @iocb: kernel I/O control block - * @iov: io vector request - * @nr_segs: number of segments in the iovec - * @pos: current file position - */ -static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - struct file *filp = iocb->ki_filp; - ssize_t retval; - unsigned long seg; - size_t count; - loff_t *ppos = &iocb->ki_pos; - - count = 0; - for (seg = 0; seg < nr_segs; seg++) { - const struct iovec *iv = &iov[seg]; - - /* - * If any segment has a negative length, or the cumulative - * length ever wraps negative then return -EINVAL. - */ - count += iv->iov_len; - if (unlikely((ssize_t)(count|iv->iov_len) < 0)) - return -EINVAL; - if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len)) - continue; - if (seg == 0) - return -EFAULT; - nr_segs = seg; - count -= iv->iov_len; /* This segment is no good */ - break; - } - retval = 0; - if (count) { - for (seg = 0; seg < nr_segs; seg++) { - read_descriptor_t desc; - - desc.written = 0; - desc.arg.buf = iov[seg].iov_base; - desc.count = iov[seg].iov_len; - if (desc.count == 0) - continue; - desc.error = 0; - do_generic_file_read(filp, ppos, &desc, - btrfs_read_actor); - retval += desc.written; - if (desc.error) { - retval = retval ?: desc.error; - break; - } - } - } - return retval; -} - static int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) { @@ -789,12 +652,25 @@ out: return ret > 0 ? EIO : ret; } +static struct vm_operations_struct btrfs_file_vm_ops = { + .nopage = filemap_nopage, + .populate = filemap_populate, + .page_mkwrite = btrfs_page_mkwrite, +}; + +static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) +{ + vma->vm_ops = &btrfs_file_vm_ops; + file_accessed(filp); + return 0; +} + struct file_operations btrfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, - .aio_read = btrfs_file_aio_read, + .aio_read = generic_file_aio_read, .write = btrfs_file_write, - .mmap = generic_file_mmap, + .mmap = btrfs_file_mmap, .open = generic_file_open, .ioctl = btrfs_ioctl, .fsync = btrfs_sync_file, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e639cc9e089f..def33ac90d77 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -561,7 +562,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) goto out; if (!PageUptodate(page)) { - ret = mpage_readpage(page, btrfs_get_block); + ret = btrfs_readpage(NULL, page); lock_page(page); if (!PageUptodate(page)) { ret = -EIO; @@ -1257,8 +1258,10 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, path = btrfs_alloc_path(); BUG_ON(!path); if (create & BTRFS_GET_BLOCK_CREATE) { - WARN_ON(1); - /* this almost but not quite works */ + /* + * danger!, this only works if the page is properly up + * to date somehow + */ trans = btrfs_start_transaction(root, 1); if (!trans) { err = -ENOMEM; @@ -1353,7 +1356,6 @@ not_found: ins.objectid, ins.offset, ins.offset); BUG_ON(ret); - SetPageChecked(result->b_page); btrfs_map_bh_to_logical(root, result, ins.objectid); } out: @@ -1374,6 +1376,40 @@ int btrfs_get_block(struct inode *inode, sector_t iblock, return err; } +int btrfs_get_block_csum(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + int ret; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct page *page = result->b_page; + u64 offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(result); + struct btrfs_csum_item *item; + struct btrfs_path *path = NULL; + + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_get_block_lock(inode, iblock, result, create); + if (ret) + goto out; + + path = btrfs_alloc_path(); + item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, offset, 0); + if (IS_ERR(item)) { + ret = PTR_ERR(item); + /* a csum that isn't present is a preallocated region. */ + if (ret == -ENOENT || ret == -EFBIG) + ret = 0; + result->b_private = 0; + goto out; + } + memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE); +printk("get_block_sum file %lu offset %llu csum %X\n", inode->i_ino, (unsigned long long)offset, *(int *)(&item->csum)); +out: + if (path) + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { @@ -1395,9 +1431,198 @@ static int btrfs_prepare_write(struct file *file, struct page *page, return block_prepare_write(page, from, to, btrfs_get_block); } -static int btrfs_readpage(struct file *file, struct page *page) +static void buffer_io_error(struct buffer_head *bh) { - return mpage_readpage(page, btrfs_get_block); + char b[BDEVNAME_SIZE]; + + printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n", + bdevname(bh->b_bdev, b), + (unsigned long long)bh->b_blocknr); +} + +/* + * I/O completion handler for block_read_full_page() - pages + * which come unlocked at the end of I/O. + */ +static void btrfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) +{ + unsigned long flags; + struct buffer_head *first; + struct buffer_head *tmp; + struct page *page; + int page_uptodate = 1; + struct inode *inode; + int ret; + + BUG_ON(!buffer_async_read(bh)); + + page = bh->b_page; + inode = page->mapping->host; + if (uptodate) { + void *kaddr; + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; + if (bh->b_private) { + char csum[BTRFS_CRC32_SIZE]; + kaddr = kmap_atomic(page, KM_IRQ0); + ret = btrfs_csum_data(root, kaddr + bh_offset(bh), + bh->b_size, csum); + BUG_ON(ret); + if (memcmp(csum, &bh->b_private, BTRFS_CRC32_SIZE)) { + u64 offset; + offset = (page->index << PAGE_CACHE_SHIFT) + + bh_offset(bh); + printk("btrfs csum failed ino %lu off %llu\n", + page->mapping->host->i_ino, + (unsigned long long)offset); + memset(kaddr + bh_offset(bh), 1, bh->b_size); + flush_dcache_page(page); +printk("bad verify file %lu offset %llu bh_private %lX csum %X\n", inode->i_ino, (unsigned long long)offset, (unsigned long)(bh->b_private), *(int *)csum); + } + kunmap_atomic(kaddr, KM_IRQ0); + } + set_buffer_uptodate(bh); + } else { + clear_buffer_uptodate(bh); + if (printk_ratelimit()) + buffer_io_error(bh); + SetPageError(page); + } + + /* + * Be _very_ careful from here on. Bad things can happen if + * two buffer heads end IO at almost the same time and both + * decide that the page is now completely done. + */ + first = page_buffers(page); + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &first->b_state); + clear_buffer_async_read(bh); + unlock_buffer(bh); + tmp = bh; + do { + if (!buffer_uptodate(tmp)) + page_uptodate = 0; + if (buffer_async_read(tmp)) { + BUG_ON(!buffer_locked(tmp)); + goto still_busy; + } + tmp = tmp->b_this_page; + } while (tmp != bh); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); + + /* + * If none of the buffers had errors and they are all + * uptodate then we can set the page uptodate. + */ + if (page_uptodate && !PageError(page)) + SetPageUptodate(page); + unlock_page(page); + return; + +still_busy: + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); + return; +} + +/* + * Generic "read page" function for block devices that have the normal + * get_block functionality. This is most of the block device filesystems. + * Reads the page asynchronously --- the unlock_buffer() and + * set/clear_buffer_uptodate() functions propagate buffer state into the + * page struct once IO has completed. + */ +int btrfs_readpage(struct file *file, struct page *page) +{ + struct inode *inode = page->mapping->host; + sector_t iblock, lblock; + struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; + unsigned int blocksize; + int nr, i; + int fully_mapped = 1; + + BUG_ON(!PageLocked(page)); + blocksize = 1 << inode->i_blkbits; + if (!page_has_buffers(page)) + create_empty_buffers(page, blocksize, 0); + head = page_buffers(page); + + iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; + bh = head; + nr = 0; + i = 0; + + do { + if (buffer_uptodate(bh)) + continue; + + if (!buffer_mapped(bh)) { + int err = 0; + + fully_mapped = 0; + if (iblock < lblock) { + WARN_ON(bh->b_size != blocksize); + err = btrfs_get_block_csum(inode, iblock, + bh, 0); + if (err) + SetPageError(page); + } + if (!buffer_mapped(bh)) { + void *kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + i * blocksize, 0, blocksize); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + if (!err) + set_buffer_uptodate(bh); + continue; + } + /* + * get_block() might have updated the buffer + * synchronously + */ + if (buffer_uptodate(bh)) + continue; + } + arr[nr++] = bh; + } while (i++, iblock++, (bh = bh->b_this_page) != head); + + if (fully_mapped) + SetPageMappedToDisk(page); + + if (!nr) { + /* + * All buffers are uptodate - we can set the page uptodate + * as well. But not if get_block() returned an error. + */ + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); + return 0; + } + + /* Stage two: lock the buffers */ + for (i = 0; i < nr; i++) { + bh = arr[i]; + lock_buffer(bh); + bh->b_end_io = btrfs_end_buffer_async_read; + set_buffer_async_read(bh); + } + + /* + * Stage 3: start the IO. Check for uptodateness + * inside the buffer lock in case another process reading + * the underlying blockdev brought it uptodate (the sct fix). + */ + for (i = 0; i < nr; i++) { + bh = arr[i]; + if (buffer_uptodate(bh)) + btrfs_end_buffer_async_read(bh, 1); + else + submit_bh(READ, bh); + } + return 0; } /* @@ -1424,6 +1649,7 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page, struct buffer_head *bh, *head; const unsigned blocksize = 1 << inode->i_blkbits; int nr_underway = 0; + struct btrfs_root *root = BTRFS_I(inode)->root; BUG_ON(!PageLocked(page)); @@ -1496,6 +1722,24 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page, continue; } if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) { + struct btrfs_trans_handle *trans; + int ret; + u64 off = page->index << PAGE_CACHE_SHIFT; + char *kaddr; + + off += bh_offset(bh); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + kaddr = kmap(page); + ret = btrfs_csum_file_block(trans, root, inode->i_ino, + off, kaddr + bh_offset(bh), + bh->b_size); + kunmap(page); + BUG_ON(ret); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); mark_buffer_async_write(bh); } else { unlock_buffer(bh); @@ -1617,6 +1861,53 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) return __btrfs_write_full_page(inode, page, wbc); } +/* + * btrfs_page_mkwrite() is not allowed to change the file size as it gets + * called from a page fault handler when a page is first dirtied. Hence we must + * be careful to check for EOF conditions here. We set the page up correctly + * for a written page which means we get ENOSPC checking when writing into + * holes and correct delalloc and unwritten extent mapping on filesystems that + * support these features. + * + * We are not allowed to take the i_mutex here so we have to play games to + * protect against truncate races as the page could now be beyond EOF. Because + * vmtruncate() writes the inode size before removing pages, once we have the + * page lock we can determine safely if the page is beyond EOF. If it is not + * beyond EOF, then the page is guaranteed safe against truncation until we + * unlock the page. + */ +int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + unsigned long end; + loff_t size; + int ret = -EINVAL; + + lock_page(page); + wait_on_page_writeback(page); +printk("btrfs_page_mkwrite %lu %lu\n", page->mapping->host->i_ino, page->index); + size = i_size_read(inode); + if ((page->mapping != inode->i_mapping) || + ((page->index << PAGE_CACHE_SHIFT) > size)) { + /* page got truncated out from underneath us */ + goto out_unlock; + } + + /* page is wholly or partially inside EOF */ + if (((page->index + 1) << PAGE_CACHE_SHIFT) > size) + end = size & ~PAGE_CACHE_MASK; + else + end = PAGE_CACHE_SIZE; + + ret = btrfs_prepare_write(NULL, page, 0, end); + if (!ret) + ret = btrfs_commit_write(NULL, page, 0, end); + +out_unlock: + unlock_page(page); + return ret; +} + static void btrfs_truncate(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; From 8c2383c3dd2cb5bb39598ce4fa97154bc591020a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 18 Jun 2007 09:57:58 -0400 Subject: [PATCH 0172/2985] Subject: Rework btrfs_file_write to only allocate while page locks are held Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 + fs/btrfs/extent-tree.c | 1 - fs/btrfs/file-item.c | 1 - fs/btrfs/file.c | 244 ++++++++++++++++++++++------------------- fs/btrfs/inode.c | 11 +- fs/btrfs/print-tree.c | 18 +++ fs/btrfs/transaction.c | 1 + 7 files changed, 161 insertions(+), 117 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 602b63dc76c3..15cc9ec92308 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -541,6 +541,8 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root else ret = submit_bh(WRITE, bh); if (ret == -EOPNOTSUPP) { + get_bh(bh); + lock_buffer(bh); set_buffer_uptodate(bh); root->fs_info->do_barriers = 0; ret = submit_bh(WRITE, bh); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5f4eaba8b85f..e0738c65bc5b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1477,7 +1477,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root break; if (wret < 0) ret = wret; - btrfs_btree_balance_dirty(root); } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 1068993ab1c9..cf894f09f6c7 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -228,7 +228,6 @@ found: path->nodes[0]->b_data, root->fs_info->sb->s_blocksize); ret = btrfs_csum_data(root, data, len, &item->csum); -// printk("file %lu offset %llu csum %X\n", objectid, (unsigned long long)offset, *(int *)(&item->csum)); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index de8d47b44e12..6b455c2b3f03 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -207,6 +207,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, } path->slots[0]--; } +next_slot: keep = 0; bookend = 0; found_extent = 0; @@ -214,39 +215,48 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, extent = NULL; leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; + ret = 0; btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); if (key.offset >= end || key.objectid != inode->i_ino) { - ret = 0; goto out; } - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) { - ret = 0; + if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY) { goto out; } - extent = btrfs_item_ptr(leaf, slot, - struct btrfs_file_extent_item); - found_type = btrfs_file_extent_type(extent); - if (found_type == BTRFS_FILE_EXTENT_REG) { - extent_end = key.offset + - (btrfs_file_extent_num_blocks(extent) << - inode->i_blkbits); - found_extent = 1; - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - found_inline = 1; - extent_end = key.offset + - btrfs_file_extent_inline_len(leaf->items + slot); + if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { + extent = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + found_type = btrfs_file_extent_type(extent); + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_end = key.offset + + (btrfs_file_extent_num_blocks(extent) << + inode->i_blkbits); + found_extent = 1; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + found_inline = 1; + extent_end = key.offset + + btrfs_file_extent_inline_len(leaf->items + + slot); + } + } else { + extent_end = search_start; } /* we found nothing we can drop */ - if (!found_extent && !found_inline) { - ret = 0; - goto out; - } - - /* we found nothing inside the range */ - if (search_start >= extent_end) { - ret = 0; - goto out; + if ((!found_extent && !found_inline) || + search_start >= extent_end) { + int nextret; + u32 nritems; + nritems = btrfs_header_nritems( + btrfs_buffer_header(path->nodes[0])); + if (slot >= nritems - 1) { + nextret = btrfs_next_leaf(root, path); + if (nextret) + goto out; + } else { + path->slots[0]++; + } + goto next_slot; } /* FIXME, there's only one inline extent allowed right now */ @@ -272,7 +282,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, WARN_ON(found_inline); bookend = 1; } - /* truncate existing extent */ if (start > key.offset) { u64 new_num; @@ -337,10 +346,14 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, ins.offset = end; ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); - btrfs_release_path(root, path); ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*extent)); + + if (ret) { + btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0])); + printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end); + } BUG_ON(ret); extent = btrfs_item_ptr( btrfs_buffer_leaf(path->nodes[0]), @@ -387,8 +400,7 @@ static int prepare_pages(struct btrfs_root *root, loff_t pos, unsigned long first_index, unsigned long last_index, - size_t write_bytes, - u64 alloc_extent_start) + size_t write_bytes) { int i; unsigned long index = pos >> PAGE_CACHE_SHIFT; @@ -399,6 +411,16 @@ static int prepare_pages(struct btrfs_root *root, struct buffer_head *bh; struct buffer_head *head; loff_t isize = i_size_read(inode); + struct btrfs_trans_handle *trans; + u64 hint_block; + u64 num_blocks; + u64 alloc_extent_start; + u64 start_pos; + struct btrfs_key ins; + + start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); + num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> + inode->i_blkbits; memset(pages, 0, num_pages * sizeof(struct page *)); @@ -408,6 +430,72 @@ static int prepare_pages(struct btrfs_root *root, err = -ENOMEM; goto failed_release; } + } + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (!trans) { + err = -ENOMEM; + mutex_unlock(&root->fs_info->fs_mutex); + goto out_unlock; + } + btrfs_set_trans_block_group(trans, inode); + /* FIXME blocksize != 4096 */ + inode->i_blocks += num_blocks << 3; + hint_block = 0; + + /* FIXME...EIEIO, ENOSPC and more */ + + /* step one, delete the existing extents in this range */ + /* FIXME blocksize != pagesize */ + if (start_pos < inode->i_size) { + err = btrfs_drop_extents(trans, root, inode, + start_pos, (pos + write_bytes + root->blocksize -1) & + ~((u64)root->blocksize - 1), &hint_block); + BUG_ON(err); + } + + /* insert any holes we need to create */ + if (inode->i_size < start_pos) { + u64 last_pos_in_file; + u64 hole_size; + u64 mask = root->blocksize - 1; + last_pos_in_file = (isize + mask) & ~mask; + hole_size = (start_pos - last_pos_in_file + mask) & ~mask; + hole_size >>= inode->i_blkbits; + if (last_pos_in_file < start_pos) { + err = btrfs_insert_file_extent(trans, root, + inode->i_ino, + last_pos_in_file, + 0, 0, hole_size); + } + BUG_ON(err); + } + + /* + * either allocate an extent for the new bytes or setup the key + * to show we are doing inline data in the extent + */ + if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || + pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { + err = btrfs_alloc_extent(trans, root, inode->i_ino, + num_blocks, hint_block, (u64)-1, + &ins, 1); + BUG_ON(err); + err = btrfs_insert_file_extent(trans, root, inode->i_ino, + start_pos, ins.objectid, ins.offset, + ins.offset); + BUG_ON(err); + } else { + ins.offset = 0; + ins.objectid = 0; + } + BUG_ON(err); + alloc_extent_start = ins.objectid; + err = btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + for (i = 0; i < num_pages; i++) { cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); wait_on_page_writeback(pages[i]); offset = pos & (PAGE_CACHE_SIZE -1); @@ -444,6 +532,11 @@ failed_truncate: if (pos > isize) vmtruncate(inode, isize); return err; + +out_unlock: + mutex_unlock(&root->fs_info->fs_mutex); + goto failed_release; + } static ssize_t btrfs_file_write(struct file *file, const char __user *buf, @@ -455,16 +548,14 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, int ret = 0; struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct page *pages[8]; + struct page **pages = NULL; + int nrptrs; struct page *pinned[2]; unsigned long first_index; unsigned long last_index; - u64 start_pos; - u64 num_blocks; - u64 alloc_extent_start; - u64 hint_block; - struct btrfs_trans_handle *trans; - struct btrfs_key ins; + + nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, + PAGE_CACHE_SIZE / (sizeof(struct page *))); pinned[0] = NULL; pinned[1] = NULL; if (file->f_flags & O_DIRECT) @@ -482,9 +573,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, goto out; file_update_time(file); - start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); - num_blocks = (count + pos - start_pos + root->blocksize - 1) >> - inode->i_blkbits; + pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); mutex_lock(&inode->i_mutex); first_index = pos >> PAGE_CACHE_SHIFT; @@ -516,87 +605,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } } - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - if (!trans) { - err = -ENOMEM; - mutex_unlock(&root->fs_info->fs_mutex); - goto out_unlock; - } - btrfs_set_trans_block_group(trans, inode); - /* FIXME blocksize != 4096 */ - inode->i_blocks += num_blocks << 3; - hint_block = 0; - - /* FIXME...EIEIO, ENOSPC and more */ - - /* step one, delete the existing extents in this range */ - if (start_pos < inode->i_size) { - /* FIXME blocksize != pagesize */ - ret = btrfs_drop_extents(trans, root, inode, - start_pos, - (pos + count + root->blocksize -1) & - ~((u64)root->blocksize - 1), - &hint_block); - BUG_ON(ret); - } - - /* insert any holes we need to create */ - if (inode->i_size < start_pos) { - u64 last_pos_in_file; - u64 hole_size; - u64 mask = root->blocksize - 1; - last_pos_in_file = (inode->i_size + mask) & ~mask; - hole_size = (start_pos - last_pos_in_file + mask) & ~mask; - hole_size >>= inode->i_blkbits; - if (last_pos_in_file < start_pos) { - ret = btrfs_insert_file_extent(trans, root, - inode->i_ino, - last_pos_in_file, - 0, 0, hole_size); - } - BUG_ON(ret); - } - - /* - * either allocate an extent for the new bytes or setup the key - * to show we are doing inline data in the extent - */ - if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || - pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { - ret = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, hint_block, (u64)-1, - &ins, 1); - BUG_ON(ret); - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - start_pos, ins.objectid, ins.offset, - ins.offset); - BUG_ON(ret); - } else { - ins.offset = 0; - ins.objectid = 0; - } - BUG_ON(ret); - alloc_extent_start = ins.objectid; - ret = btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - while(count > 0) { size_t offset = pos & (PAGE_CACHE_SIZE - 1); - size_t write_bytes = min(count, - (size_t)PAGE_CACHE_SIZE - offset); + size_t write_bytes = min(count, nrptrs * PAGE_CACHE_SIZE - + offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + WARN_ON(num_pages > nrptrs); memset(pages, 0, sizeof(pages)); ret = prepare_pages(root, file, pages, num_pages, pos, first_index, last_index, - write_bytes, alloc_extent_start); + write_bytes); BUG_ON(ret); - /* FIXME blocks != pagesize */ - if (alloc_extent_start) - alloc_extent_start += num_pages; ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); BUG_ON(ret); @@ -611,13 +633,13 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, pos += write_bytes; num_written += write_bytes; - balance_dirty_pages_ratelimited(inode->i_mapping); + balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); btrfs_btree_balance_dirty(root); cond_resched(); } -out_unlock: mutex_unlock(&inode->i_mutex); out: + kfree(pages); if (pinned[0]) page_cache_release(pinned[0]); if (pinned[1]) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index def33ac90d77..94f1c28c25b9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -962,7 +962,6 @@ void btrfs_dirty_inode(struct inode *inode) btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); } static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, @@ -1402,7 +1401,6 @@ int btrfs_get_block_csum(struct inode *inode, sector_t iblock, goto out; } memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE); -printk("get_block_sum file %lu offset %llu csum %X\n", inode->i_ino, (unsigned long long)offset, *(int *)(&item->csum)); out: if (path) btrfs_free_path(path); @@ -1476,7 +1474,6 @@ static void btrfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) (unsigned long long)offset); memset(kaddr + bh_offset(bh), 1, bh->b_size); flush_dcache_page(page); -printk("bad verify file %lu offset %llu bh_private %lX csum %X\n", inode->i_ino, (unsigned long long)offset, (unsigned long)(bh->b_private), *(int *)csum); } kunmap_atomic(kaddr, KM_IRQ0); } @@ -1655,6 +1652,13 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page, last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; + /* no csumming allowed when from PF_MEMALLOC */ + if (current->flags & PF_MEMALLOC) { + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } + if (!page_has_buffers(page)) { create_empty_buffers(page, blocksize, (1 << BH_Dirty)|(1 << BH_Uptodate)); @@ -1885,7 +1889,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) lock_page(page); wait_on_page_writeback(page); -printk("btrfs_page_mkwrite %lu %lu\n", page->mapping->host->i_ino, page->index); size = i_size_read(inode); if ((page->mapping != inode->i_mapping) || ((page->index << PAGE_CACHE_SHIFT) > size)) { diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 1fa705683008..e66acd4e2745 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -31,6 +31,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_dir_item *di; struct btrfs_inode_item *ii; struct btrfs_block_group_item *bi; + struct btrfs_file_extent_item *fi; u32 type; printk("leaf %llu total ptrs %d free space %d\n", @@ -75,6 +76,23 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) printk("\t\textent data refs %u\n", btrfs_extent_refs(ei)); break; + + case BTRFS_EXTENT_DATA_KEY: + fi = btrfs_item_ptr(l, i, + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(fi) == + BTRFS_FILE_EXTENT_INLINE) { + printk("\t\tinline extent data size %u\n", + btrfs_file_extent_inline_len(l->items + i)); + break; + } + printk("\t\textent data disk block %llu nr %llu\n", + (unsigned long long)btrfs_file_extent_disk_blocknr(fi), + (unsigned long long)btrfs_file_extent_disk_num_blocks(fi)); + printk("\t\textent data offset %llu nr %llu\n", + (unsigned long long)btrfs_file_extent_offset(fi), + (unsigned long long)btrfs_file_extent_num_blocks(fi)); + break; case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, struct btrfs_block_group_item); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cd4f1d140385..b5b99a85763f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -279,6 +279,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, BUG_ON(ret); kfree(dirty); mutex_unlock(&tree_root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(tree_root); } return 0; } From 85e55b13e4e318672f669747e5e010d1b707b198 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 19 Jun 2007 15:50:51 -0400 Subject: [PATCH 0174/2985] Btrfs: cache the extent tree preallocation Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e0738c65bc5b..47e91184fff3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -741,7 +741,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct BUG_ON(ret); } extent_root->fs_info->extent_tree_insert_nr = 0; - extent_root->fs_info->extent_tree_prealloc_nr = 0; return 0; } @@ -951,6 +950,18 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root num_blocks = 1; total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; } + if (fill_prealloc) { + u64 first; + int nr = info->extent_tree_prealloc_nr; + first = info->extent_tree_prealloc[nr - 1]; + if (info->extent_tree_prealloc_nr >= total_needed && + first >= search_start) { + ins->objectid = info->extent_tree_prealloc[0]; + ins->offset = 1; + return 0; + } + info->extent_tree_prealloc_nr = 0; + } if (search_end == (u64)-1) search_end = btrfs_super_total_blocks(info->disk_super); if (hint_block) { @@ -1107,7 +1118,6 @@ check_pending: if (ins->objectid + num_blocks > first && ins->objectid <= info->extent_tree_prealloc[0]) { search_start = info->extent_tree_prealloc[0] + 1; - WARN_ON(!full_scan); goto new_group; } } From e011599b0f375683499bf1a9954703f4959a8d00 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 19 Jun 2007 16:23:05 -0400 Subject: [PATCH 0175/2985] Btrfs: reada while dropping snapshots Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 47e91184fff3..b38c3e92f0c8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -940,7 +940,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int wrapped = 0; u64 limit; - path = btrfs_alloc_path(); ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); @@ -974,6 +973,8 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root data, 1); } + path = btrfs_alloc_path(); + check_failed: if (!block_group->data) search_start = find_search_start(root, &block_group, @@ -1355,6 +1356,28 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } +static void reada_walk_down(struct btrfs_root *root, + struct btrfs_node *node) +{ + int i; + u32 nritems; + u64 blocknr; + int ret; + u32 refs; + + nritems = btrfs_header_nritems(&node->header); + for (i = 0; i < nritems; i++) { + blocknr = btrfs_node_blockptr(node, i); + ret = lookup_extent_ref(NULL, root, blocknr, 1, &refs); + BUG_ON(ret); + if (refs != 1) + continue; + ret = readahead_tree_block(root, blocknr); + if (ret) + break; + } +} + /* * helper function for drop_snapshot, this walks down the tree dropping ref * counts as it goes. @@ -1375,6 +1398,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); if (refs > 1) goto out; + /* * walk down to the last node level and free all the leaves */ @@ -1382,8 +1406,13 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); cur = path->nodes[*level]; + + if (*level > 0 && path->slots[*level] == 0) + reada_walk_down(root, btrfs_buffer_node(cur)); + if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) WARN_ON(1); + if (path->slots[*level] >= btrfs_header_nritems(btrfs_buffer_header(cur))) break; From 11bd143fc8243cf48c934dc1c4479a5aacf58ce3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 22 Jun 2007 14:16:24 -0400 Subject: [PATCH 0176/2985] Btrfs: Switch to libcrc32c to avoid problems with cryptomgr on highmem machines Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ---- fs/btrfs/disk-io.c | 30 ++++++------------------------ fs/btrfs/file.c | 3 ++- 3 files changed, 8 insertions(+), 29 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1e83ad720b10..19a1d998fca7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -286,10 +286,7 @@ struct btrfs_block_group_cache { int cached; }; -struct crypto_hash; - struct btrfs_fs_info { - spinlock_t hash_lock; struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct radix_tree_root fs_roots_radix; @@ -314,7 +311,6 @@ struct btrfs_fs_info { struct mutex fs_mutex; struct list_head trans_list; struct list_head dead_roots; - struct crypto_hash *hash_tfm; struct delayed_work trans_work; int do_barriers; int closing; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 15cc9ec92308..3712e946f9c8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include @@ -147,22 +147,12 @@ static int btree_get_block(struct inode *inode, sector_t iblock, int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result) { - struct scatterlist sg; - struct crypto_hash *tfm = root->fs_info->hash_tfm; - struct hash_desc desc; - int ret; - - desc.tfm = tfm; - desc.flags = 0; - sg_init_one(&sg, data, len); - spin_lock_irq(&root->fs_info->hash_lock); - ret = crypto_hash_digest(&desc, &sg, 1, result); - spin_unlock_irq(&root->fs_info->hash_lock); - if (ret) { - printk("digest failed\n"); - } - return ret; + u32 crc; + crc = crc32c(0, data, len); + memcpy(result, &crc, BTRFS_CRC32_SIZE); + return 0; } + static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, int verify) { @@ -454,14 +444,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) sizeof(struct btrfs_key)); insert_inode_hash(fs_info->btree_inode); mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); - fs_info->hash_tfm = crypto_alloc_hash("crc32c", 0, CRYPTO_ALG_ASYNC); - spin_lock_init(&fs_info->hash_lock); - if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { - printk("btrfs: failed hash setup, modprobe cryptomgr?\n"); - err = -ENOMEM; - goto fail_iput; - } mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); @@ -613,7 +596,6 @@ int close_ctree(struct btrfs_root *root) btrfs_block_release(fs_info->tree_root, fs_info->tree_root->node); btrfs_block_release(root, fs_info->sb_buffer); - crypto_free_hash(fs_info->hash_tfm); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6b455c2b3f03..23f029359688 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -607,7 +607,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, while(count > 0) { size_t offset = pos & (PAGE_CACHE_SIZE - 1); - size_t write_bytes = min(count, nrptrs * PAGE_CACHE_SIZE - + size_t write_bytes = min(count, nrptrs * + (size_t)PAGE_CACHE_SIZE - offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; From 54aa1f4dfdacd60a19c4471220b24e581be6f774 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 22 Jun 2007 14:16:25 -0400 Subject: [PATCH 0177/2985] Btrfs: Audit callers and return codes to make sure -ENOSPC gets up the stack Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 106 ++++++++++++++++++++------ fs/btrfs/ctree.h | 3 - fs/btrfs/dir-item.c | 6 +- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 143 ++++++++++++++++++++++++++++------- fs/btrfs/file-item.c | 46 ++---------- fs/btrfs/file.c | 131 +++++++++++++++++++------------- fs/btrfs/inode.c | 165 ++++++++++++++++++++++++++--------------- fs/btrfs/root-tree.c | 1 - fs/btrfs/super.c | 3 +- fs/btrfs/transaction.c | 21 ++++-- 11 files changed, 411 insertions(+), 216 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 498dea0854c1..606a19b5916d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -73,6 +73,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root { struct buffer_head *cow; struct btrfs_node *cow_node; + int ret; if (btrfs_header_generation(btrfs_buffer_header(buf)) == trans->transid) { @@ -80,6 +81,8 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr); + if (IS_ERR(cow)) + return PTR_ERR(cow); cow_node = btrfs_buffer_node(cow); if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) WARN_ON(1); @@ -87,7 +90,9 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); btrfs_set_header_generation(&cow_node->header, trans->transid); btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); - btrfs_inc_ref(trans, root, buf); + ret = btrfs_inc_ref(trans, root, buf); + if (ret) + return ret; if (buf == root->node) { root->node = cow; get_bh(cow); @@ -320,6 +325,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root int wret; int pslot; int orig_slot = path->slots[level]; + int err_on_enospc = 0; u64 orig_ptr; if (level == 0) @@ -363,29 +369,43 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; + if (btrfs_header_nritems(&mid->header) < 2) + err_on_enospc = 1; + left_buf = read_node_slot(root, parent_buf, pslot - 1); right_buf = read_node_slot(root, parent_buf, pslot + 1); /* first, try to make some room in the middle buffer */ if (left_buf) { - btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1, - &left_buf); + wret = btrfs_cow_block(trans, root, left_buf, + parent_buf, pslot - 1, &left_buf); + if (wret) { + ret = wret; + goto enospc; + } left = btrfs_buffer_node(left_buf); orig_slot += btrfs_header_nritems(&left->header); wret = push_node_left(trans, root, left_buf, mid_buf); if (wret < 0) ret = wret; + if (btrfs_header_nritems(&mid->header) < 2) + err_on_enospc = 1; } /* * then try to empty the right most buffer into the middle */ if (right_buf) { - btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1, - &right_buf); + wret = btrfs_cow_block(trans, root, right_buf, + parent_buf, pslot + 1, &right_buf); + if (wret) { + ret = wret; + goto enospc; + } + right = btrfs_buffer_node(right_buf); wret = push_node_left(trans, root, mid_buf, right_buf); - if (wret < 0) + if (wret < 0 && wret != -ENOSPC) ret = wret; if (btrfs_header_nritems(&right->header) == 0) { u64 blocknr = bh_blocknr(right_buf); @@ -421,8 +441,10 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root */ BUG_ON(!left_buf); wret = balance_node_right(trans, root, mid_buf, left_buf); - if (wret < 0) + if (wret < 0) { ret = wret; + goto enospc; + } BUG_ON(wret == 1); } if (btrfs_header_nritems(&mid->header) == 0) { @@ -467,7 +489,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]), path->slots[level])) BUG(); - +enospc: if (right_buf) btrfs_block_release(root, right_buf); if (left_buf) @@ -519,10 +541,15 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; } else { - btrfs_cow_block(trans, root, left_buf, parent_buf, - pslot - 1, &left_buf); - left = btrfs_buffer_node(left_buf); - wret = push_node_left(trans, root, left_buf, mid_buf); + ret = btrfs_cow_block(trans, root, left_buf, parent_buf, + pslot - 1, &left_buf); + if (ret) + wret = 1; + else { + left = btrfs_buffer_node(left_buf); + wret = push_node_left(trans, root, + left_buf, mid_buf); + } } if (wret < 0) ret = wret; @@ -561,11 +588,16 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; } else { - btrfs_cow_block(trans, root, right_buf, - parent_buf, pslot + 1, &right_buf); - right = btrfs_buffer_node(right_buf); - wret = balance_node_right(trans, root, - right_buf, mid_buf); + ret = btrfs_cow_block(trans, root, right_buf, + parent_buf, pslot + 1, + &right_buf); + if (ret) + wret = 1; + else { + right = btrfs_buffer_node(right_buf); + wret = balance_node_right(trans, root, + right_buf, mid_buf); + } } if (wret < 0) ret = wret; @@ -631,6 +663,10 @@ again: p->nodes[level + 1], p->slots[level + 1], &cow_buf); + if (wret) { + btrfs_block_release(root, cow_buf); + return wret; + } b = cow_buf; c = btrfs_buffer_node(b); } @@ -737,6 +773,7 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root src_nritems = btrfs_header_nritems(&src->header); dst_nritems = btrfs_header_nritems(&dst->header); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; + if (push_items <= 0) { return 1; } @@ -827,6 +864,8 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->nodes[level-1] != root->node); t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr); + if (IS_ERR(t)) + return PTR_ERR(t); c = btrfs_buffer_node(t); memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); @@ -929,10 +968,15 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_header_nritems(&c->header) < BTRFS_NODEPTRS_PER_BLOCK(root) - 1) return 0; + if (ret < 0) + return ret; } c_nritems = btrfs_header_nritems(&c->header); split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr); + if (IS_ERR(split_buffer)) + return PTR_ERR(split_buffer); + split = btrfs_buffer_node(split_buffer); btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); @@ -1022,6 +1066,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_item *item; u32 left_nritems; u32 right_nritems; + int ret; slot = path->slots[1]; if (!path->nodes[1]) { @@ -1041,7 +1086,12 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } /* cow and double check */ - btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf); + ret = btrfs_cow_block(trans, root, right_buf, upper, + slot + 1, &right_buf); + if (ret) { + btrfs_block_release(root, right_buf); + return 1; + } right = btrfs_buffer_leaf(right_buf); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { @@ -1162,7 +1212,11 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root } /* cow and double check */ - btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); + ret = btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); + if (ret) { + /* we hit -ENOSPC, but it isn't fatal here */ + return 1; + } left = btrfs_buffer_leaf(t); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { @@ -1309,8 +1363,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root slot = path->slots[0]; nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); - BUG_ON(!right_buffer); + if (IS_ERR(right_buffer)) + return PTR_ERR(right_buffer); + right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); @@ -1407,7 +1464,9 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (!double_split) return ret; right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); - BUG_ON(!right_buffer); + if (IS_ERR(right_buffer)) + return PTR_ERR(right_buffer); + right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); @@ -1655,7 +1714,6 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root ptr, data, data_size); btrfs_mark_buffer_dirty(path->nodes[0]); } - btrfs_release_path(root, path); btrfs_free_path(path); return ret; } @@ -1775,12 +1833,12 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, slot = path->slots[1]; get_bh(leaf_buf); wret = push_leaf_left(trans, root, path, 1); - if (wret < 0) + if (wret < 0 && wret != -ENOSPC) ret = wret; if (path->nodes[0] == leaf_buf && btrfs_header_nritems(&leaf->header)) { wret = push_leaf_right(trans, root, path, 1); - if (wret < 0) + if (wret < 0 && wret != -ENOSPC) ret = wret; } if (btrfs_header_nritems(&leaf->header) == 0) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 19a1d998fca7..77071f273977 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1126,9 +1126,6 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, char *data, size_t len); -int btrfs_csum_verify_file_block(struct btrfs_root *root, - u64 objectid, u64 offset, - char *data, size_t len); struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index ff10cf5a8422..d5db8bdea3d8 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -47,6 +47,8 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle if (ret) return ERR_PTR(ret); } + if (ret < 0) + return ERR_PTR(ret); WARN_ON(ret > 0); leaf = btrfs_buffer_leaf(path->nodes[0]); item = leaf->items + path->slots[0]; @@ -225,14 +227,13 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, struct btrfs_leaf *leaf; u32 sub_item_len; u32 item_len; - int ret; + int ret = 0; leaf = btrfs_buffer_leaf(path->nodes[0]); sub_item_len = sizeof(*di) + btrfs_dir_name_len(di); item_len = btrfs_item_size(leaf->items + path->slots[0]); if (sub_item_len == btrfs_item_size(leaf->items + path->slots[0])) { ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); } else { char *ptr = (char *)di; char *start = btrfs_item_ptr(leaf, path->slots[0], char); @@ -240,7 +241,6 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, item_len - (ptr + sub_item_len - start)); ret = btrfs_truncate_item(trans, root, path, item_len - sub_item_len); - BUG_ON(ret); } return 0; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3712e946f9c8..865a284aa06c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -580,7 +580,7 @@ int close_ctree(struct btrfs_root *root) btrfs_transaction_flush_work(root); mutex_lock(&fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - btrfs_commit_transaction(trans, root); + ret = btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b38c3e92f0c8..8025e9f8ef19 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -100,6 +100,8 @@ static int cache_block_group(struct btrfs_root *root, if (slot >= btrfs_header_nritems(&leaf->header)) { reada_extent_leaves(root, path, limit); ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto err; if (ret == 0) { continue; } else { @@ -148,6 +150,7 @@ static int cache_block_group(struct btrfs_root *root, } block_group->cached = 1; +err: btrfs_free_path(path); return 0; } @@ -201,7 +204,9 @@ static u64 find_search_start(struct btrfs_root *root, last = max(last, cache->last_prealloc); } again: - cache_block_group(root, cache); + ret = cache_block_group(root, cache); + if (ret) + goto out; while(1) { ret = find_first_radix_bit(&root->fs_info->extent_map_radix, gang, last, ARRAY_SIZE(gang)); @@ -398,16 +403,23 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_key ins; u32 refs; - find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, 0, - &ins, 0); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; + ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, + (u64)-1, 0, &ins, 0); + if (ret) { + btrfs_free_path(path); + return ret; + } key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); + if (ret < 0) + return ret; if (ret != 0) { BUG(); } @@ -442,12 +454,14 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); + if (ret < 0) + goto out; if (ret != 0) BUG(); l = btrfs_buffer_leaf(path->nodes[0]); item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); *refs = btrfs_extent_refs(item); - btrfs_release_path(root->fs_info->extent_root, path); +out: btrfs_free_path(path); return 0; } @@ -469,6 +483,8 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int i; int leaf; int ret; + int faili; + int err; if (!root->ref_cows) return 0; @@ -491,14 +507,45 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, continue; ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, btrfs_file_extent_disk_num_blocks(fi)); - BUG_ON(ret); + if (ret) { + faili = i; + goto fail; + } } else { blocknr = btrfs_node_blockptr(buf_node, i); ret = btrfs_inc_extent_ref(trans, root, blocknr, 1); - BUG_ON(ret); + if (ret) { + faili = i; + goto fail; + } } } return 0; +fail: + for (i =0; i < faili; i++) { + if (leaf) { + u64 disk_blocknr; + key = &buf_leaf->items[i].key; + if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(buf_leaf, i, + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; + disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + if (disk_blocknr == 0) + continue; + err = btrfs_free_extent(trans, root, disk_blocknr, + btrfs_file_extent_disk_num_blocks(fi), 0); + BUG_ON(err); + } else { + blocknr = btrfs_node_blockptr(buf_node, i); + err = btrfs_free_extent(trans, root, blocknr, 1, 0); + BUG_ON(err); + } + } + return ret; } static int write_one_cache_group(struct btrfs_trans_handle *trans, @@ -512,15 +559,20 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_item *bi; struct btrfs_key ins; - find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0); + ret = find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0); + /* FIXME, set bit to recalc cache groups on next mount */ + if (ret) + return ret; ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); + if (ret < 0) + goto fail; BUG_ON(ret); bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_block_group_item); memcpy(bi, &cache->item, sizeof(*bi)); mark_buffer_dirty(path->nodes[0]); btrfs_release_path(extent_root, path); - +fail: finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); if (ret) @@ -543,6 +595,7 @@ static int write_dirty_block_radix(struct btrfs_trans_handle *trans, int werr = 0; int i; struct btrfs_path *path; + unsigned long off = 0; path = btrfs_alloc_path(); if (!path) @@ -550,18 +603,28 @@ static int write_dirty_block_radix(struct btrfs_trans_handle *trans, while(1) { ret = radix_tree_gang_lookup_tag(radix, (void **)cache, - 0, ARRAY_SIZE(cache), + off, ARRAY_SIZE(cache), BTRFS_BLOCK_GROUP_DIRTY); if (!ret) break; for (i = 0; i < ret; i++) { + err = write_one_cache_group(trans, root, + path, cache[i]); + /* + * if we fail to write the cache group, we want + * to keep it marked dirty in hopes that a later + * write will work + */ + if (err) { + werr = err; + off = cache[i]->key.objectid + + cache[i]->key.offset; + continue; + } + radix_tree_tag_clear(radix, cache[i]->key.objectid + cache[i]->key.offset - 1, BTRFS_BLOCK_GROUP_DIRTY); - err = write_one_cache_group(trans, root, - path, cache[i]); - if (err) - werr = err; } } btrfs_free_path(path); @@ -801,14 +864,20 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; - find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; + + ret = find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0); + if (ret) { + btrfs_free_path(path); + return ret; + } ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); - if (ret) { - BUG(); - } + if (ret < 0) + return ret; + BUG_ON(ret); ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_extent_item); BUG_ON(ei->refs == 0); @@ -827,8 +896,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_super_blocks_used(info->disk_super, super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, path); - if (ret) - BUG(); + if (ret) { + return ret; + } ret = update_block_group(trans, root, blocknr, num_blocks, 0, mark_free, 0); BUG_ON(ret); @@ -1075,7 +1145,6 @@ next: path->slots[0]++; cond_resched(); } - // FIXME -ENOSPC check_pending: /* we have to make sure we didn't find an extent that has already * been allocated by the map tree or the original allocation @@ -1246,7 +1315,15 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ret = find_free_extent(trans, root, num_blocks, search_start, search_end, hint_block, ins, data); if (ret) { - return ret; + if (search_start == 0) + return ret; + search_end = search_start - 1; + search_start = 0; + hint_block = search_start; + ret = find_free_extent(trans, root, num_blocks, search_start, + search_end, hint_block, ins, data); + if (ret) + return ret; } /* @@ -1271,7 +1348,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, search_end, hint_block, &prealloc_key, 0); if (ret) { - return ret; + if (search_start == 0) + return ret; + search_end = search_start - 1; + search_start = 0; + hint_block = search_start; + ret = find_free_extent(trans, root, 0, search_start, + search_end, hint_block, + &prealloc_key, 0); + if (ret) + return ret; } } @@ -1309,11 +1395,14 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, 1, hint, (unsigned long)-1, &ins, 0); if (ret) { - BUG(); - return NULL; + BUG_ON(ret > 0); + return ERR_PTR(ret); } - BUG_ON(ret); buf = btrfs_find_create_tree_block(root, ins.objectid); + if (!buf) { + btrfs_free_extent(trans, root, ins.objectid, 1, 0); + return ERR_PTR(-ENOMEM); + } set_buffer_uptodate(buf); set_buffer_checked(buf); set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index cf894f09f6c7..68859934ae25 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -45,6 +45,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(*item)); + if (ret < 0) + goto out; BUG_ON(ret); item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); @@ -55,10 +57,9 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_generation(item, trans->transid); btrfs_set_file_extent_type(item, BTRFS_FILE_EXTENT_REG); btrfs_mark_buffer_dirty(path->nodes[0]); - - btrfs_release_path(root, path); +out: btrfs_free_path(path); - return 0; + return ret; } struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, @@ -213,6 +214,8 @@ insert: csum_offset = 0; ret = btrfs_insert_empty_item(trans, root, path, &file_key, BTRFS_CRC32_SIZE); + if (ret < 0) + goto fail; if (ret != 0) { WARN_ON(1); goto fail; @@ -261,40 +264,3 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, return ret; } -int btrfs_csum_verify_file_block(struct btrfs_root *root, - u64 objectid, u64 offset, - char *data, size_t len) -{ - int ret; - struct btrfs_key file_key; - struct btrfs_path *path; - struct btrfs_csum_item *item; - char result[BTRFS_CRC32_SIZE]; - - path = btrfs_alloc_path(); - BUG_ON(!path); - file_key.objectid = objectid; - file_key.offset = offset; - file_key.flags = 0; - btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); - mutex_lock(&root->fs_info->fs_mutex); - - item = btrfs_lookup_csum(NULL, root, path, objectid, offset, 0); - if (IS_ERR(item)) { - ret = PTR_ERR(item); - /* a csum that isn't present is a preallocated region. */ - if (ret == -ENOENT || ret == -EFBIG) - ret = -ENOENT; - goto fail; - } - - ret = btrfs_csum_data(root, data, len, result); - WARN_ON(ret); - if (memcmp(result, &item->csum, BTRFS_CRC32_SIZE)) - ret = 1; -fail: - btrfs_release_path(root, path); - btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - return ret; -} diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 23f029359688..fef7ba1e707f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -81,6 +81,62 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) } } +static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, + u64 offset, ssize_t size, + struct buffer_head *bh) +{ + struct btrfs_key key; + struct btrfs_path *path; + char *ptr, *kaddr; + struct btrfs_trans_handle *trans; + struct btrfs_file_extent_item *ei; + u32 datasize; + int err = 0; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + key.objectid = inode->i_ino; + key.offset = offset; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + BUG_ON(size >= PAGE_CACHE_SIZE); + datasize = btrfs_file_extent_calc_inline_size(size); + + ret = btrfs_insert_empty_item(trans, root, path, &key, + datasize); + if (ret) { + err = ret; + goto fail; + } + ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(ei, trans->transid); + btrfs_set_file_extent_type(ei, + BTRFS_FILE_EXTENT_INLINE); + ptr = btrfs_file_extent_inline_start(ei); + + kaddr = kmap_atomic(bh->b_page, KM_USER0); + btrfs_memcpy(root, path->nodes[0]->b_data, + ptr, kaddr + bh_offset(bh), + size); + kunmap_atomic(kaddr, KM_USER0); + mark_buffer_dirty(path->nodes[0]); +fail: + btrfs_free_path(path); + ret = btrfs_end_transaction(trans, root); + if (ret && !err) + err = ret; + mutex_unlock(&root->fs_info->fs_mutex); + return err; +} + static int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct file *file, @@ -96,57 +152,22 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, int this_write; struct inode *inode = file->f_path.dentry->d_inode; struct buffer_head *bh; - struct btrfs_file_extent_item *ei; for (i = 0; i < num_pages; i++) { offset = pos & (PAGE_CACHE_SIZE -1); this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); - /* FIXME, one block at a time */ + /* FIXME, one block at a time */ bh = page_buffers(pages[i]); if (buffer_mapped(bh) && bh->b_blocknr == 0) { - struct btrfs_key key; - struct btrfs_path *path; - char *ptr, *kaddr; - u32 datasize; - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - - /* create an inline extent, and copy the data in */ - path = btrfs_alloc_path(); - BUG_ON(!path); - key.objectid = inode->i_ino; - key.offset = pages[i]->index << PAGE_CACHE_SHIFT; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); - BUG_ON(write_bytes >= PAGE_CACHE_SIZE); - datasize = offset + - btrfs_file_extent_calc_inline_size(write_bytes); - - ret = btrfs_insert_empty_item(trans, root, path, &key, - datasize); - BUG_ON(ret); - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(ei, trans->transid); - btrfs_set_file_extent_type(ei, - BTRFS_FILE_EXTENT_INLINE); - ptr = btrfs_file_extent_inline_start(ei); - - kaddr = kmap_atomic(bh->b_page, KM_USER0); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, kaddr + bh_offset(bh), - offset + write_bytes); - kunmap_atomic(kaddr, KM_USER0); - - mark_buffer_dirty(path->nodes[0]); - btrfs_free_path(path); - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); + ret = insert_inline_extent(root, inode, + pages[i]->index << PAGE_CACHE_SHIFT, + offset + this_write, bh); + if (ret) { + err = ret; + goto failed; + } } ret = btrfs_commit_write(file, pages[i], offset, @@ -321,6 +342,7 @@ next_slot: btrfs_file_extent_disk_blocknr(extent); } ret = btrfs_del_item(trans, root, path); + /* TODO update progress marker and return */ BUG_ON(ret); btrfs_release_path(root, path); extent = NULL; @@ -452,7 +474,8 @@ static int prepare_pages(struct btrfs_root *root, err = btrfs_drop_extents(trans, root, inode, start_pos, (pos + write_bytes + root->blocksize -1) & ~((u64)root->blocksize - 1), &hint_block); - BUG_ON(err); + if (err) + goto failed_release; } /* insert any holes we need to create */ @@ -469,7 +492,8 @@ static int prepare_pages(struct btrfs_root *root, last_pos_in_file, 0, 0, hole_size); } - BUG_ON(err); + if (err) + goto failed_release; } /* @@ -481,11 +505,13 @@ static int prepare_pages(struct btrfs_root *root, err = btrfs_alloc_extent(trans, root, inode->i_ino, num_blocks, hint_block, (u64)-1, &ins, 1); - BUG_ON(err); + if (err) + goto failed_truncate; err = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset, ins.offset); - BUG_ON(err); + if (err) + goto failed_truncate; } else { ins.offset = 0; ins.objectid = 0; @@ -618,16 +644,21 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ret = prepare_pages(root, file, pages, num_pages, pos, first_index, last_index, write_bytes); - BUG_ON(ret); + if (ret) + goto out; ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); - BUG_ON(ret); + if (ret) { + btrfs_drop_pages(pages, num_pages); + goto out; + } ret = dirty_and_release_pages(NULL, root, file, pages, num_pages, pos, write_bytes); - BUG_ON(ret); btrfs_drop_pages(pages, num_pages); + if (ret) + goto out; buf += write_bytes; count -= write_bytes; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 94f1c28c25b9..6d031daa7779 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -212,7 +212,11 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, struct btrfs_dir_item *di; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + ret = -ENOMEM; + goto err; + } + di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, name, name_len, -1); if (IS_ERR(di)) { @@ -225,7 +229,8 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, } objectid = btrfs_disk_key_objectid(&di->location); ret = btrfs_delete_one_dir_name(trans, root, path, di); - BUG_ON(ret); + if (ret) + goto err; btrfs_release_path(root, path); di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, @@ -239,7 +244,6 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, goto err; } ret = btrfs_delete_one_dir_name(trans, root, path, di); - BUG_ON(ret); dentry->d_inode->i_ctime = dir->i_ctime; err: @@ -248,7 +252,7 @@ err: dir->i_size -= name_len * 2; btrfs_update_inode(trans, root, dir); drop_nlink(dentry->d_inode); - btrfs_update_inode(trans, root, dentry->d_inode); + ret = btrfs_update_inode(trans, root, dentry->d_inode); dir->i_sb->s_dirt = 1; } return ret; @@ -359,9 +363,10 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, BUG_ON(!path); ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location, -1); - BUG_ON(ret); - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); + if (ret > 0) + ret = -ENOENT; + if (!ret) + ret = btrfs_del_item(trans, root, path); btrfs_free_path(path); return ret; } @@ -516,7 +521,8 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, } if (del_item) { ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); + if (ret) + goto error; } else { break; } @@ -577,19 +583,22 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) page->index << PAGE_CACHE_SHIFT, (page->index + 1) << PAGE_CACHE_SHIFT, &alloc_hint); - BUG_ON(ret); + if (ret) + goto out; ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, alloc_hint, (u64)-1, &ins, 1); - BUG_ON(ret); + if (ret) + goto out; ret = btrfs_insert_file_extent(trans, root, inode->i_ino, page->index << PAGE_CACHE_SHIFT, ins.objectid, 1, 1); - BUG_ON(ret); + if (ret) + goto out; SetPageChecked(page); kaddr = kmap(page); memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); flush_dcache_page(page); - btrfs_csum_file_block(trans, root, inode->i_ino, + ret = btrfs_csum_file_block(trans, root, inode->i_ino, page->index << PAGE_CACHE_SHIFT, kaddr, PAGE_CACHE_SIZE); kunmap(page); @@ -633,9 +642,10 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_set_trans_block_group(trans, inode); err = btrfs_insert_file_extent(trans, root, inode->i_ino, pos, 0, 0, hole_size); - BUG_ON(err); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + if (err) + return err; } out: err = inode_setattr(inode, attr); @@ -657,12 +667,20 @@ void btrfs_delete_inode(struct inode *inode) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); - BUG_ON(ret); - btrfs_free_inode(trans, root, inode); + if (ret) + goto no_delete_lock; + ret = btrfs_free_inode(trans, root, inode); + if (ret) + goto no_delete_lock; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root); return; + +no_delete_lock: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); no_delete: clear_inode(inode); } @@ -946,7 +964,7 @@ int btrfs_write_inode(struct inode *inode, int wait) } /* - * This is somewhat expense, updating the tree every time the + * This is somewhat expensive, updating the tree every time the * inode changes. But, it is most likely to find the inode in cache. * FIXME, needs more benchmarking...there are no reasons other than performance * to keep or drop this code. @@ -1002,8 +1020,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); ret = btrfs_insert_inode(trans, root, objectid, &inode_item); - BUG_ON(ret); - + if (ret) + return ERR_PTR(ret); insert_inode_hash(inode); return inode; } @@ -1121,7 +1139,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, drop_inode = 1; dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, dir); - btrfs_update_inode(trans, root, inode); + err = btrfs_update_inode(trans, root, inode); + if (err) + drop_inode = 1; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -1349,17 +1369,26 @@ not_found: ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, alloc_hint, (u64)-1, &ins, 1); - BUG_ON(ret); + if (ret) { + err = ret; + goto out; + } ret = btrfs_insert_file_extent(trans, root, inode->i_ino, iblock << inode->i_blkbits, ins.objectid, ins.offset, ins.offset); - BUG_ON(ret); + if (ret) { + err = ret; + goto out; + } btrfs_map_bh_to_logical(root, result, ins.objectid); } out: - if (trans) - err = btrfs_end_transaction(trans, root); + if (trans) { + ret = btrfs_end_transaction(trans, root); + if (!err) + err = ret; + } btrfs_free_path(path); return err; } @@ -1375,8 +1404,8 @@ int btrfs_get_block(struct inode *inode, sector_t iblock, return err; } -int btrfs_get_block_csum(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) +static int btrfs_get_block_csum(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) { int ret; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -1397,7 +1426,7 @@ int btrfs_get_block_csum(struct inode *inode, sector_t iblock, /* a csum that isn't present is a preallocated region. */ if (ret == -ENOENT || ret == -EFBIG) ret = 0; - result->b_private = 0; + result->b_private = NULL; goto out; } memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE); @@ -1736,11 +1765,10 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page, trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); kaddr = kmap(page); - ret = btrfs_csum_file_block(trans, root, inode->i_ino, + btrfs_csum_file_block(trans, root, inode->i_ino, off, kaddr + bh_offset(bh), bh->b_size); kunmap(page); - BUG_ON(ret); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); @@ -1930,7 +1958,6 @@ static void btrfs_truncate(struct inode *inode) /* FIXME, add redo link to tree so we don't leak on crash */ ret = btrfs_truncate_in_trans(trans, root, inode); - BUG_ON(ret); btrfs_update_inode(trans, root, inode); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); @@ -1970,6 +1997,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) struct inode *inode; struct inode *dir; int ret; + int err; u64 objectid; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; @@ -1978,8 +2006,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) BUG_ON(!trans); subvol = btrfs_alloc_free_block(trans, root, 0); - if (subvol == NULL) - return -ENOSPC; + if (IS_ERR(subvol)) + return PTR_ERR(subvol); leaf = btrfs_buffer_leaf(subvol); btrfs_set_header_nritems(&leaf->header, 0); btrfs_set_header_level(&leaf->header, 0); @@ -2005,7 +2033,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); - BUG_ON(ret); + if (ret) + goto fail; btrfs_set_root_dirid(&root_item, new_dirid); @@ -2015,7 +2044,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &root_item); - BUG_ON(ret); + if (ret) + goto fail; /* * insert the directory item @@ -2025,10 +2055,12 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, name, namelen, dir->i_ino, &key, BTRFS_FT_DIR); - BUG_ON(ret); + if (ret) + goto fail; ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); + if (ret) + goto fail_commit; new_root = btrfs_read_fs_root(root->fs_info, &key); BUG_ON(!new_root); @@ -2038,24 +2070,29 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) inode = btrfs_new_inode(trans, new_root, new_dirid, BTRFS_I(dir)->block_group, S_IFDIR | 0700); + if (IS_ERR(inode)) + goto fail; inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; new_root->inode = inode; ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid); - BUG_ON(ret); + if (ret) + goto fail; inode->i_nlink = 1; inode->i_size = 6; ret = btrfs_update_inode(trans, new_root, inode); - BUG_ON(ret); - - ret = btrfs_commit_transaction(trans, new_root); - BUG_ON(ret); - + if (ret) + goto fail; +fail: + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; +fail_commit: mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root); - return 0; + return ret; } static int create_snapshot(struct btrfs_root *root, char *name, int namelen) @@ -2064,6 +2101,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) struct btrfs_key key; struct btrfs_root_item new_root_item; int ret; + int err; u64 objectid; if (!root->ref_cows) @@ -2074,11 +2112,13 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) BUG_ON(!trans); ret = btrfs_update_inode(trans, root, root->inode); - BUG_ON(ret); + if (ret) + goto fail; ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); - BUG_ON(ret); + if (ret) + goto fail; memcpy(&new_root_item, &root->root_item, sizeof(new_root_item)); @@ -2091,7 +2131,8 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); - BUG_ON(ret); + if (ret) + goto fail; /* * insert the directory item @@ -2102,16 +2143,20 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) root->fs_info->sb->s_root->d_inode->i_ino, &key, BTRFS_FT_DIR); - BUG_ON(ret); + if (ret) + goto fail; ret = btrfs_inc_root_ref(trans, root); - BUG_ON(ret); + if (ret) + goto fail; - ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); +fail: + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root); - return 0; + return ret; } int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int @@ -2148,12 +2193,13 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int btrfs_free_path(path); if (di && !IS_ERR(di)) return -EEXIST; + if (IS_ERR(di)) + return PTR_ERR(di); if (root == root->fs_info->tree_root) ret = create_subvol(root, vol_args.name, namelen); else ret = create_snapshot(root, vol_args.name, namelen); - WARN_ON(ret); break; default: return -ENOTTY; @@ -2316,7 +2362,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, old_parent_oid = btrfs_disk_key_objectid(&di->location); ret = btrfs_del_item(trans, root, path); if (ret) { - ret = -EIO; goto out_fail; } btrfs_release_path(root, path); @@ -2335,7 +2380,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, } ret = btrfs_del_item(trans, root, path); if (ret) { - ret = -EIO; goto out_fail; } btrfs_release_path(root, path); @@ -2361,7 +2405,9 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, clear_nlink(new_inode); else drop_nlink(new_inode); - btrfs_update_inode(trans, root, new_inode); + ret = btrfs_update_inode(trans, root, new_inode); + if (ret) + goto out_fail; } ret = btrfs_add_link(trans, new_dentry, old_inode); if (ret) @@ -2433,7 +2479,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, datasize = btrfs_file_extent_calc_inline_size(name_len); err = btrfs_insert_empty_item(trans, root, path, &key, datasize); - BUG_ON(err); + if (err) { + drop_inode = 1; + goto out_unlock; + } ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_generation(ei, trans->transid); @@ -2447,13 +2496,13 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; inode->i_size = name_len - 1; - btrfs_update_inode(trans, root, inode); - err = 0; + err = btrfs_update_inode(trans, root, inode); + if (err) + drop_inode = 1; out_unlock: btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - if (drop_inode) { inode_dec_link_count(inode); iput(inode); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 8e1b9046d5ec..ac0fae7780ff 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -90,7 +90,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root { int ret; ret = btrfs_insert_item(trans, root, key, item, sizeof(*item)); - BUG_ON(ret); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ef8ad18b275c..c11ecf500202 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -125,9 +125,8 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; - BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); - return 0; + return ret; } static void btrfs_write_super(struct super_block *sb) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b5b99a85763f..321f8852755b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -219,7 +219,8 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root; int i; int ret; - int err; + int err = 0; + while(1) { ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, ARRAY_SIZE(gang), @@ -251,11 +252,12 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, err = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); - BUG_ON(err); + if (err) + break; list_add(&dirty->list, list); } } - return 0; + return err; } static int drop_dirty_roots(struct btrfs_root *tree_root, @@ -263,7 +265,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, { struct dirty_root *dirty; struct btrfs_trans_handle *trans; - int ret; + int ret = 0; while(!list_empty(list)) { mutex_lock(&tree_root->fs_info->fs_mutex); dirty = list_entry(list->next, struct dirty_root, list); @@ -274,14 +276,15 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, BUG_ON(ret); ret = btrfs_del_root(trans, tree_root, &dirty->snap_key); - BUG_ON(ret); + if (ret) + break; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); kfree(dirty); mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root); } - return 0; + return ret; } int btrfs_commit_transaction(struct btrfs_trans_handle *trans, @@ -321,9 +324,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } finish_wait(&trans->transaction->writer_wait, &wait); WARN_ON(cur_trans != trans->transaction); - add_dirty_roots(trans, &root->fs_info->fs_roots_radix, &dirty_fs_roots); + ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, + &dirty_fs_roots); + BUG_ON(ret); + ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); + cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; if (cur_trans->list.prev != &root->fs_info->trans_list) { From 5eda7b5e9b0bed864dd18284c7df9b3c8207dad7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 22 Jun 2007 14:16:25 -0400 Subject: [PATCH 0178/2985] Btrfs: Add the ability to find and remove dead roots after a crash. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 17 ++++++++-- fs/btrfs/disk-io.c | 44 +++++++++++++++++-------- fs/btrfs/disk-io.h | 3 ++ fs/btrfs/inode.c | 2 ++ fs/btrfs/root-tree.c | 75 +++++++++++++++++++++++++++++++++++++----- fs/btrfs/transaction.c | 54 ++++++++++++++++++++++++++---- fs/btrfs/transaction.h | 1 + 7 files changed, 164 insertions(+), 32 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 77071f273977..fb6fffb71dd0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -122,12 +122,12 @@ struct btrfs_super_block { u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* this block number */ __le64 magic; - __le32 blocksize; __le64 generation; __le64 root; __le64 total_blocks; __le64 blocks_used; __le64 root_dir_objectid; + __le32 blocksize; } __attribute__ ((__packed__)); /* @@ -226,10 +226,12 @@ struct btrfs_root_item { struct btrfs_inode_item inode; __le64 root_dirid; __le64 blocknr; - __le32 flags; __le64 block_limit; __le64 blocks_used; + __le32 flags; __le32 refs; + struct btrfs_disk_key drop_progress; + u8 drop_level; } __attribute__ ((__packed__)); #define BTRFS_FILE_EXTENT_REG 0 @@ -800,6 +802,16 @@ static inline void btrfs_set_root_refs(struct btrfs_root_item *item, u32 val) item->refs = cpu_to_le32(val); } +static inline u32 btrfs_root_flags(struct btrfs_root_item *item) +{ + return le32_to_cpu(item->flags); +} + +static inline void btrfs_set_root_flags(struct btrfs_root_item *item, u32 val) +{ + item->flags = cpu_to_le32(val); +} + static inline u64 btrfs_super_blocknr(struct btrfs_super_block *s) { return le64_to_cpu(s->blocknr); @@ -1076,6 +1088,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *item); int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); +int btrfs_find_dead_roots(struct btrfs_root *root); /* dir-item.c */ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 865a284aa06c..d7615e1578cc 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -326,8 +326,8 @@ static int find_and_setup_root(int blocksize, return 0; } -struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_key *location) +struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) { struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; @@ -336,11 +336,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, u64 highest_inode; int ret = 0; - root = radix_tree_lookup(&fs_info->fs_roots_radix, - (unsigned long)location->objectid); - if (root) - return root; - root = kmalloc(sizeof(*root), GFP_NOFS); + root = kzalloc(sizeof(*root), GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); if (location->offset == (u64)-1) { @@ -383,6 +379,28 @@ out: BUG_ON(!root->node); insert: root->ref_cows = 1; + ret = btrfs_find_highest_inode(root, &highest_inode); + if (ret == 0) { + root->highest_inode = highest_inode; + root->last_inode_alloc = highest_inode; + } + return root; +} + +struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) +{ + struct btrfs_root *root; + int ret; + + root = radix_tree_lookup(&fs_info->fs_roots_radix, + (unsigned long)location->objectid); + if (root) + return root; + + root = btrfs_read_fs_root_no_radix(fs_info, location); + if (IS_ERR(root)) + return root; ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, root); @@ -391,11 +409,6 @@ insert: kfree(root); return ERR_PTR(ret); } - ret = btrfs_find_highest_inode(root, &highest_inode); - if (ret == 0) { - root->highest_inode = highest_inode; - root->last_inode_alloc = highest_inode; - } return root; } @@ -489,6 +502,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; + ret = btrfs_find_dead_roots(tree_root); + if (ret) + goto fail_tree_root; mutex_unlock(&fs_info->fs_mutex); return tree_root; @@ -538,7 +554,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } -static int free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) +int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { radix_tree_delete(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid); @@ -565,7 +581,7 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) if (!ret) break; for (i = 0; i < ret; i++) - free_fs_root(fs_info, gang[i]); + btrfs_free_fs_root(fs_info, gang[i]); } return 0; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 7b76ccc48754..c4a695ac44f6 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -65,6 +65,8 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location); +struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, + struct btrfs_key *location); u64 bh_blocknr(struct buffer_head *bh); int btrfs_insert_dev_radix(struct btrfs_root *root, struct block_device *bdev, @@ -75,4 +77,5 @@ int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, u64 logical); int btrfs_releasepage(struct page *page, gfp_t flags); void btrfs_btree_balance_dirty(struct btrfs_root *root); +int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); #endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6d031daa7779..9d2a0a3f674d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2028,6 +2028,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); btrfs_set_root_refs(&root_item, 1); + memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); + root_item.drop_level = 0; brelse(subvol); subvol = NULL; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index ac0fae7780ff..737e5a38d17e 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -18,6 +18,7 @@ #include #include "ctree.h" +#include "transaction.h" #include "disk-io.h" #include "print-tree.h" @@ -32,7 +33,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, search_key.objectid = objectid; search_key.flags = (u32)-1; - search_key.offset = (u32)-1; + search_key.offset = (u64)-1; path = btrfs_alloc_path(); BUG_ON(!path); @@ -50,6 +51,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, memcpy(item, btrfs_item_ptr(l, slot, struct btrfs_root_item), sizeof(*item)); btrfs_disk_key_to_cpu(key, &l->items[slot].key); +printk("find last finds key %Lu %u %Lu slot %d search for obj %Lu\n", key->objectid, key->flags, key->offset, slot, objectid); ret = 0; out: btrfs_release_path(root, path); @@ -93,6 +95,67 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } +int btrfs_find_dead_roots(struct btrfs_root *root) +{ + struct btrfs_root *dead_root; + struct btrfs_item *item; + struct btrfs_root_item *ri; + struct btrfs_key key; + struct btrfs_path *path; + int ret; + u32 nritems; + struct btrfs_leaf *leaf; + int slot; + + key.objectid = 0; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + key.offset = 0; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto err; + while(1) { + leaf = btrfs_buffer_leaf(path->nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path->slots[0]; + if (slot >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret) + break; + leaf = btrfs_buffer_leaf(path->nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path->slots[0]; + } + item = leaf->items + slot; + btrfs_disk_key_to_cpu(&key, &item->key); + if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) + goto next; + ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item); + if (btrfs_root_refs(ri) != 0) + goto next; + dead_root = btrfs_read_fs_root_no_radix(root->fs_info, &key); + if (IS_ERR(root)) { + ret = PTR_ERR(root); + goto err; + } +printk("found dead root %Lu %u %Lu\n", key.objectid, key.flags, key.offset); + ret = btrfs_add_dead_root(dead_root, + &root->fs_info->dead_roots); + if (ret) + goto err; +next: + slot++; + path->slots[0]++; + } + ret = 0; +err: + btrfs_free_path(path); + return ret; +} + int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key) { @@ -111,14 +174,8 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, path->slots[0], struct btrfs_root_item); refs = btrfs_root_refs(ri); - BUG_ON(refs == 0); - if (refs == 1) { - ret = btrfs_del_item(trans, root, path); - } else { - btrfs_set_root_refs(ri, refs - 1); - WARN_ON(1); - mark_buffer_dirty(path->nodes[0]); - } + BUG_ON(refs != 0); + ret = btrfs_del_item(trans, root, path); out: btrfs_release_path(root, path); btrfs_free_path(path); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 321f8852755b..85a2a5e27148 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -85,11 +85,15 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, if (root != root->fs_info->tree_root && root->last_trans < running_trans_id) { - radix_tree_tag_set(&root->fs_info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_TRANS_TAG); - root->commit_root = root->node; - get_bh(root->node); + if (root->root_item.refs != 0) { + radix_tree_tag_set(&root->fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); + root->commit_root = root->node; + get_bh(root->node); + } else { + WARN_ON(1); + } } root->last_trans = running_trans_id; h->transid = running_trans_id; @@ -208,8 +212,24 @@ struct dirty_root { struct btrfs_key snap_key; struct buffer_head *commit_root; struct btrfs_root *root; + int free_on_drop; }; +int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list) +{ + struct dirty_root *dirty; + + dirty = kmalloc(sizeof(*dirty), GFP_NOFS); + if (!dirty) + return -ENOMEM; + memcpy(&dirty->snap_key, &root->root_key, sizeof(root->root_key)); + dirty->commit_root = root->node; + dirty->root = root; + dirty->free_on_drop = 1; + list_add(&dirty->list, dead_list); + return 0; +} + static int add_dirty_roots(struct btrfs_trans_handle *trans, struct radix_tree_root *radix, struct list_head *list) @@ -217,9 +237,11 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, struct dirty_root *dirty; struct btrfs_root *gang[8]; struct btrfs_root *root; + struct btrfs_root_item tmp_item; int i; int ret; int err = 0; + u32 refs; while(1) { ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, @@ -246,6 +268,9 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, dirty->commit_root = root->commit_root; root->commit_root = NULL; dirty->root = root; + dirty->free_on_drop = 0; + memcpy(&tmp_item, &root->root_item, sizeof(tmp_item)); + root->root_key.offset = root->fs_info->generation; btrfs_set_root_blocknr(&root->root_item, bh_blocknr(root->node)); @@ -254,7 +279,18 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, &root->root_item); if (err) break; - list_add(&dirty->list, list); + + refs = btrfs_root_refs(&tmp_item); + btrfs_set_root_refs(&tmp_item, refs - 1); + err = btrfs_update_root(trans, root->fs_info->tree_root, + &dirty->snap_key, + &tmp_item); + + BUG_ON(err); + if (refs == 1) + list_add(&dirty->list, list); + else + kfree(dirty); } } return err; @@ -270,16 +306,20 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, mutex_lock(&tree_root->fs_info->fs_mutex); dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); + trans = btrfs_start_transaction(tree_root, 1); +printk("deleting root %Lu %u %Lu\n", dirty->snap_key.objectid, dirty->snap_key.flags, dirty->snap_key.offset); ret = btrfs_drop_snapshot(trans, dirty->root, dirty->commit_root); BUG_ON(ret); - ret = btrfs_del_root(trans, tree_root, &dirty->snap_key); if (ret) break; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); + + if (dirty->free_on_drop) + kfree(dirty->root); kfree(dirty); mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 5fb1d322aa10..ebf44f3e1118 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -69,5 +69,6 @@ void btrfs_transaction_flush_work(struct btrfs_root *root); void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); void btrfs_init_transaction_sys(void); void btrfs_exit_transaction_sys(void); +int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list); #endif From 22bb92f376b38154dc0c3688a01a16525b4caab0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 22 Jun 2007 14:49:31 -0400 Subject: [PATCH 0179/2985] Btrfs: Documentation update Signed-off-by: Chris Mason --- fs/btrfs/INSTALL | 17 +++++++---------- fs/btrfs/root-tree.c | 2 -- fs/btrfs/transaction.c | 1 - 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/INSTALL b/fs/btrfs/INSTALL index e83ff6e8c8d5..16b45a56878d 100644 --- a/fs/btrfs/INSTALL +++ b/fs/btrfs/INSTALL @@ -5,19 +5,16 @@ directory can only be changed by btrfsctl right now, and normal filesystem operations do not work on it. The default subvolume is called 'default', and you can create files and directories in mount_point/default -Btrfs uses the crypto manager interface in the kernel for file and -metadata checksums. You need to compile the kernel with: +Btrfs uses libcrc32c in the kernel for file and metadata checksums. You need +to compile the kernel with: -CONFIG_CRYPTO=y -CONFIG_CRYPTO_MANAGER=m -CONFIG_CRYPTO_CRC32C=m +CONFIG_LIBCRC32C=m -cryptomanager and crc32c can be static as well. Once your kernel is -setup, typing make in the btrfs module sources will build against the -running kernel. When the build is complete: +libcrc32c can be static as well. Once your kernel is setup, typing make in the +btrfs module sources will build against the running kernel. When the build is +complete: -modprobe crc32c -modprobe cryptomgr +modprobe libcrc32c insmod btrfs.ko The Btrfs utility programs require libuuid to build. This can be found diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 737e5a38d17e..8b0eca042ea4 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -51,7 +51,6 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, memcpy(item, btrfs_item_ptr(l, slot, struct btrfs_root_item), sizeof(*item)); btrfs_disk_key_to_cpu(key, &l->items[slot].key); -printk("find last finds key %Lu %u %Lu slot %d search for obj %Lu\n", key->objectid, key->flags, key->offset, slot, objectid); ret = 0; out: btrfs_release_path(root, path); @@ -141,7 +140,6 @@ int btrfs_find_dead_roots(struct btrfs_root *root) ret = PTR_ERR(root); goto err; } -printk("found dead root %Lu %u %Lu\n", key.objectid, key.flags, key.offset); ret = btrfs_add_dead_root(dead_root, &root->fs_info->dead_roots); if (ret) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 85a2a5e27148..a5a63d471e43 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -308,7 +308,6 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, list_del_init(&dirty->list); trans = btrfs_start_transaction(tree_root, 1); -printk("deleting root %Lu %u %Lu\n", dirty->snap_key.objectid, dirty->snap_key.flags, dirty->snap_key.offset); ret = btrfs_drop_snapshot(trans, dirty->root, dirty->commit_root); BUG_ON(ret); From 79c44584ea346024e929ac0f2a3577aac33b7b3f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 25 Jun 2007 10:09:33 -0400 Subject: [PATCH 0181/2985] Btrfs: Fix mtime and ctime updates on parent dirs Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9d2a0a3f674d..eba06e7cf414 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -250,6 +250,7 @@ err: btrfs_free_path(path); if (!ret) { dir->i_size -= name_len * 2; + dir->i_mtime = dir->i_ctime = CURRENT_TIME; btrfs_update_inode(trans, root, dir); drop_nlink(dentry->d_inode); ret = btrfs_update_inode(trans, root, dentry->d_inode); @@ -1037,6 +1038,7 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, int ret; struct btrfs_key key; struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root; + struct inode *parent_inode; key.objectid = inode->i_ino; key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); @@ -1047,7 +1049,9 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, dentry->d_parent->d_inode->i_ino, &key, btrfs_inode_type(inode)); if (ret == 0) { - dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2; + parent_inode = dentry->d_parent->d_inode; + parent_inode->i_size += dentry->d_name.len * 2; + parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, root, dentry->d_parent->d_inode); } From 4b52dff6d371b9b93bc99f64c32831ea9a8ec3ac Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 26 Jun 2007 10:06:50 -0400 Subject: [PATCH 0182/2985] Btrfs: Fix super block updates during transaction commit The super block written during commit was not consistent with the state of the trees. This change adds an in-memory copy of the super so that we can make sure to write out consistent data during a commit. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 5 ++--- fs/btrfs/extent-tree.c | 16 ++++++++-------- fs/btrfs/super.c | 2 +- fs/btrfs/transaction.c | 8 ++++++-- 5 files changed, 18 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fb6fffb71dd0..1998f86df08a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -306,6 +306,7 @@ struct btrfs_fs_info { u64 generation; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; + struct btrfs_super_block super_copy; struct buffer_head *sb_buffer; struct super_block *sb; struct inode *btree_inode; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d7615e1578cc..7081729d5b16 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -471,6 +471,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!fs_info->sb_buffer) goto fail_iput; disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; + fs_info->disk_super = disk_super; + memcpy(&fs_info->super_copy, disk_super, sizeof(fs_info->super_copy)); if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; @@ -479,7 +481,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) btrfs_super_total_blocks(disk_super) << fs_info->btree_inode->i_blkbits); - fs_info->disk_super = disk_super; if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { @@ -527,8 +528,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root int ret; struct buffer_head *bh = root->fs_info->sb_buffer; - btrfs_set_super_root(root->fs_info->disk_super, - bh_blocknr(root->fs_info->tree_root->node)); lock_buffer(bh); WARN_ON(atomic_read(&bh->b_count) < 1); clear_buffer_dirty(bh); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8025e9f8ef19..7e550343aee7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -796,8 +796,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) { ins.objectid = extent_root->fs_info->extent_tree_insert[i]; - super_blocks_used = btrfs_super_blocks_used(info->disk_super); - btrfs_set_super_blocks_used(info->disk_super, + super_blocks_used = btrfs_super_blocks_used(&info->super_copy); + btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used + 1); ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item, sizeof(extent_item)); @@ -892,8 +892,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); } - super_blocks_used = btrfs_super_blocks_used(info->disk_super); - btrfs_set_super_blocks_used(info->disk_super, + super_blocks_used = btrfs_super_blocks_used(&info->super_copy); + btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, path); if (ret) { @@ -1032,7 +1032,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root info->extent_tree_prealloc_nr = 0; } if (search_end == (u64)-1) - search_end = btrfs_super_total_blocks(info->disk_super); + search_end = btrfs_super_total_blocks(&info->super_copy); if (hint_block) { block_group = btrfs_lookup_block_group(info, hint_block); block_group = btrfs_find_block_group(root, block_group, @@ -1361,8 +1361,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } } - super_blocks_used = btrfs_super_blocks_used(info->disk_super); - btrfs_set_super_blocks_used(info->disk_super, super_blocks_used + + super_blocks_used = btrfs_super_blocks_used(&info->super_copy); + btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used + num_blocks); ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, sizeof(extent_item)); @@ -1737,7 +1737,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) BTRFS_BLOCK_GROUP_AVAIL); } if (key.objectid >= - btrfs_super_total_blocks(info->disk_super)) + btrfs_super_total_blocks(&info->super_copy)) break; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c11ecf500202..2e797d5fb281 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -144,7 +144,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct btrfs_root *root = btrfs_sb(dentry->d_sb); - struct btrfs_super_block *disk_super = root->fs_info->disk_super; + struct btrfs_super_block *disk_super = &root->fs_info->super_copy; buf->f_namelen = BTRFS_NAME_LEN; buf->f_blocks = btrfs_super_total_blocks(disk_super); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a5a63d471e43..3b2face593e9 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -380,6 +380,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, else prev_trans->use_count++; } + btrfs_set_super_generation(&root->fs_info->super_copy, + cur_trans->transid); + btrfs_set_super_root(&root->fs_info->super_copy, + bh_blocknr(root->fs_info->tree_root->node)); + memcpy(root->fs_info->disk_super, &root->fs_info->super_copy, + sizeof(root->fs_info->super_copy)); mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_write_and_wait_transaction(trans, root); @@ -389,8 +395,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, put_transaction(prev_trans); mutex_unlock(&root->fs_info->trans_mutex); } - btrfs_set_super_generation(root->fs_info->disk_super, - cur_trans->transid); BUG_ON(ret); write_ctree_super(trans, root); From f2654de42a759127cb1f1e8a626ec94178732e20 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 26 Jun 2007 12:20:46 -0400 Subject: [PATCH 0183/2985] Btrfs: Allow find_free_extent callers to pass in an exclusion range Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 81 ++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 50 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7e550343aee7..01dc30579287 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -25,7 +25,8 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 search_end, u64 hint_block, - struct btrfs_key *ins, int data); + struct btrfs_key *ins, u64 exclude_start, + u64 exclude_nr, int data); static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -407,7 +408,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, - (u64)-1, 0, &ins, 0); + (u64)-1, 0, &ins, 0, 0, 0); if (ret) { btrfs_free_path(path); return ret; @@ -559,7 +560,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_item *bi; struct btrfs_key ins; - ret = find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0); + ret = find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, + 0, 0, 0); /* FIXME, set bit to recalc cache groups on next mount */ if (ret) return ret; @@ -868,7 +870,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; - ret = find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0); + ret = find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); if (ret) { btrfs_free_path(path); return ret; @@ -987,7 +989,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 search_end, u64 hint_block, - struct btrfs_key *ins, int data) + struct btrfs_key *ins, u64 exclude_start, + u64 exclude_nr, int data) { struct btrfs_path *path; struct btrfs_key key; @@ -1191,6 +1194,11 @@ check_pending: goto new_group; } } + if (exclude_nr > 0 && (ins->objectid + num_blocks > exclude_start && + ins->objectid < exclude_start + exclude_nr)) { + search_start = exclude_start + exclude_nr; + goto new_group; + } if (fill_prealloc) { int nr; test_block = ins->objectid; @@ -1267,6 +1275,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, int pending_ret; u64 super_blocks_used; u64 search_start = 0; + u64 exclude_start = 0; + u64 exclude_nr = 0; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; @@ -1298,34 +1308,20 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, */ if (data) { ret = find_free_extent(trans, root, 0, 0, - search_end, 0, &prealloc_key, 0); - if (ret) { - return ret; - } - if (prealloc_key.objectid + prealloc_key.offset >= search_end) { - int nr = info->extent_tree_prealloc_nr; - search_end = info->extent_tree_prealloc[nr - 1] - 1; - } else { - search_start = info->extent_tree_prealloc[0] + 1; - } - } - if (hint_block < search_start) - hint_block = search_start; - /* do the real allocation */ - ret = find_free_extent(trans, root, num_blocks, search_start, - search_end, hint_block, ins, data); - if (ret) { - if (search_start == 0) - return ret; - search_end = search_start - 1; - search_start = 0; - hint_block = search_start; - ret = find_free_extent(trans, root, num_blocks, search_start, - search_end, hint_block, ins, data); + search_end, 0, &prealloc_key, 0, 0, 0); if (ret) return ret; + exclude_nr = info->extent_tree_prealloc_nr; + exclude_start = info->extent_tree_prealloc[exclude_nr - 1]; } + /* do the real allocation */ + ret = find_free_extent(trans, root, num_blocks, search_start, + search_end, hint_block, ins, + exclude_start, exclude_nr, data); + if (ret) + return ret; + /* * if we're doing a metadata allocation, preallocate space in the * extent tree second. This way, we don't create a tiny hole @@ -1336,29 +1332,14 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * The unused prealloc will get reused the next time around. */ if (!data) { - if (ins->objectid + ins->offset >= search_end) - search_end = ins->objectid - 1; - else - search_start = ins->objectid + ins->offset; - - if (hint_block < search_start) - hint_block = search_start; - + exclude_start = ins->objectid; + exclude_nr = ins->offset; ret = find_free_extent(trans, root, 0, search_start, search_end, hint_block, - &prealloc_key, 0); - if (ret) { - if (search_start == 0) - return ret; - search_end = search_start - 1; - search_start = 0; - hint_block = search_start; - ret = find_free_extent(trans, root, 0, search_start, - search_end, hint_block, - &prealloc_key, 0); - if (ret) - return ret; - } + &prealloc_key, exclude_start, + exclude_nr, 0); + if (ret) + return ret; } super_blocks_used = btrfs_super_blocks_used(&info->super_copy); From ccd467d60e81b48cdbecae93532b66bcdedca91d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 28 Jun 2007 15:57:36 -0400 Subject: [PATCH 0184/2985] Btrfs: crash recovery fixes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 13 +++++++++- fs/btrfs/ctree.h | 12 ++++------ fs/btrfs/disk-io.c | 22 ++++++++++------- fs/btrfs/disk-io.h | 1 + fs/btrfs/extent-tree.c | 37 +++++++++++++++++++++++++---- fs/btrfs/file.c | 15 ++++++++---- fs/btrfs/inode.c | 6 ++--- fs/btrfs/transaction.c | 54 ++++++++++++++++++++++++++---------------- 8 files changed, 112 insertions(+), 48 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 606a19b5916d..9eb646529edc 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -75,6 +75,17 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_node *cow_node; int ret; + WARN_ON(!buffer_uptodate(buf)); + if (trans->transaction != root->fs_info->running_transaction) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->running_transaction->transid); + WARN_ON(1); + } + if (trans->transid != root->fs_info->generation) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->generation); + WARN_ON(1); + } if (btrfs_header_generation(btrfs_buffer_header(buf)) == trans->transid) { *cow_ret = buf; @@ -107,7 +118,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); } btrfs_block_release(root, buf); - mark_buffer_dirty(cow); + btrfs_mark_buffer_dirty(cow); *cow_ret = cow; return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1998f86df08a..0287bd51d87c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1013,18 +1013,13 @@ static inline void btrfs_memmove(struct btrfs_root *root, memmove(dst, src, nr); } -static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) -{ - WARN_ON(!atomic_read(&bh->b_count)); - mark_buffer_dirty(bh); -} - /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ btrfs_item_offset((leaf)->items + (slot)))) /* extent-tree.c */ +int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 blocknr); @@ -1044,8 +1039,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); -int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct - btrfs_root *root); +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct radix_tree_root *unpin_radix); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7081729d5b16..d1bf5bc1bc14 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -270,14 +270,6 @@ fail: return NULL; } -int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) -{ - WARN_ON(atomic_read(&buf->b_count) == 0); - mark_buffer_dirty(buf); - return 0; -} - int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { @@ -621,6 +613,20 @@ int close_ctree(struct btrfs_root *root) return 0; } +void btrfs_mark_buffer_dirty(struct buffer_head *bh) +{ + struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; + u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); + WARN_ON(!atomic_read(&bh->b_count)); + if (transid != root->fs_info->generation) { + printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", + (unsigned long long)bh->b_blocknr, + transid, root->fs_info->generation); + WARN_ON(1); + } + mark_buffer_dirty(bh); +} + void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { brelse(buf); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c4a695ac44f6..9e2c261b41ae 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -78,4 +78,5 @@ int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, int btrfs_releasepage(struct page *page, gfp_t flags); void btrfs_btree_balance_dirty(struct btrfs_root *root); int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); +void btrfs_mark_buffer_dirty(struct buffer_head *bh); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 01dc30579287..14b93268920e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -523,6 +523,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, } return 0; fail: + WARN_ON(1); for (i =0; i < faili; i++) { if (leaf) { u64 disk_blocknr; @@ -572,7 +573,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_block_group_item); memcpy(bi, &cache->item, sizeof(*bi)); - mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(extent_root, path); fail: finish_current_insert(trans, extent_root); @@ -739,8 +740,30 @@ static int try_remove_page(struct address_space *mapping, unsigned long index) return ret; } -int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct - btrfs_root *root) +int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy) +{ + unsigned long gang[8]; + u64 last = 0; + struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; + int ret; + int i; + + while(1) { + ret = find_first_radix_bit(pinned_radix, gang, last, + ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0 ; i < ret; i++) { + set_radix_bit(copy, gang[i]); + last = gang[i] + 1; + } + } + return 0; +} + +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct radix_tree_root *unpin_radix) { unsigned long gang[8]; struct inode *btree_inode = root->fs_info->btree_inode; @@ -752,7 +775,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix; while(1) { - ret = find_first_radix_bit(pinned_radix, gang, 0, + ret = find_first_radix_bit(unpin_radix, gang, 0, ARRAY_SIZE(gang)); if (!ret) break; @@ -760,6 +783,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct first = gang[0]; for (i = 0; i < ret; i++) { clear_radix_bit(pinned_radix, gang[i]); + clear_radix_bit(unpin_radix, gang[i]); block_group = btrfs_lookup_block_group(root->fs_info, gang[i]); if (block_group) { @@ -1309,6 +1333,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (data) { ret = find_free_extent(trans, root, 0, 0, search_end, 0, &prealloc_key, 0, 0, 0); + BUG_ON(ret); if (ret) return ret; exclude_nr = info->extent_tree_prealloc_nr; @@ -1319,6 +1344,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ret = find_free_extent(trans, root, num_blocks, search_start, search_end, hint_block, ins, exclude_start, exclude_nr, data); + BUG_ON(ret); if (ret) return ret; @@ -1334,10 +1360,12 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (!data) { exclude_start = ins->objectid; exclude_nr = ins->offset; + hint_block = exclude_start + exclude_nr; ret = find_free_extent(trans, root, 0, search_start, search_end, hint_block, &prealloc_key, exclude_start, exclude_nr, 0); + BUG_ON(ret); if (ret) return ret; } @@ -1348,6 +1376,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, sizeof(extent_item)); + BUG_ON(ret); finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); if (ret) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fef7ba1e707f..2456cc3e1cfd 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -127,7 +127,7 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, ptr, kaddr + bh_offset(bh), size); kunmap_atomic(kaddr, KM_USER0); - mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_free_path(path); ret = btrfs_end_transaction(trans, root); @@ -211,11 +211,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, int found_type; int found_extent; int found_inline; + int recow; path = btrfs_alloc_path(); if (!path) return -ENOMEM; while(1) { + recow = 0; btrfs_release_path(root, path); ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, search_start, -1); @@ -244,6 +246,10 @@ next_slot: if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY) { goto out; } + if (recow) { + search_start = key.offset; + continue; + } if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); @@ -274,6 +280,7 @@ next_slot: nextret = btrfs_next_leaf(root, path); if (nextret) goto out; + recow = 1; } else { path->slots[0]++; } @@ -321,7 +328,7 @@ next_slot: } btrfs_set_file_extent_num_blocks(extent, new_num); - mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); } else { WARN_ON(1); } @@ -452,6 +459,8 @@ static int prepare_pages(struct btrfs_root *root, err = -ENOMEM; goto failed_release; } + cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); + wait_on_page_writeback(pages[i]); } mutex_lock(&root->fs_info->fs_mutex); @@ -522,8 +531,6 @@ static int prepare_pages(struct btrfs_root *root, mutex_unlock(&root->fs_info->fs_mutex); for (i = 0; i < num_pages; i++) { - cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); - wait_on_page_writeback(pages[i]); offset = pos & (PAGE_CACHE_SIZE -1); this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); if (!page_has_buffers(pages[i])) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index eba06e7cf414..4fc0367d54f2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -506,7 +506,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, extent_num_blocks); inode->i_blocks -= (orig_num_blocks - extent_num_blocks) << 3; - mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); } else { extent_start = btrfs_file_extent_disk_blocknr(fi); @@ -2020,7 +2020,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_header_owner(&leaf->header, root->root_key.objectid); memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, sizeof(leaf->header.fsid)); - mark_buffer_dirty(subvol); + btrfs_mark_buffer_dirty(subvol); inode_item = &root_item.inode; memset(inode_item, 0, sizeof(*inode_item)); @@ -2497,7 +2497,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, ptr = btrfs_file_extent_inline_start(ei); btrfs_memcpy(root, path->nodes[0]->b_data, ptr, symname, name_len); - mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3b2face593e9..bec38ae8aa11 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -85,6 +85,8 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, if (root != root->fs_info->tree_root && root->last_trans < running_trans_id) { + WARN_ON(root == root->fs_info->extent_root); + WARN_ON(root->ref_cows != 1); if (root->root_item.refs != 0) { radix_tree_tag_set(&root->fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, @@ -113,10 +115,11 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->trans_mutex); cur_trans = root->fs_info->running_transaction; + WARN_ON(cur_trans != trans->transaction); WARN_ON(cur_trans->num_writers < 1); + cur_trans->num_writers--; if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); - cur_trans->num_writers--; put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); memset(trans, 0, sizeof(*trans)); @@ -194,6 +197,7 @@ static int wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { DEFINE_WAIT(wait); + mutex_lock(&root->fs_info->trans_mutex); while(!commit->commit_done) { prepare_to_wait(&commit->commit_wait, &wait, TASK_UNINTERRUPTIBLE); @@ -203,6 +207,7 @@ static int wait_for_commit(struct btrfs_root *root, schedule(); mutex_lock(&root->fs_info->trans_mutex); } + mutex_unlock(&root->fs_info->trans_mutex); finish_wait(&commit->commit_wait, &wait); return 0; } @@ -279,7 +284,6 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, &root->root_item); if (err) break; - refs = btrfs_root_refs(&tmp_item); btrfs_set_root_refs(&tmp_item, refs - 1); err = btrfs_update_root(trans, root->fs_info->tree_root, @@ -333,31 +337,53 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; + struct radix_tree_root pinned_copy; DEFINE_WAIT(wait); + init_bit_radix(&pinned_copy); INIT_LIST_HEAD(&dirty_fs_roots); mutex_lock(&root->fs_info->trans_mutex); if (trans->transaction->in_commit) { cur_trans = trans->transaction; trans->transaction->use_count++; + mutex_unlock(&root->fs_info->trans_mutex); btrfs_end_transaction(trans, root); + + mutex_unlock(&root->fs_info->fs_mutex); ret = wait_for_commit(root, cur_trans); BUG_ON(ret); put_transaction(cur_trans); - mutex_unlock(&root->fs_info->trans_mutex); + mutex_lock(&root->fs_info->fs_mutex); return 0; } - cur_trans = trans->transaction; trans->transaction->in_commit = 1; + cur_trans = trans->transaction; + if (cur_trans->list.prev != &root->fs_info->trans_list) { + prev_trans = list_entry(cur_trans->list.prev, + struct btrfs_transaction, list); + if (!prev_trans->commit_done) { + prev_trans->use_count++; + mutex_unlock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->trans_mutex); + + wait_for_commit(root, prev_trans); + put_transaction(prev_trans); + + mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->trans_mutex); + } + } while (trans->transaction->num_writers > 1) { WARN_ON(cur_trans != trans->transaction); prepare_to_wait(&trans->transaction->writer_wait, &wait, TASK_UNINTERRUPTIBLE); if (trans->transaction->num_writers <= 1) break; + mutex_unlock(&root->fs_info->fs_mutex); mutex_unlock(&root->fs_info->trans_mutex); schedule(); + mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); finish_wait(&trans->transaction->writer_wait, &wait); } @@ -372,34 +398,22 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; - if (cur_trans->list.prev != &root->fs_info->trans_list) { - prev_trans = list_entry(cur_trans->list.prev, - struct btrfs_transaction, list); - if (prev_trans->commit_done) - prev_trans = NULL; - else - prev_trans->use_count++; - } btrfs_set_super_generation(&root->fs_info->super_copy, cur_trans->transid); btrfs_set_super_root(&root->fs_info->super_copy, bh_blocknr(root->fs_info->tree_root->node)); memcpy(root->fs_info->disk_super, &root->fs_info->super_copy, sizeof(root->fs_info->super_copy)); + + btrfs_copy_pinned(root, &pinned_copy); + mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_write_and_wait_transaction(trans, root); - if (prev_trans) { - mutex_lock(&root->fs_info->trans_mutex); - wait_for_commit(root, prev_trans); - put_transaction(prev_trans); - mutex_unlock(&root->fs_info->trans_mutex); - } BUG_ON(ret); write_ctree_super(trans, root); - mutex_lock(&root->fs_info->fs_mutex); - btrfs_finish_extent_commit(trans, root); + btrfs_finish_extent_commit(trans, root, &pinned_copy); mutex_lock(&root->fs_info->trans_mutex); cur_trans->commit_done = 1; wake_up(&cur_trans->commit_wait); From ec6b910fb330f29e169c9f502c3ac209515af8d1 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Wed, 11 Jul 2007 10:00:37 -0400 Subject: [PATCH 0186/2985] Btrfs: trivial include fixups Almost none of the files including module.h need to do so, remove them. Include sched.h in extent-tree.c to silence a warning about cond_resched() being undeclared. Signed-off-by: Zach Brown Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 1 - fs/btrfs/ctree.c | 1 - fs/btrfs/dir-item.c | 1 - fs/btrfs/disk-io.c | 1 - fs/btrfs/extent-tree.c | 2 +- fs/btrfs/file-item.c | 1 - fs/btrfs/file.c | 1 - fs/btrfs/inode-item.c | 1 - fs/btrfs/inode-map.c | 1 - fs/btrfs/inode.c | 1 - fs/btrfs/print-tree.c | 1 - fs/btrfs/root-tree.c | 1 - fs/btrfs/sysfs.c | 1 - fs/btrfs/transaction.c | 1 - 14 files changed, 1 insertion(+), 14 deletions(-) diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index a4ddf4de9797..e8bf876db393 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "bit-radix.h" #define BIT_ARRAY_BYTES 256 diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9eb646529edc..eace2ee76448 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index d5db8bdea3d8..49db5fa7ced3 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "disk-io.h" #include "hash.h" diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d1bf5bc1bc14..60db85bc4ba8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 14b93268920e..5ace2c33f1aa 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -16,7 +16,7 @@ * Boston, MA 021110-1307, USA. */ -#include +#include #include "ctree.h" #include "disk-io.h" #include "print-tree.h" diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 68859934ae25..ea4dd4c5fce3 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2456cc3e1cfd..f327525f541d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index e8b5a9f7c0c2..b78346177539 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 58930fc48be6..405470866254 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4fc0367d54f2..d75ca5cf59b1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index e66acd4e2745..bafa78603bd2 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "disk-io.h" #include "print-tree.h" diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 8b0eca042ea4..271dd2b5c321 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "transaction.h" #include "disk-io.h" diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index ead9dc8051be..db8917e5b256 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bec38ae8aa11..0f494fe365cb 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include "ctree.h" From a1f396304fb7e5f18e4ea81c294415375f1c814c Mon Sep 17 00:00:00 2001 From: Aneesh Date: Wed, 11 Jul 2007 10:03:27 -0400 Subject: [PATCH 0187/2985] Btrfs: Some code cleanups Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 8 ++++++-- fs/btrfs/root-tree.c | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index eace2ee76448..19a30b7c3a28 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -172,11 +172,13 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, if (path->nodes[level + 1]) parent = btrfs_buffer_node(path->nodes[level + 1]); - parent_slot = path->slots[level + 1]; + slot = path->slots[level]; BUG_ON(nritems == 0); if (parent) { struct btrfs_disk_key *parent_key; + + parent_slot = path->slots[level + 1]; parent_key = &parent->ptrs[parent_slot].key; BUG_ON(memcmp(parent_key, &node->ptrs[0].key, sizeof(struct btrfs_disk_key))); @@ -208,7 +210,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, if (path->nodes[level + 1]) parent = btrfs_buffer_node(path->nodes[level + 1]); - parent_slot = path->slots[level + 1]; + BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); if (nritems == 0) @@ -216,6 +218,8 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, if (parent) { struct btrfs_disk_key *parent_key; + + parent_slot = path->slots[level + 1]; parent_key = &parent->ptrs[parent_slot].key; BUG_ON(memcmp(parent_key, &leaf->items[0].key, sizeof(struct btrfs_disk_key))); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 271dd2b5c321..402f67821c14 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -135,8 +135,8 @@ int btrfs_find_dead_roots(struct btrfs_root *root) if (btrfs_root_refs(ri) != 0) goto next; dead_root = btrfs_read_fs_root_no_radix(root->fs_info, &key); - if (IS_ERR(root)) { - ret = PTR_ERR(root); + if (IS_ERR(dead_root)) { + ret = PTR_ERR(dead_root); goto err; } ret = btrfs_add_dead_root(dead_root, From 618e21d5957c88c5ea346dfd7d1dbac96410fc5b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 11 Jul 2007 10:18:17 -0400 Subject: [PATCH 0188/2985] Btrfs: Implement mknod Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 72 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d75ca5cf59b1..f95cf5d4a69d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -46,6 +46,7 @@ struct btrfs_iget_args { static struct inode_operations btrfs_dir_inode_operations; static struct inode_operations btrfs_symlink_inode_operations; static struct inode_operations btrfs_dir_ro_inode_operations; +static struct inode_operations btrfs_special_inode_operations; static struct inode_operations btrfs_file_inode_operations; static struct address_space_operations btrfs_aops; static struct address_space_operations btrfs_symlink_aops; @@ -75,6 +76,7 @@ void btrfs_read_locked_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; u64 alloc_group_block; + u32 rdev; int ret; path = btrfs_alloc_path(); @@ -104,6 +106,8 @@ void btrfs_read_locked_inode(struct inode *inode) inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); + inode->i_rdev = 0; + rdev = btrfs_inode_rdev(inode_item); alloc_group_block = btrfs_inode_block_group(inode_item); BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, alloc_group_block); @@ -114,12 +118,6 @@ void btrfs_read_locked_inode(struct inode *inode) mutex_unlock(&root->fs_info->fs_mutex); switch (inode->i_mode & S_IFMT) { -#if 0 - default: - init_special_inode(inode, inode->i_mode, - btrfs_inode_rdev(inode_item)); - break; -#endif case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; inode->i_fop = &btrfs_file_operations; @@ -136,6 +134,9 @@ void btrfs_read_locked_inode(struct inode *inode) inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; break; + default: + init_special_inode(inode, inode->i_mode, rdev); + break; } return; @@ -162,6 +163,7 @@ static void fill_inode_item(struct btrfs_inode_item *item, btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); btrfs_set_inode_nblocks(item, inode->i_blocks); btrfs_set_inode_generation(item, inode->i_generation); + btrfs_set_inode_rdev(item, inode->i_rdev); btrfs_set_inode_block_group(item, BTRFS_I(inode)->block_group->key.objectid); } @@ -1070,6 +1072,58 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans, return err; } +static int btrfs_mknod(struct inode *dir, struct dentry *dentry, + int mode, dev_t rdev) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct inode *inode; + int err; + int drop_inode = 0; + u64 objectid; + + if (!new_valid_dev(rdev)) + return -EINVAL; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + + err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); + if (err) { + err = -ENOSPC; + goto out_unlock; + } + + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, mode); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_unlock; + + btrfs_set_trans_block_group(trans, inode); + err = btrfs_add_nondir(trans, dentry, inode); + if (err) + drop_inode = 1; + else { + inode->i_op = &btrfs_special_inode_operations; + init_special_inode(inode, inode->i_mode, rdev); + } + dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); +out_unlock: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + if (drop_inode) { + inode_dec_link_count(inode); + iput(inode); + } + btrfs_btree_balance_dirty(root); + return err; +} + static int btrfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { @@ -2526,6 +2580,7 @@ static struct inode_operations btrfs_dir_inode_operations = { .rename = btrfs_rename, .symlink = btrfs_symlink, .setattr = btrfs_setattr, + .mknod = btrfs_mknod, }; static struct inode_operations btrfs_dir_ro_inode_operations = { @@ -2562,6 +2617,11 @@ static struct inode_operations btrfs_file_inode_operations = { .setattr = btrfs_setattr, }; +static struct inode_operations btrfs_special_inode_operations = { + .getattr = btrfs_getattr, + .setattr = btrfs_setattr, +}; + static struct inode_operations btrfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, From 92fee66d49b812ea57eae53c5baaf8942ac9af06 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jul 2007 12:31:35 -0400 Subject: [PATCH 0189/2985] Btrfs: deal with api changes in 2.6.23-rc1 Signed-off-by: Chris Mason --- fs/btrfs/file.c | 9 +++++++-- fs/btrfs/inode.c | 50 +++++++++++++++++++++++++----------------------- 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f327525f541d..1fe38fe84150 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -713,8 +714,12 @@ out: } static struct vm_operations_struct btrfs_file_vm_ops = { - .nopage = filemap_nopage, - .populate = filemap_populate, +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + .nopage = filemap_nopage, + .populate = filemap_populate, +#else + .fault = filemap_fault, +#endif .page_mkwrite = btrfs_page_mkwrite, }; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f95cf5d4a69d..fa9c531c86b7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -2323,42 +2324,43 @@ void btrfs_destroy_cachep(void) kmem_cache_destroy(btrfs_path_cachep); } +static struct kmem_cache *cache_create(const char *name, size_t size, + unsigned long extra_flags, + void (*ctor)(void *, struct kmem_cache *, + unsigned long)) +{ + return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT | + SLAB_MEM_SPREAD | extra_flags), ctor +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + ,NULL +#endif + ); +} + int btrfs_init_cachep(void) { - btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache", - sizeof(struct btrfs_inode), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - init_once, NULL); + btrfs_inode_cachep = cache_create("btrfs_inode_cache", + sizeof(struct btrfs_inode), + 0, init_once); if (!btrfs_inode_cachep) goto fail; - btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache", + btrfs_trans_handle_cachep = cache_create("btrfs_trans_handle_cache", sizeof(struct btrfs_trans_handle), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - NULL, NULL); + 0, NULL); if (!btrfs_trans_handle_cachep) goto fail; - btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache", + btrfs_transaction_cachep = cache_create("btrfs_transaction_cache", sizeof(struct btrfs_transaction), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - NULL, NULL); + 0, NULL); if (!btrfs_transaction_cachep) goto fail; - btrfs_path_cachep = kmem_cache_create("btrfs_path_cache", - sizeof(struct btrfs_transaction), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - NULL, NULL); + btrfs_path_cachep = cache_create("btrfs_path_cache", + sizeof(struct btrfs_transaction), + 0, NULL); if (!btrfs_path_cachep) goto fail; - btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix", - 256, - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD | - SLAB_DESTROY_BY_RCU), - NULL, NULL); + btrfs_bit_radix_cachep = cache_create("btrfs_radix", 256, + SLAB_DESTROY_BY_RCU, NULL); if (!btrfs_bit_radix_cachep) goto fail; return 0; From 8578f0f1fda41f8429a1037022b720275102ca65 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 25 Jul 2007 14:04:51 -0400 Subject: [PATCH 0190/2985] Btrfs: cleaner make clean Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 5af2426116d3..a4e2df66bdbb 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -18,5 +18,5 @@ KERNELDIR := /lib/modules/`uname -r`/build all:: $(MAKE) -C $(KERNELDIR) M=`pwd` modules clean:: - rm *.o btrfs.ko + $(MAKE) -C $(KERNELDIR) M=`pwd` clean endif From 9f3a742736cecda5a8778be70faa2f779458839f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 7 Aug 2007 15:52:19 -0400 Subject: [PATCH 0191/2985] Btrfs: Do snapshot deletion in smaller chunks. Before, snapshot deletion was a single atomic unit. This caused considerable lock contention and required an unbounded amount of space. Now, the drop_progress field in the root item is used to indicate how far along snapshot deletion is, and to resume where it left off. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 10 +++++++ fs/btrfs/ctree.h | 4 +-- fs/btrfs/extent-tree.c | 45 +++++++++++++++++++++++++++--- fs/btrfs/transaction.c | 63 +++++++++++++++++++++++++----------------- 4 files changed, 91 insertions(+), 31 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 19a30b7c3a28..aa824e2c521f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -659,9 +659,16 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *b; struct buffer_head *cow_buf; struct btrfs_node *c; + struct btrfs_root_item *root_item = &root->root_item; int slot; int ret; int level; + u8 lowest_level = 0; + + if (btrfs_root_refs(root_item) == 0 && root->ref_cows) { + lowest_level = root_item->drop_level; + WARN_ON(ins_len || cow); + } WARN_ON(p->nodes[0] != NULL); WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); @@ -718,6 +725,9 @@ again: slot = p->slots[level]; BUG_ON(btrfs_header_nritems(&c->header) == 1); } + /* this is only true while dropping a snapshot */ + if (level == lowest_level) + break; b = read_tree_block(root, btrfs_node_blockptr(c, slot)); } else { struct btrfs_leaf *l = (struct btrfs_leaf *)c; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0287bd51d87c..73c2e75a136d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -333,10 +333,10 @@ struct btrfs_root { u64 objectid; u64 last_trans; u32 blocksize; - int ref_cows; u32 type; u64 highest_inode; u64 last_inode_alloc; + int ref_cows; }; /* the lower bits in the key flags defines the item type */ @@ -1073,7 +1073,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *snap); + *root); /* root-item.c */ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5ace2c33f1aa..9455974dabea 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1561,12 +1561,21 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root int i; int slot; int ret; + struct btrfs_root_item *root_item = &root->root_item; + for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; if (slot < btrfs_header_nritems( btrfs_buffer_header(path->nodes[i])) - 1) { + struct btrfs_node *node; + node = btrfs_buffer_node(path->nodes[i]); path->slots[i]++; *level = i; + WARN_ON(*level == 0); + memcpy(&root_item->drop_progress, + &node->ptrs[path->slots[i]].key, + sizeof(root_item->drop_progress)); + root_item->drop_level = i; return 0; } else { ret = btrfs_free_extent(trans, root, @@ -1587,7 +1596,7 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root * decremented. */ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *snap) + *root) { int ret = 0; int wret; @@ -1595,14 +1604,33 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_path *path; int i; int orig_level; + int num_walks = 0; + struct btrfs_root_item *root_item = &root->root_item; path = btrfs_alloc_path(); BUG_ON(!path); - level = btrfs_header_level(btrfs_buffer_header(snap)); + level = btrfs_header_level(btrfs_buffer_header(root->node)); orig_level = level; - path->nodes[level] = snap; - path->slots[level] = 0; + if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { + path->nodes[level] = root->node; + path->slots[level] = 0; + } else { + struct btrfs_key key; + struct btrfs_disk_key *found_key; + struct btrfs_node *node; + btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); + wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + ret = wret; + goto out; + } + level = root_item->drop_level; + node = btrfs_buffer_node(path->nodes[level]); + found_key = &node->ptrs[path->slots[level]].key; + WARN_ON(memcmp(found_key, &root_item->drop_progress, + sizeof(*found_key))); + } while(1) { wret = walk_down_tree(trans, root, path, &level); if (wret > 0) @@ -1615,12 +1643,21 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root break; if (wret < 0) ret = wret; + num_walks++; + if (num_walks > 10) { + struct btrfs_key key; + btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); + ret = -EAGAIN; + get_bh(root->node); + break; + } } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { btrfs_block_release(root, path->nodes[i]); } } +out: btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0f494fe365cb..498626470a04 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -213,10 +213,7 @@ static int wait_for_commit(struct btrfs_root *root, struct dirty_root { struct list_head list; - struct btrfs_key snap_key; - struct buffer_head *commit_root; struct btrfs_root *root; - int free_on_drop; }; int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list) @@ -226,10 +223,7 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list) dirty = kmalloc(sizeof(*dirty), GFP_NOFS); if (!dirty) return -ENOMEM; - memcpy(&dirty->snap_key, &root->root_key, sizeof(root->root_key)); - dirty->commit_root = root->node; dirty->root = root; - dirty->free_on_drop = 1; list_add(&dirty->list, dead_list); return 0; } @@ -241,7 +235,6 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, struct dirty_root *dirty; struct btrfs_root *gang[8]; struct btrfs_root *root; - struct btrfs_root_item tmp_item; int i; int ret; int err = 0; @@ -267,13 +260,16 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, } dirty = kmalloc(sizeof(*dirty), GFP_NOFS); BUG_ON(!dirty); - memcpy(&dirty->snap_key, &root->root_key, - sizeof(root->root_key)); - dirty->commit_root = root->commit_root; + dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS); + BUG_ON(!dirty->root); + + memset(&root->root_item.drop_progress, 0, + sizeof(struct btrfs_disk_key)); + root->root_item.drop_level = 0; + + memcpy(dirty->root, root, sizeof(*root)); + dirty->root->node = root->commit_root; root->commit_root = NULL; - dirty->root = root; - dirty->free_on_drop = 0; - memcpy(&tmp_item, &root->root_item, sizeof(tmp_item)); root->root_key.offset = root->fs_info->generation; btrfs_set_root_blocknr(&root->root_item, @@ -283,17 +279,21 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, &root->root_item); if (err) break; - refs = btrfs_root_refs(&tmp_item); - btrfs_set_root_refs(&tmp_item, refs - 1); + + refs = btrfs_root_refs(&dirty->root->root_item); + btrfs_set_root_refs(&dirty->root->root_item, refs - 1); err = btrfs_update_root(trans, root->fs_info->tree_root, - &dirty->snap_key, - &tmp_item); + &dirty->root->root_key, + &dirty->root->root_item); BUG_ON(err); - if (refs == 1) + if (refs == 1) { list_add(&dirty->list, list); - else + } else { + WARN_ON(1); + kfree(dirty->root); kfree(dirty); + } } } return err; @@ -305,23 +305,36 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, struct dirty_root *dirty; struct btrfs_trans_handle *trans; int ret = 0; + int err; + while(!list_empty(list)) { mutex_lock(&tree_root->fs_info->fs_mutex); dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); - trans = btrfs_start_transaction(tree_root, 1); - ret = btrfs_drop_snapshot(trans, dirty->root, - dirty->commit_root); + while(1) { + trans = btrfs_start_transaction(tree_root, 1); + ret = btrfs_drop_snapshot(trans, dirty->root); + if (ret != -EAGAIN) { + break; + } + err = btrfs_update_root(trans, + tree_root, + &dirty->root->root_key, + &dirty->root->root_item); + if (err) + ret = err; + ret = btrfs_end_transaction(trans, tree_root); + BUG_ON(ret); + } BUG_ON(ret); - ret = btrfs_del_root(trans, tree_root, &dirty->snap_key); + ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); if (ret) break; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); - if (dirty->free_on_drop) - kfree(dirty->root); + kfree(dirty->root); kfree(dirty); mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root); From 3c69faecb8d83cb2ef085a98b196a3fecea67725 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 7 Aug 2007 15:52:22 -0400 Subject: [PATCH 0192/2985] Btrfs: Fold some btree readahead routines into something more generic. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 77 ++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 30 +--------------- fs/btrfs/inode.c | 69 ++----------------------------------- 4 files changed, 81 insertions(+), 96 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index aa824e2c521f..7a08491e208e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -639,6 +639,73 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, return 1; } +/* + * readahead one full node of leaves + */ +static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, + int slot) +{ + struct btrfs_node *node; + int i; + u32 nritems; + u64 item_objectid; + u64 blocknr; + u64 search; + u64 cluster_start; + int ret; + int nread = 0; + int direction = path->reada; + struct radix_tree_root found; + unsigned long gang[8]; + struct buffer_head *bh; + + if (!path->nodes[1]) + return; + + node = btrfs_buffer_node(path->nodes[1]); + search = btrfs_node_blockptr(node, slot); + bh = btrfs_find_tree_block(root, search); + if (bh) { + brelse(bh); + return; + } + + init_bit_radix(&found); + nritems = btrfs_header_nritems(&node->header); + for (i = slot; i < nritems; i++) { + item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); + blocknr = btrfs_node_blockptr(node, i); + set_radix_bit(&found, blocknr); + } + if (direction > 0) { + cluster_start = search - 4; + if (cluster_start > search) + cluster_start = 0; + } else + cluster_start = search + 4; + while(1) { + ret = find_first_radix_bit(&found, gang, 0, ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + blocknr = gang[i]; + clear_radix_bit(&found, blocknr); + if (nread > 64) + continue; + if (direction > 0 && cluster_start <= blocknr && + cluster_start + 8 > blocknr) { + cluster_start = blocknr; + readahead_tree_block(root, blocknr); + nread++; + } else if (direction < 0 && cluster_start >= blocknr && + blocknr + 8 > cluster_start) { + cluster_start = blocknr; + readahead_tree_block(root, blocknr); + nread++; + } + } + } +} /* * look for key in the tree. path is filled in with nodes along the way * if key is found, we return zero and you can find the item in the leaf @@ -660,9 +727,11 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *cow_buf; struct btrfs_node *c; struct btrfs_root_item *root_item = &root->root_item; + u64 blocknr; int slot; int ret; int level; + int should_reada = p->reada; u8 lowest_level = 0; if (btrfs_root_refs(root_item) == 0 && root->ref_cows) { @@ -728,7 +797,11 @@ again: /* this is only true while dropping a snapshot */ if (level == lowest_level) break; + blocknr = btrfs_node_blockptr(c, slot); + if (level == 1 && should_reada) + reada_for_search(root, p, slot); b = read_tree_block(root, btrfs_node_blockptr(c, slot)); + } else { struct btrfs_leaf *l = (struct btrfs_leaf *)c; p->slots[level] = slot; @@ -1915,6 +1988,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) blocknr = btrfs_node_blockptr(c_node, slot); if (next) btrfs_block_release(root, next); + if (level == 1 && path->reada) + reada_for_search(root, path, slot); next = read_tree_block(root, blocknr); break; } @@ -1927,6 +2002,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) path->slots[level] = 0; if (!level) break; + if (level == 1 && path->reada) + reada_for_search(root, path, slot); next = read_tree_block(root, btrfs_node_blockptr(btrfs_buffer_node(next), 0)); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 73c2e75a136d..c5a18d5d7f7c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -177,6 +177,7 @@ struct btrfs_node { struct btrfs_path { struct buffer_head *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; + int reada; }; /* diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9455974dabea..5d4d5d8db8ef 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -32,33 +32,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -static void reada_extent_leaves(struct btrfs_root *root, - struct btrfs_path *path, u64 limit) -{ - struct btrfs_node *node; - int i; - int nritems; - u64 item_objectid; - u64 blocknr; - int slot; - int ret; - - if (!path->nodes[1]) - return; - node = btrfs_buffer_node(path->nodes[1]); - slot = path->slots[1] + 1; - nritems = btrfs_header_nritems(&node->header); - for (i = slot; i < nritems && i < slot + 8; i++) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); - if (item_objectid > limit) - break; - blocknr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, blocknr); - if (ret) - break; - } -} - static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) { @@ -84,6 +57,7 @@ static int cache_block_group(struct btrfs_root *root, path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; key.objectid = block_group->key.objectid; key.flags = 0; key.offset = 0; @@ -94,12 +68,10 @@ static int cache_block_group(struct btrfs_root *root, if (ret && path->slots[0] > 0) path->slots[0]--; limit = block_group->key.objectid + block_group->key.offset; - reada_extent_leaves(root, path, limit); while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; if (slot >= btrfs_header_nritems(&leaf->header)) { - reada_extent_leaves(root, path, limit); ret = btrfs_next_leaf(root, path); if (ret < 0) goto err; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fa9c531c86b7..3889032fc449 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -374,40 +374,6 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, return ret; } -/* - * truncates go from a high offset to a low offset. So, walk - * from hi to lo in the node and issue readas. Stop when you find - * keys from a different objectid - */ -static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path, - u64 objectid) -{ - struct btrfs_node *node; - int i; - int nritems; - u64 item_objectid; - u64 blocknr; - int slot; - int ret; - - if (!path->nodes[1]) - return; - node = btrfs_buffer_node(path->nodes[1]); - slot = path->slots[1]; - if (slot == 0) - return; - nritems = btrfs_header_nritems(&node->header); - for (i = slot - 1; i >= 0; i--) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); - if (item_objectid != objectid) - break; - blocknr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, blocknr); - if (ret) - break; - } -} - /* * this can truncate away extent items, csum items and directory items. * It starts at a high offset and removes keys until it can't find @@ -434,6 +400,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int del_item; path = btrfs_alloc_path(); + path->reada = -1; BUG_ON(!path); /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; @@ -450,7 +417,6 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, BUG_ON(path->slots[0] == 0); path->slots[0]--; } - reada_truncate(root, path, inode->i_ino); leaf = btrfs_buffer_leaf(path->nodes[0]); found_key = &leaf->items[path->slots[0]].key; found_type = btrfs_disk_key_type(found_key); @@ -827,36 +793,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return d_splice_alias(inode, dentry); } -/* - * readahead one full node of leaves as long as their keys include - * the objectid supplied - */ -static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path, - u64 objectid) -{ - struct btrfs_node *node; - int i; - u32 nritems; - u64 item_objectid; - u64 blocknr; - int slot; - int ret; - - if (!path->nodes[1]) - return; - node = btrfs_buffer_node(path->nodes[1]); - slot = path->slots[1]; - nritems = btrfs_header_nritems(&node->header); - for (i = slot + 1; i < nritems; i++) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); - if (item_objectid != objectid) - break; - blocknr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, blocknr); - if (ret) - break; - } -} static unsigned char btrfs_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK }; @@ -890,18 +826,17 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; path = btrfs_alloc_path(); + path->reada = 1; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; advance = 0; - reada_leaves(root, path, inode->i_ino); while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path->slots[0]; if (advance || slot >= nritems) { if (slot >= nritems -1) { - reada_leaves(root, path, inode->i_ino); ret = btrfs_next_leaf(root, path); if (ret) break; From 6702ed490ca0bb44e17131818a5a18b773957c5a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 7 Aug 2007 16:15:09 -0400 Subject: [PATCH 0193/2985] Btrfs: Add run time btree defrag, and an ioctl to force btree defrag This adds two types of btree defrag, a run time form that tries to defrag recently allocated blocks in the btree when they are still in ram, and an ioctl that forces defrag of all btree blocks. File data blocks are not defragged yet, but this can make a huge difference in sequential btree reads. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.c | 221 ++++++++++++++++++++++++++++++---------- fs/btrfs/ctree.h | 19 +++- fs/btrfs/disk-io.c | 8 ++ fs/btrfs/extent-tree.c | 53 ++++++---- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 33 +++++- fs/btrfs/ioctl.h | 2 +- fs/btrfs/transaction.c | 105 +++++++++++++++---- fs/btrfs/transaction.h | 1 + fs/btrfs/tree-defrag.c | 222 +++++++++++++++++++++++++++++++++++++++++ 11 files changed, 566 insertions(+), 102 deletions(-) create mode 100644 fs/btrfs/tree-defrag.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a4e2df66bdbb..9321438c5eef 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ - transaction.o bit-radix.o inode.o file.o + transaction.o bit-radix.o inode.o file.o tree-defrag.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7a08491e208e..c7e47e77723f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -65,16 +65,71 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) memset(p, 0, sizeof(*p)); } -static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root +static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct buffer_head *buf, struct buffer_head + *parent, int parent_slot, struct buffer_head + **cow_ret, u64 search_start, u64 empty_size) +{ + struct buffer_head *cow; + struct btrfs_node *cow_node; + int ret = 0; + int different_trans = 0; + + WARN_ON(root->ref_cows && trans->transid != root->last_trans); + WARN_ON(!buffer_uptodate(buf)); + cow = btrfs_alloc_free_block(trans, root, search_start, empty_size); + if (IS_ERR(cow)) + return PTR_ERR(cow); + + cow_node = btrfs_buffer_node(cow); + if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) + WARN_ON(1); + + memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); + btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); + btrfs_set_header_generation(&cow_node->header, trans->transid); + btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); + + WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) > + trans->transid); + if (btrfs_header_generation(btrfs_buffer_header(buf)) != + trans->transid) { + different_trans = 1; + ret = btrfs_inc_ref(trans, root, buf); + if (ret) + return ret; + } else { + WARN_ON(!root->ref_cows); + clean_tree_block(trans, root, buf); + } + + if (buf == root->node) { + root->node = cow; + get_bh(cow); + if (buf != root->commit_root) { + btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); + } + btrfs_block_release(root, buf); + } else { + btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, + bh_blocknr(cow)); + btrfs_mark_buffer_dirty(parent); + WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) != + trans->transid); + btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); + } + btrfs_block_release(root, buf); + btrfs_mark_buffer_dirty(cow); + *cow_ret = cow; + return 0; +} + +int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf, struct buffer_head *parent, int parent_slot, struct buffer_head **cow_ret) { - struct buffer_head *cow; - struct btrfs_node *cow_node; - int ret; - - WARN_ON(!buffer_uptodate(buf)); + u64 search_start; if (trans->transaction != root->fs_info->running_transaction) { printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, root->fs_info->running_transaction->transid); @@ -90,38 +145,94 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *cow_ret = buf; return 0; } - cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr); - if (IS_ERR(cow)) - return PTR_ERR(cow); - cow_node = btrfs_buffer_node(cow); - if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) - WARN_ON(1); - memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); - btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); - btrfs_set_header_generation(&cow_node->header, trans->transid); - btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); - ret = btrfs_inc_ref(trans, root, buf); - if (ret) - return ret; - if (buf == root->node) { - root->node = cow; - get_bh(cow); - if (buf != root->commit_root) { - btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); - } - btrfs_block_release(root, buf); - } else { - btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, - bh_blocknr(cow)); - btrfs_mark_buffer_dirty(parent); - btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); - } - btrfs_block_release(root, buf); - btrfs_mark_buffer_dirty(cow); - *cow_ret = cow; + + search_start = bh_blocknr(buf) & ~((u64)65535); + return __btrfs_cow_block(trans, root, buf, parent, + parent_slot, cow_ret, search_start, 0); +} + +static int close_blocks(u64 blocknr, u64 other) +{ + if (blocknr < other && other - blocknr < 8) + return 1; + if (blocknr > other && blocknr - other < 8) + return 1; return 0; } +int btrfs_realloc_node(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct buffer_head *parent, + int cache_only) +{ + struct btrfs_node *parent_node; + struct buffer_head *cur_bh; + struct buffer_head *tmp_bh; + u64 blocknr; + u64 search_start = 0; + u64 other; + u32 parent_nritems; + int start_slot; + int end_slot; + int i; + int err = 0; + + if (trans->transaction != root->fs_info->running_transaction) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->running_transaction->transid); + WARN_ON(1); + } + if (trans->transid != root->fs_info->generation) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->generation); + WARN_ON(1); + } + parent_node = btrfs_buffer_node(parent); + parent_nritems = btrfs_header_nritems(&parent_node->header); + + start_slot = 0; + end_slot = parent_nritems; + + if (parent_nritems == 1) + return 0; + + for (i = start_slot; i < end_slot; i++) { + int close = 1; + blocknr = btrfs_node_blockptr(parent_node, i); + if (i > 0) { + other = btrfs_node_blockptr(parent_node, i - 1); + close = close_blocks(blocknr, other); + } + if (close && i < end_slot - 1) { + other = btrfs_node_blockptr(parent_node, i + 1); + close = close_blocks(blocknr, other); + } + if (close) + continue; + + cur_bh = btrfs_find_tree_block(root, blocknr); + if (!cur_bh || !buffer_uptodate(cur_bh) || + buffer_locked(cur_bh)) { + if (cache_only) { + brelse(cur_bh); + continue; + } + brelse(cur_bh); + cur_bh = read_tree_block(root, blocknr); + } + if (search_start == 0) { + search_start = bh_blocknr(cur_bh) & ~((u64)65535); + } + err = __btrfs_cow_block(trans, root, cur_bh, parent, i, + &tmp_bh, search_start, + min(8, end_slot - i)); + if (err) + break; + search_start = bh_blocknr(tmp_bh); + brelse(tmp_bh); + } + return err; +} + /* * The leaf data grows from end-to-front in the node. * this returns the address of the start of the last item, @@ -221,6 +332,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, parent_slot = path->slots[level + 1]; parent_key = &parent->ptrs[parent_slot].key; + BUG_ON(memcmp(parent_key, &leaf->items[0].key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != @@ -643,7 +755,7 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, * readahead one full node of leaves */ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, - int slot) + int level, int slot) { struct btrfs_node *node; int i; @@ -659,10 +771,13 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, unsigned long gang[8]; struct buffer_head *bh; - if (!path->nodes[1]) + if (level == 0) return; - node = btrfs_buffer_node(path->nodes[1]); + if (!path->nodes[level]) + return; + + node = btrfs_buffer_node(path->nodes[level]); search = btrfs_node_blockptr(node, slot); bh = btrfs_find_tree_block(root, search); if (bh) { @@ -690,7 +805,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, for (i = 0; i < ret; i++) { blocknr = gang[i]; clear_radix_bit(&found, blocknr); - if (nread > 64) + if (nread > 32) continue; if (direction > 0 && cluster_start <= blocknr && cluster_start + 8 > blocknr) { @@ -726,7 +841,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *b; struct buffer_head *cow_buf; struct btrfs_node *c; - struct btrfs_root_item *root_item = &root->root_item; u64 blocknr; int slot; int ret; @@ -734,11 +848,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int should_reada = p->reada; u8 lowest_level = 0; - if (btrfs_root_refs(root_item) == 0 && root->ref_cows) { - lowest_level = root_item->drop_level; - WARN_ON(ins_len || cow); - } - + lowest_level = p->lowest_level; + WARN_ON(lowest_level && ins_len); WARN_ON(p->nodes[0] != NULL); WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); again: @@ -798,8 +909,8 @@ again: if (level == lowest_level) break; blocknr = btrfs_node_blockptr(c, slot); - if (level == 1 && should_reada) - reada_for_search(root, p, slot); + if (should_reada) + reada_for_search(root, p, level, slot); b = read_tree_block(root, btrfs_node_blockptr(c, slot)); } else { @@ -960,7 +1071,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr); + t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0); if (IS_ERR(t)) return PTR_ERR(t); c = btrfs_buffer_node(t); @@ -1070,7 +1181,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(&c->header); - split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr); + split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0); if (IS_ERR(split_buffer)) return PTR_ERR(split_buffer); @@ -1461,7 +1572,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); if (IS_ERR(right_buffer)) return PTR_ERR(right_buffer); @@ -1560,7 +1671,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (!double_split) return ret; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); if (IS_ERR(right_buffer)) return PTR_ERR(right_buffer); @@ -1988,8 +2099,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) blocknr = btrfs_node_blockptr(c_node, slot); if (next) btrfs_block_release(root, next); - if (level == 1 && path->reada) - reada_for_search(root, path, slot); + if (path->reada) + reada_for_search(root, path, level, slot); next = read_tree_block(root, blocknr); break; } @@ -2002,8 +2113,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) path->slots[level] = 0; if (!level) break; - if (level == 1 && path->reada) - reada_for_search(root, path, slot); + if (path->reada) + reada_for_search(root, path, level, slot); next = read_tree_block(root, btrfs_node_blockptr(btrfs_buffer_node(next), 0)); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c5a18d5d7f7c..42aa20301bc9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -178,6 +178,7 @@ struct btrfs_path { struct buffer_head *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; int reada; + int lowest_level; }; /* @@ -338,6 +339,9 @@ struct btrfs_root { u64 highest_inode; u64 last_inode_alloc; int ref_cows; + struct btrfs_key defrag_progress; + int defrag_running; + int defrag_level; }; /* the lower bits in the key flags defines the item type */ @@ -1031,10 +1035,11 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint); + struct btrfs_root *root, u64 hint, + u64 empty_size); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 search_start, + u64 num_blocks, u64 empty_size, u64 search_start, u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); @@ -1051,6 +1056,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); /* ctree.c */ +int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct buffer_head *buf, struct buffer_head + *parent, int parent_slot, struct buffer_head + **cow_ret); int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); int btrfs_truncate_item(struct btrfs_trans_handle *trans, @@ -1060,6 +1069,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); +int btrfs_realloc_node(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct buffer_head *parent, + int cache_only); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); @@ -1171,4 +1183,7 @@ extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, u64 *hint_block); +/* tree-defrag.c */ +int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int cache_only); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 60db85bc4ba8..c948416fea3a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -273,7 +273,9 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { WARN_ON(atomic_read(&buf->b_count) == 0); + lock_buffer(buf); clear_buffer_dirty(buf); + unlock_buffer(buf); return 0; } @@ -294,6 +296,9 @@ static int __setup_root(int blocksize, root->last_inode_alloc = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); + memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); + root->defrag_running = 0; + root->defrag_level = 0; root->root_key.objectid = objectid; return 0; } @@ -585,6 +590,7 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 1; btrfs_transaction_flush_work(root); mutex_lock(&fs_info->fs_mutex); + btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -616,7 +622,9 @@ void btrfs_mark_buffer_dirty(struct buffer_head *bh) { struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); + WARN_ON(!atomic_read(&bh->b_count)); + if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", (unsigned long long)bh->b_blocknr, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5d4d5d8db8ef..26b8d3406491 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -23,7 +23,8 @@ #include "transaction.h" static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 search_start, + *orig_root, u64 num_blocks, u64 empty_size, + u64 search_start, u64 search_end, u64 hint_block, struct btrfs_key *ins, u64 exclude_start, u64 exclude_nr, int data); @@ -379,7 +380,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, + ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); if (ret) { btrfs_free_path(path); @@ -533,7 +534,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_item *bi; struct btrfs_key ins; - ret = find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, + ret = find_free_extent(trans, extent_root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); /* FIXME, set bit to recalc cache groups on next mount */ if (ret) @@ -708,6 +709,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, static int try_remove_page(struct address_space *mapping, unsigned long index) { int ret; + return 0; ret = invalidate_mapping_pages(mapping, index, index); return ret; } @@ -866,7 +868,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; - ret = find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); + ret = find_free_extent(trans, root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); if (ret) { btrfs_free_path(path); return ret; @@ -983,8 +985,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root * Any available blocks before search_start are skipped. */ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 search_start, u64 - search_end, u64 hint_block, + *orig_root, u64 num_blocks, u64 empty_size, + u64 search_start, u64 search_end, u64 hint_block, struct btrfs_key *ins, u64 exclude_start, u64 exclude_nr, int data) { @@ -1042,6 +1044,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root data, 1); } + total_needed += empty_size; path = btrfs_alloc_path(); check_failed: @@ -1157,9 +1160,11 @@ check_pending: goto error; } search_start = orig_search_start; - if (wrapped) + if (wrapped) { + if (!full_scan) + total_needed -= empty_size; full_scan = 1; - else + } else wrapped = 1; goto new_group; } @@ -1238,9 +1243,11 @@ new_group: ret = -ENOSPC; goto error; } - if (wrapped) + if (wrapped) { + if (!full_scan) + total_needed -= empty_size; full_scan = 1; - else + } else wrapped = 1; } block_group = btrfs_lookup_block_group(info, search_start); @@ -1264,7 +1271,7 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 hint_block, + u64 num_blocks, u64 empty_size, u64 hint_block, u64 search_end, struct btrfs_key *ins, int data) { int ret; @@ -1303,7 +1310,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * in the correct block group. */ if (data) { - ret = find_free_extent(trans, root, 0, 0, + ret = find_free_extent(trans, root, 0, 0, 0, search_end, 0, &prealloc_key, 0, 0, 0); BUG_ON(ret); if (ret) @@ -1313,8 +1320,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } /* do the real allocation */ - ret = find_free_extent(trans, root, num_blocks, search_start, - search_end, hint_block, ins, + ret = find_free_extent(trans, root, num_blocks, empty_size, + search_start, search_end, hint_block, ins, exclude_start, exclude_nr, data); BUG_ON(ret); if (ret) @@ -1333,7 +1340,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, exclude_start = ins->objectid; exclude_nr = ins->offset; hint_block = exclude_start + exclude_nr; - ret = find_free_extent(trans, root, 0, search_start, + ret = find_free_extent(trans, root, 0, 0, search_start, search_end, hint_block, &prealloc_key, exclude_start, exclude_nr, 0); @@ -1368,14 +1375,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * returns the tree buffer or NULL. */ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint) + struct btrfs_root *root, u64 hint, + u64 empty_size) { struct btrfs_key ins; int ret; struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, hint, (unsigned long)-1, &ins, 0); + 1, empty_size, hint, + (unsigned long)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); return ERR_PTR(ret); @@ -1385,6 +1394,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, btrfs_free_extent(trans, root, ins.objectid, 1, 0); return ERR_PTR(-ENOMEM); } + WARN_ON(buffer_dirty(buf)); set_buffer_uptodate(buf); set_buffer_checked(buf); set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); @@ -1591,13 +1601,15 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key key; struct btrfs_disk_key *found_key; struct btrfs_node *node; + btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); + level = root_item->drop_level; + path->lowest_level = level; wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { + if (wret < 0) { ret = wret; goto out; } - level = root_item->drop_level; node = btrfs_buffer_node(path->nodes[level]); found_key = &node->ptrs[path->slots[level]].key; WARN_ON(memcmp(found_key, &root_item->drop_progress, @@ -1617,8 +1629,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; num_walks++; if (num_walks > 10) { - struct btrfs_key key; - btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); ret = -EAGAIN; get_bh(root->node); break; @@ -1627,6 +1637,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { btrfs_block_release(root, path->nodes[i]); + path->nodes[i] = 0; } } out: diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1fe38fe84150..00b118a2db69 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -512,7 +512,7 @@ static int prepare_pages(struct btrfs_root *root, if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { err = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, hint_block, (u64)-1, + num_blocks, 0, hint_block, (u64)-1, &ins, 1); if (err) goto failed_truncate; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3889032fc449..12aa043b9f6f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -554,7 +554,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) &alloc_hint); if (ret) goto out; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, + ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) goto out; @@ -1360,7 +1360,7 @@ not_found: if (create & BTRFS_GET_BLOCK_CREATE) { struct btrfs_key ins; ret = btrfs_alloc_extent(trans, root, inode->i_ino, - 1, alloc_hint, (u64)-1, + 1, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { err = ret; @@ -1998,7 +1998,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - subvol = btrfs_alloc_free_block(trans, root, 0); + subvol = btrfs_alloc_free_block(trans, root, 0, 0); if (IS_ERR(subvol)) return PTR_ERR(subvol); leaf = btrfs_buffer_leaf(subvol); @@ -2159,7 +2159,9 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_vol_args vol_args; + struct btrfs_trans_handle *trans; int ret = 0; + int err; struct btrfs_dir_item *di; int namelen; struct btrfs_path *path; @@ -2196,6 +2198,31 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int else ret = create_snapshot(root, vol_args.name, namelen); break; + + case BTRFS_IOC_DEFRAG: + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + memset(&root->defrag_progress, 0, + sizeof(root->defrag_progress)); + while (1) { + root->defrag_running = 1; + err = btrfs_defrag_leaves(trans, root, 0); + + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + btrfs_btree_balance_dirty(root); + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (err != -EAGAIN) + break; + } + root->defrag_running = 0; + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + ret = 0; + break; default: return -ENOTTY; } diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 23bed48c51a7..8bc47dec2865 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -28,6 +28,6 @@ struct btrfs_ioctl_vol_args { #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_ADD_DISK _IOW(BTRFS_IOCTL_MAGIC, 2, \ +#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 498626470a04..338a7199363b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -29,6 +29,7 @@ extern struct kmem_cache *btrfs_transaction_cachep; static struct workqueue_struct *trans_wq; #define BTRFS_ROOT_TRANS_TAG 0 +#define BTRFS_ROOT_DEFRAG_TAG 1 static void put_transaction(struct btrfs_transaction *transaction) { @@ -69,35 +70,41 @@ static int join_transaction(struct btrfs_root *root) return 0; } +static int record_root_in_trans(struct btrfs_root *root) +{ + u64 running_trans_id = root->fs_info->running_transaction->transid; + if (root->ref_cows && root->last_trans < running_trans_id) { + WARN_ON(root == root->fs_info->extent_root); + if (root->root_item.refs != 0) { + radix_tree_tag_set(&root->fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); + radix_tree_tag_set(&root->fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_DEFRAG_TAG); + root->commit_root = root->node; + get_bh(root->node); + } else { + WARN_ON(1); + } + root->last_trans = running_trans_id; + } + return 0; +} + struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_blocks) { struct btrfs_trans_handle *h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); int ret; - u64 running_trans_id; mutex_lock(&root->fs_info->trans_mutex); ret = join_transaction(root); BUG_ON(ret); - running_trans_id = root->fs_info->running_transaction->transid; - if (root != root->fs_info->tree_root && root->last_trans < - running_trans_id) { - WARN_ON(root == root->fs_info->extent_root); - WARN_ON(root->ref_cows != 1); - if (root->root_item.refs != 0) { - radix_tree_tag_set(&root->fs_info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_TRANS_TAG); - root->commit_root = root->node; - get_bh(root->node); - } else { - WARN_ON(1); - } - } - root->last_trans = running_trans_id; - h->transid = running_trans_id; + record_root_in_trans(root); + h->transid = root->fs_info->running_transaction->transid; h->transaction = root->fs_info->running_transaction; h->blocks_reserved = num_blocks; h->blocks_used = 0; @@ -155,6 +162,15 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, gang[i]); if (!page) continue; + if (PageWriteback(page)) { + if (PageDirty(page)) + wait_on_page_writeback(page); + else { + unlock_page(page); + page_cache_release(page); + continue; + } + } err = write_one_page(page, 0); if (err) werr = err; @@ -299,6 +315,58 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, return err; } +int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) +{ + struct btrfs_root *gang[1]; + struct btrfs_root *root; + struct btrfs_root *tree_root = info->tree_root; + struct btrfs_trans_handle *trans; + int i; + int ret; + int err = 0; + u64 last = 0; + + trans = btrfs_start_transaction(tree_root, 1); + while(1) { + ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix, + (void **)gang, last, + ARRAY_SIZE(gang), + BTRFS_ROOT_DEFRAG_TAG); + if (ret == 0) + break; + for (i = 0; i < ret; i++) { + root = gang[i]; + last = root->root_key.objectid + 1; + radix_tree_tag_clear(&info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_DEFRAG_TAG); + if (root->defrag_running) + continue; + + while (1) { + mutex_lock(&root->fs_info->trans_mutex); + record_root_in_trans(root); + mutex_unlock(&root->fs_info->trans_mutex); + + root->defrag_running = 1; + err = btrfs_defrag_leaves(trans, root, 1); + btrfs_end_transaction(trans, tree_root); + mutex_unlock(&info->fs_mutex); + + btrfs_btree_balance_dirty(root); + + mutex_lock(&info->fs_mutex); + trans = btrfs_start_transaction(tree_root, 1); + if (err != -EAGAIN) + break; + } + root->defrag_running = 0; + } + } + btrfs_end_transaction(trans, tree_root); + return err; +} + static int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) { @@ -475,6 +543,7 @@ void btrfs_transaction_cleaner(struct work_struct *work) goto out; } mutex_unlock(&root->fs_info->trans_mutex); + btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); out: diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index ebf44f3e1118..8b2714e65562 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -70,5 +70,6 @@ void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); void btrfs_init_transaction_sys(void); void btrfs_exit_transaction_sys(void); int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list); +int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c new file mode 100644 index 000000000000..15d0a486fb59 --- /dev/null +++ b/fs/btrfs/tree-defrag.c @@ -0,0 +1,222 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include "ctree.h" +#include "disk-io.h" +#include "print-tree.h" +#include "transaction.h" + +static void reada_defrag(struct btrfs_root *root, + struct btrfs_node *node) +{ + int i; + u32 nritems; + u64 blocknr; + int ret; + + nritems = btrfs_header_nritems(&node->header); + for (i = 0; i < nritems; i++) { + blocknr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; + } +} + +static int defrag_walk_down(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level, + int cache_only) +{ + struct buffer_head *next; + struct buffer_head *cur; + u64 blocknr; + int ret = 0; + + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + + while(*level > 0) { + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + cur = path->nodes[*level]; + + if (!cache_only && *level > 1 && path->slots[*level] == 0) + reada_defrag(root, btrfs_buffer_node(cur)); + + if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + WARN_ON(1); + + if (path->slots[*level] >= + btrfs_header_nritems(btrfs_buffer_header(cur))) + break; + + if (*level == 1) { + ret = btrfs_realloc_node(trans, root, + path->nodes[*level], + cache_only); + break; + } + blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), + path->slots[*level]); + + if (cache_only) { + next = btrfs_find_tree_block(root, blocknr); + if (!next || !buffer_uptodate(next) || + buffer_locked(next)) { + brelse(next); + path->slots[*level]++; + continue; + } + } else { + next = read_tree_block(root, blocknr); + } + ret = btrfs_cow_block(trans, root, next, path->nodes[*level], + path->slots[*level], &next); + BUG_ON(ret); + ret = btrfs_realloc_node(trans, root, next, cache_only); + BUG_ON(ret); + WARN_ON(*level <= 0); + if (path->nodes[*level-1]) + btrfs_block_release(root, path->nodes[*level-1]); + path->nodes[*level-1] = next; + *level = btrfs_header_level(btrfs_buffer_header(next)); + path->slots[*level] = 0; + } + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + btrfs_block_release(root, path->nodes[*level]); + path->nodes[*level] = NULL; + *level += 1; + WARN_ON(ret); + return 0; +} + +static int defrag_walk_up(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level, + int cache_only) +{ + int i; + int slot; + struct btrfs_node *node; + + for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { + slot = path->slots[i]; + if (slot < btrfs_header_nritems( + btrfs_buffer_header(path->nodes[i])) - 1) { + path->slots[i]++; + *level = i; + node = btrfs_buffer_node(path->nodes[i]); + WARN_ON(i == 0); + btrfs_disk_key_to_cpu(&root->defrag_progress, + &node->ptrs[path->slots[i]].key); + root->defrag_level = i; + return 0; + } else { + btrfs_block_release(root, path->nodes[*level]); + path->nodes[*level] = NULL; + *level = i + 1; + } + } + return 1; +} + +int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int cache_only) +{ + struct btrfs_path *path = NULL; + struct buffer_head *tmp; + int ret = 0; + int wret; + int level; + int orig_level; + int i; + int num_runs = 0; + + if (root->ref_cows == 0) { + goto out; + } + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + level = btrfs_header_level(btrfs_buffer_header(root->node)); + orig_level = level; + if (level == 0) { + goto out; + } + if (root->defrag_progress.objectid == 0) { + get_bh(root->node); + ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); + BUG_ON(ret); + ret = btrfs_realloc_node(trans, root, root->node, cache_only); + BUG_ON(ret); + path->nodes[level] = root->node; + path->slots[level] = 0; + } else { + level = root->defrag_level; + path->lowest_level = level; + wret = btrfs_search_slot(trans, root, &root->defrag_progress, + path, 0, 1); + + if (wret < 0) { + ret = wret; + goto out; + } + while(level > 0 && !path->nodes[level]) + level--; + if (!path->nodes[level]) { + ret = 0; + goto out; + } + } + + while(1) { + wret = defrag_walk_down(trans, root, path, &level, cache_only); + if (wret > 0) + break; + if (wret < 0) + ret = wret; + + wret = defrag_walk_up(trans, root, path, &level, cache_only); + if (wret > 0) + break; + if (wret < 0) + ret = wret; + if (num_runs++ > 8) { + ret = -EAGAIN; + break; + } + } + for (i = 0; i <= orig_level; i++) { + if (path->nodes[i]) { + btrfs_block_release(root, path->nodes[i]); + path->nodes[i] = 0; + } + } +out: + if (path) + btrfs_free_path(path); + if (ret != -EAGAIN) { + memset(&root->defrag_progress, 0, + sizeof(root->defrag_progress)); + } + return ret; +} From f4468e94c86c2031f447788c4bfe7dfd2fcdc93a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 8 Aug 2007 10:08:58 -0400 Subject: [PATCH 0195/2985] Btrfs: Let some locks go during defrag and snapshot dropping Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/extent-tree.c | 6 ++++-- fs/btrfs/transaction.c | 8 ++++++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 42aa20301bc9..8b7d01597f4f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -300,9 +300,9 @@ struct btrfs_fs_info { struct radix_tree_root block_group_data_radix; struct radix_tree_root extent_map_radix; - u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3]; + u64 extent_tree_insert[BTRFS_MAX_LEVEL * 6]; int extent_tree_insert_nr; - u64 extent_tree_prealloc[BTRFS_MAX_LEVEL * 3]; + u64 extent_tree_prealloc[BTRFS_MAX_LEVEL * 6]; int extent_tree_prealloc_nr; u64 generation; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 26b8d3406491..f7d76d34fc37 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1018,7 +1018,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (num_blocks == 0) { fill_prealloc = 1; num_blocks = 1; - total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; + total_needed = (min(level + 1, BTRFS_MAX_LEVEL)) * 6; } if (fill_prealloc) { u64 first; @@ -1300,6 +1300,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ins->objectid; ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, 0); + WARN_ON(info->extent_tree_insert_nr > + ARRAY_SIZE(info->extent_tree_insert)); BUG_ON(ret); return 0; } @@ -1628,7 +1630,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) ret = wret; num_walks++; - if (num_walks > 10) { + if (num_walks > 2) { ret = -EAGAIN; get_bh(root->node); break; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 338a7199363b..88b0752550b6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -354,6 +354,7 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) mutex_unlock(&info->fs_mutex); btrfs_btree_balance_dirty(root); + cond_resched(); mutex_lock(&info->fs_mutex); trans = btrfs_start_transaction(tree_root, 1); @@ -394,6 +395,12 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, ret = err; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); + mutex_unlock(&tree_root->fs_info->fs_mutex); + + btrfs_btree_balance_dirty(tree_root); + schedule(); + + mutex_lock(&tree_root->fs_info->fs_mutex); } BUG_ON(ret); ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); @@ -406,6 +413,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, kfree(dirty); mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root); + schedule(); } return ret; } From 26b8003f10569a9155b7539ef5a7379ee0c6b050 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 8 Aug 2007 20:17:12 -0400 Subject: [PATCH 0196/2985] Btrfs: Replace extent tree preallocation code with some bit radix magic. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 7 +- fs/btrfs/disk-io.c | 3 +- fs/btrfs/extent-tree.c | 230 ++++++++--------------------------------- fs/btrfs/transaction.c | 2 + fs/btrfs/transaction.h | 2 + 5 files changed, 49 insertions(+), 195 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8b7d01597f4f..59e09e37ab93 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -299,12 +299,7 @@ struct btrfs_fs_info { struct radix_tree_root block_group_radix; struct radix_tree_root block_group_data_radix; struct radix_tree_root extent_map_radix; - - u64 extent_tree_insert[BTRFS_MAX_LEVEL * 6]; - int extent_tree_insert_nr; - u64 extent_tree_prealloc[BTRFS_MAX_LEVEL * 6]; - int extent_tree_prealloc_nr; - + struct radix_tree_root extent_ins_radix; u64 generation; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c948416fea3a..192211274ce5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -427,6 +427,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); init_bit_radix(&fs_info->extent_map_radix); + init_bit_radix(&fs_info->extent_ins_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); @@ -443,8 +444,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; fs_info->do_barriers = 1; - fs_info->extent_tree_insert_nr = 0; - fs_info->extent_tree_prealloc_nr = 0; fs_info->closing = 0; INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f7d76d34fc37..ba50bd7b9a78 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -22,12 +22,6 @@ #include "print-tree.h" #include "transaction.h" -static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 empty_size, - u64 search_start, - u64 search_end, u64 hint_block, - struct btrfs_key *ins, u64 exclude_start, - u64 exclude_nr, int data); static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -174,9 +168,6 @@ static u64 find_search_start(struct btrfs_root *root, if (cache->data) goto out; - if (num > 1) { - last = max(last, cache->last_prealloc); - } again: ret = cache_block_group(root, cache); if (ret) @@ -374,18 +365,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_key key; struct btrfs_leaf *l; struct btrfs_extent_item *item; - struct btrfs_key ins; u32 refs; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, 0, - (u64)-1, 0, &ins, 0, 0, 0); - if (ret) { - btrfs_free_path(path); - return ret; - } + key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -532,13 +517,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, int pending_ret; struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_block_group_item *bi; - struct btrfs_key ins; - ret = find_free_extent(trans, extent_root, 0, 0, 0, (u64)-1, 0, &ins, - 0, 0, 0); - /* FIXME, set bit to recalc cache groups on next mount */ - if (ret) - return ret; ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); if (ret < 0) goto fail; @@ -706,14 +685,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } -static int try_remove_page(struct address_space *mapping, unsigned long index) -{ - int ret; - return 0; - ret = invalidate_mapping_pages(mapping, index, index); - return ret; -} - int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy) { unsigned long gang[8]; @@ -732,6 +703,9 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy) last = gang[i] + 1; } } + ret = find_first_radix_bit(&root->fs_info->extent_ins_radix, gang, 0, + ARRAY_SIZE(gang)); + WARN_ON(ret); return 0; } @@ -740,7 +714,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct radix_tree_root *unpin_radix) { unsigned long gang[8]; - struct inode *btree_inode = root->fs_info->btree_inode; struct btrfs_block_group_cache *block_group; u64 first = 0; int ret; @@ -765,14 +738,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, block_group->pinned--; if (gang[i] < block_group->last_alloc) block_group->last_alloc = gang[i]; - if (gang[i] < block_group->last_prealloc) - block_group->last_prealloc = gang[i]; if (!block_group->data) set_radix_bit(extent_radix, gang[i]); } - try_remove_page(btree_inode->i_mapping, - gang[i] << (PAGE_CACHE_SHIFT - - btree_inode->i_blkbits)); } } return 0; @@ -785,7 +753,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct struct btrfs_extent_item extent_item; int i; int ret; - u64 super_blocks_used; + int err; + unsigned long gang[8]; struct btrfs_fs_info *info = extent_root->fs_info; btrfs_set_extent_refs(&extent_item, 1); @@ -794,16 +763,21 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid); - for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) { - ins.objectid = extent_root->fs_info->extent_tree_insert[i]; - super_blocks_used = btrfs_super_blocks_used(&info->super_copy); - btrfs_set_super_blocks_used(&info->super_copy, - super_blocks_used + 1); - ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item, - sizeof(extent_item)); - BUG_ON(ret); + while(1) { + ret = find_first_radix_bit(&info->extent_ins_radix, gang, 0, + ARRAY_SIZE(gang)); + if (!ret) + break; + + for (i = 0; i < ret; i++) { + ins.objectid = gang[i]; + err = btrfs_insert_item(trans, extent_root, &ins, + &extent_item, + sizeof(extent_item)); + clear_radix_bit(&info->extent_ins_radix, gang[i]); + WARN_ON(err); + } } - extent_root->fs_info->extent_tree_insert_nr = 0; return 0; } @@ -856,7 +830,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_root *extent_root = info->extent_root; int ret; struct btrfs_extent_item *ei; - struct btrfs_key ins; u32 refs; key.objectid = blocknr; @@ -868,12 +841,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; - ret = find_free_extent(trans, root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); - if (ret) { - btrfs_free_path(path); - return ret; - } - ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); if (ret < 0) return ret; @@ -1003,35 +970,17 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; int total_needed = num_blocks; - int total_found = 0; - int fill_prealloc = 0; int level; struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; u64 limit; + WARN_ON(num_blocks < 1); ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(btrfs_buffer_header(root->node)); - if (num_blocks == 0) { - fill_prealloc = 1; - num_blocks = 1; - total_needed = (min(level + 1, BTRFS_MAX_LEVEL)) * 6; - } - if (fill_prealloc) { - u64 first; - int nr = info->extent_tree_prealloc_nr; - first = info->extent_tree_prealloc[nr - 1]; - if (info->extent_tree_prealloc_nr >= total_needed && - first >= search_start) { - ins->objectid = info->extent_tree_prealloc[0]; - ins->offset = 1; - return 0; - } - info->extent_tree_prealloc_nr = 0; - } if (search_end == (u64)-1) search_end = btrfs_super_total_blocks(&info->super_copy); if (hint_block) { @@ -1091,10 +1040,6 @@ check_failed: l = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { - if (fill_prealloc) { - info->extent_tree_prealloc_nr = 0; - total_found = 0; - } if (start_found) limit = last_block + (block_group->key.offset >> 1); @@ -1170,67 +1115,21 @@ check_pending: } for (test_block = ins->objectid; test_block < ins->objectid + num_blocks; test_block++) { - if (test_radix_bit(&info->pinned_radix, test_block)) { + if (test_radix_bit(&info->pinned_radix, test_block) || + test_radix_bit(&info->extent_ins_radix, test_block)) { search_start = test_block + 1; goto new_group; } } - if (!fill_prealloc && info->extent_tree_insert_nr) { - u64 last = - info->extent_tree_insert[info->extent_tree_insert_nr - 1]; - if (ins->objectid + num_blocks > - info->extent_tree_insert[0] && - ins->objectid <= last) { - search_start = last + 1; - WARN_ON(!full_scan); - goto new_group; - } - } - if (!fill_prealloc && info->extent_tree_prealloc_nr) { - u64 first = - info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1]; - if (ins->objectid + num_blocks > first && - ins->objectid <= info->extent_tree_prealloc[0]) { - search_start = info->extent_tree_prealloc[0] + 1; - goto new_group; - } - } if (exclude_nr > 0 && (ins->objectid + num_blocks > exclude_start && ins->objectid < exclude_start + exclude_nr)) { search_start = exclude_start + exclude_nr; goto new_group; } - if (fill_prealloc) { - int nr; - test_block = ins->objectid; - if (test_block - info->extent_tree_prealloc[total_needed - 1] >= - leaf_range(root)) { - total_found = 0; - info->extent_tree_prealloc_nr = total_found; - } - while(test_block < ins->objectid + ins->offset && - total_found < total_needed) { - nr = total_needed - total_found - 1; - BUG_ON(nr < 0); - info->extent_tree_prealloc[nr] = test_block; - total_found++; - test_block++; - } - if (total_found < total_needed) { - search_start = test_block; - goto new_group; - } - info->extent_tree_prealloc_nr = total_found; - } if (!data) { block_group = btrfs_lookup_block_group(info, ins->objectid); - if (block_group) { - if (fill_prealloc) - block_group->last_prealloc = - info->extent_tree_prealloc[total_needed-1]; - else - trans->block_group = block_group; - } + if (block_group) + trans->block_group = block_group; } ins->offset = num_blocks; btrfs_free_path(path); @@ -1278,85 +1177,41 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, int pending_ret; u64 super_blocks_used; u64 search_start = 0; - u64 exclude_start = 0; - u64 exclude_nr = 0; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; - struct btrfs_key prealloc_key; btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, owner); - if (root == extent_root) { - int nr; - BUG_ON(info->extent_tree_prealloc_nr == 0); - BUG_ON(num_blocks != 1); - ins->offset = 1; - info->extent_tree_prealloc_nr--; - nr = info->extent_tree_prealloc_nr; - ins->objectid = info->extent_tree_prealloc[nr]; - info->extent_tree_insert[info->extent_tree_insert_nr++] = - ins->objectid; - ret = update_block_group(trans, root, - ins->objectid, ins->offset, 1, 0, 0); - WARN_ON(info->extent_tree_insert_nr > - ARRAY_SIZE(info->extent_tree_insert)); - BUG_ON(ret); - return 0; - } - - /* - * if we're doing a data allocation, preallocate room in the - * extent tree first. This way the extent tree blocks end up - * in the correct block group. - */ - if (data) { - ret = find_free_extent(trans, root, 0, 0, 0, - search_end, 0, &prealloc_key, 0, 0, 0); - BUG_ON(ret); - if (ret) - return ret; - exclude_nr = info->extent_tree_prealloc_nr; - exclude_start = info->extent_tree_prealloc[exclude_nr - 1]; - } - - /* do the real allocation */ + WARN_ON(num_blocks < 1); ret = find_free_extent(trans, root, num_blocks, empty_size, search_start, search_end, hint_block, ins, - exclude_start, exclude_nr, data); + trans->alloc_exclude_start, + trans->alloc_exclude_nr, data); BUG_ON(ret); if (ret) return ret; - /* - * if we're doing a metadata allocation, preallocate space in the - * extent tree second. This way, we don't create a tiny hole - * in the allocation map between any unused preallocation blocks - * and the metadata block we're actually allocating. On disk, - * it'll go: - * [block we've allocated], [used prealloc 1], [ unused prealloc ] - * The unused prealloc will get reused the next time around. - */ - if (!data) { - exclude_start = ins->objectid; - exclude_nr = ins->offset; - hint_block = exclude_start + exclude_nr; - ret = find_free_extent(trans, root, 0, 0, search_start, - search_end, hint_block, - &prealloc_key, exclude_start, - exclude_nr, 0); - BUG_ON(ret); - if (ret) - return ret; - } - super_blocks_used = btrfs_super_blocks_used(&info->super_copy); btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used + num_blocks); + + if (root == extent_root) { + BUG_ON(num_blocks != 1); + set_radix_bit(&root->fs_info->extent_ins_radix, ins->objectid); + goto update_block; + } + + WARN_ON(trans->alloc_exclude_nr); + trans->alloc_exclude_start = ins->objectid; + trans->alloc_exclude_nr = ins->offset; ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, sizeof(extent_item)); + trans->alloc_exclude_start = 0; + trans->alloc_exclude_nr = 0; + BUG_ON(ret); finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); @@ -1366,6 +1221,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (pending_ret) { return pending_ret; } + +update_block: ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, data); BUG_ON(ret); @@ -1750,7 +1607,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; cache->first_free = cache->key.objectid; - cache->last_prealloc = cache->key.objectid; cache->pinned = 0; cache->cached = 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 88b0752550b6..e64ecec3f52e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -109,6 +109,8 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, h->blocks_reserved = num_blocks; h->blocks_used = 0; h->block_group = NULL; + h->alloc_exclude_nr = 0; + h->alloc_exclude_start = 0; root->fs_info->running_transaction->use_count++; mutex_unlock(&root->fs_info->trans_mutex); return h; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 8b2714e65562..65a395eeca91 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -39,6 +39,8 @@ struct btrfs_trans_handle { unsigned long blocks_used; struct btrfs_transaction *transaction; struct btrfs_block_group_cache *block_group; + u64 alloc_exclude_start; + u64 alloc_exclude_nr; }; From 409eb95d7f6632d5af32b795244ce68a29e49319 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 8 Aug 2007 20:17:12 -0400 Subject: [PATCH 0197/2985] Btrfs: Further reduce the concurrency penalty of defrag and drop_snapshot Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 13 ++++++------- fs/btrfs/transaction.c | 3 +++ fs/btrfs/tree-defrag.c | 7 ++----- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ba50bd7b9a78..120c448f58f8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1312,7 +1312,10 @@ static void reada_walk_down(struct btrfs_root *root, BUG_ON(ret); if (refs != 1) continue; + mutex_unlock(&root->fs_info->fs_mutex); ret = readahead_tree_block(root, blocknr); + cond_resched(); + mutex_lock(&root->fs_info->fs_mutex); if (ret) break; } @@ -1445,7 +1448,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_path *path; int i; int orig_level; - int num_walks = 0; struct btrfs_root_item *root_item = &root->root_item; path = btrfs_alloc_path(); @@ -1486,12 +1488,9 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root break; if (wret < 0) ret = wret; - num_walks++; - if (num_walks > 2) { - ret = -EAGAIN; - get_bh(root->node); - break; - } + ret = -EAGAIN; + get_bh(root->node); + break; } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e64ecec3f52e..204337c5ca0f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -364,6 +364,9 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) break; } root->defrag_running = 0; + radix_tree_tag_clear(&info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_DEFRAG_TAG); } } btrfs_end_transaction(trans, tree_root); diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 15d0a486fb59..7ea66b4aa5c2 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -148,7 +148,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, int level; int orig_level; int i; - int num_runs = 0; if (root->ref_cows == 0) { goto out; @@ -200,10 +199,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, break; if (wret < 0) ret = wret; - if (num_runs++ > 8) { - ret = -EAGAIN; - break; - } + ret = -EAGAIN; + break; } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { From e9d0b13b5bbb58c9b840e407a8d181442f799966 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 10 Aug 2007 14:06:19 -0400 Subject: [PATCH 0198/2985] Btrfs: Btree defrag on the extent-mapping tree as well Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 18 +++++---- fs/btrfs/ctree.h | 4 +- fs/btrfs/extent-tree.c | 28 ++++++++++++- fs/btrfs/inode.c | 23 +---------- fs/btrfs/super.c | 2 + fs/btrfs/transaction.c | 91 +++++++++++++++++++++++------------------- fs/btrfs/transaction.h | 2 + fs/btrfs/tree-defrag.c | 36 +++++++++++++---- 8 files changed, 125 insertions(+), 79 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c7e47e77723f..ee1ae00d2827 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -99,7 +99,6 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) return ret; } else { - WARN_ON(!root->ref_cows); clean_tree_block(trans, root, buf); } @@ -162,13 +161,14 @@ static int close_blocks(u64 blocknr, u64 other) int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *parent, - int cache_only) + int cache_only, u64 *last_ret) { struct btrfs_node *parent_node; struct buffer_head *cur_bh; struct buffer_head *tmp_bh; u64 blocknr; - u64 search_start = 0; + u64 search_start = *last_ret; + u64 last_block = 0; u64 other; u32 parent_nritems; int start_slot; @@ -198,6 +198,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, for (i = start_slot; i < end_slot; i++) { int close = 1; blocknr = btrfs_node_blockptr(parent_node, i); + if (last_block == 0) + last_block = blocknr; if (i > 0) { other = btrfs_node_blockptr(parent_node, i - 1); close = close_blocks(blocknr, other); @@ -206,8 +208,10 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, other = btrfs_node_blockptr(parent_node, i + 1); close = close_blocks(blocknr, other); } - if (close) + if (close) { + last_block = blocknr; continue; + } cur_bh = btrfs_find_tree_block(root, blocknr); if (!cur_bh || !buffer_uptodate(cur_bh) || @@ -219,9 +223,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, brelse(cur_bh); cur_bh = read_tree_block(root, blocknr); } - if (search_start == 0) { - search_start = bh_blocknr(cur_bh) & ~((u64)65535); - } + if (search_start == 0) + search_start = last_block & ~((u64)65535); + err = __btrfs_cow_block(trans, root, cur_bh, parent, i, &tmp_bh, search_start, min(8, end_slot - i)); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 59e09e37ab93..d3cd564b3b3f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1019,6 +1019,8 @@ static inline void btrfs_memmove(struct btrfs_root *root, btrfs_item_offset((leaf)->items + (slot)))) /* extent-tree.c */ +int btrfs_extent_post_op(struct btrfs_trans_handle *trans, + struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, @@ -1066,7 +1068,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow); int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *parent, - int cache_only); + int cache_only, u64 *last_ret); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 120c448f58f8..3418bb62b996 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -396,6 +396,14 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, return 0; } +int btrfs_extent_post_op(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + finish_current_insert(trans, root->fs_info->extent_root); + del_pending_extents(trans, root->fs_info->extent_root); + return 0; +} + static int lookup_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, u32 *refs) @@ -1374,7 +1382,25 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); continue; } - next = read_tree_block(root, blocknr); + next = btrfs_find_tree_block(root, blocknr); + if (!next || !buffer_uptodate(next)) { + brelse(next); + mutex_unlock(&root->fs_info->fs_mutex); + next = read_tree_block(root, blocknr); + mutex_lock(&root->fs_info->fs_mutex); + + /* we dropped the lock, check one more time */ + ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); + BUG_ON(ret); + if (refs != 1) { + path->slots[*level]++; + brelse(next); + ret = btrfs_free_extent(trans, root, + blocknr, 1, 1); + BUG_ON(ret); + continue; + } + } WARN_ON(*level <= 0); if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 12aa043b9f6f..5c05ecbc5726 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2159,9 +2159,7 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_vol_args vol_args; - struct btrfs_trans_handle *trans; int ret = 0; - int err; struct btrfs_dir_item *di; int namelen; struct btrfs_path *path; @@ -2201,25 +2199,8 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int case BTRFS_IOC_DEFRAG: mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - memset(&root->defrag_progress, 0, - sizeof(root->defrag_progress)); - while (1) { - root->defrag_running = 1; - err = btrfs_defrag_leaves(trans, root, 0); - - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - - btrfs_btree_balance_dirty(root); - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - if (err != -EAGAIN) - break; - } - root->defrag_running = 0; - btrfs_end_transaction(trans, root); + btrfs_defrag_root(root, 0); + btrfs_defrag_root(root->fs_info->extent_root, 0); mutex_unlock(&root->fs_info->fs_mutex); ret = 0; break; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2e797d5fb281..74f3de47423c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -121,7 +121,9 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; } + btrfs_clean_old_snapshots(root); mutex_lock(&root->fs_info->fs_mutex); + btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 204337c5ca0f..c9d52dc83e48 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -317,18 +317,47 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, return err; } +int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) +{ + struct btrfs_fs_info *info = root->fs_info; + int ret; + struct btrfs_trans_handle *trans; + + if (root->defrag_running) + return 0; + + trans = btrfs_start_transaction(root, 1); + while (1) { + root->defrag_running = 1; + ret = btrfs_defrag_leaves(trans, root, cacheonly); + btrfs_end_transaction(trans, root); + mutex_unlock(&info->fs_mutex); + + btrfs_btree_balance_dirty(root); + cond_resched(); + + mutex_lock(&info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (ret != -EAGAIN) + break; + } + root->defrag_running = 0; + radix_tree_tag_clear(&info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_DEFRAG_TAG); + btrfs_end_transaction(trans, root); + return 0; +} + int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) { struct btrfs_root *gang[1]; struct btrfs_root *root; - struct btrfs_root *tree_root = info->tree_root; - struct btrfs_trans_handle *trans; int i; int ret; int err = 0; u64 last = 0; - trans = btrfs_start_transaction(tree_root, 1); while(1) { ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix, (void **)gang, last, @@ -339,37 +368,10 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) for (i = 0; i < ret; i++) { root = gang[i]; last = root->root_key.objectid + 1; - radix_tree_tag_clear(&info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_DEFRAG_TAG); - if (root->defrag_running) - continue; - - while (1) { - mutex_lock(&root->fs_info->trans_mutex); - record_root_in_trans(root); - mutex_unlock(&root->fs_info->trans_mutex); - - root->defrag_running = 1; - err = btrfs_defrag_leaves(trans, root, 1); - btrfs_end_transaction(trans, tree_root); - mutex_unlock(&info->fs_mutex); - - btrfs_btree_balance_dirty(root); - cond_resched(); - - mutex_lock(&info->fs_mutex); - trans = btrfs_start_transaction(tree_root, 1); - if (err != -EAGAIN) - break; - } - root->defrag_running = 0; - radix_tree_tag_clear(&info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_DEFRAG_TAG); + btrfs_defrag_root(root, 1); } } - btrfs_end_transaction(trans, tree_root); + btrfs_defrag_root(info->extent_root, 1); return err; } @@ -527,6 +529,20 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; } +int btrfs_clean_old_snapshots(struct btrfs_root *root) +{ + struct list_head dirty_roots; + INIT_LIST_HEAD(&dirty_roots); + + mutex_lock(&root->fs_info->trans_mutex); + list_splice_init(&root->fs_info->dead_roots, &dirty_roots); + mutex_unlock(&root->fs_info->trans_mutex); + + if (!list_empty(&dirty_roots)) { + drop_dirty_roots(root, &dirty_roots); + } + return 0; +} void btrfs_transaction_cleaner(struct work_struct *work) { struct btrfs_fs_info *fs_info = container_of(work, @@ -536,12 +552,10 @@ void btrfs_transaction_cleaner(struct work_struct *work) struct btrfs_root *root = fs_info->tree_root; struct btrfs_transaction *cur; struct btrfs_trans_handle *trans; - struct list_head dirty_roots; unsigned long now; unsigned long delay = HZ * 30; int ret; - INIT_LIST_HEAD(&dirty_roots); mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); cur = root->fs_info->running_transaction; @@ -561,14 +575,7 @@ void btrfs_transaction_cleaner(struct work_struct *work) ret = btrfs_commit_transaction(trans, root); out: mutex_unlock(&root->fs_info->fs_mutex); - - mutex_lock(&root->fs_info->trans_mutex); - list_splice_init(&root->fs_info->dead_roots, &dirty_roots); - mutex_unlock(&root->fs_info->trans_mutex); - - if (!list_empty(&dirty_roots)) { - drop_dirty_roots(root, &dirty_roots); - } + btrfs_clean_old_snapshots(root); btrfs_transaction_queue_work(root, delay); } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 65a395eeca91..d5f491d3757e 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -73,5 +73,7 @@ void btrfs_init_transaction_sys(void); void btrfs_exit_transaction_sys(void); int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list); int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); +int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); +int btrfs_clean_old_snapshots(struct btrfs_root *root); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 7ea66b4aa5c2..a09064a9a41c 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -42,16 +42,20 @@ static void reada_defrag(struct btrfs_root *root, static int defrag_walk_down(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level, - int cache_only) + int cache_only, u64 *last_ret) { struct buffer_head *next; struct buffer_head *cur; u64 blocknr; int ret = 0; + int is_extent = 0; WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); + if (root->fs_info->extent_root == root) + is_extent = 1; + while(*level > 0) { WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); @@ -70,7 +74,10 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, if (*level == 1) { ret = btrfs_realloc_node(trans, root, path->nodes[*level], - cache_only); + cache_only, last_ret); + if (is_extent) + btrfs_extent_post_op(trans, root); + break; } blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), @@ -90,8 +97,13 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, ret = btrfs_cow_block(trans, root, next, path->nodes[*level], path->slots[*level], &next); BUG_ON(ret); - ret = btrfs_realloc_node(trans, root, next, cache_only); + ret = btrfs_realloc_node(trans, root, next, cache_only, + last_ret); BUG_ON(ret); + + if (is_extent) + btrfs_extent_post_op(trans, root); + WARN_ON(*level <= 0); if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); @@ -148,10 +160,14 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, int level; int orig_level; int i; + int is_extent = 0; + u64 last_ret = 0; - if (root->ref_cows == 0) { + if (root->fs_info->extent_root == root) + is_extent = 1; + + if (root->ref_cows == 0 && !is_extent) goto out; - } path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -165,16 +181,21 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, get_bh(root->node); ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); BUG_ON(ret); - ret = btrfs_realloc_node(trans, root, root->node, cache_only); + ret = btrfs_realloc_node(trans, root, root->node, cache_only, + &last_ret); BUG_ON(ret); path->nodes[level] = root->node; path->slots[level] = 0; + if (is_extent) + btrfs_extent_post_op(trans, root); } else { level = root->defrag_level; path->lowest_level = level; wret = btrfs_search_slot(trans, root, &root->defrag_progress, path, 0, 1); + if (is_extent) + btrfs_extent_post_op(trans, root); if (wret < 0) { ret = wret; goto out; @@ -188,7 +209,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, } while(1) { - wret = defrag_walk_down(trans, root, path, &level, cache_only); + wret = defrag_walk_down(trans, root, path, &level, cache_only, + &last_ret); if (wret > 0) break; if (wret < 0) From f2183bde1a918d338337955c8e8ba29bd8f5e7b1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 10 Aug 2007 14:42:37 -0400 Subject: [PATCH 0199/2985] Btrfs: Add BH_Defrag to mark buffers that are in need of defragging This allows the tree walking code to defrag only the newly allocated buffers, it seems to be a good balance between perfect defragging and the performance hit of repeatedly reallocating blocks. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 22 ++++++++++++---------- fs/btrfs/disk-io.h | 2 ++ fs/btrfs/extent-tree.c | 2 ++ fs/btrfs/tree-defrag.c | 3 ++- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ee1ae00d2827..7cf43da5e78e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -175,6 +175,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, int end_slot; int i; int err = 0; + int parent_level; if (trans->transaction != root->fs_info->running_transaction) { printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, @@ -188,6 +189,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, } parent_node = btrfs_buffer_node(parent); parent_nritems = btrfs_header_nritems(&parent_node->header); + parent_level = btrfs_header_level(&parent_node->header); start_slot = 0; end_slot = parent_nritems; @@ -215,13 +217,16 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, cur_bh = btrfs_find_tree_block(root, blocknr); if (!cur_bh || !buffer_uptodate(cur_bh) || - buffer_locked(cur_bh)) { + buffer_locked(cur_bh) || !buffer_defrag(cur_bh)) { if (cache_only) { brelse(cur_bh); continue; } - brelse(cur_bh); - cur_bh = read_tree_block(root, blocknr); + if (!cur_bh || !buffer_uptodate(cur_bh) || + buffer_locked(cur_bh)) { + brelse(cur_bh); + cur_bh = read_tree_block(root, blocknr); + } } if (search_start == 0) search_start = last_block & ~((u64)65535); @@ -232,6 +237,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (err) break; search_start = bh_blocknr(tmp_bh); + *last_ret = search_start; + if (parent_level == 1) + clear_buffer_defrag(tmp_bh); brelse(tmp_bh); } return err; @@ -811,16 +819,10 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, clear_radix_bit(&found, blocknr); if (nread > 32) continue; - if (direction > 0 && cluster_start <= blocknr && - cluster_start + 8 > blocknr) { - cluster_start = blocknr; + if (close_blocks(cluster_start, blocknr)) { readahead_tree_block(root, blocknr); nread++; - } else if (direction < 0 && cluster_start >= blocknr && - blocknr + 8 > cluster_start) { cluster_start = blocknr; - readahead_tree_block(root, blocknr); - nread++; } } } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 9e2c261b41ae..81fd18cbd824 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -25,8 +25,10 @@ enum btrfs_bh_state_bits { BH_Checked = BH_PrivateStart, + BH_Defrag, }; BUFFER_FNS(Checked, checked); +BUFFER_FNS(Defrag, defrag); static inline struct btrfs_node *btrfs_buffer_node(struct buffer_head *bh) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3418bb62b996..b3641234473f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1015,6 +1015,7 @@ check_failed: ins->objectid = search_start; ins->offset = 0; start_found = 0; + path->reada = 1; ret = btrfs_search_slot(trans, root, ins, path, 0, 0); if (ret < 0) @@ -1264,6 +1265,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, WARN_ON(buffer_dirty(buf)); set_buffer_uptodate(buf); set_buffer_checked(buf); + set_buffer_defrag(buf); set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); return buf; } diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index a09064a9a41c..35fd20d24645 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -86,7 +86,7 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, if (cache_only) { next = btrfs_find_tree_block(root, blocknr); if (!next || !buffer_uptodate(next) || - buffer_locked(next)) { + buffer_locked(next) || !buffer_defrag(next)) { brelse(next); path->slots[*level]++; continue; @@ -142,6 +142,7 @@ static int defrag_walk_up(struct btrfs_trans_handle *trans, root->defrag_level = i; return 0; } else { + clear_buffer_defrag(path->nodes[*level]); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; From 15ee9bc7ed171248d1405df5854da5fa91bfdc39 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 10 Aug 2007 16:22:09 -0400 Subject: [PATCH 0200/2985] Btrfs: delay commits during fsync to allow more writers Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 5 +++++ fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/file.c | 22 +++++++++++++++++---- fs/btrfs/inode.c | 2 ++ fs/btrfs/transaction.c | 44 ++++++++++++++++++++++++++++++------------ fs/btrfs/transaction.h | 7 +++++++ 7 files changed, 66 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6bce46be49ff..f446f161f736 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -25,6 +25,11 @@ struct btrfs_inode { struct btrfs_block_group_cache *block_group; struct btrfs_key location; struct inode vfs_inode; + + /* + * transid of the trans_handle that last modified this inode + */ + u64 last_trans; }; static inline struct btrfs_inode *BTRFS_I(struct inode *inode) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d3cd564b3b3f..c7f5161271d3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -301,6 +301,7 @@ struct btrfs_fs_info { struct radix_tree_root extent_map_radix; struct radix_tree_root extent_ins_radix; u64 generation; + u64 last_trans_committed; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; struct btrfs_super_block super_copy; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 192211274ce5..b2f79878d51a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -435,6 +435,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_LIST_HEAD(&fs_info->dead_roots); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; + fs_info->last_trans_committed = 0; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->sb = sb; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 00b118a2db69..6933ab11a5cd 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -694,22 +694,36 @@ static int btrfs_sync_file(struct file *file, { struct inode *inode = dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; + int ret = 0; struct btrfs_trans_handle *trans; /* - * FIXME, use inode generation number to check if we can skip the - * commit + * check the transaction that last modified this inode + * and see if its already been committed */ mutex_lock(&root->fs_info->fs_mutex); + if (!BTRFS_I(inode)->last_trans) + goto out; + mutex_lock(&root->fs_info->trans_mutex); + if (BTRFS_I(inode)->last_trans <= + root->fs_info->last_trans_committed) { + BTRFS_I(inode)->last_trans = 0; + mutex_unlock(&root->fs_info->trans_mutex); + goto out; + } + mutex_unlock(&root->fs_info->trans_mutex); + + /* + * ok we haven't committed the transaction yet, lets do a commit + */ trans = btrfs_start_transaction(root, 1); if (!trans) { ret = -ENOMEM; goto out; } ret = btrfs_commit_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); out: + mutex_unlock(&root->fs_info->fs_mutex); return ret > 0 ? EIO : ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5c05ecbc5726..398484179d82 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -193,6 +193,7 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, fill_inode_item(inode_item, inode); btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_set_inode_last_trans(trans, inode); ret = 0; failed: btrfs_release_path(root, path); @@ -2234,6 +2235,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); if (!ei) return NULL; + ei->last_trans = 0; return &ei->vfs_inode; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c9d52dc83e48..18abea802794 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -55,7 +55,8 @@ static int join_transaction(struct btrfs_root *root) BUG_ON(!cur_trans); root->fs_info->generation++; root->fs_info->running_transaction = cur_trans; - cur_trans->num_writers = 0; + cur_trans->num_writers = 1; + cur_trans->num_joined = 0; cur_trans->transid = root->fs_info->generation; init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); @@ -65,8 +66,11 @@ static int join_transaction(struct btrfs_root *root) cur_trans->start_time = get_seconds(); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); init_bit_radix(&cur_trans->dirty_pages); + } else { + cur_trans->num_writers++; + cur_trans->num_joined++; } - cur_trans->num_writers++; + return 0; } @@ -428,12 +432,14 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - int ret = 0; + unsigned long joined = 0; + unsigned long timeout = 1; struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; struct radix_tree_root pinned_copy; DEFINE_WAIT(wait); + int ret; init_bit_radix(&pinned_copy); INIT_LIST_HEAD(&dirty_fs_roots); @@ -448,7 +454,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->fs_mutex); ret = wait_for_commit(root, cur_trans); BUG_ON(ret); + + mutex_lock(&root->fs_info->trans_mutex); put_transaction(cur_trans); + mutex_unlock(&root->fs_info->trans_mutex); + mutex_lock(&root->fs_info->fs_mutex); return 0; } @@ -463,26 +473,35 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); wait_for_commit(root, prev_trans); - put_transaction(prev_trans); mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); + put_transaction(prev_trans); } } - while (trans->transaction->num_writers > 1) { + + do { + joined = cur_trans->num_joined; WARN_ON(cur_trans != trans->transaction); - prepare_to_wait(&trans->transaction->writer_wait, &wait, + prepare_to_wait(&cur_trans->writer_wait, &wait, TASK_UNINTERRUPTIBLE); - if (trans->transaction->num_writers <= 1) - break; + + if (cur_trans->num_writers > 1) + timeout = MAX_SCHEDULE_TIMEOUT; + else + timeout = 1; + mutex_unlock(&root->fs_info->fs_mutex); mutex_unlock(&root->fs_info->trans_mutex); - schedule(); + + schedule_timeout(timeout); + mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); - finish_wait(&trans->transaction->writer_wait, &wait); - } - finish_wait(&trans->transaction->writer_wait, &wait); + finish_wait(&cur_trans->writer_wait, &wait); + } while (cur_trans->num_writers > 1 || + (cur_trans->num_joined != joined)); + WARN_ON(cur_trans != trans->transaction); ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, &dirty_fs_roots); @@ -511,6 +530,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_finish_extent_commit(trans, root, &pinned_copy); mutex_lock(&root->fs_info->trans_mutex); cur_trans->commit_done = 1; + root->fs_info->last_trans_committed = cur_trans->transid; wake_up(&cur_trans->commit_wait); put_transaction(cur_trans); put_transaction(cur_trans); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index d5f491d3757e..e451783a1a4d 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -23,6 +23,7 @@ struct btrfs_transaction { u64 transid; unsigned long num_writers; + unsigned long num_joined; int in_commit; int use_count; int commit_done; @@ -57,6 +58,12 @@ static inline void btrfs_update_inode_block_group(struct BTRFS_I(inode)->block_group = trans->block_group; } +static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, + struct inode *inode) +{ + BTRFS_I(inode)->last_trans = trans->transaction->transid; +} + int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, From 320206112895c72f98e57570ae194689dcd7fe56 Mon Sep 17 00:00:00 2001 From: Yan Date: Mon, 27 Aug 2007 15:17:54 -0400 Subject: [PATCH 0202/2985] fix block readahead in btrfs_next_leaf Send the correct slot down to reada_for_search Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7cf43da5e78e..51eea5ccb645 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2120,7 +2120,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) if (!level) break; if (path->reada) - reada_for_search(root, path, level, slot); + reada_for_search(root, path, level, 0); next = read_tree_block(root, btrfs_node_blockptr(btrfs_buffer_node(next), 0)); } From 2cc58cf24f69be8632a3b29d653c318bf3bd8c84 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 27 Aug 2007 16:49:44 -0400 Subject: [PATCH 0203/2985] Btrfs: Do more extensive readahead during tree searches Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 62 +++++++++++++++++++++++++++++++++--------- fs/btrfs/extent-tree.c | 4 +-- fs/btrfs/inode.c | 2 +- 3 files changed, 52 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 51eea5ccb645..c0782a5b04c5 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -43,8 +43,10 @@ struct btrfs_path *btrfs_alloc_path(void) { struct btrfs_path *path; path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); - if (path) + if (path) { btrfs_init_path(path); + path->reada = 1; + } return path; } @@ -159,6 +161,34 @@ static int close_blocks(u64 blocknr, u64 other) return 0; } +static int should_defrag_leaf(struct buffer_head *bh) +{ + struct btrfs_leaf *leaf = btrfs_buffer_leaf(bh); + struct btrfs_disk_key *key; + u32 nritems; + + if (buffer_defrag(bh)) + return 1; + + nritems = btrfs_header_nritems(&leaf->header); + if (nritems == 0) + return 0; + + key = &leaf->items[0].key; + if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY) + return 1; + + key = &leaf->items[nritems-1].key; + if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY) + return 1; + if (nritems > 4) { + key = &leaf->items[nritems/2].key; + if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY) + return 1; + } + return 0; +} + int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *parent, int cache_only, u64 *last_ret) @@ -217,7 +247,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, cur_bh = btrfs_find_tree_block(root, blocknr); if (!cur_bh || !buffer_uptodate(cur_bh) || - buffer_locked(cur_bh) || !buffer_defrag(cur_bh)) { + buffer_locked(cur_bh) || + (parent_level != 1 && !buffer_defrag(cur_bh)) || + (parent_level == 1 && !should_defrag_leaf(cur_bh))) { if (cache_only) { brelse(cur_bh); continue; @@ -297,6 +329,7 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, parent = btrfs_buffer_node(path->nodes[level + 1]); slot = path->slots[level]; + BUG_ON(!buffer_uptodate(path->nodes[level])); BUG_ON(nritems == 0); if (parent) { struct btrfs_disk_key *parent_key; @@ -511,9 +544,6 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root err_on_enospc = 1; left_buf = read_node_slot(root, parent_buf, pslot - 1); - right_buf = read_node_slot(root, parent_buf, pslot + 1); - - /* first, try to make some room in the middle buffer */ if (left_buf) { wret = btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1, &left_buf); @@ -521,6 +551,19 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; goto enospc; } + } + right_buf = read_node_slot(root, parent_buf, pslot + 1); + if (right_buf) { + wret = btrfs_cow_block(trans, root, right_buf, + parent_buf, pslot + 1, &right_buf); + if (wret) { + ret = wret; + goto enospc; + } + } + + /* first, try to make some room in the middle buffer */ + if (left_buf) { left = btrfs_buffer_node(left_buf); orig_slot += btrfs_header_nritems(&left->header); wret = push_node_left(trans, root, left_buf, mid_buf); @@ -534,13 +577,6 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root * then try to empty the right most buffer into the middle */ if (right_buf) { - wret = btrfs_cow_block(trans, root, right_buf, - parent_buf, pslot + 1, &right_buf); - if (wret) { - ret = wret; - goto enospc; - } - right = btrfs_buffer_node(right_buf); wret = push_node_left(trans, root, mid_buf, right_buf); if (wret < 0 && wret != -ENOSPC) @@ -817,7 +853,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, for (i = 0; i < ret; i++) { blocknr = gang[i]; clear_radix_bit(&found, blocknr); - if (nread > 32) + if (path->reada == 1 && nread > 16) continue; if (close_blocks(cluster_start, blocknr)) { readahead_tree_block(root, blocknr); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b3641234473f..4049aadbeda9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -52,7 +52,7 @@ static int cache_block_group(struct btrfs_root *root, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - path->reada = 1; + path->reada = 2; key.objectid = block_group->key.objectid; key.flags = 0; key.offset = 0; @@ -1015,7 +1015,7 @@ check_failed: ins->objectid = search_start; ins->offset = 0; start_found = 0; - path->reada = 1; + path->reada = 2; ret = btrfs_search_slot(trans, root, ins, path, 0, 0); if (ret < 0) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 398484179d82..7e4cf62ada5a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -827,7 +827,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; path = btrfs_alloc_path(); - path->reada = 1; + path->reada = 2; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; From 83df7c1d8b885e216c68c3f0469bec9b93666b74 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 27 Aug 2007 16:49:44 -0400 Subject: [PATCH 0204/2985] Btrfs: Make sure to cow the root during a snapshot Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7e4cf62ada5a..504d37dfa6c9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2096,6 +2096,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) struct btrfs_trans_handle *trans; struct btrfs_key key; struct btrfs_root_item new_root_item; + struct buffer_head *tmp; int ret; int err; u64 objectid; @@ -2123,6 +2124,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) key.offset = 1; key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, From a52d9a8033c454cd9b4697cfafb467509fc1693f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 27 Aug 2007 16:49:44 -0400 Subject: [PATCH 0205/2985] Btrfs: Extent based page cache code. This uses an rbtree of extents and tests instead of buffer heads. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/btrfs_inode.h | 3 + fs/btrfs/ctree.h | 7 + fs/btrfs/disk-io.h | 1 - fs/btrfs/extent_map.c | 1656 ++++++++++++++++++++++++++++++++++++++++ fs/btrfs/extent_map.h | 89 +++ fs/btrfs/file.c | 295 ++++--- fs/btrfs/inode.c | 836 ++++++++------------ fs/btrfs/super.c | 2 + 9 files changed, 2213 insertions(+), 679 deletions(-) create mode 100644 fs/btrfs/extent_map.c create mode 100644 fs/btrfs/extent_map.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9321438c5eef..ea7a22870350 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -4,7 +4,8 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ - transaction.o bit-radix.o inode.o file.o tree-defrag.o + transaction.o bit-radix.o inode.o file.o tree-defrag.o \ + extent_map.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index f446f161f736..d1d5af471c3c 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -19,11 +19,14 @@ #ifndef __BTRFS_I__ #define __BTRFS_I__ +#include "extent_map.h" + /* in memory btrfs inode */ struct btrfs_inode { struct btrfs_root *root; struct btrfs_block_group_cache *block_group; struct btrfs_key location; + struct extent_map_tree extent_tree; struct inode vfs_inode; /* diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c7f5161271d3..947f061ed118 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1176,7 +1176,14 @@ int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to); int btrfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *result, int create); +struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, + size_t page_offset, u64 start, u64 end, + int create); +int btrfs_update_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode); /* file.c */ +int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end); extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 81fd18cbd824..5261733b8735 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -77,7 +77,6 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, u64 num_blocks); int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, u64 logical); -int btrfs_releasepage(struct page *page, gfp_t flags); void btrfs_btree_balance_dirty(struct btrfs_root *root); int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_mark_buffer_dirty(struct buffer_head *bh); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c new file mode 100644 index 000000000000..d378edf0964e --- /dev/null +++ b/fs/btrfs/extent_map.c @@ -0,0 +1,1656 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "extent_map.h" + +static struct kmem_cache *extent_map_cache; +static struct kmem_cache *extent_state_cache; + +struct tree_entry { + u64 start; + u64 end; + int in_tree; + struct rb_node rb_node; +}; + +/* bits for the extent state */ +#define EXTENT_DIRTY 1 +#define EXTENT_WRITEBACK (1 << 1) +#define EXTENT_UPTODATE (1 << 2) +#define EXTENT_LOCKED (1 << 3) +#define EXTENT_NEW (1 << 4) +#define EXTENT_DELALLOC (1 << 5) + +#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) + +static LIST_HEAD(all_states); +spinlock_t state_lock = SPIN_LOCK_UNLOCKED; + +void __init extent_map_init(void) +{ + extent_map_cache = kmem_cache_create("extent_map", + sizeof(struct extent_map), 0, + SLAB_RECLAIM_ACCOUNT | + SLAB_DESTROY_BY_RCU, + NULL); + extent_state_cache = kmem_cache_create("extent_state", + sizeof(struct extent_state), 0, + SLAB_RECLAIM_ACCOUNT | + SLAB_DESTROY_BY_RCU, + NULL); +} + +void __exit extent_map_exit(void) +{ + while(!list_empty(&all_states)) { + struct extent_state *state; + struct list_head *cur = all_states.next; + state = list_entry(cur, struct extent_state, list); + printk("found leaked state %Lu %Lu state %d in_tree %d\n", + state->start, state->end, state->state, state->in_tree); + list_del(&state->list); + kfree(state); + } + if (extent_map_cache) + kmem_cache_destroy(extent_map_cache); + if (extent_state_cache) + kmem_cache_destroy(extent_state_cache); +} + +void extent_map_tree_init(struct extent_map_tree *tree, + struct address_space *mapping, gfp_t mask) +{ + tree->map.rb_node = NULL; + tree->state.rb_node = NULL; + rwlock_init(&tree->lock); + tree->mapping = mapping; +} +EXPORT_SYMBOL(extent_map_tree_init); + +struct extent_map *alloc_extent_map(gfp_t mask) +{ + struct extent_map *em; + em = kmem_cache_alloc(extent_map_cache, mask); + if (!em || IS_ERR(em)) + return em; + em->in_tree = 0; + atomic_set(&em->refs, 1); + return em; +} +EXPORT_SYMBOL(alloc_extent_map); + +void free_extent_map(struct extent_map *em) +{ + if (atomic_dec_and_test(&em->refs)) { + WARN_ON(em->in_tree); + kmem_cache_free(extent_map_cache, em); + } +} +EXPORT_SYMBOL(free_extent_map); + + +struct extent_state *alloc_extent_state(gfp_t mask) +{ + struct extent_state *state; + state = kmem_cache_alloc(extent_state_cache, mask); + if (!state || IS_ERR(state)) + return state; + state->state = 0; + state->in_tree = 0; + atomic_set(&state->refs, 1); + init_waitqueue_head(&state->wq); + spin_lock_irq(&state_lock); + list_add(&state->list, &all_states); + spin_unlock_irq(&state_lock); + return state; +} +EXPORT_SYMBOL(alloc_extent_state); + +void free_extent_state(struct extent_state *state) +{ + if (atomic_dec_and_test(&state->refs)) { + WARN_ON(state->in_tree); + spin_lock_irq(&state_lock); + list_del_init(&state->list); + spin_unlock_irq(&state_lock); + kmem_cache_free(extent_state_cache, state); + } +} +EXPORT_SYMBOL(free_extent_state); + +static struct rb_node *tree_insert(struct rb_root *root, u64 offset, + struct rb_node *node) +{ + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct tree_entry *entry; + + while(*p) { + parent = *p; + entry = rb_entry(parent, struct tree_entry, rb_node); + + if (offset < entry->start) + p = &(*p)->rb_left; + else if (offset > entry->end) + p = &(*p)->rb_right; + else + return parent; + } + + entry = rb_entry(node, struct tree_entry, rb_node); + entry->in_tree = 1; + rb_link_node(node, parent, p); + rb_insert_color(node, root); + return NULL; +} + +static struct rb_node *__tree_search(struct rb_root *root, u64 offset, + struct rb_node **prev_ret) +{ + struct rb_node * n = root->rb_node; + struct rb_node *prev = NULL; + struct tree_entry *entry; + struct tree_entry *prev_entry = NULL; + + while(n) { + entry = rb_entry(n, struct tree_entry, rb_node); + prev = n; + prev_entry = entry; + + if (offset < entry->start) + n = n->rb_left; + else if (offset > entry->end) + n = n->rb_right; + else + return n; + } + if (!prev_ret) + return NULL; + while(prev && offset > prev_entry->end) { + prev = rb_next(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *prev_ret = prev; + return NULL; +} + +static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) +{ + struct rb_node *prev; + struct rb_node *ret; + ret = __tree_search(root, offset, &prev); + if (!ret) + return prev; + return ret; +} + +static int tree_delete(struct rb_root *root, u64 offset) +{ + struct rb_node *node; + struct tree_entry *entry; + + node = __tree_search(root, offset, NULL); + if (!node) + return -ENOENT; + entry = rb_entry(node, struct tree_entry, rb_node); + entry->in_tree = 0; + rb_erase(node, root); + return 0; +} + +/* + * add_extent_mapping tries a simple backward merge with existing + * mappings. The extent_map struct passed in will be inserted into + * the tree directly (no copies made, just a reference taken). + */ +int add_extent_mapping(struct extent_map_tree *tree, + struct extent_map *em) +{ + int ret = 0; + struct extent_map *prev = NULL; + struct rb_node *rb; + + write_lock_irq(&tree->lock); + rb = tree_insert(&tree->map, em->end, &em->rb_node); + if (rb) { + prev = rb_entry(rb, struct extent_map, rb_node); + printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end); + ret = -EEXIST; + goto out; + } + atomic_inc(&em->refs); + if (em->start != 0) { + rb = rb_prev(&em->rb_node); + if (rb) + prev = rb_entry(rb, struct extent_map, rb_node); + if (prev && prev->end + 1 == em->start && + ((em->block_start == 0 && prev->block_start == 0) || + (em->block_start == prev->block_end + 1))) { + em->start = prev->start; + em->block_start = prev->block_start; + rb_erase(&prev->rb_node, &tree->map); + prev->in_tree = 0; + free_extent_map(prev); + } + } +out: + write_unlock_irq(&tree->lock); + return ret; +} +EXPORT_SYMBOL(add_extent_mapping); + +/* + * lookup_extent_mapping returns the first extent_map struct in the + * tree that intersects the [start, end] (inclusive) range. There may + * be additional objects in the tree that intersect, so check the object + * returned carefully to make sure you don't need additional lookups. + */ +struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, + u64 start, u64 end) +{ + struct extent_map *em; + struct rb_node *rb_node; + + read_lock_irq(&tree->lock); + rb_node = tree_search(&tree->map, start); + if (!rb_node) { + em = NULL; + goto out; + } + if (IS_ERR(rb_node)) { + em = ERR_PTR(PTR_ERR(rb_node)); + goto out; + } + em = rb_entry(rb_node, struct extent_map, rb_node); + if (em->end < start || em->start > end) { + em = NULL; + goto out; + } + atomic_inc(&em->refs); +out: + read_unlock_irq(&tree->lock); + return em; +} +EXPORT_SYMBOL(lookup_extent_mapping); + +/* + * removes an extent_map struct from the tree. No reference counts are + * dropped, and no checks are done to see if the range is in use + */ +int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) +{ + int ret; + + write_lock_irq(&tree->lock); + ret = tree_delete(&tree->map, em->end); + write_unlock_irq(&tree->lock); + return ret; +} +EXPORT_SYMBOL(remove_extent_mapping); + +/* + * utility function to look for merge candidates inside a given range. + * Any extents with matching state are merged together into a single + * extent in the tree. Extents with EXTENT_IO in their state field + * are not merged because the end_io handlers need to be able to do + * operations on them without sleeping (or doing allocations/splits). + * + * This should be called with the tree lock held. + */ +static int merge_state(struct extent_map_tree *tree, + struct extent_state *state) +{ + struct extent_state *other; + struct rb_node *other_node; + + if (state->state & EXTENT_IOBITS) + return 0; + + other_node = rb_prev(&state->rb_node); + if (other_node) { + other = rb_entry(other_node, struct extent_state, rb_node); + if (other->end == state->start - 1 && + other->state == state->state) { + state->start = other->start; + other->in_tree = 0; + rb_erase(&other->rb_node, &tree->state); + free_extent_state(other); + } + } + other_node = rb_next(&state->rb_node); + if (other_node) { + other = rb_entry(other_node, struct extent_state, rb_node); + if (other->start == state->end + 1 && + other->state == state->state) { + other->start = state->start; + state->in_tree = 0; + rb_erase(&state->rb_node, &tree->state); + free_extent_state(state); + } + } + return 0; +} + +/* + * insert an extent_state struct into the tree. 'bits' are set on the + * struct before it is inserted. + * + * This may return -EEXIST if the extent is already there, in which case the + * state struct is freed. + * + * The tree lock is not taken internally. This is a utility function and + * probably isn't what you want to call (see set/clear_extent_bit). + */ +static int insert_state(struct extent_map_tree *tree, + struct extent_state *state, u64 start, u64 end, + int bits) +{ + struct rb_node *node; + + if (end < start) { + printk("end < start %Lu %Lu\n", end, start); + WARN_ON(1); + } + state->state |= bits; + state->start = start; + state->end = end; + if ((end & 4095) == 0) { + printk("insert state %Lu %Lu strange end\n", start, end); + WARN_ON(1); + } + node = tree_insert(&tree->state, end, &state->rb_node); + if (node) { + struct extent_state *found; + found = rb_entry(node, struct extent_state, rb_node); +printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); + free_extent_state(state); + return -EEXIST; + } + merge_state(tree, state); + return 0; +} + +/* + * split a given extent state struct in two, inserting the preallocated + * struct 'prealloc' as the newly created second half. 'split' indicates an + * offset inside 'orig' where it should be split. + * + * Before calling, + * the tree has 'orig' at [orig->start, orig->end]. After calling, there + * are two extent state structs in the tree: + * prealloc: [orig->start, split - 1] + * orig: [ split, orig->end ] + * + * The tree locks are not taken by this function. They need to be held + * by the caller. + */ +static int split_state(struct extent_map_tree *tree, struct extent_state *orig, + struct extent_state *prealloc, u64 split) +{ + struct rb_node *node; + prealloc->start = orig->start; + prealloc->end = split - 1; + prealloc->state = orig->state; + orig->start = split; + if ((prealloc->end & 4095) == 0) { + printk("insert state %Lu %Lu strange end\n", prealloc->start, + prealloc->end); + WARN_ON(1); + } + node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); + if (node) { + struct extent_state *found; + found = rb_entry(node, struct extent_state, rb_node); +printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); + free_extent_state(prealloc); + return -EEXIST; + } + return 0; +} + +/* + * utility function to clear some bits in an extent state struct. + * it will optionally wake up any one waiting on this state (wake == 1), or + * forcibly remove the state from the tree (delete == 1). + * + * If no bits are set on the state struct after clearing things, the + * struct is freed and removed from the tree + */ +static int clear_state_bit(struct extent_map_tree *tree, + struct extent_state *state, int bits, int wake, + int delete) +{ + int ret = state->state & bits; + state->state &= ~bits; + if (wake) + wake_up(&state->wq); + if (delete || state->state == 0) { + if (state->in_tree) { + rb_erase(&state->rb_node, &tree->state); + state->in_tree = 0; + free_extent_state(state); + } else { + WARN_ON(1); + } + } else { + merge_state(tree, state); + } + return ret; +} + +/* + * clear some bits on a range in the tree. This may require splitting + * or inserting elements in the tree, so the gfp mask is used to + * indicate which allocations or sleeping are allowed. + * + * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove + * the given range from the tree regardless of state (ie for truncate). + * + * the range [start, end] is inclusive. + * + * This takes the tree lock, and returns < 0 on error, > 0 if any of the + * bits were already set, or zero if none of the bits were already set. + */ +int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, + int bits, int wake, int delete, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + int err; + int set = 0; + +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + write_lock_irq(&tree->lock); + /* + * this search will find the extents that end after + * our range starts + */ + node = tree_search(&tree->state, start); + if (!node) + goto out; + state = rb_entry(node, struct extent_state, rb_node); + if (state->start > end) + goto out; + WARN_ON(state->end < start); + + /* + * | ---- desired range ---- | + * | state | or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip + * bits on second half. + * + * If the extent we found extends past our range, we + * just split and search again. It'll get split again + * the next time though. + * + * If the extent we found is inside our range, we clear + * the desired bit on it. + */ + + if (state->start < start) { + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + start = state->end + 1; + set |= clear_state_bit(tree, state, bits, + wake, delete); + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and clear the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + if (wake) + wake_up(&state->wq); + set |= clear_state_bit(tree, prealloc, bits, + wake, delete); + prealloc = NULL; + goto out; + } + + start = state->end + 1; + set |= clear_state_bit(tree, state, bits, wake, delete); + goto search_again; + +out: + write_unlock_irq(&tree->lock); + if (prealloc) + free_extent_state(prealloc); + + return set; + +search_again: + if (start >= end) + goto out; + write_unlock_irq(&tree->lock); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} +EXPORT_SYMBOL(clear_extent_bit); + +static int wait_on_state(struct extent_map_tree *tree, + struct extent_state *state) +{ + DEFINE_WAIT(wait); + prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); + read_unlock_irq(&tree->lock); + schedule(); + read_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + return 0; +} + +/* + * waits for one or more bits to clear on a range in the state tree. + * The range [start, end] is inclusive. + * The tree lock is taken by this function + */ +int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits) +{ + struct extent_state *state; + struct rb_node *node; + + read_lock_irq(&tree->lock); +again: + while (1) { + /* + * this search will find all the extents that end after + * our range starts + */ + node = tree_search(&tree->state, start); + if (!node) + break; + + state = rb_entry(node, struct extent_state, rb_node); + + if (state->start > end) + goto out; + + if (state->state & bits) { + start = state->start; + atomic_inc(&state->refs); + wait_on_state(tree, state); + free_extent_state(state); + goto again; + } + start = state->end + 1; + + if (start > end) + break; + + if (need_resched()) { + read_unlock_irq(&tree->lock); + cond_resched(); + read_lock_irq(&tree->lock); + } + } +out: + read_unlock_irq(&tree->lock); + return 0; +} +EXPORT_SYMBOL(wait_extent_bit); + +/* + * set some bits on a range in the tree. This may require allocations + * or sleeping, so the gfp mask is used to indicate what is allowed. + * + * If 'exclusive' == 1, this will fail with -EEXIST if some part of the + * range already has the desired bits set. The start of the existing + * range is returned in failed_start in this case. + * + * [start, end] is inclusive + * This takes the tree lock. + */ +int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, + int exclusive, u64 *failed_start, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + int err = 0; + int set; + u64 last_start; + u64 last_end; +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node) { + err = insert_state(tree, prealloc, start, end, bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + goto out; + } + + state = rb_entry(node, struct extent_state, rb_node); + last_start = state->start; + last_end = state->end; + + /* + * | ---- desired range ---- | + * | state | + * + * Just lock what we found and keep going + */ + if (state->start == start && state->end <= end) { + set = state->state & bits; + if (set && exclusive) { + *failed_start = state->start; + err = -EEXIST; + goto out; + } + state->state |= bits; + start = state->end + 1; + merge_state(tree, state); + goto search_again; + } + + /* + * | ---- desired range ---- | + * | state | + * or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip bits on + * second half. + * + * If the extent we found extends past our + * range, we just split and search again. It'll get split + * again the next time though. + * + * If the extent we found is inside our range, we set the + * desired bit on it. + */ + if (state->start < start) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + state->state |= bits; + start = state->end + 1; + merge_state(tree, state); + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and set the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + prealloc->state |= bits; + merge_state(tree, prealloc); + prealloc = NULL; + goto out; + } + + /* + * | ---- desired range ---- | + * | state | or | state | + * + * There's a hole, we need to insert something in it and + * ignore the extent we found. + */ + if (state->start > start) { + u64 this_end; + if (end < last_start) + this_end = end; + else + this_end = last_start -1; + err = insert_state(tree, prealloc, start, this_end, + bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + if (err) + goto out; + start = this_end + 1; + goto search_again; + } + goto search_again; + +out: + write_unlock_irq(&tree->lock); + if (prealloc) + free_extent_state(prealloc); + + return err; + +search_again: + if (start > end) + goto out; + write_unlock_irq(&tree->lock); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} +EXPORT_SYMBOL(set_extent_bit); + +/* wrappers around set/clear extent bit */ +int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_dirty); + +int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_DIRTY, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_dirty); + +int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_new); + +int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_new); + +int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_uptodate); + +int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_uptodate); + +int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, + 0, NULL, mask); +} +EXPORT_SYMBOL(set_extent_writeback); + +int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); +} +EXPORT_SYMBOL(clear_extent_writeback); + +int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end) +{ + return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); +} +EXPORT_SYMBOL(wait_on_extent_writeback); + +/* + * locks a range in ascending order, waiting for any locked regions + * it hits on the way. [start,end] are inclusive, and this will sleep. + */ +int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) +{ + int err; + u64 failed_start; + while (1) { + err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, + &failed_start, mask); + if (err == -EEXIST && (mask & __GFP_WAIT)) { + wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); + start = failed_start; + } else { + break; + } + WARN_ON(start > end); + } + return err; +} +EXPORT_SYMBOL(lock_extent); + +int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); +} +EXPORT_SYMBOL(unlock_extent); + +/* + * helper function to set pages and extents in the tree dirty + */ +int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + BUG_ON(!page); + __set_page_dirty_nobuffers(page); + page_cache_release(page); + index++; + } + set_extent_dirty(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(set_range_dirty); + +/* + * helper function to set both pages and extents in the tree writeback + */ +int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + BUG_ON(!page); + set_page_writeback(page); + page_cache_release(page); + index++; + } + set_extent_writeback(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(set_range_writeback); + +/* + * helper function to lock both pages and extents in the tree. + * pages must be locked first. + */ +int lock_range(struct extent_map_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + int err; + + while (index <= end_index) { + page = grab_cache_page(tree->mapping, index); + if (!page) { + err = -ENOMEM; + goto failed; + } + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto failed; + } + index++; + } + lock_extent(tree, start, end, GFP_NOFS); + return 0; + +failed: + /* + * we failed above in getting the page at 'index', so we undo here + * up to but not including the page at 'index' + */ + end_index = index; + index = start >> PAGE_CACHE_SHIFT; + while (index < end_index) { + page = find_get_page(tree->mapping, index); + unlock_page(page); + page_cache_release(page); + index++; + } + return err; +} +EXPORT_SYMBOL(lock_range); + +/* + * helper function to unlock both pages and extents in the tree. + */ +int unlock_range(struct extent_map_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + unlock_page(page); + page_cache_release(page); + index++; + } + unlock_extent(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(unlock_range); + +/* + * searches a range in the state tree for a given mask. + * If 'filled' == 1, this returns 1 only if ever extent in the tree + * has the bits set. Otherwise, 1 is returned if any bit in the + * range is found set. + */ +static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, + int bits, int filled) +{ + struct extent_state *state = NULL; + struct rb_node *node; + int bitset = 0; + + read_lock_irq(&tree->lock); + node = tree_search(&tree->state, start); + while (node && start <= end) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->start > end) + break; + + if (filled && state->start > start) { + bitset = 0; + break; + } + if (state->state & bits) { + bitset = 1; + if (!filled) + break; + } else if (filled) { + bitset = 0; + break; + } + start = state->end + 1; + if (start > end) + break; + node = rb_next(node); + } + read_unlock_irq(&tree->lock); + return bitset; +} + +/* + * helper function to set a given page up to date if all the + * extents in the tree for that page are up to date + */ +static int check_page_uptodate(struct extent_map_tree *tree, + struct page *page) +{ + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) + SetPageUptodate(page); + return 0; +} + +/* + * helper function to unlock a page if all the extents in the tree + * for that page are unlocked + */ +static int check_page_locked(struct extent_map_tree *tree, + struct page *page) +{ + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) + unlock_page(page); + return 0; +} + +/* + * helper function to end page writeback if all the extents + * in the tree for that page are done with writeback + */ +static int check_page_writeback(struct extent_map_tree *tree, + struct page *page) +{ + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) + end_page_writeback(page); + return 0; +} + +/* lots and lots of room for performance fixes in the end_bio funcs */ + +/* + * after a writepage IO is done, we need to: + * clear the uptodate bits on error + * clear the writeback bits in the extent tree for this IO + * end_page_writeback if the page has no more pending IO + * + * Scheduling is not allowed, so the extent state tree is expected + * to have one and only one object corresponding to this IO. + */ +static int end_bio_extent_writepage(struct bio *bio, + unsigned int bytes_done, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_map_tree *tree = bio->bi_private; + u64 start; + u64 end; + int whole_page; + + if (bio->bi_size) + return 1; + + do { + struct page *page = bvec->bv_page; + start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) + whole_page = 1; + else + whole_page = 0; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (!uptodate) { + clear_extent_uptodate(tree, start, end, GFP_ATOMIC); + ClearPageUptodate(page); + SetPageError(page); + } + clear_extent_writeback(tree, start, end, GFP_ATOMIC); + + if (whole_page) + end_page_writeback(page); + else + check_page_writeback(tree, page); + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); + return 0; +} + +/* + * after a readpage IO is done, we need to: + * clear the uptodate bits on error + * set the uptodate bits if things worked + * set the page up to date if all extents in the tree are uptodate + * clear the lock bit in the extent tree + * unlock the page if there are no other extents locked for it + * + * Scheduling is not allowed, so the extent state tree is expected + * to have one and only one object corresponding to this IO. + */ +static int end_bio_extent_readpage(struct bio *bio, + unsigned int bytes_done, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_map_tree *tree = bio->bi_private; + u64 start; + u64 end; + int whole_page; + + if (bio->bi_size) + return 1; + + do { + struct page *page = bvec->bv_page; + start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) + whole_page = 1; + else + whole_page = 0; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate) { + set_extent_uptodate(tree, start, end, GFP_ATOMIC); + if (whole_page) + SetPageUptodate(page); + else + check_page_uptodate(tree, page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + + unlock_extent(tree, start, end, GFP_ATOMIC); + + if (whole_page) + unlock_page(page); + else + check_page_locked(tree, page); + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); + return 0; +} + +/* + * IO done from prepare_write is pretty simple, we just unlock + * the structs in the extent tree when done, and set the uptodate bits + * as appropriate. + */ +static int end_bio_extent_preparewrite(struct bio *bio, + unsigned int bytes_done, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_map_tree *tree = bio->bi_private; + u64 start; + u64 end; + + if (bio->bi_size) + return 1; + + do { + struct page *page = bvec->bv_page; + start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate) { + set_extent_uptodate(tree, start, end, GFP_ATOMIC); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + + unlock_extent(tree, start, end, GFP_ATOMIC); + + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); + return 0; +} + +static int submit_extent_page(int rw, struct extent_map_tree *tree, + struct page *page, sector_t sector, + size_t size, unsigned long offset, + struct block_device *bdev, + bio_end_io_t end_io_func) +{ + struct bio *bio; + int ret = 0; + + bio = bio_alloc(GFP_NOIO, 1); + + bio->bi_sector = sector; + bio->bi_bdev = bdev; + bio->bi_io_vec[0].bv_page = page; + bio->bi_io_vec[0].bv_len = size; + bio->bi_io_vec[0].bv_offset = offset; + + bio->bi_vcnt = 1; + bio->bi_idx = 0; + bio->bi_size = size; + + bio->bi_end_io = end_io_func; + bio->bi_private = tree; + + bio_get(bio); + submit_bio(rw, bio); + + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + + bio_put(bio); + return ret; +} + +/* + * basic readpage implementation. Locked extent state structs are inserted + * into the tree that are removed when the IO is done (by the end_io + * handlers) + */ +int extent_read_full_page(struct extent_map_tree *tree, struct page *page, + get_extent_t *get_extent) +{ + struct inode *inode = page->mapping->host; + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 end; + u64 cur = start; + u64 extent_offset; + u64 last_byte = i_size_read(inode); + u64 block_start; + u64 cur_end; + sector_t sector; + struct extent_map *em; + struct block_device *bdev; + int ret; + int nr = 0; + size_t page_offset = 0; + size_t iosize; + size_t blocksize = inode->i_sb->s_blocksize; + + if (!PagePrivate(page)) { + SetPagePrivate(page); + set_page_private(page, 1); + page_cache_get(page); + } + + end = page_end; + lock_extent(tree, start, end, GFP_NOFS); + + while (cur <= end) { + if (cur >= last_byte) { + iosize = PAGE_CACHE_SIZE - page_offset; + zero_user_page(page, page_offset, iosize, KM_USER0); + set_extent_uptodate(tree, cur, cur + iosize - 1, + GFP_NOFS); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + break; + } + em = get_extent(inode, page, page_offset, cur, end, 0); + if (IS_ERR(em) || !em) { + SetPageError(page); + unlock_extent(tree, cur, end, GFP_NOFS); + break; + } + + extent_offset = cur - em->start; + BUG_ON(em->end < cur); + BUG_ON(end < cur); + + iosize = min(em->end - cur, end - cur) + 1; + cur_end = min(em->end, end); + iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + sector = (em->block_start + extent_offset) >> 9; + bdev = em->bdev; + block_start = em->block_start; + free_extent_map(em); + em = NULL; + + /* we've found a hole, just zero and go on */ + if (block_start == 0) { + zero_user_page(page, page_offset, iosize, KM_USER0); + set_extent_uptodate(tree, cur, cur + iosize - 1, + GFP_NOFS); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + /* the get_extent function already copied into the page */ + if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, bdev, + end_bio_extent_readpage); + if (ret) + SetPageError(page); + cur = cur + iosize; + page_offset += iosize; + nr++; + } + if (!nr) { + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); + } + return 0; +} +EXPORT_SYMBOL(extent_read_full_page); + +/* + * the writepage semantics are similar to regular writepage. extent + * records are inserted to lock ranges in the tree, and as dirty areas + * are found, they are marked writeback. Then the lock bits are removed + * and the end_io handler clears the writeback ranges + */ +int extent_write_full_page(struct extent_map_tree *tree, struct page *page, + get_extent_t *get_extent, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 end; + u64 cur = start; + u64 extent_offset; + u64 last_byte = i_size_read(inode); + u64 block_start; + sector_t sector; + struct extent_map *em; + struct block_device *bdev; + int ret; + int nr = 0; + size_t page_offset = 0; + size_t iosize; + size_t blocksize; + loff_t i_size = i_size_read(inode); + unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; + + if (page->index > end_index) { + clear_extent_dirty(tree, start, page_end, GFP_NOFS); + unlock_page(page); + return 0; + } + + if (page->index == end_index) { + size_t offset = i_size & (PAGE_CACHE_SIZE - 1); + zero_user_page(page, offset, + PAGE_CACHE_SIZE - offset, KM_USER0); + } + + if (!PagePrivate(page)) { + SetPagePrivate(page); + set_page_private(page, 1); + page_cache_get(page); + } + + end = page_end; + lock_extent(tree, start, page_end, GFP_NOFS); + + if (last_byte <= start) { + clear_extent_dirty(tree, start, page_end, GFP_NOFS); + goto done; + } + + set_extent_uptodate(tree, start, page_end, GFP_NOFS); + blocksize = inode->i_sb->s_blocksize; + + while (cur <= end) { + if (cur >= last_byte) { + clear_extent_dirty(tree, cur, page_end, GFP_NOFS); + break; + } + em = get_extent(inode, page, page_offset, cur, end, 1); + if (IS_ERR(em) || !em) { + SetPageError(page); + break; + } + + extent_offset = cur - em->start; + BUG_ON(em->end < cur); + BUG_ON(end < cur); + iosize = min(em->end - cur, end - cur) + 1; + iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + sector = (em->block_start + extent_offset) >> 9; + bdev = em->bdev; + block_start = em->block_start; + free_extent_map(em); + em = NULL; + + if (block_start == 0 || block_start == EXTENT_MAP_INLINE) { + clear_extent_dirty(tree, cur, + cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + + /* leave this out until we have a page_mkwrite call */ + if (0 && !test_range_bit(tree, cur, cur + iosize - 1, + EXTENT_DIRTY, 0)) { + cur = cur + iosize; + page_offset += iosize; + continue; + } + clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); + set_range_writeback(tree, cur, cur + iosize - 1); + ret = submit_extent_page(WRITE, tree, page, + sector, iosize, page_offset, bdev, + end_bio_extent_writepage); + if (ret) + SetPageError(page); + cur = cur + iosize; + page_offset += iosize; + nr++; + } +done: + WARN_ON(test_range_bit(tree, start, page_end, EXTENT_DIRTY, 0)); + unlock_extent(tree, start, page_end, GFP_NOFS); + unlock_page(page); + return 0; +} +EXPORT_SYMBOL(extent_write_full_page); + +/* + * basic invalidatepage code, this waits on any locked or writeback + * ranges corresponding to the page, and then deletes any extent state + * records from the tree + */ +int extent_invalidatepage(struct extent_map_tree *tree, + struct page *page, unsigned long offset) +{ + u64 start = (page->index << PAGE_CACHE_SHIFT); + u64 end = start + PAGE_CACHE_SIZE - 1; + size_t blocksize = page->mapping->host->i_sb->s_blocksize; + + start += (offset + blocksize -1) & ~(blocksize - 1); + if (start > end) + return 0; + + lock_extent(tree, start, end, GFP_NOFS); + wait_on_extent_writeback(tree, start, end); + clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY, + 1, 1, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(extent_invalidatepage); + +/* + * simple commit_write call, set_range_dirty is used to mark both + * the pages and the extent records as dirty + */ +int extent_commit_write(struct extent_map_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to) +{ + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + + if (!PagePrivate(page)) { + SetPagePrivate(page); + set_page_private(page, 1); + page_cache_get(page); + } + + set_page_dirty(page); + + if (pos > inode->i_size) { + i_size_write(inode, pos); + mark_inode_dirty(inode); + } + return 0; +} +EXPORT_SYMBOL(extent_commit_write); + +int extent_prepare_write(struct extent_map_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to, get_extent_t *get_extent) +{ + u64 page_start = page->index << PAGE_CACHE_SHIFT; + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + u64 block_start; + u64 orig_block_start; + u64 block_end; + u64 cur_end; + struct extent_map *em; + unsigned blocksize = 1 << inode->i_blkbits; + size_t page_offset = 0; + size_t block_off_start; + size_t block_off_end; + int err = 0; + int iocount = 0; + int ret = 0; + int isnew; + + if (!PagePrivate(page)) { + SetPagePrivate(page); + set_page_private(page, 1); + page_cache_get(page); + } + block_start = (page_start + from) & ~((u64)blocksize - 1); + block_end = (page_start + to - 1) | (blocksize - 1); + orig_block_start = block_start; + + lock_extent(tree, page_start, page_end, GFP_NOFS); + while(block_start <= block_end) { + em = get_extent(inode, page, page_offset, block_start, + block_end, 1); + if (IS_ERR(em) || !em) { + goto err; + } + cur_end = min(block_end, em->end); + block_off_start = block_start & (PAGE_CACHE_SIZE - 1); + block_off_end = block_off_start + blocksize; + isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); + + if (!PageUptodate(page) && isnew && + (block_off_end > to || block_off_start < from)) { + void *kaddr; + + kaddr = kmap_atomic(page, KM_USER0); + if (block_off_end > to) + memset(kaddr + to, 0, block_off_end - to); + if (block_off_start < from) + memset(kaddr + block_off_start, 0, + from - block_off_start); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + } + if (!isnew && !PageUptodate(page) && + (block_off_end > to || block_off_start < from) && + !test_range_bit(tree, block_start, cur_end, + EXTENT_UPTODATE, 1)) { + u64 sector; + u64 extent_offset = block_start - em->start; + size_t iosize; + sector = (em->block_start + extent_offset) >> 9; + iosize = (cur_end - block_start + blocksize - 1) & + ~((u64)blocksize - 1); + /* + * we've already got the extent locked, but we + * need to split the state such that our end_bio + * handler can clear the lock. + */ + set_extent_bit(tree, block_start, + block_start + iosize - 1, + EXTENT_LOCKED, 0, NULL, GFP_NOFS); + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, em->bdev, + end_bio_extent_preparewrite); + iocount++; + block_start = block_start + iosize; + } else { + set_extent_uptodate(tree, block_start, cur_end, + GFP_NOFS); + unlock_extent(tree, block_start, cur_end, GFP_NOFS); + block_start = cur_end + 1; + } + page_offset = block_start & (PAGE_CACHE_SIZE - 1); + free_extent_map(em); + } + if (iocount) { + wait_extent_bit(tree, orig_block_start, + block_end, EXTENT_LOCKED); + } + check_page_uptodate(tree, page); +err: + /* FIXME, zero out newly allocated blocks on error */ + return err; +} +EXPORT_SYMBOL(extent_prepare_write); + +/* + * a helper for releasepage. As long as there are no locked extents + * in the range corresponding to the page, both state records and extent + * map records are removed + */ +int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) +{ + struct extent_map *em; + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + u64 orig_start = start; + + while (start <= end) { + em = lookup_extent_mapping(tree, start, end); + if (!em || IS_ERR(em)) + break; + if (test_range_bit(tree, em->start, em->end, + EXTENT_LOCKED, 0)) { + free_extent_map(em); + start = em->end + 1; +printk("range still locked %Lu %Lu\n", em->start, em->end); + break; + } + remove_extent_mapping(tree, em); + start = em->end + 1; + /* once for the rb tree */ + free_extent_map(em); + /* once for us */ + free_extent_map(em); + } + WARN_ON(test_range_bit(tree, orig_start, end, EXTENT_WRITEBACK, 0)); + clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, + 1, 1, GFP_NOFS); + return 1; +} +EXPORT_SYMBOL(try_release_extent_mapping); + diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h new file mode 100644 index 000000000000..108944aab4bd --- /dev/null +++ b/fs/btrfs/extent_map.h @@ -0,0 +1,89 @@ +#ifndef __EXTENTMAP__ +#define __EXTENTMAP__ + +#include + +#define EXTENT_MAP_INLINE (u64)-2 +#define EXTENT_MAP_DELALLOC (u64)-1 + +struct extent_map_tree { + struct rb_root map; + struct rb_root state; + struct address_space *mapping; + rwlock_t lock; +}; + +/* note, this must start with the same fields as fs/extent_map.c:tree_entry */ +struct extent_map { + u64 start; + u64 end; /* inclusive */ + int in_tree; + struct rb_node rb_node; + /* block_start and block_end are in bytes */ + u64 block_start; + u64 block_end; /* inclusive */ + struct block_device *bdev; + atomic_t refs; +}; + +/* note, this must start with the same fields as fs/extent_map.c:tree_entry */ +struct extent_state { + u64 start; + u64 end; /* inclusive */ + int in_tree; + struct rb_node rb_node; + wait_queue_head_t wq; + atomic_t refs; + unsigned long state; + struct list_head list; +}; + +struct extent_buffer { + u64 start; + u64 end; /* inclusive */ + char *addr; + struct page *pages[]; +}; + +typedef struct extent_map *(get_extent_t)(struct inode *inode, + struct page *page, + size_t page_offset, + u64 start, u64 end, + int create); + +void extent_map_tree_init(struct extent_map_tree *tree, + struct address_space *mapping, gfp_t mask); +struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, + u64 start, u64 end); +int add_extent_mapping(struct extent_map_tree *tree, + struct extent_map *em); +int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); +int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page); +int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +struct extent_map *alloc_extent_map(gfp_t mask); +void free_extent_map(struct extent_map *em); +int extent_read_full_page(struct extent_map_tree *tree, struct page *page, + get_extent_t *get_extent); +void __init extent_map_init(void); +void __exit extent_map_exit(void); +int extent_clean_all_trees(struct extent_map_tree *tree); +int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask); +int extent_invalidatepage(struct extent_map_tree *tree, + struct page *page, unsigned long offset); +int extent_write_full_page(struct extent_map_tree *tree, struct page *page, + get_extent_t *get_extent, + struct writeback_control *wbc); +int extent_prepare_write(struct extent_map_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to, get_extent_t *get_extent); +int extent_commit_write(struct extent_map_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to); +int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); +#endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6933ab11a5cd..71a481894ab6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -81,14 +81,14 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) } } -static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, +static int insert_inline_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, u64 offset, ssize_t size, - struct buffer_head *bh) + struct page *page, size_t page_offset) { struct btrfs_key key; struct btrfs_path *path; char *ptr, *kaddr; - struct btrfs_trans_handle *trans; struct btrfs_file_extent_item *ei; u32 datasize; int err = 0; @@ -98,8 +98,6 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, if (!path) return -ENOMEM; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); key.objectid = inode->i_ino; @@ -122,18 +120,13 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(page, KM_USER0); btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, kaddr + bh_offset(bh), - size); + ptr, kaddr + page_offset, size); kunmap_atomic(kaddr, KM_USER0); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_free_path(path); - ret = btrfs_end_transaction(trans, root); - if (ret && !err) - err = ret; - mutex_unlock(&root->fs_info->fs_mutex); return err; } @@ -145,45 +138,143 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, loff_t pos, size_t write_bytes) { - int i; - int offset; int err = 0; - int ret; - int this_write; + int i; struct inode *inode = file->f_path.dentry->d_inode; - struct buffer_head *bh; + struct extent_map *em; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct btrfs_key ins; + u64 hint_block; + u64 num_blocks; + u64 start_pos; + u64 end_of_last_block; + u64 end_pos = pos + write_bytes; + loff_t isize = i_size_read(inode); - for (i = 0; i < num_pages; i++) { - offset = pos & (PAGE_CACHE_SIZE -1); - this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); + em = alloc_extent_map(GFP_NOFS); + if (!em) + return -ENOMEM; - /* FIXME, one block at a time */ - bh = page_buffers(pages[i]); + em->bdev = inode->i_sb->s_bdev; - if (buffer_mapped(bh) && bh->b_blocknr == 0) { - ret = insert_inline_extent(root, inode, - pages[i]->index << PAGE_CACHE_SHIFT, - offset + this_write, bh); - if (ret) { - err = ret; - goto failed; - } - } + start_pos = pos & ~((u64)root->blocksize - 1); + num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> + inode->i_blkbits; - ret = btrfs_commit_write(file, pages[i], offset, - offset + this_write); - pos += this_write; - if (ret) { - err = ret; + end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1; + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (!trans) { + err = -ENOMEM; + goto out_unlock; + } + btrfs_set_trans_block_group(trans, inode); + inode->i_blocks += num_blocks << 3; + hint_block = 0; + + if ((end_of_last_block & 4095) == 0) { + printk("strange end of last %Lu %lu %Lu\n", start_pos, write_bytes, end_of_last_block); + } + set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS); + + /* FIXME...EIEIO, ENOSPC and more */ + + /* step one, delete the existing extents in this range */ + /* FIXME blocksize != pagesize */ + if (start_pos < inode->i_size) { + err = btrfs_drop_extents(trans, root, inode, + start_pos, (pos + write_bytes + root->blocksize -1) & + ~((u64)root->blocksize - 1), &hint_block); + if (err) goto failed; + } + + /* insert any holes we need to create */ + if (inode->i_size < start_pos) { + u64 last_pos_in_file; + u64 hole_size; + u64 mask = root->blocksize - 1; + last_pos_in_file = (isize + mask) & ~mask; + hole_size = (start_pos - last_pos_in_file + mask) & ~mask; + hole_size >>= inode->i_blkbits; + if (last_pos_in_file < start_pos) { + err = btrfs_insert_file_extent(trans, root, + inode->i_ino, + last_pos_in_file, + 0, 0, hole_size); } - WARN_ON(this_write > write_bytes); - write_bytes -= this_write; + if (err) + goto failed; + } + + /* + * either allocate an extent for the new bytes or setup the key + * to show we are doing inline data in the extent + */ + if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || + pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { + err = btrfs_alloc_extent(trans, root, inode->i_ino, + num_blocks, 0, hint_block, (u64)-1, + &ins, 1); + BUG_ON(err); + err = btrfs_insert_file_extent(trans, root, inode->i_ino, + start_pos, ins.objectid, ins.offset, + ins.offset); + BUG_ON(err); + em->start = start_pos; + em->end = end_of_last_block; + em->block_start = ins.objectid << inode->i_blkbits; + em->block_end = em->block_start + + (ins.offset << inode->i_blkbits) - 1; + set_extent_dirty(em_tree, start_pos, end_of_last_block, + GFP_NOFS); + err = add_extent_mapping(em_tree, em); + for (i = 0; i < num_pages; i++) { + struct page *p = pages[i]; + SetPageUptodate(p); + __set_page_dirty_nobuffers(p); + } + } else { + struct page *p = pages[0]; + err = insert_inline_extent(trans, root, inode, start_pos, + end_pos - start_pos, p, 0); + BUG_ON(err); + em->start = start_pos; + em->end = end_pos; + em->block_start = EXTENT_MAP_INLINE; + em->block_end = EXTENT_MAP_INLINE; + add_extent_mapping(em_tree, em); + } + if (end_pos > isize) { + i_size_write(inode, end_pos); + btrfs_update_inode(trans, root, inode); } failed: + err = btrfs_end_transaction(trans, root); +out_unlock: + mutex_unlock(&root->fs_info->fs_mutex); + free_extent_map(em); return err; } +int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) +{ + struct extent_map *em; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + + while(1) { + em = lookup_extent_mapping(em_tree, start, end); + if (!em) + break; + remove_extent_mapping(em_tree, em); + /* once for us */ + free_extent_map(em); + /* once for the tree*/ + free_extent_map(em); + } + return 0; +} + /* * this is very complex, but the basic idea is to drop all extents * in the range start - end. hint_block is filled in with a block number @@ -213,6 +304,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, int found_inline; int recow; + btrfs_drop_extent_cache(inode, start, end - 1); + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -434,18 +527,9 @@ static int prepare_pages(struct btrfs_root *root, int i; unsigned long index = pos >> PAGE_CACHE_SHIFT; struct inode *inode = file->f_path.dentry->d_inode; - int offset; int err = 0; - int this_write; - struct buffer_head *bh; - struct buffer_head *head; - loff_t isize = i_size_read(inode); - struct btrfs_trans_handle *trans; - u64 hint_block; u64 num_blocks; - u64 alloc_extent_start; u64 start_pos; - struct btrfs_key ins; start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> @@ -457,119 +541,17 @@ static int prepare_pages(struct btrfs_root *root, pages[i] = grab_cache_page(inode->i_mapping, index + i); if (!pages[i]) { err = -ENOMEM; - goto failed_release; + BUG_ON(1); } cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); wait_on_page_writeback(pages[i]); - } - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - if (!trans) { - err = -ENOMEM; - mutex_unlock(&root->fs_info->fs_mutex); - goto out_unlock; - } - btrfs_set_trans_block_group(trans, inode); - /* FIXME blocksize != 4096 */ - inode->i_blocks += num_blocks << 3; - hint_block = 0; - - /* FIXME...EIEIO, ENOSPC and more */ - - /* step one, delete the existing extents in this range */ - /* FIXME blocksize != pagesize */ - if (start_pos < inode->i_size) { - err = btrfs_drop_extents(trans, root, inode, - start_pos, (pos + write_bytes + root->blocksize -1) & - ~((u64)root->blocksize - 1), &hint_block); - if (err) - goto failed_release; - } - - /* insert any holes we need to create */ - if (inode->i_size < start_pos) { - u64 last_pos_in_file; - u64 hole_size; - u64 mask = root->blocksize - 1; - last_pos_in_file = (isize + mask) & ~mask; - hole_size = (start_pos - last_pos_in_file + mask) & ~mask; - hole_size >>= inode->i_blkbits; - if (last_pos_in_file < start_pos) { - err = btrfs_insert_file_extent(trans, root, - inode->i_ino, - last_pos_in_file, - 0, 0, hole_size); + if (!PagePrivate(pages[i])) { + SetPagePrivate(pages[i]); + set_page_private(pages[i], 1); + page_cache_get(pages[i]); } - if (err) - goto failed_release; - } - - /* - * either allocate an extent for the new bytes or setup the key - * to show we are doing inline data in the extent - */ - if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || - pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { - err = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, 0, hint_block, (u64)-1, - &ins, 1); - if (err) - goto failed_truncate; - err = btrfs_insert_file_extent(trans, root, inode->i_ino, - start_pos, ins.objectid, ins.offset, - ins.offset); - if (err) - goto failed_truncate; - } else { - ins.offset = 0; - ins.objectid = 0; - } - BUG_ON(err); - alloc_extent_start = ins.objectid; - err = btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - - for (i = 0; i < num_pages; i++) { - offset = pos & (PAGE_CACHE_SIZE -1); - this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); - if (!page_has_buffers(pages[i])) { - create_empty_buffers(pages[i], - root->fs_info->sb->s_blocksize, - (1 << BH_Uptodate)); - } - head = page_buffers(pages[i]); - bh = head; - do { - err = btrfs_map_bh_to_logical(root, bh, - alloc_extent_start); - BUG_ON(err); - if (err) - goto failed_truncate; - bh = bh->b_this_page; - if (alloc_extent_start) - alloc_extent_start++; - } while (bh != head); - pos += this_write; - WARN_ON(this_write > write_bytes); - write_bytes -= this_write; } return 0; - -failed_release: - btrfs_drop_pages(pages, num_pages); - return err; - -failed_truncate: - btrfs_drop_pages(pages, num_pages); - if (pos > isize) - vmtruncate(inode, isize); - return err; - -out_unlock: - mutex_unlock(&root->fs_info->fs_mutex); - goto failed_release; - } static ssize_t btrfs_file_write(struct file *file, const char __user *buf, @@ -685,7 +667,6 @@ out: page_cache_release(pinned[1]); *ppos = pos; current->backing_dev_info = NULL; - mark_inode_dirty(inode); return num_written ? num_written : err; } @@ -714,8 +695,8 @@ static int btrfs_sync_file(struct file *file, mutex_unlock(&root->fs_info->trans_mutex); /* - * ok we haven't committed the transaction yet, lets do a commit - */ + * ok we haven't committed the transaction yet, lets do a commit + */ trans = btrfs_start_transaction(root, 1); if (!trans) { ret = -ENOMEM; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 504d37dfa6c9..fd4cb65fcf1f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -121,6 +121,8 @@ void btrfs_read_locked_inode(struct inode *inode) switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + inode->i_mapping, GFP_NOFS); inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; break; @@ -169,7 +171,7 @@ static void fill_inode_item(struct btrfs_inode_item *item, BTRFS_I(inode)->block_group->key.objectid); } -static int btrfs_update_inode(struct btrfs_trans_handle *trans, +int btrfs_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) { @@ -400,6 +402,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int found_extent; int del_item; + btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1); path = btrfs_alloc_path(); path->reada = -1; BUG_ON(!path); @@ -511,47 +514,27 @@ error: return ret; } -/* - * taken from block_truncate_page, but does cow as it zeros out - * any bytes left in the last page in the file. - */ -static int btrfs_truncate_page(struct address_space *mapping, loff_t from) +static int btrfs_cow_one_page(struct btrfs_trans_handle *trans, + struct inode *inode, struct page *page, + size_t zero_start) { - struct inode *inode = mapping->host; - unsigned blocksize = 1 << inode->i_blkbits; - pgoff_t index = from >> PAGE_CACHE_SHIFT; - unsigned offset = from & (PAGE_CACHE_SIZE-1); - struct page *page; char *kaddr; int ret = 0; struct btrfs_root *root = BTRFS_I(inode)->root; u64 alloc_hint = 0; + u64 page_start = page->index << PAGE_CACHE_SHIFT; struct btrfs_key ins; - struct btrfs_trans_handle *trans; - if ((offset & (blocksize - 1)) == 0) - goto out; - - ret = -ENOMEM; - page = grab_cache_page(mapping, index); - if (!page) - goto out; - - if (!PageUptodate(page)) { - ret = btrfs_readpage(NULL, page); - lock_page(page); - if (!PageUptodate(page)) { - ret = -EIO; - goto out; - } + if (!PagePrivate(page)) { + SetPagePrivate(page); + set_page_private(page, 1); + page_cache_get(page); } - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); ret = btrfs_drop_extents(trans, root, inode, - page->index << PAGE_CACHE_SHIFT, - (page->index + 1) << PAGE_CACHE_SHIFT, + page_start, page_start + PAGE_CACHE_SIZE, &alloc_hint); if (ret) goto out; @@ -560,22 +543,70 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) if (ret) goto out; ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - page->index << PAGE_CACHE_SHIFT, - ins.objectid, 1, 1); + page_start, ins.objectid, 1, 1); if (ret) goto out; SetPageChecked(page); kaddr = kmap(page); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - ret = btrfs_csum_file_block(trans, root, inode->i_ino, - page->index << PAGE_CACHE_SHIFT, - kaddr, PAGE_CACHE_SIZE); + if (zero_start != PAGE_CACHE_SIZE) { + memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); + flush_dcache_page(page); + } kunmap(page); + +out: + return ret; +} + +/* + * taken from block_truncate_page, but does cow as it zeros out + * any bytes left in the last page in the file. + */ +static int btrfs_truncate_page(struct address_space *mapping, loff_t from) +{ + struct inode *inode = mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + unsigned blocksize = 1 << inode->i_blkbits; + pgoff_t index = from >> PAGE_CACHE_SHIFT; + unsigned offset = from & (PAGE_CACHE_SIZE-1); + struct page *page; + int ret = 0; + u64 page_start; + + if ((offset & (blocksize - 1)) == 0) + goto out; + + ret = -ENOMEM; + page = grab_cache_page(mapping, index); + if (!page) + goto out; + if (!PageUptodate(page)) { + ret = btrfs_readpage(NULL, page); + lock_page(page); + if (!PageUptodate(page)) { + ret = -EIO; + goto out; + } + } + page_start = page->index << PAGE_CACHE_SHIFT; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_cow_one_page(trans, inode, page, offset); + if (!ret) { + char *kaddr = kmap(page); + ret = btrfs_csum_file_block(trans, root, inode->i_ino, + page_start, kaddr, PAGE_CACHE_SIZE); + kunmap(page); + } + set_extent_dirty(&BTRFS_I(inode)->extent_tree, + page_start, page_start + PAGE_CACHE_SIZE - 1, + GFP_NOFS); + set_page_dirty(page); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - set_page_dirty(page); unlock_page(page); page_cache_release(page); out: @@ -1095,6 +1126,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &btrfs_aops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + inode->i_mapping, GFP_NOFS); } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -1238,6 +1271,182 @@ out_unlock: return err; } +struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, + size_t page_offset, u64 start, u64 end, + int create) +{ + int ret; + int err = 0; + u64 blocknr; + u64 extent_start = 0; + u64 extent_end = 0; + u64 objectid = inode->i_ino; + u32 found_type; + int failed_insert = 0; + struct btrfs_path *path; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_file_extent_item *item; + struct btrfs_leaf *leaf; + struct btrfs_disk_key *found_key; + struct extent_map *em = NULL; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct btrfs_trans_handle *trans = NULL; + + path = btrfs_alloc_path(); + BUG_ON(!path); + mutex_lock(&root->fs_info->fs_mutex); + +again: + em = lookup_extent_mapping(em_tree, start, end); + if (em) { + goto out; + } + if (!em) { + em = alloc_extent_map(GFP_NOFS); + if (!em) { + err = -ENOMEM; + goto out; + } + em->start = 0; + em->end = 0; + } + em->bdev = inode->i_sb->s_bdev; + ret = btrfs_lookup_file_extent(NULL, root, path, + objectid, start, 0); + if (ret < 0) { + err = ret; + goto out; + } + + if (ret != 0) { + if (path->slots[0] == 0) + goto not_found; + path->slots[0]--; + } + + item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + struct btrfs_file_extent_item); + leaf = btrfs_buffer_leaf(path->nodes[0]); + blocknr = btrfs_file_extent_disk_blocknr(item); + blocknr += btrfs_file_extent_offset(item); + + /* are we inside the extent that was found? */ + found_key = &leaf->items[path->slots[0]].key; + found_type = btrfs_disk_key_type(found_key); + if (btrfs_disk_key_objectid(found_key) != objectid || + found_type != BTRFS_EXTENT_DATA_KEY) { + goto not_found; + } + + found_type = btrfs_file_extent_type(item); + extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_end = extent_start + + (btrfs_file_extent_num_blocks(item) << inode->i_blkbits); + err = 0; + if (start < extent_start || start > extent_end) { + em->start = start; + if (start < extent_start) { + em->end = extent_end - 1; + } else { + em->end = end; + } + goto not_found_em; + } + if (btrfs_file_extent_disk_blocknr(item) == 0) { + em->start = extent_start; + em->end = extent_end - 1; + em->block_start = 0; + em->block_end = 0; + goto insert; + } + em->block_start = blocknr << inode->i_blkbits; + em->block_end = em->block_start + + (btrfs_file_extent_num_blocks(item) << + inode->i_blkbits) - 1; + em->start = extent_start; + em->end = extent_end - 1; + goto insert; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + char *ptr; + char *map; + u32 size; + + size = btrfs_file_extent_inline_len(leaf->items + + path->slots[0]); + extent_end = extent_start + size; + if (start < extent_start || start > extent_end) { + em->start = start; + if (start < extent_start) { + em->end = extent_end - 1; + } else { + em->end = end; + } + goto not_found_em; + } + em->block_start = EXTENT_MAP_INLINE; + em->block_end = EXTENT_MAP_INLINE; + em->start = extent_start; + em->end = extent_end - 1; + if (!page) { + goto insert; + } + ptr = btrfs_file_extent_inline_start(item); + map = kmap(page); + memcpy(map + page_offset, ptr, size); + flush_dcache_page(result->b_page); + kunmap(page); + set_extent_uptodate(em_tree, extent_start, + extent_end, GFP_NOFS); + goto insert; + } else { + printk("unkknown found_type %d\n", found_type); + WARN_ON(1); + } +not_found: + em->start = start; + em->end = end; +not_found_em: + em->block_start = 0; + em->block_end = 0; +insert: + btrfs_release_path(root, path); + if (em->start > start || em->end < start) { + printk("bad extent! %Lu %Lu start %Lu end %Lu\n", em->start, em->end, start, end); + WARN_ON(1); + err = -EIO; + goto out; + } + ret = add_extent_mapping(em_tree, em); + if (ret == -EEXIST) { + free_extent_map(em); + failed_insert++; + if (failed_insert > 5) { + printk("failing to insert %Lu %Lu\n", start, end); + err = -EIO; + goto out; + } + em = NULL; + goto again; + } + err = 0; +out: + btrfs_free_path(path); + if (trans) { + ret = btrfs_end_transaction(trans, root); + if (!err) + err = ret; + } + mutex_unlock(&root->fs_info->fs_mutex); + if (err) { + free_extent_map(em); + WARN_ON(1); + return ERR_PTR(err); + } + return em; +} + + /* * FIBMAP and others want to pass in a fake buffer head. They need to * use BTRFS_GET_BLOCK_NO_DIRECT to make sure we don't try to memcpy @@ -1398,46 +1607,22 @@ int btrfs_get_block(struct inode *inode, sector_t iblock, return err; } -static int btrfs_get_block_csum(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) -{ - int ret; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct page *page = result->b_page; - u64 offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(result); - struct btrfs_csum_item *item; - struct btrfs_path *path = NULL; - - mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_get_block_lock(inode, iblock, result, create); - if (ret) - goto out; - - path = btrfs_alloc_path(); - item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, offset, 0); - if (IS_ERR(item)) { - ret = PTR_ERR(item); - /* a csum that isn't present is a preallocated region. */ - if (ret == -ENOENT || ret == -EFBIG) - ret = 0; - result->b_private = NULL; - goto out; - } - memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE); -out: - if (path) - btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - return ret; -} - static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { struct btrfs_root *root = BTRFS_I(inode)->root; - mutex_lock(&root->fs_info->fs_mutex); - btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT); - mutex_unlock(&root->fs_info->fs_mutex); + u64 start = iblock << inode->i_blkbits; + u64 end = start + root->blocksize -1; + struct extent_map *em; + + em = btrfs_get_extent(inode, NULL, 0, start, end, 0); + if (em && !IS_ERR(em) && em->block_start != EXTENT_MAP_INLINE && + em->block_start != 0) { + u64 offset; + offset = start - em->start; + start = (em->block_start + offset) >> inode->i_blkbits; + btrfs_map_bh_to_logical(root, result, start); + } return 0; } @@ -1449,442 +1634,50 @@ static sector_t btrfs_bmap(struct address_space *as, sector_t block) static int btrfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { - return block_prepare_write(page, from, to, btrfs_get_block); + return extent_prepare_write(&BTRFS_I(page->mapping->host)->extent_tree, + page->mapping->host, page, from, to, + btrfs_get_extent); } -static void buffer_io_error(struct buffer_head *bh) -{ - char b[BDEVNAME_SIZE]; - - printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n", - bdevname(bh->b_bdev, b), - (unsigned long long)bh->b_blocknr); -} - -/* - * I/O completion handler for block_read_full_page() - pages - * which come unlocked at the end of I/O. - */ -static void btrfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) -{ - unsigned long flags; - struct buffer_head *first; - struct buffer_head *tmp; - struct page *page; - int page_uptodate = 1; - struct inode *inode; - int ret; - - BUG_ON(!buffer_async_read(bh)); - - page = bh->b_page; - inode = page->mapping->host; - if (uptodate) { - void *kaddr; - struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; - if (bh->b_private) { - char csum[BTRFS_CRC32_SIZE]; - kaddr = kmap_atomic(page, KM_IRQ0); - ret = btrfs_csum_data(root, kaddr + bh_offset(bh), - bh->b_size, csum); - BUG_ON(ret); - if (memcmp(csum, &bh->b_private, BTRFS_CRC32_SIZE)) { - u64 offset; - offset = (page->index << PAGE_CACHE_SHIFT) + - bh_offset(bh); - printk("btrfs csum failed ino %lu off %llu\n", - page->mapping->host->i_ino, - (unsigned long long)offset); - memset(kaddr + bh_offset(bh), 1, bh->b_size); - flush_dcache_page(page); - } - kunmap_atomic(kaddr, KM_IRQ0); - } - set_buffer_uptodate(bh); - } else { - clear_buffer_uptodate(bh); - if (printk_ratelimit()) - buffer_io_error(bh); - SetPageError(page); - } - - /* - * Be _very_ careful from here on. Bad things can happen if - * two buffer heads end IO at almost the same time and both - * decide that the page is now completely done. - */ - first = page_buffers(page); - local_irq_save(flags); - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); - clear_buffer_async_read(bh); - unlock_buffer(bh); - tmp = bh; - do { - if (!buffer_uptodate(tmp)) - page_uptodate = 0; - if (buffer_async_read(tmp)) { - BUG_ON(!buffer_locked(tmp)); - goto still_busy; - } - tmp = tmp->b_this_page; - } while (tmp != bh); - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); - local_irq_restore(flags); - - /* - * If none of the buffers had errors and they are all - * uptodate then we can set the page uptodate. - */ - if (page_uptodate && !PageError(page)) - SetPageUptodate(page); - unlock_page(page); - return; - -still_busy: - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); - local_irq_restore(flags); - return; -} - -/* - * Generic "read page" function for block devices that have the normal - * get_block functionality. This is most of the block device filesystems. - * Reads the page asynchronously --- the unlock_buffer() and - * set/clear_buffer_uptodate() functions propagate buffer state into the - * page struct once IO has completed. - */ int btrfs_readpage(struct file *file, struct page *page) { - struct inode *inode = page->mapping->host; - sector_t iblock, lblock; - struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; - unsigned int blocksize; - int nr, i; - int fully_mapped = 1; - - BUG_ON(!PageLocked(page)); - blocksize = 1 << inode->i_blkbits; - if (!page_has_buffers(page)) - create_empty_buffers(page, blocksize, 0); - head = page_buffers(page); - - iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; - bh = head; - nr = 0; - i = 0; - - do { - if (buffer_uptodate(bh)) - continue; - - if (!buffer_mapped(bh)) { - int err = 0; - - fully_mapped = 0; - if (iblock < lblock) { - WARN_ON(bh->b_size != blocksize); - err = btrfs_get_block_csum(inode, iblock, - bh, 0); - if (err) - SetPageError(page); - } - if (!buffer_mapped(bh)) { - void *kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + i * blocksize, 0, blocksize); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - if (!err) - set_buffer_uptodate(bh); - continue; - } - /* - * get_block() might have updated the buffer - * synchronously - */ - if (buffer_uptodate(bh)) - continue; - } - arr[nr++] = bh; - } while (i++, iblock++, (bh = bh->b_this_page) != head); - - if (fully_mapped) - SetPageMappedToDisk(page); - - if (!nr) { - /* - * All buffers are uptodate - we can set the page uptodate - * as well. But not if get_block() returned an error. - */ - if (!PageError(page)) - SetPageUptodate(page); - unlock_page(page); - return 0; - } - - /* Stage two: lock the buffers */ - for (i = 0; i < nr; i++) { - bh = arr[i]; - lock_buffer(bh); - bh->b_end_io = btrfs_end_buffer_async_read; - set_buffer_async_read(bh); - } - - /* - * Stage 3: start the IO. Check for uptodateness - * inside the buffer lock in case another process reading - * the underlying blockdev brought it uptodate (the sct fix). - */ - for (i = 0; i < nr; i++) { - bh = arr[i]; - if (buffer_uptodate(bh)) - btrfs_end_buffer_async_read(bh, 1); - else - submit_bh(READ, bh); - } - return 0; + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + return extent_read_full_page(tree, page, btrfs_get_extent); } - -/* - * Aside from a tiny bit of packed file data handling, this is the - * same as the generic code. - * - * While block_write_full_page is writing back the dirty buffers under - * the page lock, whoever dirtied the buffers may decide to clean them - * again at any time. We handle that by only looking at the buffer - * state inside lock_buffer(). - * - * If block_write_full_page() is called for regular writeback - * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a - * locked buffer. This only can happen if someone has written the buffer - * directly, with submit_bh(). At the address_space level PageWriteback - * prevents this contention from occurring. - */ -static int __btrfs_write_full_page(struct inode *inode, struct page *page, - struct writeback_control *wbc) -{ - int err; - sector_t block; - sector_t last_block; - struct buffer_head *bh, *head; - const unsigned blocksize = 1 << inode->i_blkbits; - int nr_underway = 0; - struct btrfs_root *root = BTRFS_I(inode)->root; - - BUG_ON(!PageLocked(page)); - - last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; - - /* no csumming allowed when from PF_MEMALLOC */ - if (current->flags & PF_MEMALLOC) { - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return 0; - } - - if (!page_has_buffers(page)) { - create_empty_buffers(page, blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - } - - /* - * Be very careful. We have no exclusion from __set_page_dirty_buffers - * here, and the (potentially unmapped) buffers may become dirty at - * any time. If a buffer becomes dirty here after we've inspected it - * then we just miss that fact, and the page stays dirty. - * - * Buffers outside i_size may be dirtied by __set_page_dirty_buffers; - * handle that here by just cleaning them. - */ - - block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - head = page_buffers(page); - bh = head; - - /* - * Get all the dirty buffers mapped to disk addresses and - * handle any aliases from the underlying blockdev's mapping. - */ - do { - if (block > last_block) { - /* - * mapped buffers outside i_size will occur, because - * this page can be outside i_size when there is a - * truncate in progress. - */ - /* - * The buffer was zeroed by block_write_full_page() - */ - clear_buffer_dirty(bh); - set_buffer_uptodate(bh); - } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { - WARN_ON(bh->b_size != blocksize); - err = btrfs_get_block(inode, block, bh, 0); - if (err) { - goto recover; - } - if (buffer_new(bh)) { - /* blockdev mappings never come here */ - clear_buffer_new(bh); - } - } - bh = bh->b_this_page; - block++; - } while (bh != head); - - do { - if (!buffer_mapped(bh)) - continue; - /* - * If it's a fully non-blocking write attempt and we cannot - * lock the buffer then redirty the page. Note that this can - * potentially cause a busy-wait loop from pdflush and kswapd - * activity, but those code paths have their own higher-level - * throttling. - */ - if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { - lock_buffer(bh); - } else if (test_set_buffer_locked(bh)) { - redirty_page_for_writepage(wbc, page); - continue; - } - if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) { - struct btrfs_trans_handle *trans; - int ret; - u64 off = page->index << PAGE_CACHE_SHIFT; - char *kaddr; - - off += bh_offset(bh); - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - kaddr = kmap(page); - btrfs_csum_file_block(trans, root, inode->i_ino, - off, kaddr + bh_offset(bh), - bh->b_size); - kunmap(page); - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); - mark_buffer_async_write(bh); - } else { - unlock_buffer(bh); - } - } while ((bh = bh->b_this_page) != head); - - /* - * The page and its buffers are protected by PageWriteback(), so we can - * drop the bh refcounts early. - */ - BUG_ON(PageWriteback(page)); - set_page_writeback(page); - - do { - struct buffer_head *next = bh->b_this_page; - if (buffer_async_write(bh)) { - submit_bh(WRITE, bh); - nr_underway++; - } - bh = next; - } while (bh != head); - unlock_page(page); - - err = 0; -done: - if (nr_underway == 0) { - /* - * The page was marked dirty, but the buffers were - * clean. Someone wrote them back by hand with - * ll_rw_block/submit_bh. A rare case. - */ - int uptodate = 1; - do { - if (!buffer_uptodate(bh)) { - uptodate = 0; - break; - } - bh = bh->b_this_page; - } while (bh != head); - if (uptodate) - SetPageUptodate(page); - end_page_writeback(page); - } - return err; - -recover: - /* - * ENOSPC, or some other error. We may already have added some - * blocks to the file, so we need to write these out to avoid - * exposing stale data. - * The page is currently locked and not marked for writeback - */ - bh = head; - /* Recovery: lock and submit the mapped buffers */ - do { - if (buffer_mapped(bh) && buffer_dirty(bh)) { - lock_buffer(bh); - mark_buffer_async_write(bh); - } else { - /* - * The buffer may have been set dirty during - * attachment to a dirty page. - */ - clear_buffer_dirty(bh); - } - } while ((bh = bh->b_this_page) != head); - SetPageError(page); - BUG_ON(PageWriteback(page)); - set_page_writeback(page); - do { - struct buffer_head *next = bh->b_this_page; - if (buffer_async_write(bh)) { - clear_buffer_dirty(bh); - submit_bh(WRITE, bh); - nr_underway++; - } - bh = next; - } while (bh != head); - unlock_page(page); - goto done; -} - static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { - struct inode * const inode = page->mapping->host; - loff_t i_size = i_size_read(inode); - const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; - unsigned offset; - void *kaddr; + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + return extent_write_full_page(tree, page, btrfs_get_extent, wbc); +} - /* Is the page fully inside i_size? */ - if (page->index < end_index) - return __btrfs_write_full_page(inode, page, wbc); +static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) +{ + struct extent_map_tree *tree; + int ret; - /* Is the page fully outside i_size? (truncate in progress) */ - offset = i_size & (PAGE_CACHE_SIZE-1); - if (page->index >= end_index+1 || !offset) { - /* - * The page may have dirty, unmapped buffers. For example, - * they may have been added in ext3_writepage(). Make them - * freeable here, so the page does not leak. - */ - block_invalidatepage(page, 0); - unlock_page(page); - return 0; /* don't care */ + if (page->private != 1) { + WARN_ON(1); + return try_to_free_buffers(page); } + tree = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(tree, page); + if (ret == 1) { + ClearPagePrivate(page); + set_page_private(page, 0); + page_cache_release(page); + } + return ret; +} - /* - * The page straddles i_size. It must be zeroed out on each and every - * writepage invokation because it may be mmapped. "A file is mapped - * in multiples of the page size. For a file that is not a multiple of - * the page size, the remaining memory is zeroed when mapped, and - * writes to that region are not written out to the file." - */ - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - return __btrfs_write_full_page(inode, page, wbc); +static void btrfs_invalidatepage(struct page *page, unsigned long offset) +{ + struct extent_map_tree *tree; + + tree = &BTRFS_I(page->mapping->host)->extent_tree; + extent_invalidatepage(tree, page, offset); + btrfs_releasepage(page, GFP_NOFS); } /* @@ -1905,28 +1698,39 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; unsigned long end; loff_t size; int ret = -EINVAL; + u64 page_start; lock_page(page); wait_on_page_writeback(page); size = i_size_read(inode); + page_start = page->index << PAGE_CACHE_SHIFT; + if ((page->mapping != inode->i_mapping) || - ((page->index << PAGE_CACHE_SHIFT) > size)) { + (page_start > size)) { /* page got truncated out from underneath us */ goto out_unlock; } /* page is wholly or partially inside EOF */ - if (((page->index + 1) << PAGE_CACHE_SHIFT) > size) + if (page_start + PAGE_CACHE_SIZE > size) end = size & ~PAGE_CACHE_MASK; else end = PAGE_CACHE_SIZE; - ret = btrfs_prepare_write(NULL, page, 0, end); - if (!ret) - ret = btrfs_commit_write(NULL, page, 0, end); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_cow_one_page(trans, inode, page, end); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + set_extent_dirty(&BTRFS_I(inode)->extent_tree, + page_start, page_start + PAGE_CACHE_SIZE - 1, + GFP_NOFS); + set_page_dirty(page); out_unlock: unlock_page(page); @@ -1962,21 +1766,8 @@ static void btrfs_truncate(struct inode *inode) int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) { - struct inode *inode = page->mapping->host; - struct buffer_head *bh; - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - - SetPageUptodate(page); - bh = page_buffers(page); - set_buffer_uptodate(bh); - if (buffer_mapped(bh) && bh->b_blocknr != 0) { - set_page_dirty(page); - } - if (pos > inode->i_size) { - i_size_write(inode, pos); - mark_inode_dirty(inode); - } - return 0; + return extent_commit_write(&BTRFS_I(page->mapping->host)->extent_tree, + page->mapping->host, page, from, to); } static int create_subvol(struct btrfs_root *root, char *name, int namelen) @@ -2471,6 +2262,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &btrfs_aops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + inode->i_mapping, GFP_NOFS); } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -2553,6 +2346,9 @@ static struct address_space_operations btrfs_aops = { .prepare_write = btrfs_prepare_write, .commit_write = btrfs_commit_write, .bmap = btrfs_bmap, + .invalidatepage = btrfs_invalidatepage, + .releasepage = btrfs_releasepage, + .set_page_dirty = __set_page_dirty_nobuffers, }; static struct address_space_operations btrfs_symlink_aops = { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 74f3de47423c..7b7c32c73a2c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -185,6 +185,7 @@ static int __init init_btrfs_fs(void) err = btrfs_init_cachep(); if (err) return err; + extent_map_init(); return register_filesystem(&btrfs_fs_type); } @@ -192,6 +193,7 @@ static void __exit exit_btrfs_fs(void) { btrfs_exit_transaction_sys(); btrfs_destroy_cachep(); + extent_map_exit(); unregister_filesystem(&btrfs_fs_type); } From b888db2bd7b67f190b32934e6a86181f262ac3ec Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 27 Aug 2007 16:49:44 -0400 Subject: [PATCH 0206/2985] Btrfs: Add delayed allocation to the extent based page tree code Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 9 --- fs/btrfs/extent_map.c | 147 +++++++++++++++++++++++++++++++---------- fs/btrfs/extent_map.h | 3 + fs/btrfs/file.c | 50 ++++++-------- fs/btrfs/inode.c | 141 ++++++++++++++++++++++----------------- fs/btrfs/super.c | 1 + 6 files changed, 216 insertions(+), 135 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4049aadbeda9..c31e84d42653 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -39,7 +39,6 @@ static int cache_block_group(struct btrfs_root *root, u64 i; u64 last = 0; u64 hole_size; - u64 limit; int found = 0; root = root->fs_info->extent_root; @@ -62,7 +61,6 @@ static int cache_block_group(struct btrfs_root *root, return ret; if (ret && path->slots[0] > 0) path->slots[0]--; - limit = block_group->key.objectid + block_group->key.offset; while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; @@ -982,7 +980,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; - u64 limit; WARN_ON(num_blocks < 1); ins->flags = 0; @@ -1049,12 +1046,6 @@ check_failed: l = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { - if (start_found) - limit = last_block + - (block_group->key.offset >> 1); - else - limit = search_start + - (block_group->key.offset >> 1); ret = btrfs_next_leaf(root, path); if (ret == 0) continue; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index d378edf0964e..a9c7419615b9 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -30,9 +30,6 @@ struct tree_entry { #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) -static LIST_HEAD(all_states); -spinlock_t state_lock = SPIN_LOCK_UNLOCKED; - void __init extent_map_init(void) { extent_map_cache = kmem_cache_create("extent_map", @@ -49,15 +46,6 @@ void __init extent_map_init(void) void __exit extent_map_exit(void) { - while(!list_empty(&all_states)) { - struct extent_state *state; - struct list_head *cur = all_states.next; - state = list_entry(cur, struct extent_state, list); - printk("found leaked state %Lu %Lu state %d in_tree %d\n", - state->start, state->end, state->state, state->in_tree); - list_del(&state->list); - kfree(state); - } if (extent_map_cache) kmem_cache_destroy(extent_map_cache); if (extent_state_cache) @@ -69,6 +57,7 @@ void extent_map_tree_init(struct extent_map_tree *tree, { tree->map.rb_node = NULL; tree->state.rb_node = NULL; + tree->fill_delalloc = NULL; rwlock_init(&tree->lock); tree->mapping = mapping; } @@ -106,9 +95,6 @@ struct extent_state *alloc_extent_state(gfp_t mask) state->in_tree = 0; atomic_set(&state->refs, 1); init_waitqueue_head(&state->wq); - spin_lock_irq(&state_lock); - list_add(&state->list, &all_states); - spin_unlock_irq(&state_lock); return state; } EXPORT_SYMBOL(alloc_extent_state); @@ -117,9 +103,6 @@ void free_extent_state(struct extent_state *state) { if (atomic_dec_and_test(&state->refs)) { WARN_ON(state->in_tree); - spin_lock_irq(&state_lock); - list_del_init(&state->list); - spin_unlock_irq(&state_lock); kmem_cache_free(extent_state_cache, state); } } @@ -369,7 +352,7 @@ static int insert_state(struct extent_map_tree *tree, if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); -printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); free_extent_state(state); return -EEXIST; } @@ -408,7 +391,7 @@ static int split_state(struct extent_map_tree *tree, struct extent_state *orig, if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); -printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); free_extent_state(prealloc); return -EEXIST; } @@ -792,10 +775,20 @@ int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(set_extent_dirty); +int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, + EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_delalloc); + int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) { - return clear_extent_bit(tree, start, end, EXTENT_DIRTY, 0, 0, mask); + return clear_extent_bit(tree, start, end, + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); } EXPORT_SYMBOL(clear_extent_dirty); @@ -922,6 +915,62 @@ int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(set_range_writeback); +u64 find_lock_delalloc_range(struct extent_map_tree *tree, + u64 start, u64 lock_start, u64 *end, u64 max_bytes) +{ + struct rb_node *node; + struct extent_state *state; + u64 cur_start = start; + u64 found = 0; + u64 total_bytes = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ +search_again: + node = tree_search(&tree->state, cur_start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != cur_start) { + goto out; + } + if (!(state->state & EXTENT_DELALLOC)) { + goto out; + } + if (state->start >= lock_start) { + if (state->state & EXTENT_LOCKED) { + DEFINE_WAIT(wait); + atomic_inc(&state->refs); + write_unlock_irq(&tree->lock); + schedule(); + write_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + free_extent_state(state); + goto search_again; + } + state->state |= EXTENT_LOCKED; + } + found++; + *end = state->end; + cur_start = state->end + 1; + node = rb_next(node); + if (!node) + break; + total_bytes = state->end - state->start + 1; + if (total_bytes >= max_bytes) + break; + } +out: + write_unlock_irq(&tree->lock); + return found; +} + /* * helper function to lock both pages and extents in the tree. * pages must be locked first. @@ -1285,6 +1334,7 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } @@ -1384,7 +1434,10 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, size_t blocksize; loff_t i_size = i_size_read(inode); unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; + u64 nr_delalloc; + u64 delalloc_end; + WARN_ON(!PageLocked(page)); if (page->index > end_index) { clear_extent_dirty(tree, start, page_end, GFP_NOFS); unlock_page(page); @@ -1400,11 +1453,34 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } - end = page_end; lock_extent(tree, start, page_end, GFP_NOFS); + nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1, + &delalloc_end, + 128 * 1024 * 1024); + if (nr_delalloc) { + tree->fill_delalloc(inode, start, delalloc_end); + if (delalloc_end >= page_end + 1) { + clear_extent_bit(tree, page_end + 1, delalloc_end, + EXTENT_LOCKED | EXTENT_DELALLOC, + 1, 0, GFP_NOFS); + } + clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC, + 0, 0, GFP_NOFS); + if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { + printk("found delalloc bits after clear extent_bit\n"); + } + } else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { + printk("found delalloc bits after find_delalloc_range returns 0\n"); + } + + end = page_end; + if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { + printk("found delalloc bits after lock_extent\n"); + } if (last_byte <= start) { clear_extent_dirty(tree, start, page_end, GFP_NOFS); @@ -1419,7 +1495,7 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, clear_extent_dirty(tree, cur, page_end, GFP_NOFS); break; } - em = get_extent(inode, page, page_offset, cur, end, 1); + em = get_extent(inode, page, page_offset, cur, end, 0); if (IS_ERR(em) || !em) { SetPageError(page); break; @@ -1507,6 +1583,7 @@ int extent_commit_write(struct extent_map_tree *tree, if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } @@ -1543,6 +1620,7 @@ int extent_prepare_write(struct extent_map_tree *tree, if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } block_start = (page_start + from) & ~((u64)blocksize - 1); @@ -1628,29 +1706,28 @@ int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) u64 start = page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; u64 orig_start = start; + int ret = 1; while (start <= end) { em = lookup_extent_mapping(tree, start, end); if (!em || IS_ERR(em)) break; - if (test_range_bit(tree, em->start, em->end, - EXTENT_LOCKED, 0)) { + if (!test_range_bit(tree, em->start, em->end, + EXTENT_LOCKED, 0)) { + remove_extent_mapping(tree, em); + /* once for the rb tree */ free_extent_map(em); - start = em->end + 1; -printk("range still locked %Lu %Lu\n", em->start, em->end); - break; } - remove_extent_mapping(tree, em); start = em->end + 1; - /* once for the rb tree */ - free_extent_map(em); /* once for us */ free_extent_map(em); } - WARN_ON(test_range_bit(tree, orig_start, end, EXTENT_WRITEBACK, 0)); - clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, - 1, 1, GFP_NOFS); - return 1; + if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) + ret = 0; + else + clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, + 1, 1, GFP_NOFS); + return ret; } EXPORT_SYMBOL(try_release_extent_mapping); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 108944aab4bd..e91a2e9619ee 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -11,6 +11,7 @@ struct extent_map_tree { struct rb_root state; struct address_space *mapping; rwlock_t lock; + int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); }; /* note, this must start with the same fields as fs/extent_map.c:tree_entry */ @@ -74,6 +75,8 @@ int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask); int extent_invalidatepage(struct extent_map_tree *tree, struct page *page, unsigned long offset); int extent_write_full_page(struct extent_map_tree *tree, struct page *page, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 71a481894ab6..d3d39e4a2797 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -143,7 +143,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct inode *inode = file->f_path.dentry->d_inode; struct extent_map *em; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - struct btrfs_key ins; u64 hint_block; u64 num_blocks; u64 start_pos; @@ -162,6 +161,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, inode->i_blkbits; end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1; + lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (!trans) { @@ -179,16 +179,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, /* FIXME...EIEIO, ENOSPC and more */ - /* step one, delete the existing extents in this range */ - /* FIXME blocksize != pagesize */ - if (start_pos < inode->i_size) { - err = btrfs_drop_extents(trans, root, inode, - start_pos, (pos + write_bytes + root->blocksize -1) & - ~((u64)root->blocksize - 1), &hint_block); - if (err) - goto failed; - } - /* insert any holes we need to create */ if (inode->i_size < start_pos) { u64 last_pos_in_file; @@ -213,29 +203,28 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, */ if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { - err = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, 0, hint_block, (u64)-1, - &ins, 1); - BUG_ON(err); - err = btrfs_insert_file_extent(trans, root, inode->i_ino, - start_pos, ins.objectid, ins.offset, - ins.offset); - BUG_ON(err); - em->start = start_pos; - em->end = end_of_last_block; - em->block_start = ins.objectid << inode->i_blkbits; - em->block_end = em->block_start + - (ins.offset << inode->i_blkbits) - 1; - set_extent_dirty(em_tree, start_pos, end_of_last_block, - GFP_NOFS); - err = add_extent_mapping(em_tree, em); + u64 last_end; for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; SetPageUptodate(p); - __set_page_dirty_nobuffers(p); + set_page_dirty(p); } + last_end = pages[num_pages -1]->index << PAGE_CACHE_SHIFT; + last_end += PAGE_CACHE_SIZE - 1; + set_extent_delalloc(em_tree, start_pos, end_of_last_block, + GFP_NOFS); } else { struct page *p = pages[0]; + /* step one, delete the existing extents in this range */ + /* FIXME blocksize != pagesize */ + if (start_pos < inode->i_size) { + err = btrfs_drop_extents(trans, root, inode, start_pos, + (pos + write_bytes + root->blocksize -1) & + ~((u64)root->blocksize - 1), &hint_block); + if (err) + goto failed; + } + err = insert_inline_extent(trans, root, inode, start_pos, end_pos - start_pos, p, 0); BUG_ON(err); @@ -253,6 +242,7 @@ failed: err = btrfs_end_transaction(trans, root); out_unlock: mutex_unlock(&root->fs_info->fs_mutex); + unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); free_extent_map(em); return err; } @@ -531,7 +521,7 @@ static int prepare_pages(struct btrfs_root *root, u64 num_blocks; u64 start_pos; - start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); + start_pos = pos & ~((u64)root->blocksize - 1); num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> inode->i_blkbits; @@ -548,8 +538,10 @@ static int prepare_pages(struct btrfs_root *root, if (!PagePrivate(pages[i])) { SetPagePrivate(pages[i]); set_page_private(pages[i], 1); + WARN_ON(!pages[i]->mapping->a_ops->invalidatepage); page_cache_get(pages[i]); } + WARN_ON(!PageLocked(pages[i])); } return 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fd4cb65fcf1f..dc181089aa74 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -70,6 +70,40 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, }; +static int run_delalloc_range(struct inode *inode, u64 start, u64 end) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + struct btrfs_key ins; + u64 alloc_hint = 0; + u64 num_blocks; + int ret; + u64 blocksize = 1 << inode->i_blkbits; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + BUG_ON(!trans); + num_blocks = (end - start + blocksize) & ~(blocksize - 1); + ret = btrfs_drop_extents(trans, root, inode, + start, start + num_blocks, &alloc_hint); + num_blocks = num_blocks >> inode->i_blkbits; + ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_blocks, 0, + alloc_hint, (u64)-1, &ins, 1); + if (ret) { + WARN_ON(1); + goto out; + } + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + start, ins.objectid, ins.offset, + ins.offset); +out: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + + void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; @@ -121,8 +155,7 @@ void btrfs_read_locked_inode(struct inode *inode) switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, - inode->i_mapping, GFP_NOFS); + BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; break; @@ -450,11 +483,17 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, BUG_ON(ret); } if (item_end < inode->i_size) { - if (found_type) { - btrfs_set_key_type(&key, found_type - 1); - continue; + if (found_type == BTRFS_DIR_ITEM_KEY) { + found_type = BTRFS_INODE_ITEM_KEY; + } else if (found_type == BTRFS_EXTENT_ITEM_KEY) { + found_type = BTRFS_CSUM_ITEM_KEY; + } else if (found_type) { + found_type--; + } else { + break; } - break; + btrfs_set_key_type(&key, found_type - 1); + continue; } if (btrfs_disk_key_offset(found_key) >= inode->i_size) del_item = 1; @@ -514,47 +553,34 @@ error: return ret; } -static int btrfs_cow_one_page(struct btrfs_trans_handle *trans, - struct inode *inode, struct page *page, +static int btrfs_cow_one_page(struct inode *inode, struct page *page, size_t zero_start) { char *kaddr; int ret = 0; - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 alloc_hint = 0; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 page_start = page->index << PAGE_CACHE_SHIFT; - struct btrfs_key ins; + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } - btrfs_set_trans_block_group(trans, inode); - - ret = btrfs_drop_extents(trans, root, inode, - page_start, page_start + PAGE_CACHE_SIZE, - &alloc_hint); - if (ret) - goto out; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0, - alloc_hint, (u64)-1, &ins, 1); - if (ret) - goto out; - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - page_start, ins.objectid, 1, 1); - if (ret) - goto out; - SetPageChecked(page); - kaddr = kmap(page); + lock_extent(em_tree, page_start, page_end, GFP_NOFS); + set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start, + page_end, GFP_NOFS); if (zero_start != PAGE_CACHE_SIZE) { + kaddr = kmap(page); memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); flush_dcache_page(page); + kunmap(page); } - kunmap(page); + set_page_dirty(page); + unlock_extent(em_tree, page_start, page_end, GFP_NOFS); -out: return ret; } @@ -565,8 +591,6 @@ out: static int btrfs_truncate_page(struct address_space *mapping, loff_t from) { struct inode *inode = mapping->host; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; unsigned blocksize = 1 << inode->i_blkbits; pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); @@ -591,21 +615,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) } page_start = page->index << PAGE_CACHE_SHIFT; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_cow_one_page(trans, inode, page, offset); - if (!ret) { - char *kaddr = kmap(page); - ret = btrfs_csum_file_block(trans, root, inode->i_ino, - page_start, kaddr, PAGE_CACHE_SIZE); - kunmap(page); - } - set_extent_dirty(&BTRFS_I(inode)->extent_tree, - page_start, page_start + PAGE_CACHE_SIZE - 1, - GFP_NOFS); - set_page_dirty(page); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); + ret = btrfs_cow_one_page(inode, page, offset); unlock_page(page); page_cache_release(page); @@ -757,6 +767,8 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) struct btrfs_iget_args *args = p; inode->i_ino = args->ino; BTRFS_I(inode)->root = args->root; + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + inode->i_mapping, GFP_NOFS); return 0; } @@ -968,7 +980,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (!inode) return ERR_PTR(-ENOMEM); + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + inode->i_mapping, GFP_NOFS); BTRFS_I(inode)->root = root; + if (mode & S_IFDIR) owner = 0; else @@ -1128,6 +1143,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, inode->i_mapping, GFP_NOFS); + BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -1344,9 +1360,11 @@ again: extent_end = extent_start + (btrfs_file_extent_num_blocks(item) << inode->i_blkbits); err = 0; - if (start < extent_start || start > extent_end) { + if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { + if (end < extent_start) + goto not_found; em->end = extent_end - 1; } else { em->end = end; @@ -1375,9 +1393,11 @@ again: size = btrfs_file_extent_inline_len(leaf->items + path->slots[0]); extent_end = extent_start + size; - if (start < extent_start || start > extent_end) { + if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { + if (end < extent_start) + goto not_found; em->end = extent_end - 1; } else { em->end = end; @@ -1412,8 +1432,7 @@ not_found_em: insert: btrfs_release_path(root, path); if (em->start > start || em->end < start) { - printk("bad extent! %Lu %Lu start %Lu end %Lu\n", em->start, em->end, start, end); - WARN_ON(1); + printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->end, start, end); err = -EIO; goto out; } @@ -1648,6 +1667,13 @@ int btrfs_readpage(struct file *file, struct page *page) static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { struct extent_map_tree *tree; + + + if (current->flags & PF_MEMALLOC) { + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } tree = &BTRFS_I(page->mapping->host)->extent_tree; return extent_write_full_page(tree, page, btrfs_get_extent, wbc); } @@ -1698,8 +1724,6 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; unsigned long end; loff_t size; int ret = -EINVAL; @@ -1722,15 +1746,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) else end = PAGE_CACHE_SIZE; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_cow_one_page(trans, inode, page, end); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - set_extent_dirty(&BTRFS_I(inode)->extent_tree, - page_start, page_start + PAGE_CACHE_SIZE - 1, - GFP_NOFS); - set_page_dirty(page); + ret = btrfs_cow_one_page(inode, page, end); out_unlock: unlock_page(page); @@ -2264,6 +2280,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, inode->i_mapping, GFP_NOFS); + BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7b7c32c73a2c..7da95538745f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -83,6 +83,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) bi->location.offset = 0; bi->location.flags = 0; bi->root = tree_root; + btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); if (!inode) { From 58176a9604c5db1784d2c979aea472b3be40b6f0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 29 Aug 2007 15:47:34 -0400 Subject: [PATCH 0207/2985] Btrfs: Add per-root block accounting and sysfs entries Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.h | 54 ++++++++++ fs/btrfs/disk-io.c | 31 +++++- fs/btrfs/disk-io.h | 3 +- fs/btrfs/extent-tree.c | 17 ++- fs/btrfs/inode.c | 13 ++- fs/btrfs/super.c | 14 +++ fs/btrfs/sysfs.c | 236 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/transaction.c | 34 +++++- 9 files changed, 393 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index ea7a22870350..ee505c87ecf3 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -5,7 +5,7 @@ obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ - extent_map.o + extent_map.o sysfs.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 947f061ed118..ffeee546a0d7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -22,6 +22,7 @@ #include #include #include +#include #include "bit-radix.h" struct btrfs_trans_handle; @@ -313,6 +314,8 @@ struct btrfs_fs_info { struct list_head trans_list; struct list_head dead_roots; struct delayed_work trans_work; + struct kobject super_kobj; + struct completion kobj_unregister; int do_barriers; int closing; }; @@ -328,6 +331,8 @@ struct btrfs_root { struct btrfs_key root_key; struct btrfs_fs_info *fs_info; struct inode *inode; + struct kobject root_kobj; + struct completion kobj_unregister; u64 objectid; u64 last_trans; u32 blocksize; @@ -338,6 +343,7 @@ struct btrfs_root { struct btrfs_key defrag_progress; int defrag_running; int defrag_level; + char *name; }; /* the lower bits in the key flags defines the item type */ @@ -814,6 +820,28 @@ static inline void btrfs_set_root_flags(struct btrfs_root_item *item, u32 val) item->flags = cpu_to_le32(val); } +static inline void btrfs_set_root_blocks_used(struct btrfs_root_item *item, + u64 val) +{ + item->blocks_used = cpu_to_le64(val); +} + +static inline u64 btrfs_root_blocks_used(struct btrfs_root_item *item) +{ + return le64_to_cpu(item->blocks_used); +} + +static inline void btrfs_set_root_block_limit(struct btrfs_root_item *item, + u64 val) +{ + item->block_limit = cpu_to_le64(val); +} + +static inline u64 btrfs_root_block_limit(struct btrfs_root_item *item) +{ + return le64_to_cpu(item->block_limit); +} + static inline u64 btrfs_super_blocknr(struct btrfs_super_block *s) { return le64_to_cpu(s->blocknr); @@ -1014,6 +1042,23 @@ static inline void btrfs_memmove(struct btrfs_root *root, memmove(dst, src, nr); } +static inline int btrfs_set_root_name(struct btrfs_root *root, + const char *name, int len) +{ + /* if we already have a name just free it */ + if (root->name) + kfree(root->name); + + root->name = kmalloc(len+1, GFP_KERNEL); + if (!root->name) + return -ENOMEM; + + memcpy(root->name, name, len); + root->name[len] ='\0'; + + return 0; +} + /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ @@ -1191,4 +1236,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only); + +/* sysfs.c */ +int btrfs_init_sysfs(void); +void btrfs_exit_sysfs(void); +int btrfs_sysfs_add_super(struct btrfs_fs_info *fs); +int btrfs_sysfs_add_root(struct btrfs_root *root); +void btrfs_sysfs_del_root(struct btrfs_root *root); +void btrfs_sysfs_del_super(struct btrfs_fs_info *root); + #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b2f79878d51a..c25ef0a68f18 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -294,9 +294,12 @@ static int __setup_root(int blocksize, root->last_trans = 0; root->highest_inode = 0; root->last_inode_alloc = 0; + root->name = NULL; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); + memset(&root->root_kobj, 0, sizeof(root->root_kobj)); + init_completion(&root->kobj_unregister); root->defrag_running = 0; root->defrag_level = 0; root->root_key.objectid = objectid; @@ -384,7 +387,8 @@ insert: } struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_key *location) + struct btrfs_key *location, + const char *name, int namelen) { struct btrfs_root *root; int ret; @@ -405,6 +409,22 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } + + ret = btrfs_set_root_name(root, name, namelen); + if (ret) { + brelse(root->node); + kfree(root); + return ERR_PTR(ret); + } + + ret = btrfs_sysfs_add_root(root); + if (ret) { + brelse(root->node); + kfree(root->name); + kfree(root); + return ERR_PTR(ret); + } + return root; } @@ -433,6 +453,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); + memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); + init_completion(&fs_info->kobj_unregister); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->last_trans_committed = 0; @@ -500,8 +522,10 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->generation = btrfs_super_generation(disk_super) + 1; ret = btrfs_find_dead_roots(tree_root); - if (ret) + if (ret) { + mutex_unlock(&fs_info->fs_mutex); goto fail_tree_root; + } mutex_unlock(&fs_info->fs_mutex); return tree_root; @@ -553,12 +577,15 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { radix_tree_delete(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid); + btrfs_sysfs_del_root(root); if (root->inode) iput(root->inode); if (root->node) brelse(root->node); if (root->commit_root) brelse(root->commit_root); + if (root->name) + kfree(root->name); kfree(root); return 0; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 5261733b8735..da6bb72750f2 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -66,7 +66,8 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr); int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_key *location); + struct btrfs_key *location, + const char *name, int namelen); struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_key *location); u64 bh_blocknr(struct buffer_head *bh); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c31e84d42653..ff3f7c2be605 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -858,16 +858,23 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_extent_refs(ei, refs); btrfs_mark_buffer_dirty(path->nodes[0]); if (refs == 0) { - u64 super_blocks_used; + u64 super_blocks_used, root_blocks_used; if (pin) { ret = pin_down_block(root, blocknr, 0); BUG_ON(ret); } + /* block accounting for super block */ super_blocks_used = btrfs_super_blocks_used(&info->super_copy); btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used - num_blocks); + + /* block accounting for root item */ + root_blocks_used = btrfs_root_blocks_used(&root->root_item); + btrfs_set_root_blocks_used(&root->root_item, + root_blocks_used - num_blocks); + ret = btrfs_del_item(trans, extent_root, path); if (ret) { return ret; @@ -1175,7 +1182,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, { int ret; int pending_ret; - u64 super_blocks_used; + u64 super_blocks_used, root_blocks_used; u64 search_start = 0; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; @@ -1193,10 +1200,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (ret) return ret; + /* block accounting for super block */ super_blocks_used = btrfs_super_blocks_used(&info->super_copy); btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used + num_blocks); + /* block accounting for root item */ + root_blocks_used = btrfs_root_blocks_used(&root->root_item); + btrfs_set_root_blocks_used(&root->root_item, root_blocks_used + + num_blocks); + if (root == extent_root) { BUG_ON(num_blocks != 1); set_radix_bit(&root->fs_info->extent_ins_radix, ins->objectid); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dc181089aa74..2e3918e6049e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -733,7 +733,8 @@ out: */ static int fixup_tree_root_location(struct btrfs_root *root, struct btrfs_key *location, - struct btrfs_root **sub_root) + struct btrfs_root **sub_root, + struct dentry *dentry) { struct btrfs_path *path; struct btrfs_root_item *ri; @@ -747,7 +748,9 @@ static int fixup_tree_root_location(struct btrfs_root *root, BUG_ON(!path); mutex_lock(&root->fs_info->fs_mutex); - *sub_root = btrfs_read_fs_root(root->fs_info, location); + *sub_root = btrfs_read_fs_root(root->fs_info, location, + dentry->d_name.name, + dentry->d_name.len); if (IS_ERR(*sub_root)) return PTR_ERR(*sub_root); @@ -812,7 +815,8 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(ret); inode = NULL; if (location.objectid) { - ret = fixup_tree_root_location(root, &location, &sub_root); + ret = fixup_tree_root_location(root, &location, &sub_root, + dentry); if (ret < 0) return ERR_PTR(ret); if (ret > 0) @@ -1829,6 +1833,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); btrfs_set_root_refs(&root_item, 1); + btrfs_set_root_blocks_used(&root_item, 0); memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); root_item.drop_level = 0; brelse(subvol); @@ -1865,7 +1870,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail_commit; - new_root = btrfs_read_fs_root(root->fs_info, &key); + new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen); BUG_ON(!new_root); trans = btrfs_start_transaction(new_root, 1); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7da95538745f..a68101ae1a3b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -45,12 +45,14 @@ static struct super_operations btrfs_super_ops; static void btrfs_put_super (struct super_block * sb) { struct btrfs_root *root = btrfs_sb(sb); + struct btrfs_fs_info *fs = root->fs_info; int ret; ret = close_ctree(root); if (ret) { printk("close ctree returns %d\n", ret); } + btrfs_sysfs_del_super(fs); sb->s_fs_info = NULL; } @@ -101,6 +103,12 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) err = -ENOMEM; goto fail_close; } + + /* this does the super kobj at the same time */ + err = btrfs_sysfs_add_super(tree_root->fs_info); + if (err) + goto fail_close; + sb->s_root = root_dentry; btrfs_transaction_queue_work(tree_root, HZ * 30); return 0; @@ -182,6 +190,11 @@ static struct super_operations btrfs_super_ops = { static int __init init_btrfs_fs(void) { int err; + + err = btrfs_init_sysfs(); + if (err) + return err; + btrfs_init_transaction_sys(); err = btrfs_init_cachep(); if (err) @@ -196,6 +209,7 @@ static void __exit exit_btrfs_fs(void) btrfs_destroy_cachep(); extent_map_exit(); unregister_filesystem(&btrfs_fs_type); + btrfs_exit_sysfs(); } module_init(init_btrfs_fs) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index db8917e5b256..2058783373eb 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -16,6 +16,242 @@ * Boston, MA 021110-1307, USA. */ +#include +#include +#include +#include +#include +#include +#include + #include "ctree.h" #include "disk-io.h" #include "transaction.h" + +static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_root_blocks_used(&root->root_item)); +} + +static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_root_block_limit(&root->root_item)); +} + +static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_super_blocks_used(fs->disk_super)); +} + +static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_super_total_blocks(fs->disk_super)); +} + +static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_super_blocksize(fs->disk_super)); +} + +/* this is for root attrs (subvols/snapshots) */ +struct btrfs_root_attr { + struct attribute attr; + ssize_t (*show)(struct btrfs_root *, char *); + ssize_t (*store)(struct btrfs_root *, const char *, size_t); +}; + +#define ROOT_ATTR(name, mode, show, store) \ +static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, show, store) + +ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL); +ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL); + +static struct attribute *btrfs_root_attrs[] = { + &btrfs_root_attr_blocks_used.attr, + &btrfs_root_attr_block_limit.attr, + NULL, +}; + +/* this is for super attrs (actual full fs) */ +struct btrfs_super_attr { + struct attribute attr; + ssize_t (*show)(struct btrfs_fs_info *, char *); + ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t); +}; + +#define SUPER_ATTR(name, mode, show, store) \ +static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, show, store) + +SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL); +SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL); +SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL); + +static struct attribute *btrfs_super_attrs[] = { + &btrfs_super_attr_blocks_used.attr, + &btrfs_super_attr_total_blocks.attr, + &btrfs_super_attr_blocksize.attr, + NULL, +}; + +static ssize_t btrfs_super_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, + super_kobj); + struct btrfs_super_attr *a = container_of(attr, + struct btrfs_super_attr, + attr); + + return a->show ? a->show(fs, buf) : 0; +} + +static ssize_t btrfs_super_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, + super_kobj); + struct btrfs_super_attr *a = container_of(attr, + struct btrfs_super_attr, + attr); + + return a->store ? a->store(fs, buf, len) : 0; +} + +static ssize_t btrfs_root_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct btrfs_root *root = container_of(kobj, struct btrfs_root, + root_kobj); + struct btrfs_root_attr *a = container_of(attr, + struct btrfs_root_attr, + attr); + + return a->show ? a->show(root, buf) : 0; +} + +static ssize_t btrfs_root_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct btrfs_root *root = container_of(kobj, struct btrfs_root, + root_kobj); + struct btrfs_root_attr *a = container_of(attr, + struct btrfs_root_attr, + attr); + return a->store ? a->store(root, buf, len) : 0; +} + +static void btrfs_super_release(struct kobject *kobj) +{ + struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, + super_kobj); + complete(&fs->kobj_unregister); +} + +static void btrfs_root_release(struct kobject *kobj) +{ + struct btrfs_root *root = container_of(kobj, struct btrfs_root, + root_kobj); + complete(&root->kobj_unregister); +} + +static struct sysfs_ops btrfs_super_attr_ops = { + .show = btrfs_super_attr_show, + .store = btrfs_super_attr_store, +}; + +static struct sysfs_ops btrfs_root_attr_ops = { + .show = btrfs_root_attr_show, + .store = btrfs_root_attr_store, +}; + +static struct kobj_type btrfs_root_ktype = { + .default_attrs = btrfs_root_attrs, + .sysfs_ops = &btrfs_root_attr_ops, + .release = btrfs_root_release, +}; + +static struct kobj_type btrfs_super_ktype = { + .default_attrs = btrfs_super_attrs, + .sysfs_ops = &btrfs_super_attr_ops, + .release = btrfs_super_release, +}; + +static struct kset btrfs_kset = { + .kobj = {.name = "btrfs"}, +}; + +int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) +{ + int error; + + fs->super_kobj.kset = &btrfs_kset; + fs->super_kobj.ktype = &btrfs_super_ktype; + + error = kobject_set_name(&fs->super_kobj, "%s", + fs->sb->s_id); + if (error) + goto fail; + + error = kobject_register(&fs->super_kobj); + if (error) + goto fail; + + return 0; + +fail: + printk(KERN_ERR "btrfs: sysfs creation for super failed\n"); + return error; +} + +int btrfs_sysfs_add_root(struct btrfs_root *root) +{ + int error; + + root->root_kobj.ktype = &btrfs_root_ktype; + root->root_kobj.parent = &root->fs_info->super_kobj; + + error = kobject_set_name(&root->root_kobj, "%s", root->name); + if (error) { + goto fail; + } + + error = kobject_register(&root->root_kobj); + if (error) + goto fail; + + return 0; + +fail: + printk(KERN_ERR "btrfs: sysfs creation for root failed\n"); + return error; +} + +void btrfs_sysfs_del_root(struct btrfs_root *root) +{ + kobject_unregister(&root->root_kobj); + wait_for_completion(&root->kobj_unregister); +} + +void btrfs_sysfs_del_super(struct btrfs_fs_info *fs) +{ + kobject_unregister(&fs->super_kobj); + wait_for_completion(&fs->kobj_unregister); +} + +int btrfs_init_sysfs() +{ + kobj_set_kset_s(&btrfs_kset, fs_subsys); + return kset_register(&btrfs_kset); +} + +void btrfs_exit_sysfs() +{ + kset_unregister(&btrfs_kset); +} diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 18abea802794..2b15daa3a9f2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -236,6 +236,7 @@ static int wait_for_commit(struct btrfs_root *root, struct dirty_root { struct list_head list; struct btrfs_root *root; + struct btrfs_root *latest_root; }; int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list) @@ -278,6 +279,15 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, btrfs_root_blocknr(&root->root_item)); brelse(root->commit_root); root->commit_root = NULL; + + /* make sure to update the root on disk + * so we get any updates to the block used + * counts + */ + err = btrfs_update_root(trans, + root->fs_info->tree_root, + &root->root_key, + &root->root_item); continue; } dirty = kmalloc(sizeof(*dirty), GFP_NOFS); @@ -291,6 +301,7 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, memcpy(dirty->root, root, sizeof(*root)); dirty->root->node = root->commit_root; + dirty->latest_root = root; root->commit_root = NULL; root->root_key.offset = root->fs_info->generation; @@ -384,20 +395,29 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, { struct dirty_root *dirty; struct btrfs_trans_handle *trans; + u64 num_blocks; + u64 blocks_used; int ret = 0; int err; while(!list_empty(list)) { + struct btrfs_root *root; + mutex_lock(&tree_root->fs_info->fs_mutex); dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); + num_blocks = btrfs_root_blocks_used(&dirty->root->root_item); + root = dirty->latest_root; + while(1) { trans = btrfs_start_transaction(tree_root, 1); + ret = btrfs_drop_snapshot(trans, dirty->root); if (ret != -EAGAIN) { break; } + err = btrfs_update_root(trans, tree_root, &dirty->root->root_key, @@ -414,9 +434,19 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, mutex_lock(&tree_root->fs_info->fs_mutex); } BUG_ON(ret); + + num_blocks -= btrfs_root_blocks_used(&dirty->root->root_item); + blocks_used = btrfs_root_blocks_used(&root->root_item); + if (num_blocks) { + record_root_in_trans(root); + btrfs_set_root_blocks_used(&root->root_item, + blocks_used - num_blocks); + } ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); - if (ret) + if (ret) { + BUG(); break; + } ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); @@ -534,10 +564,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, wake_up(&cur_trans->commit_wait); put_transaction(cur_trans); put_transaction(cur_trans); + if (root->fs_info->closing) list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots); else list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); + mutex_unlock(&root->fs_info->trans_mutex); kmem_cache_free(btrfs_trans_handle_cachep, trans); From 252c38f0697b6889c03542fa3c96143d3db4d9d8 Mon Sep 17 00:00:00 2001 From: Yan Date: Wed, 29 Aug 2007 09:11:44 -0400 Subject: [PATCH 0208/2985] Btrfs: ctree.c cleanups Fixup a few buffer_head release errors, and fix an off by one in balance_node_right. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c0782a5b04c5..89e282c35ce2 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -266,8 +266,10 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, err = __btrfs_cow_block(trans, root, cur_bh, parent, i, &tmp_bh, search_start, min(8, end_slot - i)); - if (err) + if (err) { + brelse(cur_bh); break; + } search_start = bh_blocknr(tmp_bh); *last_ret = search_start; if (parent_level == 1) @@ -881,7 +883,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow) { struct buffer_head *b; - struct buffer_head *cow_buf; struct btrfs_node *c; u64 blocknr; int slot; @@ -905,12 +906,11 @@ again: wret = btrfs_cow_block(trans, root, b, p->nodes[level + 1], p->slots[level + 1], - &cow_buf); + &b); if (wret) { - btrfs_block_release(root, cow_buf); + btrfs_block_release(root, b); return wret; } - b = cow_buf; c = btrfs_buffer_node(b); } BUG_ON(!cow && ins_len); @@ -1075,8 +1075,9 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct max_push = src_nritems / 2 + 1; /* don't try to empty the node */ - if (max_push > src_nritems) + if (max_push >= src_nritems) return 1; + if (max_push < push_items) push_items = max_push; @@ -1465,6 +1466,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root ret = btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); if (ret) { /* we hit -ENOSPC, but it isn't fatal here */ + btrfs_block_release(root, t); return 1; } left = btrfs_buffer_leaf(t); From 1b4ab1bb4bbd3045cd041259c30b826654599e96 Mon Sep 17 00:00:00 2001 From: Yan Date: Wed, 29 Aug 2007 09:11:44 -0400 Subject: [PATCH 0209/2985] Btrfs: Fix mknod to properly send rdev info back to disk Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2e3918e6049e..483eea69b0a6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1095,6 +1095,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, else { inode->i_op = &btrfs_special_inode_operations; init_special_inode(inode, inode->i_mode, rdev); + btrfs_update_inode(trans, root, inode); } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); From 4b82d6e4a592016ac75e756ca519923e0fc18f00 Mon Sep 17 00:00:00 2001 From: Yan Date: Wed, 29 Aug 2007 09:11:44 -0400 Subject: [PATCH 0210/2985] Btrfs: Add mount into directory support Modified form of original patch from Christoph Hellwig to make btrfs mount into the default subvolume by default. mount /dev/somedevice:subvolumename to get other subvolumes or mount /dev/somedevice:. to get the root Signed-off-by: Chris Mason --- fs/btrfs/super.c | 125 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 121 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a68101ae1a3b..8a94be1a773b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include #include #include #include @@ -26,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -145,11 +147,126 @@ static void btrfs_write_super(struct super_block *sb) sb->s_dirt = 0; } -static int btrfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +/* + * This is almost a copy of get_sb_bdev in fs/super.c. + * We need the local copy to allow direct mounting of + * subvolumes, but this could be easily integrated back + * into the generic version. --hch + */ + +/* start copy & paste */ +static int set_bdev_super(struct super_block *s, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, - btrfs_fill_super, mnt); + s->s_bdev = data; + s->s_dev = s->s_bdev->bd_dev; + return 0; +} + +static int test_bdev_super(struct super_block *s, void *data) +{ + return (void *)s->s_bdev == data; +} + +int btrfs_get_sb_bdev(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + int (*fill_super)(struct super_block *, void *, int), + struct vfsmount *mnt, const char *subvol) +{ + struct block_device *bdev = NULL; + struct super_block *s; + struct dentry *root; + int error = 0; + + bdev = open_bdev_excl(dev_name, flags, fs_type); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + + /* + * once the super is inserted into the list by sget, s_umount + * will protect the lockfs code from trying to start a snapshot + * while we are mounting + */ + down(&bdev->bd_mount_sem); + s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); + up(&bdev->bd_mount_sem); + if (IS_ERR(s)) + goto error_s; + + if (s->s_root) { + if ((flags ^ s->s_flags) & MS_RDONLY) { + up_write(&s->s_umount); + deactivate_super(s); + error = -EBUSY; + goto error_bdev; + } + + close_bdev_excl(bdev); + } else { + char b[BDEVNAME_SIZE]; + + s->s_flags = flags; + strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); + sb_set_blocksize(s, block_size(bdev)); + error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); + if (error) { + up_write(&s->s_umount); + deactivate_super(s); + goto error; + } + + s->s_flags |= MS_ACTIVE; + } + + if (subvol) { + root = lookup_one_len(subvol, s->s_root, strlen(subvol)); + if (IS_ERR(root)) { + up_write(&s->s_umount); + deactivate_super(s); + error = PTR_ERR(root); + goto error; + } + if (!root->d_inode) { + dput(root); + up_write(&s->s_umount); + deactivate_super(s); + error = -ENXIO; + goto error; + } + } else { + root = dget(s->s_root); + } + + mnt->mnt_sb = s; + mnt->mnt_root = root; + return 0; + +error_s: + error = PTR_ERR(s); +error_bdev: + close_bdev_excl(bdev); +error: + return error; +} +/* end copy & paste */ + +static int btrfs_get_sb(struct file_system_type *fs_type, + int flags, const char *identifier, void *data, struct vfsmount *mnt) +{ + int ret; + char *_identifier = kstrdup(identifier, GFP_KERNEL); + char *subvol_name; + const char *dev_name; + + subvol_name = _identifier; + dev_name = strsep(&subvol_name, ":"); + if (!dev_name) + return -ENOMEM; + + ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, + btrfs_fill_super, mnt, + subvol_name ? subvol_name : "default"); + kfree(_identifier); + return ret; } static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) From e9fe395e47bef9b948657b788c65cbfbd2433cc4 Mon Sep 17 00:00:00 2001 From: Yan Date: Wed, 29 Aug 2007 09:11:44 -0400 Subject: [PATCH 0211/2985] Btrfs: Fix oopsen in extent_tree.c during enospc Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ff3f7c2be605..b074ad1416dd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -244,7 +244,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (search_start) { struct btrfs_block_group_cache *shint; shint = btrfs_lookup_block_group(info, search_start); - if (shint->data == data) { + if (shint && shint->data == data) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < div_factor(shint->key.offset, factor)) { @@ -1255,8 +1255,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, empty_size, hint, - (unsigned long)-1, &ins, 0); + 1, empty_size, hint, (u64)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); return ERR_PTR(ret); From 95e0528919cb0c7635b37496e0ad1c41e4c0dd7c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 29 Aug 2007 09:11:44 -0400 Subject: [PATCH 0212/2985] Btrfs: Use mount -o subvol to select the subvol directory instead of dev: Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 6 ++++++ fs/btrfs/super.c | 49 ++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ffeee546a0d7..cd75c906048f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1064,6 +1064,12 @@ static inline int btrfs_set_root_name(struct btrfs_root *root, ((type *)(btrfs_leaf_data(leaf) + \ btrfs_item_offset((leaf)->items + (slot)))) +/* mount option defines and helpers */ +#define BTRFS_MOUNT_SUBVOL 0x000001 +#define btrfs_clear_opt(o, opt) o &= ~BTRFS_MOUNT_##opt +#define btrfs_set_opt(o, opt) o |= BTRFS_MOUNT_##opt +#define btrfs_test_opt(sb, opt) (BTRFS_SB(sb)->s_mount_opt & \ + BTRFS_MOUNT_##opt) /* extent-tree.c */ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8a94be1a773b..66a01cbbbea1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -58,6 +59,41 @@ static void btrfs_put_super (struct super_block * sb) sb->s_fs_info = NULL; } +enum { + Opt_subvol, Opt_err, +}; + +static match_table_t tokens = { + {Opt_subvol, "subvol=%s"}, + {Opt_err, NULL} +}; + +static int parse_options (char * options, + struct btrfs_root *root, + char **subvol_name) +{ + char * p; + substring_t args[MAX_OPT_ARGS]; + if (!options) + return 1; + + while ((p = strsep (&options, ",")) != NULL) { + int token; + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_subvol: + *subvol_name = match_strdup(&args[0]); + break; + default: + return 0; + } + } + return 1; +} + static int btrfs_fill_super(struct super_block * sb, void * data, int silent) { struct inode * inode; @@ -250,22 +286,15 @@ error: /* end copy & paste */ static int btrfs_get_sb(struct file_system_type *fs_type, - int flags, const char *identifier, void *data, struct vfsmount *mnt) + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { int ret; - char *_identifier = kstrdup(identifier, GFP_KERNEL); - char *subvol_name; - const char *dev_name; - - subvol_name = _identifier; - dev_name = strsep(&subvol_name, ":"); - if (!dev_name) - return -ENOMEM; + char *subvol_name = NULL; + parse_options((char *)data, NULL, &subvol_name); ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, btrfs_fill_super, mnt, subvol_name ? subvol_name : "default"); - kfree(_identifier); return ret; } From 07157aacb1ecd394a5494910b5f80d7e7d407d37 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 30 Aug 2007 08:50:51 -0400 Subject: [PATCH 0213/2985] Btrfs: Add file data csums back in via hooks in the extent map code Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 94 ++++++++++++++++++++++++++++++++++++----- fs/btrfs/extent_map.h | 15 ++++++- fs/btrfs/inode.c | 98 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 193 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index a9c7419615b9..6053f9db0739 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -57,7 +57,7 @@ void extent_map_tree_init(struct extent_map_tree *tree, { tree->map.rb_node = NULL; tree->state.rb_node = NULL; - tree->fill_delalloc = NULL; + tree->ops = NULL; rwlock_init(&tree->lock); tree->mapping = mapping; } @@ -93,6 +93,7 @@ struct extent_state *alloc_extent_state(gfp_t mask) return state; state->state = 0; state->in_tree = 0; + state->private = 0; atomic_set(&state->refs, 1); init_waitqueue_head(&state->wq); return state; @@ -1034,6 +1035,61 @@ int unlock_range(struct extent_map_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(unlock_range); +int set_state_private(struct extent_map_tree *tree, u64 start, u64 private) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + ret = -ENOENT; + goto out; + } + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + state->private = private; +out: + write_unlock_irq(&tree->lock); + return ret; + +} + +int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 0; + + read_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + ret = -ENOENT; + goto out; + } + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + *private = state->private; +out: + read_unlock_irq(&tree->lock); + return ret; +} + /* * searches a range in the state tree for a given mask. * If 'filled' == 1, this returns 1 only if ever extent in the tree @@ -1185,12 +1241,13 @@ static int end_bio_extent_writepage(struct bio *bio, static int end_bio_extent_readpage(struct bio *bio, unsigned int bytes_done, int err) { - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct extent_map_tree *tree = bio->bi_private; u64 start; u64 end; int whole_page; + int ret; if (bio->bi_size) return 1; @@ -1208,6 +1265,11 @@ static int end_bio_extent_readpage(struct bio *bio, if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); + if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { + ret = tree->ops->readpage_end_io_hook(page, start, end); + if (ret) + uptodate = 0; + } if (uptodate) { set_extent_uptodate(tree, start, end, GFP_ATOMIC); if (whole_page) @@ -1388,9 +1450,16 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, continue; } - ret = submit_extent_page(READ, tree, page, - sector, iosize, page_offset, bdev, - end_bio_extent_readpage); + ret = 0; + if (tree->ops && tree->ops->readpage_io_hook) { + ret = tree->ops->readpage_io_hook(page, cur, + cur + iosize - 1); + } + if (!ret) { + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, + bdev, end_bio_extent_readpage); + } if (ret) SetPageError(page); cur = cur + iosize; @@ -1462,7 +1531,7 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, &delalloc_end, 128 * 1024 * 1024); if (nr_delalloc) { - tree->fill_delalloc(inode, start, delalloc_end); + tree->ops->fill_delalloc(inode, start, delalloc_end); if (delalloc_end >= page_end + 1) { clear_extent_bit(tree, page_end + 1, delalloc_end, EXTENT_LOCKED | EXTENT_DELALLOC, @@ -1528,12 +1597,17 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, continue; } clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); - set_range_writeback(tree, cur, cur + iosize - 1); - ret = submit_extent_page(WRITE, tree, page, - sector, iosize, page_offset, bdev, - end_bio_extent_writepage); + ret = tree->ops->writepage_io_hook(page, cur, cur + iosize - 1); if (ret) SetPageError(page); + else { + set_range_writeback(tree, cur, cur + iosize - 1); + ret = submit_extent_page(WRITE, tree, page, sector, + iosize, page_offset, bdev, + end_bio_extent_writepage); + if (ret) + SetPageError(page); + } cur = cur + iosize; page_offset += iosize; nr++; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index e91a2e9619ee..0eae6c420acb 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -6,12 +6,19 @@ #define EXTENT_MAP_INLINE (u64)-2 #define EXTENT_MAP_DELALLOC (u64)-1 +struct extent_map_ops { + int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); + int (*writepage_io_hook)(struct page *page, u64 start, u64 end); + int (*readpage_io_hook)(struct page *page, u64 start, u64 end); + int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end); +}; + struct extent_map_tree { struct rb_root map; struct rb_root state; struct address_space *mapping; rwlock_t lock; - int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); + struct extent_map_ops *ops; }; /* note, this must start with the same fields as fs/extent_map.c:tree_entry */ @@ -36,6 +43,10 @@ struct extent_state { wait_queue_head_t wq; atomic_t refs; unsigned long state; + + /* for use by the FS */ + u64 private; + struct list_head list; }; @@ -89,4 +100,6 @@ int extent_commit_write(struct extent_map_tree *tree, struct inode *inode, struct page *page, unsigned from, unsigned to); int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); +int set_state_private(struct extent_map_tree *tree, u64 start, u64 private); +int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private); #endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 483eea69b0a6..3ee6b2fadf58 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -52,6 +52,7 @@ static struct inode_operations btrfs_file_inode_operations; static struct address_space_operations btrfs_aops; static struct address_space_operations btrfs_symlink_aops; static struct file_operations btrfs_dir_file_operations; +static struct extent_map_ops btrfs_extent_map_ops; static struct kmem_cache *btrfs_inode_cachep; struct kmem_cache *btrfs_trans_handle_cachep; @@ -103,6 +104,90 @@ out: return ret; } +int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end) +{ + struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + char *kaddr; + int ret; + u64 page_start = page->index << PAGE_CACHE_SHIFT; + size_t offset = start - page_start; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + kaddr = kmap(page); + btrfs_csum_file_block(trans, root, inode->i_ino, + start, kaddr + offset, end - start + 1); + kunmap(page); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + +int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) +{ + int ret = 0; + struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct btrfs_csum_item *item; + struct btrfs_path *path = NULL; + u64 private; + + mutex_lock(&root->fs_info->fs_mutex); + path = btrfs_alloc_path(); + item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); + if (IS_ERR(item)) { + ret = PTR_ERR(item); + /* a csum that isn't present is a preallocated region. */ + if (ret == -ENOENT || ret == -EFBIG) + ret = 0; + private = 0; + goto out; + } + memcpy((char *)&private, &item->csum, BTRFS_CRC32_SIZE); + set_state_private(em_tree, start, private); +out: + if (path) + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + +int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) +{ + char csum[BTRFS_CRC32_SIZE]; + size_t offset = start - (page->index << PAGE_CACHE_SHIFT); + struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + char *kaddr; + u64 private; + int ret; + + ret = get_state_private(em_tree, start, &private); + kaddr = kmap_atomic(page, KM_IRQ0); + if (ret) { + goto zeroit; + } + ret = btrfs_csum_data(root, kaddr + offset, end - start + 1, csum); + BUG_ON(ret); + if (memcmp(csum, &private, BTRFS_CRC32_SIZE)) { + goto zeroit; + } + kunmap_atomic(kaddr, KM_IRQ0); + return 0; + +zeroit: + printk("btrfs csum failed ino %lu off %llu\n", + page->mapping->host->i_ino, (unsigned long long)start); + memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page); + kunmap_atomic(kaddr, KM_IRQ0); + return 0; +} void btrfs_read_locked_inode(struct inode *inode) { @@ -155,7 +240,7 @@ void btrfs_read_locked_inode(struct inode *inode) switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; - BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; + BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; break; @@ -1148,7 +1233,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, inode->i_mapping, GFP_NOFS); - BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; + BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -2286,7 +2371,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, inode->i_mapping, GFP_NOFS); - BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; + BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -2362,6 +2447,13 @@ static struct file_operations btrfs_dir_file_operations = { #endif }; +static struct extent_map_ops btrfs_extent_map_ops = { + .fill_delalloc = run_delalloc_range, + .writepage_io_hook = btrfs_writepage_io_hook, + .readpage_io_hook = btrfs_readpage_io_hook, + .readpage_end_io_hook = btrfs_readpage_end_io_hook, +}; + static struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, .writepage = btrfs_writepage, From 2bf5a725a3b82efeaf7b292c085e69a9388a89ea Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 30 Aug 2007 11:54:02 -0400 Subject: [PATCH 0214/2985] Btrfs: fsx delalloc fixes Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 9 ++- fs/btrfs/file.c | 26 ++++--- fs/btrfs/inode.c | 177 ++++-------------------------------------- 3 files changed, 38 insertions(+), 174 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 6053f9db0739..8ad6f8efc5a0 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -77,6 +77,8 @@ EXPORT_SYMBOL(alloc_extent_map); void free_extent_map(struct extent_map *em) { + if (!em) + return; if (atomic_dec_and_test(&em->refs)) { WARN_ON(em->in_tree); kmem_cache_free(extent_map_cache, em); @@ -102,6 +104,8 @@ EXPORT_SYMBOL(alloc_extent_state); void free_extent_state(struct extent_state *state) { + if (!state) + return; if (atomic_dec_and_test(&state->refs)) { WARN_ON(state->in_tree); kmem_cache_free(extent_state_cache, state); @@ -1395,8 +1399,8 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, if (!PagePrivate(page)) { SetPagePrivate(page); - set_page_private(page, 1); WARN_ON(!page->mapping->a_ops->invalidatepage); + set_page_private(page, 1); page_cache_get(page); } @@ -1638,7 +1642,8 @@ int extent_invalidatepage(struct extent_map_tree *tree, lock_extent(tree, start, end, GFP_NOFS); wait_on_extent_writeback(tree, start, end); - clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY, + clear_extent_bit(tree, start, end, + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, 1, 1, GFP_NOFS); return 0; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d3d39e4a2797..07b121d4bd93 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -186,8 +186,16 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, u64 mask = root->blocksize - 1; last_pos_in_file = (isize + mask) & ~mask; hole_size = (start_pos - last_pos_in_file + mask) & ~mask; - hole_size >>= inode->i_blkbits; + if (last_pos_in_file < start_pos) { + err = btrfs_drop_extents(trans, root, inode, + last_pos_in_file, + last_pos_in_file + hole_size, + &hint_block); + if (err) + goto failed; + + hole_size >>= inode->i_blkbits; err = btrfs_insert_file_extent(trans, root, inode->i_ino, last_pos_in_file, @@ -217,13 +225,11 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct page *p = pages[0]; /* step one, delete the existing extents in this range */ /* FIXME blocksize != pagesize */ - if (start_pos < inode->i_size) { - err = btrfs_drop_extents(trans, root, inode, start_pos, - (pos + write_bytes + root->blocksize -1) & - ~((u64)root->blocksize - 1), &hint_block); - if (err) - goto failed; - } + err = btrfs_drop_extents(trans, root, inode, start_pos, + (pos + write_bytes + root->blocksize -1) & + ~((u64)root->blocksize - 1), &hint_block); + if (err) + goto failed; err = insert_inline_extent(trans, root, inode, start_pos, end_pos - start_pos, p, 0); @@ -400,6 +406,8 @@ next_slot: keep = 1; WARN_ON(start & (root->blocksize - 1)); if (found_extent) { + btrfs_drop_extent_cache(inode, key.offset, + start - 1 ); new_num = (start - key.offset) >> inode->i_blkbits; old_num = btrfs_file_extent_num_blocks(extent); @@ -464,7 +472,7 @@ next_slot: if (ret) { btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0])); - printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end); + printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end, keep); } BUG_ON(ret); extent = btrfs_item_ptr( diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3ee6b2fadf58..64710fa77d01 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -721,25 +721,35 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + u64 mask = root->blocksize - 1; u64 pos = (inode->i_size + mask) & ~mask; + u64 block_end = attr->ia_size | mask; u64 hole_size; + u64 alloc_hint; if (attr->ia_size <= pos) goto out; btrfs_truncate_page(inode->i_mapping, inode->i_size); + lock_extent(em_tree, pos, block_end, GFP_NOFS); hole_size = (attr->ia_size - pos + mask) & ~mask; - hole_size >>= inode->i_blkbits; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); + err = btrfs_drop_extents(trans, root, inode, + pos, pos + hole_size, &alloc_hint); + + hole_size >>= inode->i_blkbits; + err = btrfs_insert_file_extent(trans, root, inode->i_ino, pos, 0, 0, hole_size); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + unlock_extent(em_tree, pos, block_end, GFP_NOFS); if (err) return err; } @@ -1529,13 +1539,13 @@ insert: ret = add_extent_mapping(em_tree, em); if (ret == -EEXIST) { free_extent_map(em); + em = NULL; failed_insert++; if (failed_insert > 5) { printk("failing to insert %Lu %Lu\n", start, end); err = -EIO; goto out; } - em = NULL; goto again; } err = 0; @@ -1555,167 +1565,6 @@ out: return em; } - -/* - * FIBMAP and others want to pass in a fake buffer head. They need to - * use BTRFS_GET_BLOCK_NO_DIRECT to make sure we don't try to memcpy - * any packed file data into the fake bh - */ -#define BTRFS_GET_BLOCK_NO_CREATE 0 -#define BTRFS_GET_BLOCK_CREATE 1 -#define BTRFS_GET_BLOCK_NO_DIRECT 2 - -/* - * FIXME create==1 doe not work. - */ -static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) -{ - int ret; - int err = 0; - u64 blocknr; - u64 extent_start = 0; - u64 extent_end = 0; - u64 objectid = inode->i_ino; - u32 found_type; - u64 alloc_hint = 0; - struct btrfs_path *path; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_file_extent_item *item; - struct btrfs_leaf *leaf; - struct btrfs_disk_key *found_key; - struct btrfs_trans_handle *trans = NULL; - - path = btrfs_alloc_path(); - BUG_ON(!path); - if (create & BTRFS_GET_BLOCK_CREATE) { - /* - * danger!, this only works if the page is properly up - * to date somehow - */ - trans = btrfs_start_transaction(root, 1); - if (!trans) { - err = -ENOMEM; - goto out; - } - ret = btrfs_drop_extents(trans, root, inode, - iblock << inode->i_blkbits, - (iblock + 1) << inode->i_blkbits, - &alloc_hint); - BUG_ON(ret); - } - - ret = btrfs_lookup_file_extent(NULL, root, path, - objectid, - iblock << inode->i_blkbits, 0); - if (ret < 0) { - err = ret; - goto out; - } - - if (ret != 0) { - if (path->slots[0] == 0) { - btrfs_release_path(root, path); - goto not_found; - } - path->slots[0]--; - } - - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_file_extent_item); - leaf = btrfs_buffer_leaf(path->nodes[0]); - blocknr = btrfs_file_extent_disk_blocknr(item); - blocknr += btrfs_file_extent_offset(item); - - /* are we inside the extent that was found? */ - found_key = &leaf->items[path->slots[0]].key; - found_type = btrfs_disk_key_type(found_key); - if (btrfs_disk_key_objectid(found_key) != objectid || - found_type != BTRFS_EXTENT_DATA_KEY) { - extent_end = 0; - extent_start = 0; - goto not_found; - } - found_type = btrfs_file_extent_type(item); - extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); - if (found_type == BTRFS_FILE_EXTENT_REG) { - extent_start = extent_start >> inode->i_blkbits; - extent_end = extent_start + btrfs_file_extent_num_blocks(item); - err = 0; - if (btrfs_file_extent_disk_blocknr(item) == 0) - goto out; - if (iblock >= extent_start && iblock < extent_end) { - btrfs_map_bh_to_logical(root, result, blocknr + - iblock - extent_start); - goto out; - } - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - char *ptr; - char *map; - u32 size; - - if (create & BTRFS_GET_BLOCK_NO_DIRECT) { - err = -EINVAL; - goto out; - } - size = btrfs_file_extent_inline_len(leaf->items + - path->slots[0]); - extent_end = (extent_start + size) >> inode->i_blkbits; - extent_start >>= inode->i_blkbits; - if (iblock < extent_start || iblock > extent_end) { - goto not_found; - } - ptr = btrfs_file_extent_inline_start(item); - map = kmap(result->b_page); - memcpy(map, ptr, size); - memset(map + size, 0, PAGE_CACHE_SIZE - size); - flush_dcache_page(result->b_page); - kunmap(result->b_page); - set_buffer_uptodate(result); - SetPageChecked(result->b_page); - btrfs_map_bh_to_logical(root, result, 0); - } -not_found: - if (create & BTRFS_GET_BLOCK_CREATE) { - struct btrfs_key ins; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, - 1, 0, alloc_hint, (u64)-1, - &ins, 1); - if (ret) { - err = ret; - goto out; - } - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - iblock << inode->i_blkbits, - ins.objectid, ins.offset, - ins.offset); - if (ret) { - err = ret; - goto out; - } - btrfs_map_bh_to_logical(root, result, ins.objectid); - } -out: - if (trans) { - ret = btrfs_end_transaction(trans, root); - if (!err) - err = ret; - } - btrfs_free_path(path); - return err; -} - -int btrfs_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) -{ - int err; - struct btrfs_root *root = BTRFS_I(inode)->root; - mutex_lock(&root->fs_info->fs_mutex); - err = btrfs_get_block_lock(inode, iblock, result, create); - mutex_unlock(&root->fs_info->fs_mutex); - return err; -} - static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { @@ -2469,6 +2318,8 @@ static struct address_space_operations btrfs_aops = { static struct address_space_operations btrfs_symlink_aops = { .readpage = btrfs_readpage, .writepage = btrfs_writepage, + .invalidatepage = btrfs_invalidatepage, + .releasepage = btrfs_releasepage, }; static struct inode_operations btrfs_file_inode_operations = { From 8e21528f87854314792aaef4d279bc9e5a9be997 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 30 Aug 2007 12:16:51 -0400 Subject: [PATCH 0215/2985] Btrfs: remove extra drop_extent_cache call Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 07b121d4bd93..acef8cb7fb79 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -406,8 +406,6 @@ next_slot: keep = 1; WARN_ON(start & (root->blocksize - 1)); if (found_extent) { - btrfs_drop_extent_cache(inode, key.offset, - start - 1 ); new_num = (start - key.offset) >> inode->i_blkbits; old_num = btrfs_file_extent_num_blocks(extent); From 86479a04eef8f304a13aeb8b64bcc8e506a68268 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 10 Sep 2007 19:58:16 -0400 Subject: [PATCH 0216/2985] Add support for defragging files via btrfsctl -d. Avoid OOM on extent tree defrag. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 4 ++ fs/btrfs/disk-io.h | 2 + fs/btrfs/extent_map.c | 16 +++++--- fs/btrfs/inode.c | 93 ++++++++++++++++++++++++++++++++++++------ fs/btrfs/tree-defrag.c | 3 ++ 5 files changed, 100 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 89e282c35ce2..b41f48ade419 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -217,6 +217,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, root->fs_info->generation); WARN_ON(1); } + if (buffer_defrag_done(parent)) + return 0; + parent_node = btrfs_buffer_node(parent); parent_nritems = btrfs_header_nritems(&parent_node->header); parent_level = btrfs_header_level(&parent_node->header); @@ -274,6 +277,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, *last_ret = search_start; if (parent_level == 1) clear_buffer_defrag(tmp_bh); + set_buffer_defrag_done(tmp_bh); brelse(tmp_bh); } return err; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index da6bb72750f2..714fa877438b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -26,9 +26,11 @@ enum btrfs_bh_state_bits { BH_Checked = BH_PrivateStart, BH_Defrag, + BH_DefragDone, }; BUFFER_FNS(Checked, checked); BUFFER_FNS(Defrag, defrag); +BUFFER_FNS(DefragDone, defrag_done); static inline struct btrfs_node *btrfs_buffer_node(struct buffer_head *bh) { diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 8ad6f8efc5a0..33f7a18dddf7 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -10,6 +10,12 @@ #include #include "extent_map.h" +/* temporary define until extent_map moves out of btrfs */ +struct kmem_cache *btrfs_cache_create(const char *name, size_t size, + unsigned long extra_flags, + void (*ctor)(void *, struct kmem_cache *, + unsigned long)); + static struct kmem_cache *extent_map_cache; static struct kmem_cache *extent_state_cache; @@ -32,14 +38,12 @@ struct tree_entry { void __init extent_map_init(void) { - extent_map_cache = kmem_cache_create("extent_map", - sizeof(struct extent_map), 0, - SLAB_RECLAIM_ACCOUNT | + extent_map_cache = btrfs_cache_create("extent_map", + sizeof(struct extent_map), SLAB_DESTROY_BY_RCU, NULL); - extent_state_cache = kmem_cache_create("extent_state", - sizeof(struct extent_state), 0, - SLAB_RECLAIM_ACCOUNT | + extent_state_cache = btrfs_cache_create("extent_state", + sizeof(struct extent_state), SLAB_DESTROY_BY_RCU, NULL); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 64710fa77d01..6b3e4404dc6a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1904,6 +1904,70 @@ fail: return ret; } +static unsigned long force_ra(struct address_space *mapping, + struct file_ra_state *ra, struct file *file, + pgoff_t offset, pgoff_t last_index) +{ + pgoff_t req_size; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + req_size = last_index - offset + 1; + offset = page_cache_readahead(mapping, ra, file, offset, req_size); + return offset; +#else + req_size = min(last_index - offset + 1, (pgoff_t)128); + page_cache_sync_readahead(mapping, ra, file, offset, req_size); + return offset + req_size; +#endif +} + +int btrfs_defrag_file(struct file *file) { + struct inode *inode = file->f_path.dentry->d_inode; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct page *page; + unsigned long last_index; + unsigned long ra_index = 0; + u64 page_start; + u64 page_end; + unsigned long i; + + mutex_lock(&inode->i_mutex); + last_index = inode->i_size >> PAGE_CACHE_SHIFT; + for (i = 0; i <= last_index; i++) { + if (i == ra_index) { + ra_index = force_ra(inode->i_mapping, &file->f_ra, + file, ra_index, last_index); + } + page = grab_cache_page(inode->i_mapping, i); + if (!page) + goto out_unlock; + if (!PageUptodate(page)) { + btrfs_readpage(NULL, page); + lock_page(page); + if (!PageUptodate(page)) { + unlock_page(page); + page_cache_release(page); + goto out_unlock; + } + } + page_start = page->index << PAGE_CACHE_SHIFT; + page_end = page_start + PAGE_CACHE_SIZE - 1; + + lock_extent(em_tree, page_start, page_end, GFP_NOFS); + set_extent_delalloc(em_tree, page_start, + page_end, GFP_NOFS); + unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); + balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); + } + +out_unlock: + mutex_unlock(&inode->i_mutex); + return 0; +} + int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { @@ -1948,10 +2012,14 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int break; case BTRFS_IOC_DEFRAG: - mutex_lock(&root->fs_info->fs_mutex); - btrfs_defrag_root(root, 0); - btrfs_defrag_root(root->fs_info->extent_root, 0); - mutex_unlock(&root->fs_info->fs_mutex); + if (S_ISDIR(inode->i_mode)) { + mutex_lock(&root->fs_info->fs_mutex); + btrfs_defrag_root(root, 0); + btrfs_defrag_root(root->fs_info->extent_root, 0); + mutex_unlock(&root->fs_info->fs_mutex); + } else if (S_ISREG(inode->i_mode)) { + btrfs_defrag_file(filp); + } ret = 0; break; default: @@ -2018,7 +2086,7 @@ void btrfs_destroy_cachep(void) kmem_cache_destroy(btrfs_path_cachep); } -static struct kmem_cache *cache_create(const char *name, size_t size, +struct kmem_cache *btrfs_cache_create(const char *name, size_t size, unsigned long extra_flags, void (*ctor)(void *, struct kmem_cache *, unsigned long)) @@ -2033,27 +2101,28 @@ static struct kmem_cache *cache_create(const char *name, size_t size, int btrfs_init_cachep(void) { - btrfs_inode_cachep = cache_create("btrfs_inode_cache", + btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache", sizeof(struct btrfs_inode), 0, init_once); if (!btrfs_inode_cachep) goto fail; - btrfs_trans_handle_cachep = cache_create("btrfs_trans_handle_cache", - sizeof(struct btrfs_trans_handle), - 0, NULL); + btrfs_trans_handle_cachep = + btrfs_cache_create("btrfs_trans_handle_cache", + sizeof(struct btrfs_trans_handle), + 0, NULL); if (!btrfs_trans_handle_cachep) goto fail; - btrfs_transaction_cachep = cache_create("btrfs_transaction_cache", + btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache", sizeof(struct btrfs_transaction), 0, NULL); if (!btrfs_transaction_cachep) goto fail; - btrfs_path_cachep = cache_create("btrfs_path_cache", + btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache", sizeof(struct btrfs_transaction), 0, NULL); if (!btrfs_path_cachep) goto fail; - btrfs_bit_radix_cachep = cache_create("btrfs_radix", 256, + btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256, SLAB_DESTROY_BY_RCU, NULL); if (!btrfs_bit_radix_cachep) goto fail; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 35fd20d24645..420597127ed1 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -113,6 +113,8 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, } WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); + clear_buffer_defrag(path->nodes[*level]); + clear_buffer_defrag_done(path->nodes[*level]); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -143,6 +145,7 @@ static int defrag_walk_up(struct btrfs_trans_handle *trans, return 0; } else { clear_buffer_defrag(path->nodes[*level]); + clear_buffer_defrag_done(path->nodes[*level]); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; From 011410bd859a481a335d7db1fb559542c5663fd5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 10 Sep 2007 19:58:36 -0400 Subject: [PATCH 0217/2985] Btrfs: Add more synchronization before creating a snapshot File data checksums are only done during writepage, so we have to make sure all pages are written when the snapshot is taken. This also adds some locking so that new writes don't race in and add new dirty pages. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/file.c | 2 ++ fs/btrfs/inode.c | 10 +++++++++- 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index cd75c906048f..03e1c4ad341b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -333,6 +333,7 @@ struct btrfs_root { struct inode *inode; struct kobject root_kobj; struct completion kobj_unregister; + struct rw_semaphore snap_sem; u64 objectid; u64 last_trans; u32 blocksize; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c25ef0a68f18..6c953a0e0aa3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -300,6 +300,7 @@ static int __setup_root(int blocksize, memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); memset(&root->root_kobj, 0, sizeof(root->root_kobj)); init_completion(&root->kobj_unregister); + init_rwsem(&root->snap_sem); root->defrag_running = 0; root->defrag_level = 0; root->root_key.objectid = objectid; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index acef8cb7fb79..77f1950b0aa0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -160,6 +160,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> inode->i_blkbits; + down_read(&BTRFS_I(inode)->root->snap_sem); end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1; lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); mutex_lock(&root->fs_info->fs_mutex); @@ -250,6 +251,7 @@ out_unlock: mutex_unlock(&root->fs_info->fs_mutex); unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); free_extent_map(em); + up_read(&BTRFS_I(inode)->root->snap_sem); return err; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6b3e4404dc6a..1ace6d11e097 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -686,6 +686,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) if ((offset & (blocksize - 1)) == 0) goto out; + down_read(&BTRFS_I(inode)->root->snap_sem); ret = -ENOMEM; page = grab_cache_page(mapping, index); if (!page) @@ -704,6 +705,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) unlock_page(page); page_cache_release(page); + up_read(&BTRFS_I(inode)->root->snap_sem); out: return ret; } @@ -1668,6 +1670,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) int ret = -EINVAL; u64 page_start; + down_read(&BTRFS_I(inode)->root->snap_sem); lock_page(page); wait_on_page_writeback(page); size = i_size_read(inode); @@ -1688,6 +1691,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) ret = btrfs_cow_one_page(inode, page, end); out_unlock: + up_read(&BTRFS_I(inode)->root->snap_sem); unlock_page(page); return ret; } @@ -1851,6 +1855,10 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (!root->ref_cows) return -EINVAL; + down_write(&root->snap_sem); + freeze_bdev(root->fs_info->sb->s_bdev); + thaw_bdev(root->fs_info->sb->s_bdev, root->fs_info->sb); + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); @@ -1894,12 +1902,12 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) ret = btrfs_inc_root_ref(trans, root); if (ret) goto fail; - fail: err = btrfs_commit_transaction(trans, root); if (err && !ret) ret = err; mutex_unlock(&root->fs_info->fs_mutex); + up_write(&root->snap_sem); btrfs_btree_balance_dirty(root); return ret; } From a8c450b211c010ac55190da23ceb6b39b393f411 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 10 Sep 2007 20:00:27 -0400 Subject: [PATCH 0218/2985] Btrfs: Reorder tests in set_extent_bit to properly find holes Yan Zheng noticed that set_extent_bit was exiting too early when there was a hole in the map. The fix is to reorder the tests to check for the hole first. Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 44 +++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 33f7a18dddf7..370ed97e52f5 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -712,28 +712,6 @@ again: } goto search_again; } - /* - * | ---- desired range ---- | - * | state | - * We need to split the extent, and set the bit - * on the first half - */ - if (state->start <= end && state->end > end) { - set = state->state & bits; - if (exclusive && set) { - *failed_start = start; - err = -EEXIST; - goto out; - } - err = split_state(tree, state, prealloc, end + 1); - BUG_ON(err == -EEXIST); - - prealloc->state |= bits; - merge_state(tree, prealloc); - prealloc = NULL; - goto out; - } - /* * | ---- desired range ---- | * | state | or | state | @@ -756,6 +734,28 @@ again: start = this_end + 1; goto search_again; } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and set the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + prealloc->state |= bits; + merge_state(tree, prealloc); + prealloc = NULL; + goto out; + } + goto search_again; out: From 9433063bcb4b3c0c9e5ef3c24a811b63084dedb4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Sep 2007 20:02:22 -0400 Subject: [PATCH 0219/2985] [PATCH] btrfs: fix printk format warning Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 77f1950b0aa0..c26ca54ed4de 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -174,7 +174,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, hint_block = 0; if ((end_of_last_block & 4095) == 0) { - printk("strange end of last %Lu %lu %Lu\n", start_pos, write_bytes, end_of_last_block); + printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block); } set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS); From 90f1c19a9fd2a943adc69d2b9b8c83bcc4bba6f9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Sep 2007 20:02:27 -0400 Subject: [PATCH 0220/2985] Btrfs: [PATCH] extent_map: fix locking for bio completion The bio completion handlers can be run in any context, e.g. when using the old ide driver they run in hardirq context with irqs disabled so lockdep rightfully warns about using write_lock_irq useage in these handlers. This patch switches clear_extent_bit and set_extent_bit to write_lock_irqsave to fix this problem. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 370ed97e52f5..3021564ac62c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -456,6 +456,7 @@ int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, struct extent_state *state; struct extent_state *prealloc = NULL; struct rb_node *node; + unsigned long flags; int err; int set = 0; @@ -466,7 +467,7 @@ again: return -ENOMEM; } - write_lock_irq(&tree->lock); + write_lock_irqsave(&tree->lock, flags); /* * this search will find the extents that end after * our range starts @@ -533,7 +534,7 @@ again: goto search_again; out: - write_unlock_irq(&tree->lock); + write_unlock_irqrestore(&tree->lock, flags); if (prealloc) free_extent_state(prealloc); @@ -542,7 +543,7 @@ out: search_again: if (start >= end) goto out; - write_unlock_irq(&tree->lock); + write_unlock_irqrestore(&tree->lock, flags); if (mask & __GFP_WAIT) cond_resched(); goto again; @@ -628,6 +629,7 @@ int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, struct extent_state *state; struct extent_state *prealloc = NULL; struct rb_node *node; + unsigned long flags; int err = 0; int set; u64 last_start; @@ -639,7 +641,7 @@ again: return -ENOMEM; } - write_lock_irq(&tree->lock); + write_lock_irqsave(&tree->lock, flags); /* * this search will find all the extents that end after * our range starts. @@ -759,7 +761,7 @@ again: goto search_again; out: - write_unlock_irq(&tree->lock); + write_unlock_irqrestore(&tree->lock, flags); if (prealloc) free_extent_state(prealloc); @@ -768,7 +770,7 @@ out: search_again: if (start > end) goto out; - write_unlock_irq(&tree->lock); + write_unlock_irqrestore(&tree->lock, flags); if (mask & __GFP_WAIT) cond_resched(); goto again; From d396c6f554d182b7bce4e2bb2acb47f80430c05f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Sep 2007 20:02:30 -0400 Subject: [PATCH 0221/2985] Btrfs: [PATCH] extent_map: provide generic bmap generic_bmap is completely trivial, while the extent to bh mapping in btrfs is rather complex. So provide a extent_bmap instead that takes a get_extent callback and can be used by filesystem using the extent_map code. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 19 +++++++++++++++++++ fs/btrfs/extent_map.h | 2 ++ fs/btrfs/inode.c | 23 ++--------------------- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 3021564ac62c..ff2b1dd25bf9 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1816,3 +1816,22 @@ int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) } EXPORT_SYMBOL(try_release_extent_mapping); +sector_t extent_bmap(struct address_space *mapping, sector_t iblock, + get_extent_t *get_extent) +{ + struct inode *inode = mapping->host; + u64 start = iblock << inode->i_blkbits; + u64 end = start + (1 << inode->i_blkbits) - 1; + struct extent_map *em; + + em = get_extent(inode, NULL, 0, start, end, 0); + if (!em || IS_ERR(em)) + return 0; + + // XXX(hch): block 0 is valid in some cases, e.g. XFS RT device + if (em->block_start == EXTENT_MAP_INLINE || + em->block_start == 0) + return 0; + + return (em->block_start + start - em->start) >> inode->i_blkbits; +} diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 0eae6c420acb..da41aca74e0b 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -99,6 +99,8 @@ int extent_prepare_write(struct extent_map_tree *tree, int extent_commit_write(struct extent_map_tree *tree, struct inode *inode, struct page *page, unsigned from, unsigned to); +sector_t extent_bmap(struct address_space *mapping, sector_t iblock, + get_extent_t *get_extent); int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); int set_state_private(struct extent_map_tree *tree, u64 start, u64 private); int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1ace6d11e097..39c1fe178f47 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1567,28 +1567,9 @@ out: return em; } -static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) +static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) { - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 start = iblock << inode->i_blkbits; - u64 end = start + root->blocksize -1; - struct extent_map *em; - - em = btrfs_get_extent(inode, NULL, 0, start, end, 0); - if (em && !IS_ERR(em) && em->block_start != EXTENT_MAP_INLINE && - em->block_start != 0) { - u64 offset; - offset = start - em->start; - start = (em->block_start + offset) >> inode->i_blkbits; - btrfs_map_bh_to_logical(root, result, start); - } - return 0; -} - -static sector_t btrfs_bmap(struct address_space *as, sector_t block) -{ - return generic_block_bmap(as, block, btrfs_get_block_bmap); + return extent_bmap(mapping, iblock, btrfs_get_extent); } static int btrfs_prepare_write(struct file *file, struct page *page, From b06355f0fe04a62d08238438654ec453e05304ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Sep 2007 20:02:32 -0400 Subject: [PATCH 0222/2985] Btrfs: [PATCH] extent_map: make the writepage_io hook optional The writepage_io is not mandatory, e.g. my port of xfs to the extent_map code does not have one for now. So handle a NULL pointer gracefully here. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index ff2b1dd25bf9..162766773a32 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1607,7 +1607,12 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, continue; } clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); - ret = tree->ops->writepage_io_hook(page, cur, cur + iosize - 1); + if (tree->ops && tree->ops->writepage_io_hook) { + ret = tree->ops->writepage_io_hook(page, cur, + cur + iosize - 1); + } else { + ret = 0; + } if (ret) SetPageError(page); else { From 0e2752a72cb37075b24899f01e9bc6a589de3b6c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Sep 2007 20:02:33 -0400 Subject: [PATCH 0223/2985] Btrfs: [PATCH] extent_map: add writepage_end_io hook XFS updates the ondisk inode size only after the data I/O has finished, so it needs a hook when the writepage end_bio handler has finished. Might not be worth applying as-is as the per-page callback is very ineffcient. What XFS really wants is a callback when writeout of a whole extent has completed. This delayed i_size updates scheme might be worthwile for btrfs aswell, btw. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 2 ++ fs/btrfs/extent_map.h | 1 + 2 files changed, 3 insertions(+) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 162766773a32..ab5bde31b687 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1231,6 +1231,8 @@ static int end_bio_extent_writepage(struct bio *bio, end_page_writeback(page); else check_page_writeback(tree, page); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, start, end); } while (bvec >= bio->bi_io_vec); bio_put(bio); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index da41aca74e0b..40b53ee274fb 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -11,6 +11,7 @@ struct extent_map_ops { int (*writepage_io_hook)(struct page *page, u64 start, u64 end); int (*readpage_io_hook)(struct page *page, u64 start, u64 end); int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end); + void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end); }; struct extent_map_tree { From 5ce14bbcdd1b5d9233d26a1e89faf3a26c820c58 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 11 Sep 2007 11:15:39 -0400 Subject: [PATCH 0224/2985] Btrfs: Find and remove dead roots the first time a root is loaded. Dead roots are trees left over after a crash, and they were either in the process of being removed or were waiting to be removed when the box crashed. Before, a search of the entire tree of root pointers was done on mount looking for dead roots. Now, the search is done the first time we load a root. This makes mount faster when there are a large number of snapshots, and it enables the block accounting code to properly update the block counts on the latest root as old versions of the root are reaped after a crash. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 ++- fs/btrfs/disk-io.c | 9 ++++----- fs/btrfs/root-tree.c | 16 +++++++++++++--- fs/btrfs/transaction.c | 6 ++++-- fs/btrfs/transaction.h | 3 ++- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 03e1c4ad341b..6d6e94b63803 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1148,7 +1148,8 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *item); int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); -int btrfs_find_dead_roots(struct btrfs_root *root); +int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, + struct btrfs_root *latest_root); /* dir-item.c */ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6c953a0e0aa3..4296839eea1a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -426,6 +426,10 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, return ERR_PTR(ret); } + ret = btrfs_find_dead_roots(fs_info->tree_root, + root->root_key.objectid, root); + BUG_ON(ret); + return root; } @@ -522,11 +526,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; - ret = btrfs_find_dead_roots(tree_root); - if (ret) { - mutex_unlock(&fs_info->fs_mutex); - goto fail_tree_root; - } mutex_unlock(&fs_info->fs_mutex); return tree_root; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 402f67821c14..3b5926dfbeba 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -93,7 +93,8 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } -int btrfs_find_dead_roots(struct btrfs_root *root) +int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, + struct btrfs_root *latest) { struct btrfs_root *dead_root; struct btrfs_item *item; @@ -105,7 +106,7 @@ int btrfs_find_dead_roots(struct btrfs_root *root) struct btrfs_leaf *leaf; int slot; - key.objectid = 0; + key.objectid = objectid; key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); key.offset = 0; @@ -131,15 +132,24 @@ int btrfs_find_dead_roots(struct btrfs_root *root) btrfs_disk_key_to_cpu(&key, &item->key); if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) goto next; + + if (key.objectid < objectid) + goto next; + + if (key.objectid > objectid) + break; + ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item); if (btrfs_root_refs(ri) != 0) goto next; + dead_root = btrfs_read_fs_root_no_radix(root->fs_info, &key); if (IS_ERR(dead_root)) { ret = PTR_ERR(dead_root); goto err; } - ret = btrfs_add_dead_root(dead_root, + + ret = btrfs_add_dead_root(dead_root, latest, &root->fs_info->dead_roots); if (ret) goto err; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2b15daa3a9f2..29755593de65 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -239,7 +239,9 @@ struct dirty_root { struct btrfs_root *latest_root; }; -int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list) +int btrfs_add_dead_root(struct btrfs_root *root, + struct btrfs_root *latest, + struct list_head *dead_list) { struct dirty_root *dirty; @@ -247,6 +249,7 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list) if (!dirty) return -ENOMEM; dirty->root = root; + dirty->latest_root = latest; list_add(&dirty->list, dead_list); return 0; } @@ -412,7 +415,6 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, while(1) { trans = btrfs_start_transaction(tree_root, 1); - ret = btrfs_drop_snapshot(trans, dirty->root); if (ret != -EAGAIN) { break; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index e451783a1a4d..4bc328cbb24c 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -78,7 +78,8 @@ void btrfs_transaction_flush_work(struct btrfs_root *root); void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); void btrfs_init_transaction_sys(void); void btrfs_exit_transaction_sys(void); -int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list); +int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, + struct list_head *dead_list); int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); From 93a6925ec10c2d5792ffad35af8366179c9db57c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 14 Sep 2007 09:42:31 -0400 Subject: [PATCH 0226/2985] Btrfs: Fix extra link count dec in rename Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 39c1fe178f47..cde8ddde6098 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2214,13 +2214,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry); if (ret) goto out_fail; - if (S_ISDIR(new_inode->i_mode)) - clear_nlink(new_inode); - else - drop_nlink(new_inode); - ret = btrfs_update_inode(trans, root, new_inode); - if (ret) - goto out_fail; } ret = btrfs_add_link(trans, new_dentry, old_inode); if (ret) From 84a5d5eefd2b31c3cb97dc078589870bf85c7f23 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 14 Sep 2007 09:43:53 -0400 Subject: [PATCH 0227/2985] Btrfs: add modules_install target Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index ee505c87ecf3..058a2dae65b4 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -18,6 +18,8 @@ else KERNELDIR := /lib/modules/`uname -r`/build all:: $(MAKE) -C $(KERNELDIR) M=`pwd` modules +modules_install:: + $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install clean:: $(MAKE) -C $(KERNELDIR) M=`pwd` clean endif From 432eba0882a32d6181ba44044f6e576226134784 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 14 Sep 2007 10:22:19 -0400 Subject: [PATCH 0228/2985] Btrfs: Simplify makefile Single-colons will do here. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 058a2dae65b4..6341cf7a9f91 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -16,10 +16,11 @@ else # Normal Makefile KERNELDIR := /lib/modules/`uname -r`/build -all:: +all: $(MAKE) -C $(KERNELDIR) M=`pwd` modules -modules_install:: +modules_install: $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install -clean:: +clean: $(MAKE) -C $(KERNELDIR) M=`pwd` clean + endif From 34287aa3605d52ef5a4b6e5e2cca98667166508b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Sep 2007 10:22:47 -0400 Subject: [PATCH 0229/2985] Btrfs: use unlocked_ioctl No reason to grab the BKL before calling into the btrfs ioctl code. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 5 +---- fs/btrfs/file.c | 4 ++-- fs/btrfs/inode.c | 23 +++++------------------ 3 files changed, 8 insertions(+), 24 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6d6e94b63803..800a3499cc3f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1219,10 +1219,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); int btrfs_init_cachep(void); void btrfs_destroy_cachep(void); -int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, - unsigned long arg); -long btrfs_compat_ioctl(struct file *file, unsigned int cmd, - unsigned long arg); +long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root); int btrfs_commit_write(struct file *file, struct page *page, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c26ca54ed4de..ca7e5d4474e0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -732,10 +732,10 @@ struct file_operations btrfs_file_operations = { .write = btrfs_file_write, .mmap = btrfs_file_mmap, .open = generic_file_open, - .ioctl = btrfs_ioctl, .fsync = btrfs_sync_file, + .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = btrfs_compat_ioctl, + .compat_ioctl = btrfs_ioctl, #endif }; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cde8ddde6098..3901a7fa0947 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1957,9 +1957,10 @@ out_unlock: return 0; } -int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int +long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_vol_args vol_args; int ret = 0; @@ -2007,7 +2008,7 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int btrfs_defrag_root(root->fs_info->extent_root, 0); mutex_unlock(&root->fs_info->fs_mutex); } else if (S_ISREG(inode->i_mode)) { - btrfs_defrag_file(filp); + btrfs_defrag_file(file); } ret = 0; break; @@ -2017,20 +2018,6 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int return ret; } -#ifdef CONFIG_COMPAT -long btrfs_compat_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - struct inode *inode = file->f_path.dentry->d_inode; - int ret; - lock_kernel(); - ret = btrfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); - unlock_kernel(); - return ret; - -} -#endif - /* * Called inside transaction, so use GFP_NOFS */ @@ -2341,9 +2328,9 @@ static struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = btrfs_readdir, - .ioctl = btrfs_ioctl, + .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = btrfs_compat_ioctl, + .compat_ioctl = btrfs_ioctl, #endif }; From d03581f4345e30db078ea8f81189e8177280bbb8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Sep 2007 10:22:57 -0400 Subject: [PATCH 0230/2985] split up btrfs_ioctl Add a helper per ioctl function to make the code more readable. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 114 ++++++++++++++++++++++++++--------------------- 1 file changed, 63 insertions(+), 51 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3901a7fa0947..e24b875327da 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1957,65 +1957,77 @@ out_unlock: return 0; } -long btrfs_ioctl(struct file *file, unsigned int - cmd, unsigned long arg) +static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_ioctl_vol_args vol_args; + struct btrfs_dir_item *di; + struct btrfs_path *path; + int namelen; + u64 root_dirid; + + if (copy_from_user(&vol_args, arg, sizeof(vol_args))) + return -EFAULT; + + namelen = strlen(vol_args.name); + if (namelen > BTRFS_VOL_NAME_MAX) + return -EINVAL; + if (strchr(vol_args.name, '/')) + return -EINVAL; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, + mutex_lock(&root->fs_info->fs_mutex); + di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, + path, root_dirid, + vol_args.name, namelen, 0); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_free_path(path); + if (di && !IS_ERR(di)) + return -EEXIST; + if (IS_ERR(di)) + return PTR_ERR(di); + + if (root == root->fs_info->tree_root) + return create_subvol(root, vol_args.name, namelen); + return create_snapshot(root, vol_args.name, namelen); +} + +static int btrfs_ioctl_defrag(struct file *file) { struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_ioctl_vol_args vol_args; - int ret = 0; - struct btrfs_dir_item *di; - int namelen; - struct btrfs_path *path; - u64 root_dirid; + + switch (inode->i_mode & S_IFMT) { + case S_IFDIR: + mutex_lock(&root->fs_info->fs_mutex); + btrfs_defrag_root(root, 0); + btrfs_defrag_root(root->fs_info->extent_root, 0); + mutex_unlock(&root->fs_info->fs_mutex); + break; + case S_IFREG: + btrfs_defrag_file(file); + break; + } + + return 0; +} + +long btrfs_ioctl(struct file *file, unsigned int + cmd, unsigned long arg) +{ + struct btrfs_root *root = BTRFS_I(file->f_path.dentry->d_inode)->root; switch (cmd) { case BTRFS_IOC_SNAP_CREATE: - if (copy_from_user(&vol_args, - (struct btrfs_ioctl_vol_args __user *)arg, - sizeof(vol_args))) - return -EFAULT; - namelen = strlen(vol_args.name); - if (namelen > BTRFS_VOL_NAME_MAX) - return -EINVAL; - if (strchr(vol_args.name, '/')) - return -EINVAL; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, - mutex_lock(&root->fs_info->fs_mutex); - di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, - path, root_dirid, - vol_args.name, namelen, 0); - mutex_unlock(&root->fs_info->fs_mutex); - btrfs_free_path(path); - if (di && !IS_ERR(di)) - return -EEXIST; - if (IS_ERR(di)) - return PTR_ERR(di); - - if (root == root->fs_info->tree_root) - ret = create_subvol(root, vol_args.name, namelen); - else - ret = create_snapshot(root, vol_args.name, namelen); - break; - + return btrfs_ioctl_snap_create(root, (void __user *)arg); case BTRFS_IOC_DEFRAG: - if (S_ISDIR(inode->i_mode)) { - mutex_lock(&root->fs_info->fs_mutex); - btrfs_defrag_root(root, 0); - btrfs_defrag_root(root->fs_info->extent_root, 0); - mutex_unlock(&root->fs_info->fs_mutex); - } else if (S_ISREG(inode->i_mode)) { - btrfs_defrag_file(file); - } - ret = 0; - break; - default: - return -ENOTTY; + return btrfs_ioctl_defrag(file); } - return ret; + + return -ENOTTY; } /* From 6af858b24c0c4e91ce0b40b5dd5fdfc1fe2dda70 Mon Sep 17 00:00:00 2001 From: Yan Date: Fri, 14 Sep 2007 10:23:29 -0400 Subject: [PATCH 0231/2985] Btrfs: Fix off by one error in dirty_and_release_pages Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ca7e5d4474e0..26826a543355 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -236,7 +236,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, end_pos - start_pos, p, 0); BUG_ON(err); em->start = start_pos; - em->end = end_pos; + em->end = end_pos - 1; em->block_start = EXTENT_MAP_INLINE; em->block_end = EXTENT_MAP_INLINE; add_extent_mapping(em_tree, em); From 7d7d6068bee9d8004ea7cc8a2ebda2c99a57caec Mon Sep 17 00:00:00 2001 From: Yan Date: Fri, 14 Sep 2007 16:15:28 -0400 Subject: [PATCH 0232/2985] Btrfs: Fix cache_block_group to catch holes at the start of the group Cache block group was overly complex and missed free blocks at the very start of the group. This patch simplifies things significantly. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 65 +++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b074ad1416dd..9151850266b0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -39,6 +39,7 @@ static int cache_block_group(struct btrfs_root *root, u64 i; u64 last = 0; u64 hole_size; + u64 first_free; int found = 0; root = root->fs_info->extent_root; @@ -51,16 +52,22 @@ static int cache_block_group(struct btrfs_root *root, path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 2; + first_free = block_group->key.objectid; key.objectid = block_group->key.objectid; key.flags = 0; key.offset = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) return ret; + if (ret && path->slots[0] > 0) path->slots[0]--; + while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; @@ -71,50 +78,48 @@ static int cache_block_group(struct btrfs_root *root, if (ret == 0) { continue; } else { - if (found) { - hole_size = block_group->key.objectid + - block_group->key.offset - last; - } else { - last = block_group->key.objectid; - hole_size = block_group->key.offset; - } - for (i = 0; i < hole_size; i++) { - set_radix_bit(extent_radix, - last + i); - } break; } } + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + if (key.objectid < block_group->key.objectid) { + if (key.objectid + key.offset > first_free) + first_free = key.objectid + key.offset; + goto next; + } + if (key.objectid >= block_group->key.objectid + block_group->key.offset) { - if (found) { - hole_size = block_group->key.objectid + - block_group->key.offset - last; - } else { - last = block_group->key.objectid; - hole_size = block_group->key.offset; + break; + } + + if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { + if (!found) { + last = first_free; + found = 1; } + hole_size = key.objectid - last; for (i = 0; i < hole_size; i++) { set_radix_bit(extent_radix, last + i); } - break; - } - if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { - if (!found) { - last = key.objectid + key.offset; - found = 1; - } else { - hole_size = key.objectid - last; - for (i = 0; i < hole_size; i++) { - set_radix_bit(extent_radix, last + i); - } - last = key.objectid + key.offset; - } + last = key.objectid + key.offset; } +next: path->slots[0]++; } + if (!found) + last = first_free; + if (block_group->key.objectid + + block_group->key.offset > last) { + hole_size = block_group->key.objectid + + block_group->key.offset - last; + for (i = 0; i < hole_size; i++) { + set_radix_bit(extent_radix, + last + i); + } + } block_group->cached = 1; err: btrfs_free_path(path); From d3c2fdcf7b79079f60ac64e61d886964d4647910 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Sep 2007 10:58:06 -0400 Subject: [PATCH 0233/2985] Btrfs: Use balance_dirty_pages_nr on btree blocks btrfs_btree_balance_dirty is changed to pass the number of pages dirtied for more accurate dirty throttling. This lets the VM make better decisions about when to force some writeback. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 +++-- fs/btrfs/disk-io.h | 2 +- fs/btrfs/extent-tree.c | 1 + fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 47 +++++++++++++++++++++++++++++++----------- fs/btrfs/transaction.c | 14 +++++++++---- 6 files changed, 51 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4296839eea1a..ef6d76f2ec20 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -666,7 +666,8 @@ void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) brelse(buf); } -void btrfs_btree_balance_dirty(struct btrfs_root *root) +void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) { - balance_dirty_pages_ratelimited(root->fs_info->btree_inode->i_mapping); + balance_dirty_pages_ratelimited_nr( + root->fs_info->btree_inode->i_mapping, nr); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 714fa877438b..fc6b520c6e0b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -80,7 +80,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, u64 num_blocks); int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, u64 logical); -void btrfs_btree_balance_dirty(struct btrfs_root *root); +void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_mark_buffer_dirty(struct buffer_head *bh); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9151850266b0..fe772f9b06cf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1275,6 +1275,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, set_buffer_checked(buf); set_buffer_defrag(buf); set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); + trans->blocks_used++; return buf; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 26826a543355..698eaea612f1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -655,7 +655,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, num_written += write_bytes; balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, 1); cond_resched(); } mutex_unlock(&inode->i_mutex); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e24b875327da..bd00df33fb3f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -387,15 +387,17 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) struct btrfs_root *root; struct btrfs_trans_handle *trans; int ret; + unsigned long nr; root = BTRFS_I(dir)->root; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); ret = btrfs_unlink_trans(trans, root, dir, dentry); + nr = trans->blocks_used; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return ret; } @@ -412,6 +414,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) int found_type; struct btrfs_leaf *leaf; char *goodnames = ".."; + unsigned long nr; path = btrfs_alloc_path(); BUG_ON(!path); @@ -467,8 +470,9 @@ out: btrfs_release_path(root, path); btrfs_free_path(path); mutex_unlock(&root->fs_info->fs_mutex); + nr = trans->blocks_used; ret = btrfs_end_transaction(trans, root); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); if (ret && !err) err = ret; return err; @@ -764,6 +768,7 @@ void btrfs_delete_inode(struct inode *inode) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; + unsigned long nr; int ret; truncate_inode_pages(&inode->i_data, 0); @@ -780,15 +785,17 @@ void btrfs_delete_inode(struct inode *inode) ret = btrfs_free_inode(trans, root, inode); if (ret) goto no_delete_lock; + nr = trans->blocks_used; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return; no_delete_lock: + nr = trans->blocks_used; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); no_delete: clear_inode(inode); } @@ -1165,6 +1172,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, int err; int drop_inode = 0; u64 objectid; + unsigned long nr; if (!new_valid_dev(rdev)) return -EINVAL; @@ -1198,6 +1206,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, dir); out_unlock: + nr = trans->blocks_used; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -1205,7 +1214,7 @@ out_unlock: inode_dec_link_count(inode); iput(inode); } - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return err; } @@ -1217,6 +1226,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, struct inode *inode; int err; int drop_inode = 0; + unsigned long nr; u64 objectid; mutex_lock(&root->fs_info->fs_mutex); @@ -1251,6 +1261,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, dir); out_unlock: + nr = trans->blocks_used; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -1258,7 +1269,7 @@ out_unlock: inode_dec_link_count(inode); iput(inode); } - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return err; } @@ -1268,6 +1279,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode = old_dentry->d_inode; + unsigned long nr; int err; int drop_inode = 0; @@ -1288,6 +1300,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (err) drop_inode = 1; + nr = trans->blocks_used; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -1295,7 +1308,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, inode_dec_link_count(inode); iput(inode); } - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return err; } @@ -1336,6 +1349,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) int err = 0; int drop_on_err = 0; u64 objectid; + unsigned long nr = 1; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -1380,12 +1394,13 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) btrfs_update_inode_block_group(trans, dir); out_fail: + nr = trans->blocks_used; btrfs_end_transaction(trans, root); out_unlock: mutex_unlock(&root->fs_info->fs_mutex); if (drop_on_err) iput(inode); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return err; } @@ -1682,6 +1697,7 @@ static void btrfs_truncate(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; int ret; struct btrfs_trans_handle *trans; + unsigned long nr; if (!S_ISREG(inode->i_mode)) return; @@ -1697,10 +1713,11 @@ static void btrfs_truncate(struct inode *inode) /* FIXME, add redo link to tree so we don't leak on crash */ ret = btrfs_truncate_in_trans(trans, root, inode); btrfs_update_inode(trans, root, inode); + nr = trans->blocks_used; ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); } int btrfs_commit_write(struct file *file, struct page *page, @@ -1725,6 +1742,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) int err; u64 objectid; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; + unsigned long nr = 1; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -1814,12 +1832,13 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail; fail: + nr = trans->blocks_used; err = btrfs_commit_transaction(trans, root); if (err && !ret) ret = err; fail_commit: mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return ret; } @@ -1832,6 +1851,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) int ret; int err; u64 objectid; + unsigned long nr; if (!root->ref_cows) return -EINVAL; @@ -1884,12 +1904,13 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail; fail: + nr = trans->blocks_used; err = btrfs_commit_transaction(trans, root); if (err && !ret) ret = err; mutex_unlock(&root->fs_info->fs_mutex); up_write(&root->snap_sem); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return ret; } @@ -2240,6 +2261,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, int datasize; char *ptr; struct btrfs_file_extent_item *ei; + unsigned long nr; name_len = strlen(symname) + 1; if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) @@ -2309,13 +2331,14 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, drop_inode = 1; out_unlock: + nr = trans->blocks_used; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); if (drop_inode) { inode_dec_link_count(inode); iput(inode); } - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(root, nr); return err; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 29755593de65..087074db0bd5 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -18,6 +18,7 @@ #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -340,6 +341,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) struct btrfs_fs_info *info = root->fs_info; int ret; struct btrfs_trans_handle *trans; + unsigned long nr; if (root->defrag_running) return 0; @@ -348,10 +350,11 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) while (1) { root->defrag_running = 1; ret = btrfs_defrag_leaves(trans, root, cacheonly); + nr = trans->blocks_used; btrfs_end_transaction(trans, root); mutex_unlock(&info->fs_mutex); - btrfs_btree_balance_dirty(root); + btrfs_btree_balance_dirty(info->tree_root, nr); cond_resched(); mutex_lock(&info->fs_mutex); @@ -398,6 +401,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, { struct dirty_root *dirty; struct btrfs_trans_handle *trans; + unsigned long nr; u64 num_blocks; u64 blocks_used; int ret = 0; @@ -426,11 +430,11 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, &dirty->root->root_item); if (err) ret = err; + nr = trans->blocks_used; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); mutex_unlock(&tree_root->fs_info->fs_mutex); - - btrfs_btree_balance_dirty(tree_root); + btrfs_btree_balance_dirty(tree_root, nr); schedule(); mutex_lock(&tree_root->fs_info->fs_mutex); @@ -449,13 +453,15 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, BUG(); break; } + nr = trans->blocks_used; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); kfree(dirty->root); kfree(dirty); mutex_unlock(&tree_root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(tree_root); + + btrfs_btree_balance_dirty(tree_root, nr); schedule(); } return ret; From cf67582bb2ec364d71b3e4f8e49de98d8c3c832e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Sep 2007 11:00:51 -0400 Subject: [PATCH 0234/2985] Btrfs: Fix duplicate ENOSPC checks in find_free_extent find_free_extent would fail to wrap around to the start of the drive because it was doing the enospc case checking twice in some cases, causing it to return -ENOSPC early. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fe772f9b06cf..f261a8326cdf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1111,20 +1111,9 @@ check_pending: btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); - if (ins->objectid + num_blocks >= search_end) { - if (full_scan) { - ret = -ENOSPC; - goto error; - } - search_start = orig_search_start; - if (wrapped) { - if (!full_scan) - total_needed -= empty_size; - full_scan = 1; - } else - wrapped = 1; - goto new_group; - } + if (ins->objectid + num_blocks >= search_end) + goto enospc; + for (test_block = ins->objectid; test_block < ins->objectid + num_blocks; test_block++) { if (test_radix_bit(&info->pinned_radix, test_block) || @@ -1149,6 +1138,7 @@ check_pending: new_group: if (search_start + num_blocks >= search_end) { +enospc: search_start = orig_search_start; if (full_scan) { ret = -ENOSPC; From a61721d5b771d1735415a6b4c75409ab1fc9c845 Mon Sep 17 00:00:00 2001 From: Yan Date: Mon, 17 Sep 2007 11:08:38 -0400 Subject: [PATCH 0235/2985] fix found_type decrement in btrfs_truncate_in_trans found_type has already been decreased by codes above the change, I think decrease it by one again doesn't make sense. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bd00df33fb3f..211ca32c65c5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -581,7 +581,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, } else { break; } - btrfs_set_key_type(&key, found_type - 1); + btrfs_set_key_type(&key, found_type); continue; } if (btrfs_disk_key_offset(found_key) >= inode->i_size) From 23223584e44cb1b0b69ba3a9ad74956f292a416b Mon Sep 17 00:00:00 2001 From: Yan Date: Mon, 17 Sep 2007 11:08:52 -0400 Subject: [PATCH 0236/2985] create btrfs_path slab with the correct size Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 211ca32c65c5..192fdda0d56e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2127,7 +2127,7 @@ int btrfs_init_cachep(void) if (!btrfs_transaction_cachep) goto fail; btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache", - sizeof(struct btrfs_transaction), + sizeof(struct btrfs_path), 0, NULL); if (!btrfs_path_cachep) goto fail; From bab9fb035faa5119991b64cfee1b14c1d6f6a31b Mon Sep 17 00:00:00 2001 From: Yan Date: Mon, 17 Sep 2007 11:13:11 -0400 Subject: [PATCH 0237/2985] Btrfs: truncate: don't update inode->i_blocks when extent is a hole I think check whether extent is a hole before update 'inode->i_blocks' is unconditional required. (original codes check it only when del_item isn't equal to 0) Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 192fdda0d56e..43bf4337859c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -595,6 +595,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, btrfs_file_extent_type(fi) != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; + extent_start = btrfs_file_extent_disk_blocknr(fi); if (!del_item) { u64 orig_num_blocks = btrfs_file_extent_num_blocks(fi); @@ -604,12 +605,13 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, extent_num_blocks >>= inode->i_blkbits; btrfs_set_file_extent_num_blocks(fi, extent_num_blocks); - inode->i_blocks -= (orig_num_blocks - - extent_num_blocks) << 3; + num_dec = (orig_num_blocks - + extent_num_blocks) << 3; + if (extent_start != 0) { + inode->i_blocks -= num_dec; + } btrfs_mark_buffer_dirty(path->nodes[0]); } else { - extent_start = - btrfs_file_extent_disk_blocknr(fi); extent_num_blocks = btrfs_file_extent_disk_num_blocks(fi); /* FIXME blocksize != 4096 */ From 8e1cd766643f040fd88b9c4221353a3f6b77db1e Mon Sep 17 00:00:00 2001 From: Yan Date: Mon, 17 Sep 2007 11:13:12 -0400 Subject: [PATCH 0238/2985] Btrfs: Fix double free and off by one in inode.c The first change removes potential double free, the second fix a off by one error. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 43bf4337859c..88b3ef20be69 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -206,7 +206,6 @@ void btrfs_read_locked_inode(struct inode *inode) memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); if (ret) { - btrfs_free_path(path); goto make_bad; } inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), @@ -1536,7 +1535,7 @@ again: flush_dcache_page(result->b_page); kunmap(page); set_extent_uptodate(em_tree, extent_start, - extent_end, GFP_NOFS); + extent_end - 1, GFP_NOFS); goto insert; } else { printk("unkknown found_type %d\n", found_type); From b3cfa35a493866cffd00952438b6c77364d42d3c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Sep 2007 11:25:58 -0400 Subject: [PATCH 0239/2985] Btrfs: factor page private preparations into a helper Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 40 +++++++++++++++------------------------- fs/btrfs/extent_map.h | 1 + fs/btrfs/file.c | 7 +------ fs/btrfs/inode.c | 7 +------ 4 files changed, 18 insertions(+), 37 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index ab5bde31b687..0ab368e091f9 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1379,6 +1379,16 @@ static int submit_extent_page(int rw, struct extent_map_tree *tree, return ret; } +void set_page_extent_mapped(struct page *page) +{ + if (!PagePrivate(page)) { + SetPagePrivate(page); + WARN_ON(!page->mapping->a_ops->invalidatepage); + set_page_private(page, 1); + page_cache_get(page); + } +} + /* * basic readpage implementation. Locked extent state structs are inserted * into the tree that are removed when the IO is done (by the end_io @@ -1405,12 +1415,7 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, size_t iosize; size_t blocksize = inode->i_sb->s_blocksize; - if (!PagePrivate(page)) { - SetPagePrivate(page); - WARN_ON(!page->mapping->a_ops->invalidatepage); - set_page_private(page, 1); - page_cache_get(page); - } + set_page_extent_mapped(page); end = page_end; lock_extent(tree, start, end, GFP_NOFS); @@ -1531,12 +1536,7 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, PAGE_CACHE_SIZE - offset, KM_USER0); } - if (!PagePrivate(page)) { - SetPagePrivate(page); - set_page_private(page, 1); - WARN_ON(!page->mapping->a_ops->invalidatepage); - page_cache_get(page); - } + set_page_extent_mapped(page); lock_extent(tree, start, page_end, GFP_NOFS); nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1, @@ -1672,13 +1672,7 @@ int extent_commit_write(struct extent_map_tree *tree, { loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - if (!PagePrivate(page)) { - SetPagePrivate(page); - set_page_private(page, 1); - WARN_ON(!page->mapping->a_ops->invalidatepage); - page_cache_get(page); - } - + set_page_extent_mapped(page); set_page_dirty(page); if (pos > inode->i_size) { @@ -1709,12 +1703,8 @@ int extent_prepare_write(struct extent_map_tree *tree, int ret = 0; int isnew; - if (!PagePrivate(page)) { - SetPagePrivate(page); - set_page_private(page, 1); - WARN_ON(!page->mapping->a_ops->invalidatepage); - page_cache_get(page); - } + set_page_extent_mapped(page); + block_start = (page_start + from) & ~((u64)blocksize - 1); block_end = (page_start + to - 1) | (blocksize - 1); orig_block_start = block_start; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 40b53ee274fb..d913ce01248d 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -105,4 +105,5 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); int set_state_private(struct extent_map_tree *tree, u64 start, u64 private); int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private); +void set_page_extent_mapped(struct page *page); #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 698eaea612f1..4cc459c943ec 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -543,12 +543,7 @@ static int prepare_pages(struct btrfs_root *root, } cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); wait_on_page_writeback(pages[i]); - if (!PagePrivate(pages[i])) { - SetPagePrivate(pages[i]); - set_page_private(pages[i], 1); - WARN_ON(!pages[i]->mapping->a_ops->invalidatepage); - page_cache_get(pages[i]); - } + set_page_extent_mapped(pages[i]); WARN_ON(!PageLocked(pages[i])); } return 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 88b3ef20be69..0df325f89d62 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -652,12 +652,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, u64 page_start = page->index << PAGE_CACHE_SHIFT; u64 page_end = page_start + PAGE_CACHE_SIZE - 1; - if (!PagePrivate(page)) { - SetPagePrivate(page); - set_page_private(page, 1); - WARN_ON(!page->mapping->a_ops->invalidatepage); - page_cache_get(page); - } + set_page_extent_mapped(page); lock_extent(em_tree, page_start, page_end, GFP_NOFS); set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start, From 50b78c24d594e6985f36a9a8d8e2fb74e1664e90 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 20 Sep 2007 14:14:42 -0400 Subject: [PATCH 0240/2985] btrfs_get_extent should treat inline extents as though they hold a whole block Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0df325f89d62..b03d40a907ca 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1505,13 +1505,13 @@ again: size = btrfs_file_extent_inline_len(leaf->items + path->slots[0]); - extent_end = extent_start + size; + extent_end = extent_start | ((u64)root->blocksize - 1); if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { if (end < extent_start) goto not_found; - em->end = extent_end - 1; + em->end = extent_end; } else { em->end = end; } @@ -1520,17 +1520,19 @@ again: em->block_start = EXTENT_MAP_INLINE; em->block_end = EXTENT_MAP_INLINE; em->start = extent_start; - em->end = extent_end - 1; + em->end = extent_end; if (!page) { goto insert; } ptr = btrfs_file_extent_inline_start(item); map = kmap(page); memcpy(map + page_offset, ptr, size); - flush_dcache_page(result->b_page); + memset(map + page_offset + size, 0, + root->blocksize - (page_offset + size)); + flush_dcache_page(page); kunmap(page); set_extent_uptodate(em_tree, extent_start, - extent_end - 1, GFP_NOFS); + extent_end, GFP_NOFS); goto insert; } else { printk("unkknown found_type %d\n", found_type); From 5f39d397dfbe140a14edecd4e73c34ce23c4f9ee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:14:19 -0400 Subject: [PATCH 0241/2985] Btrfs: Create extent_buffer interface for large blocksizes Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 + fs/btrfs/ctree.c | 1500 ++++++++++++++++++++++------------------ fs/btrfs/ctree.h | 871 +++++++++-------------- fs/btrfs/dir-item.c | 102 +-- fs/btrfs/disk-io.c | 395 +++++------ fs/btrfs/disk-io.h | 55 +- fs/btrfs/extent-tree.c | 294 ++++---- fs/btrfs/extent_map.c | 672 +++++++++++++++++- fs/btrfs/extent_map.h | 75 +- fs/btrfs/file-item.c | 63 +- fs/btrfs/file.c | 127 ++-- fs/btrfs/inode-item.c | 22 +- fs/btrfs/inode-map.c | 17 +- fs/btrfs/inode.c | 435 +++++++----- fs/btrfs/print-tree.c | 96 ++- fs/btrfs/print-tree.h | 4 +- fs/btrfs/root-tree.c | 51 +- fs/btrfs/super.c | 7 +- fs/btrfs/sysfs.c | 10 +- fs/btrfs/transaction.c | 61 +- fs/btrfs/transaction.h | 5 +- fs/btrfs/tree-defrag.c | 58 +- 22 files changed, 2814 insertions(+), 2108 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 6341cf7a9f91..a3b51085d7f0 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -23,4 +23,6 @@ modules_install: clean: $(MAKE) -C $(KERNELDIR) M=`pwd` clean +tester: + $(MAKE) -C $(KERNELDIR) M=`pwd` tree-defrag.o transaction.o sysfs.o super.o root-tree.o inode-map.o inode-item.o inode.o file-item.o file.o extent_map.o disk-io.o ctree.o dir-item.o extent-tree.o endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b41f48ade419..f60920e8a0e0 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -16,21 +16,24 @@ * Boston, MA 021110-1307, USA. */ +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "print-tree.h" static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *ins_key, struct btrfs_path *path, int data_size); -static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *dst, struct buffer_head - *src); -static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct buffer_head *dst_buf, - struct buffer_head *src_buf); +static int push_node_left(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *dst, + struct extent_buffer *src); +static int balance_node_right(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *dst_buf, + struct extent_buffer *src_buf); static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); @@ -62,40 +65,38 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (!p->nodes[i]) break; - btrfs_block_release(root, p->nodes[i]); + free_extent_buffer(p->nodes[i]); } memset(p, 0, sizeof(*p)); } -static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *buf, struct buffer_head - *parent, int parent_slot, struct buffer_head - **cow_ret, u64 search_start, u64 empty_size) +static int __btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret, + u64 search_start, u64 empty_size) { - struct buffer_head *cow; - struct btrfs_node *cow_node; + struct extent_buffer *cow; int ret = 0; int different_trans = 0; WARN_ON(root->ref_cows && trans->transid != root->last_trans); - WARN_ON(!buffer_uptodate(buf)); + cow = btrfs_alloc_free_block(trans, root, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); - cow_node = btrfs_buffer_node(cow); - if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) + if (buf->len != root->sectorsize || cow->len != root->sectorsize) WARN_ON(1); - memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); - btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); - btrfs_set_header_generation(&cow_node->header, trans->transid); - btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); + copy_extent_buffer(cow, buf, 0, 0, cow->len); + btrfs_set_header_blocknr(cow, extent_buffer_blocknr(cow)); + btrfs_set_header_generation(cow, trans->transid); + btrfs_set_header_owner(cow, root->root_key.objectid); - WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) > - trans->transid); - if (btrfs_header_generation(btrfs_buffer_header(buf)) != - trans->transid) { + WARN_ON(btrfs_header_generation(buf) > trans->transid); + if (btrfs_header_generation(buf) != trans->transid) { different_trans = 1; ret = btrfs_inc_ref(trans, root, buf); if (ret) @@ -106,29 +107,29 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root if (buf == root->node) { root->node = cow; - get_bh(cow); + extent_buffer_get(cow); if (buf != root->commit_root) { - btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); + btrfs_free_extent(trans, root, + extent_buffer_blocknr(buf), 1, 1); } - btrfs_block_release(root, buf); + free_extent_buffer(buf); } else { - btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, - bh_blocknr(cow)); + btrfs_set_node_blockptr(parent, parent_slot, + extent_buffer_blocknr(cow)); btrfs_mark_buffer_dirty(parent); - WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) != - trans->transid); - btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); + WARN_ON(btrfs_header_generation(parent) != trans->transid); + btrfs_free_extent(trans, root, extent_buffer_blocknr(buf),1,1); } - btrfs_block_release(root, buf); + free_extent_buffer(buf); btrfs_mark_buffer_dirty(cow); *cow_ret = cow; return 0; } -int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *buf, struct buffer_head - *parent, int parent_slot, struct buffer_head - **cow_ret) +int btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret) { u64 search_start; if (trans->transaction != root->fs_info->running_transaction) { @@ -141,13 +142,12 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root root->fs_info->generation); WARN_ON(1); } - if (btrfs_header_generation(btrfs_buffer_header(buf)) == - trans->transid) { + if (btrfs_header_generation(buf) == trans->transid) { *cow_ret = buf; return 0; } - search_start = bh_blocknr(buf) & ~((u64)65535); + search_start = extent_buffer_blocknr(buf) & ~((u64)65535); return __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); } @@ -161,9 +161,11 @@ static int close_blocks(u64 blocknr, u64 other) return 0; } -static int should_defrag_leaf(struct buffer_head *bh) +#if 0 +static int should_defrag_leaf(struct extent_buffer *eb) { - struct btrfs_leaf *leaf = btrfs_buffer_leaf(bh); + return 0; + struct btrfs_leaf *leaf = btrfs_buffer_leaf(eb); struct btrfs_disk_key *key; u32 nritems; @@ -188,14 +190,17 @@ static int should_defrag_leaf(struct buffer_head *bh) } return 0; } +#endif int btrfs_realloc_node(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *parent, + struct btrfs_root *root, struct extent_buffer *parent, int cache_only, u64 *last_ret) { + return 0; +#if 0 struct btrfs_node *parent_node; - struct buffer_head *cur_bh; - struct buffer_head *tmp_bh; + struct extent_buffer *cur_eb; + struct extent_buffer *tmp_eb; u64 blocknr; u64 search_start = *last_ret; u64 last_block = 0; @@ -281,6 +286,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, brelse(tmp_bh); } return err; +#endif } /* @@ -289,12 +295,12 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, * which is the stop of the leaf data stack */ static inline unsigned int leaf_data_end(struct btrfs_root *root, - struct btrfs_leaf *leaf) + struct extent_buffer *leaf) { - u32 nr = btrfs_header_nritems(&leaf->header); + u32 nr = btrfs_header_nritems(leaf); if (nr == 0) return BTRFS_LEAF_DATA_SIZE(root); - return btrfs_item_offset(leaf->items + nr - 1); + return btrfs_item_offset_nr(leaf, nr - 1); } /* @@ -310,9 +316,9 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 1; if (k1.objectid < k2->objectid) return -1; - if (k1.flags > k2->flags) + if (k1.type > k2->type) return 1; - if (k1.flags < k2->flags) + if (k1.type < k2->type) return -1; if (k1.offset > k2->offset) return 1; @@ -324,37 +330,39 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) static int check_node(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_node *parent = NULL; - struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); + struct extent_buffer *parent = NULL; + struct extent_buffer *node = path->nodes[level]; + struct btrfs_disk_key parent_key; + struct btrfs_disk_key node_key; int parent_slot; int slot; struct btrfs_key cpukey; - u32 nritems = btrfs_header_nritems(&node->header); + u32 nritems = btrfs_header_nritems(node); if (path->nodes[level + 1]) - parent = btrfs_buffer_node(path->nodes[level + 1]); + parent = path->nodes[level + 1]; slot = path->slots[level]; - BUG_ON(!buffer_uptodate(path->nodes[level])); BUG_ON(nritems == 0); if (parent) { - struct btrfs_disk_key *parent_key; - parent_slot = path->slots[level + 1]; - parent_key = &parent->ptrs[parent_slot].key; - BUG_ON(memcmp(parent_key, &node->ptrs[0].key, + btrfs_node_key(parent, &parent_key, parent_slot); + btrfs_node_key(node, &node_key, 0); + BUG_ON(memcmp(&parent_key, &node_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(&node->header)); + btrfs_header_blocknr(node)); } BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); if (slot != 0) { - btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot - 1].key); - BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) <= 0); + btrfs_node_key_to_cpu(node, &cpukey, slot - 1); + btrfs_node_key(node, &node_key, slot); + BUG_ON(comp_keys(&node_key, &cpukey) <= 0); } if (slot < nritems - 1) { - btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot + 1].key); - BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) >= 0); + btrfs_node_key_to_cpu(node, &cpukey, slot + 1); + btrfs_node_key(node, &node_key, slot); + BUG_ON(comp_keys(&node_key, &cpukey) >= 0); } return 0; } @@ -362,83 +370,172 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]); - struct btrfs_node *parent = NULL; + struct extent_buffer *leaf = path->nodes[level]; + struct extent_buffer *parent = NULL; int parent_slot; - int slot = path->slots[0]; struct btrfs_key cpukey; + struct btrfs_disk_key parent_key; + struct btrfs_disk_key leaf_key; + int slot = path->slots[0]; - u32 nritems = btrfs_header_nritems(&leaf->header); + u32 nritems = btrfs_header_nritems(leaf); if (path->nodes[level + 1]) - parent = btrfs_buffer_node(path->nodes[level + 1]); - - BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); + parent = path->nodes[level + 1]; if (nritems == 0) return 0; if (parent) { - struct btrfs_disk_key *parent_key; - parent_slot = path->slots[level + 1]; - parent_key = &parent->ptrs[parent_slot].key; + btrfs_node_key(parent, &parent_key, parent_slot); + btrfs_item_key(leaf, &leaf_key, 0); - BUG_ON(memcmp(parent_key, &leaf->items[0].key, + BUG_ON(memcmp(&parent_key, &leaf_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(&leaf->header)); + btrfs_header_blocknr(leaf)); } - if (slot != 0) { - btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot - 1].key); - BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) <= 0); - BUG_ON(btrfs_item_offset(leaf->items + slot - 1) != - btrfs_item_end(leaf->items + slot)); +#if 0 + for (i = 0; nritems > 1 && i < nritems - 2; i++) { + btrfs_item_key_to_cpu(leaf, &cpukey, i + 1); + btrfs_item_key(leaf, &leaf_key, i); + if (comp_keys(&leaf_key, &cpukey) >= 0) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad key\n", i); + BUG_ON(1); + } + if (btrfs_item_offset_nr(leaf, i) != + btrfs_item_end_nr(leaf, i + 1)) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad\n", i); + BUG_ON(1); + } + if (i == 0) { + if (btrfs_item_offset_nr(leaf, i) + + btrfs_item_size_nr(leaf, i) != + BTRFS_LEAF_DATA_SIZE(root)) { + btrfs_print_leaf(root, leaf); + printk("slot %d first offset bad\n", i); + BUG_ON(1); + } + } + } + if (nritems > 0) { + if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) { + btrfs_print_leaf(root, leaf); + printk("slot %d bad size \n", nritems - 1); + BUG_ON(1); + } + } +#endif + if (slot != 0 && slot < nritems - 1) { + btrfs_item_key(leaf, &leaf_key, slot); + btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); + if (comp_keys(&leaf_key, &cpukey) <= 0) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad key\n", slot); + BUG_ON(1); + } + if (btrfs_item_offset_nr(leaf, slot - 1) != + btrfs_item_end_nr(leaf, slot)) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad\n", slot); + BUG_ON(1); + } } if (slot < nritems - 1) { - btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot + 1].key); - BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) >= 0); - BUG_ON(btrfs_item_offset(leaf->items + slot) != - btrfs_item_end(leaf->items + slot + 1)); + btrfs_item_key(leaf, &leaf_key, slot); + btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1); + BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0); + if (btrfs_item_offset_nr(leaf, slot) != + btrfs_item_end_nr(leaf, slot + 1)) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad\n", slot); + BUG_ON(1); + } } - BUG_ON(btrfs_item_offset(leaf->items) + - btrfs_item_size(leaf->items) != BTRFS_LEAF_DATA_SIZE(root)); + BUG_ON(btrfs_item_offset_nr(leaf, 0) + + btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)); return 0; } static int check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); - if (memcmp(node->header.fsid, root->fs_info->disk_super->fsid, - sizeof(node->header.fsid))) - BUG(); + struct extent_buffer *buf = path->nodes[level]; + char fsid[BTRFS_FSID_SIZE]; + + read_extent_buffer(buf, fsid, (unsigned long)btrfs_header_fsid(buf), + BTRFS_FSID_SIZE); + + if (memcmp(fsid, root->fs_info->fsid, BTRFS_FSID_SIZE)) { + int i = 0; + printk("warning bad block %Lu\n", buf->start); + if (!btrfs_buffer_uptodate(buf)) { + WARN_ON(1); + } + for (i = 0; i < BTRFS_FSID_SIZE; i++) { + printk("%x:%x ", root->fs_info->fsid[i], fsid[i]); + } + printk("\n"); + // BUG(); + } if (level == 0) return check_leaf(root, path, level); return check_node(root, path, level); } /* - * search for key in the array p. items p are item_size apart - * and there are 'max' items in p + * search for key in the extent_buffer. The items start at offset p, + * and they are item_size apart. There are 'max' items in p. + * * the slot in the array is returned via slot, and it points to * the place where you would insert key if it is not found in * the array. * * slot may point to max if the key is bigger than all of the keys */ -static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, - int max, int *slot) +static int generic_bin_search(struct extent_buffer *eb, unsigned long p, + int item_size, struct btrfs_key *key, + int max, int *slot) { int low = 0; int high = max; int mid; int ret; struct btrfs_disk_key *tmp; + struct btrfs_disk_key unaligned; + unsigned long offset; + char *map_token = NULL; + char *kaddr = NULL; + unsigned long map_start = 0; + unsigned long map_len = 0; while(low < high) { mid = (low + high) / 2; - tmp = (struct btrfs_disk_key *)(p + mid * item_size); + offset = p + mid * item_size; + + if (!map_token || offset < map_start || + (offset + sizeof(struct btrfs_disk_key)) > + map_start + map_len) { + if (map_token) + unmap_extent_buffer(eb, map_token, KM_USER0); + map_extent_buffer(eb, offset, &map_token, &kaddr, + &map_start, &map_len, KM_USER0); + + } + if (offset + sizeof(struct btrfs_disk_key) > + map_start + map_len) { + unmap_extent_buffer(eb, map_token, KM_USER0); + read_extent_buffer(eb, &unaligned, + offset, sizeof(unaligned)); + map_token = NULL; + tmp = &unaligned; + } else { + tmp = (struct btrfs_disk_key *)(kaddr + offset - + map_start); + } ret = comp_keys(tmp, key); if (ret < 0) @@ -447,10 +544,13 @@ static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, high = mid; else { *slot = mid; + unmap_extent_buffer(eb, map_token, KM_USER0); return 0; } } *slot = low; + if (map_token) + unmap_extent_buffer(eb, map_token, KM_USER0); return 1; } @@ -458,46 +558,42 @@ static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, * simple bin_search frontend that does the right thing for * leaves vs nodes */ -static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) +static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, + int level, int *slot) { - if (btrfs_is_leaf(c)) { - struct btrfs_leaf *l = (struct btrfs_leaf *)c; - return generic_bin_search((void *)l->items, + if (level == 0) { + return generic_bin_search(eb, + offsetof(struct btrfs_leaf, items), sizeof(struct btrfs_item), - key, btrfs_header_nritems(&c->header), + key, btrfs_header_nritems(eb), slot); } else { - return generic_bin_search((void *)c->ptrs, + return generic_bin_search(eb, + offsetof(struct btrfs_node, ptrs), sizeof(struct btrfs_key_ptr), - key, btrfs_header_nritems(&c->header), + key, btrfs_header_nritems(eb), slot); } return -1; } -static struct buffer_head *read_node_slot(struct btrfs_root *root, - struct buffer_head *parent_buf, - int slot) +static struct extent_buffer *read_node_slot(struct btrfs_root *root, + struct extent_buffer *parent, int slot) { - struct btrfs_node *node = btrfs_buffer_node(parent_buf); if (slot < 0) return NULL; - if (slot >= btrfs_header_nritems(&node->header)) + if (slot >= btrfs_header_nritems(parent)) return NULL; - return read_tree_block(root, btrfs_node_blockptr(node, slot)); + return read_tree_block(root, btrfs_node_blockptr(parent, slot)); } static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct buffer_head *right_buf; - struct buffer_head *mid_buf; - struct buffer_head *left_buf; - struct buffer_head *parent_buf = NULL; - struct btrfs_node *right = NULL; - struct btrfs_node *mid; - struct btrfs_node *left = NULL; - struct btrfs_node *parent = NULL; + struct extent_buffer *right = NULL; + struct extent_buffer *mid; + struct extent_buffer *left = NULL; + struct extent_buffer *parent = NULL; int ret = 0; int wret; int pslot; @@ -508,60 +604,57 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (level == 0) return 0; - mid_buf = path->nodes[level]; - mid = btrfs_buffer_node(mid_buf); + mid = path->nodes[level]; orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) - parent_buf = path->nodes[level + 1]; + parent = path->nodes[level + 1]; pslot = path->slots[level + 1]; /* * deal with the case where there is only one pointer in the root * by promoting the node below to a root */ - if (!parent_buf) { - struct buffer_head *child; - u64 blocknr = bh_blocknr(mid_buf); + if (!parent) { + struct extent_buffer *child; + u64 blocknr = extent_buffer_blocknr(mid); - if (btrfs_header_nritems(&mid->header) != 1) + if (btrfs_header_nritems(mid) != 1) return 0; /* promote the child to a root */ - child = read_node_slot(root, mid_buf, 0); + child = read_node_slot(root, mid, 0); BUG_ON(!child); root->node = child; path->nodes[level] = NULL; - clean_tree_block(trans, root, mid_buf); - wait_on_buffer(mid_buf); + clean_tree_block(trans, root, mid); + wait_on_tree_block_writeback(root, mid); /* once for the path */ - btrfs_block_release(root, mid_buf); + free_extent_buffer(mid); /* once for the root ptr */ - btrfs_block_release(root, mid_buf); + free_extent_buffer(mid); return btrfs_free_extent(trans, root, blocknr, 1, 1); } - parent = btrfs_buffer_node(parent_buf); - - if (btrfs_header_nritems(&mid->header) > + if (btrfs_header_nritems(mid) > BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; - if (btrfs_header_nritems(&mid->header) < 2) + if (btrfs_header_nritems(mid) < 2) err_on_enospc = 1; - left_buf = read_node_slot(root, parent_buf, pslot - 1); - if (left_buf) { - wret = btrfs_cow_block(trans, root, left_buf, - parent_buf, pslot - 1, &left_buf); + left = read_node_slot(root, parent, pslot - 1); + if (left) { + wret = btrfs_cow_block(trans, root, left, + parent, pslot - 1, &left); if (wret) { ret = wret; goto enospc; } } - right_buf = read_node_slot(root, parent_buf, pslot + 1); - if (right_buf) { - wret = btrfs_cow_block(trans, root, right_buf, - parent_buf, pslot + 1, &right_buf); + right = read_node_slot(root, parent, pslot + 1); + if (right) { + wret = btrfs_cow_block(trans, root, right, + parent, pslot + 1, &right); if (wret) { ret = wret; goto enospc; @@ -569,30 +662,27 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } /* first, try to make some room in the middle buffer */ - if (left_buf) { - left = btrfs_buffer_node(left_buf); - orig_slot += btrfs_header_nritems(&left->header); - wret = push_node_left(trans, root, left_buf, mid_buf); + if (left) { + orig_slot += btrfs_header_nritems(left); + wret = push_node_left(trans, root, left, mid); if (wret < 0) ret = wret; - if (btrfs_header_nritems(&mid->header) < 2) + if (btrfs_header_nritems(mid) < 2) err_on_enospc = 1; } /* * then try to empty the right most buffer into the middle */ - if (right_buf) { - right = btrfs_buffer_node(right_buf); - wret = push_node_left(trans, root, mid_buf, right_buf); + if (right) { + wret = push_node_left(trans, root, mid, right); if (wret < 0 && wret != -ENOSPC) ret = wret; - if (btrfs_header_nritems(&right->header) == 0) { - u64 blocknr = bh_blocknr(right_buf); - clean_tree_block(trans, root, right_buf); - wait_on_buffer(right_buf); - btrfs_block_release(root, right_buf); - right_buf = NULL; + if (btrfs_header_nritems(right) == 0) { + u64 blocknr = extent_buffer_blocknr(right); + clean_tree_block(trans, root, right); + wait_on_tree_block_writeback(root, right); + free_extent_buffer(right); right = NULL; wret = del_ptr(trans, root, path, level + 1, pslot + 1); @@ -602,14 +692,13 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (wret) ret = wret; } else { - btrfs_memcpy(root, parent, - &parent->ptrs[pslot + 1].key, - &right->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); + struct btrfs_disk_key right_key; + btrfs_node_key(right, &right_key, 0); + btrfs_set_node_key(parent, &right_key, pslot + 1); + btrfs_mark_buffer_dirty(parent); } } - if (btrfs_header_nritems(&mid->header) == 1) { + if (btrfs_header_nritems(mid) == 1) { /* * we're not allowed to leave a node with one item in the * tree during a delete. A deletion from lower in the tree @@ -619,21 +708,20 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root * otherwise we would have pulled some pointers from the * right */ - BUG_ON(!left_buf); - wret = balance_node_right(trans, root, mid_buf, left_buf); + BUG_ON(!left); + wret = balance_node_right(trans, root, mid, left); if (wret < 0) { ret = wret; goto enospc; } BUG_ON(wret == 1); } - if (btrfs_header_nritems(&mid->header) == 0) { + if (btrfs_header_nritems(mid) == 0) { /* we've managed to empty the middle node, drop it */ - u64 blocknr = bh_blocknr(mid_buf); - clean_tree_block(trans, root, mid_buf); - wait_on_buffer(mid_buf); - btrfs_block_release(root, mid_buf); - mid_buf = NULL; + u64 blocknr = extent_buffer_blocknr(mid); + clean_tree_block(trans, root, mid); + wait_on_tree_block_writeback(root, mid); + free_extent_buffer(mid); mid = NULL; wret = del_ptr(trans, root, path, level + 1, pslot); if (wret) @@ -643,37 +731,36 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; } else { /* update the parent key to reflect our changes */ - btrfs_memcpy(root, parent, - &parent->ptrs[pslot].key, &mid->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); + struct btrfs_disk_key mid_key; + btrfs_node_key(mid, &mid_key, 0); + btrfs_set_node_key(parent, &mid_key, pslot); + btrfs_mark_buffer_dirty(parent); } /* update the path */ - if (left_buf) { - if (btrfs_header_nritems(&left->header) > orig_slot) { - get_bh(left_buf); - path->nodes[level] = left_buf; + if (left) { + if (btrfs_header_nritems(left) > orig_slot) { + extent_buffer_get(left); + path->nodes[level] = left; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; - if (mid_buf) - btrfs_block_release(root, mid_buf); + if (mid) + free_extent_buffer(mid); } else { - orig_slot -= btrfs_header_nritems(&left->header); + orig_slot -= btrfs_header_nritems(left); path->slots[level] = orig_slot; } } /* double check we haven't messed things up */ check_block(root, path, level); if (orig_ptr != - btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]), - path->slots[level])) + btrfs_node_blockptr(path->nodes[level], path->slots[level])) BUG(); enospc: - if (right_buf) - btrfs_block_release(root, right_buf); - if (left_buf) - btrfs_block_release(root, left_buf); + if (right) + free_extent_buffer(right); + if (left) + free_extent_buffer(left); return ret; } @@ -682,14 +769,10 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct buffer_head *right_buf; - struct buffer_head *mid_buf; - struct buffer_head *left_buf; - struct buffer_head *parent_buf = NULL; - struct btrfs_node *right = NULL; - struct btrfs_node *mid; - struct btrfs_node *left = NULL; - struct btrfs_node *parent = NULL; + struct extent_buffer *right = NULL; + struct extent_buffer *mid; + struct extent_buffer *left = NULL; + struct extent_buffer *parent = NULL; int ret = 0; int wret; int pslot; @@ -699,107 +782,101 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, if (level == 0) return 1; - mid_buf = path->nodes[level]; - mid = btrfs_buffer_node(mid_buf); + mid = path->nodes[level]; orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) - parent_buf = path->nodes[level + 1]; + parent = path->nodes[level + 1]; pslot = path->slots[level + 1]; - if (!parent_buf) + if (!parent) return 1; - parent = btrfs_buffer_node(parent_buf); - left_buf = read_node_slot(root, parent_buf, pslot - 1); + left = read_node_slot(root, parent, pslot - 1); /* first, try to make some room in the middle buffer */ - if (left_buf) { + if (left) { u32 left_nr; - left = btrfs_buffer_node(left_buf); - left_nr = btrfs_header_nritems(&left->header); + left_nr = btrfs_header_nritems(left); if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; } else { - ret = btrfs_cow_block(trans, root, left_buf, parent_buf, - pslot - 1, &left_buf); + ret = btrfs_cow_block(trans, root, left, parent, + pslot - 1, &left); if (ret) wret = 1; else { - left = btrfs_buffer_node(left_buf); wret = push_node_left(trans, root, - left_buf, mid_buf); + left, mid); } } if (wret < 0) ret = wret; if (wret == 0) { + struct btrfs_disk_key disk_key; orig_slot += left_nr; - btrfs_memcpy(root, parent, - &parent->ptrs[pslot].key, - &mid->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); - if (btrfs_header_nritems(&left->header) > orig_slot) { - path->nodes[level] = left_buf; + btrfs_node_key(mid, &disk_key, 0); + btrfs_set_node_key(parent, &disk_key, pslot); + btrfs_mark_buffer_dirty(parent); + if (btrfs_header_nritems(left) > orig_slot) { + path->nodes[level] = left; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; - btrfs_block_release(root, mid_buf); + free_extent_buffer(mid); } else { orig_slot -= - btrfs_header_nritems(&left->header); + btrfs_header_nritems(left); path->slots[level] = orig_slot; - btrfs_block_release(root, left_buf); + free_extent_buffer(left); } check_node(root, path, level); return 0; } - btrfs_block_release(root, left_buf); + free_extent_buffer(left); } - right_buf = read_node_slot(root, parent_buf, pslot + 1); + right= read_node_slot(root, parent, pslot + 1); /* * then try to empty the right most buffer into the middle */ - if (right_buf) { + if (right) { u32 right_nr; - right = btrfs_buffer_node(right_buf); - right_nr = btrfs_header_nritems(&right->header); + right_nr = btrfs_header_nritems(right); if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; } else { - ret = btrfs_cow_block(trans, root, right_buf, - parent_buf, pslot + 1, - &right_buf); + ret = btrfs_cow_block(trans, root, right, + parent, pslot + 1, + &right); if (ret) wret = 1; else { - right = btrfs_buffer_node(right_buf); wret = balance_node_right(trans, root, - right_buf, mid_buf); + right, mid); } } if (wret < 0) ret = wret; if (wret == 0) { - btrfs_memcpy(root, parent, - &parent->ptrs[pslot + 1].key, - &right->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); - if (btrfs_header_nritems(&mid->header) <= orig_slot) { - path->nodes[level] = right_buf; + struct btrfs_disk_key disk_key; + + btrfs_node_key(right, &disk_key, 0); + btrfs_set_node_key(parent, &disk_key, pslot + 1); + btrfs_mark_buffer_dirty(parent); + + if (btrfs_header_nritems(mid) <= orig_slot) { + path->nodes[level] = right; path->slots[level + 1] += 1; path->slots[level] = orig_slot - - btrfs_header_nritems(&mid->header); - btrfs_block_release(root, mid_buf); + btrfs_header_nritems(mid); + free_extent_buffer(mid); } else { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); } check_node(root, path, level); return 0; } - btrfs_block_release(root, right_buf); + free_extent_buffer(right); } check_node(root, path, level); return 1; @@ -811,10 +888,9 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { - struct btrfs_node *node; + struct extent_buffer *node; int i; u32 nritems; - u64 item_objectid; u64 blocknr; u64 search; u64 cluster_start; @@ -823,7 +899,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int direction = path->reada; struct radix_tree_root found; unsigned long gang[8]; - struct buffer_head *bh; + struct extent_buffer *eb; if (level == 0) return; @@ -831,18 +907,17 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, if (!path->nodes[level]) return; - node = btrfs_buffer_node(path->nodes[level]); + node = path->nodes[level]; search = btrfs_node_blockptr(node, slot); - bh = btrfs_find_tree_block(root, search); - if (bh) { - brelse(bh); + eb = btrfs_find_tree_block(root, search); + if (eb) { + free_extent_buffer(eb); return; } init_bit_radix(&found); - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(node); for (i = slot; i < nritems; i++) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); blocknr = btrfs_node_blockptr(node, i); set_radix_bit(&found, blocknr); } @@ -886,8 +961,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow) { - struct buffer_head *b; - struct btrfs_node *c; + struct extent_buffer *b; u64 blocknr; int slot; int ret; @@ -901,10 +975,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); again: b = root->node; - get_bh(b); + extent_buffer_get(b); while (b) { - c = btrfs_buffer_node(b); - level = btrfs_header_level(&c->header); + level = btrfs_header_level(b); if (cow) { int wret; wret = btrfs_cow_block(trans, root, b, @@ -912,32 +985,30 @@ again: p->slots[level + 1], &b); if (wret) { - btrfs_block_release(root, b); + free_extent_buffer(b); return wret; } - c = btrfs_buffer_node(b); } BUG_ON(!cow && ins_len); - if (level != btrfs_header_level(&c->header)) + if (level != btrfs_header_level(b)) WARN_ON(1); - level = btrfs_header_level(&c->header); + level = btrfs_header_level(b); p->nodes[level] = b; ret = check_block(root, p, level); if (ret) return -1; - ret = bin_search(c, key, &slot); - if (!btrfs_is_leaf(c)) { + ret = bin_search(b, key, level, &slot); + if (level != 0) { if (ret && slot > 0) slot -= 1; p->slots[level] = slot; - if (ins_len > 0 && btrfs_header_nritems(&c->header) >= + if (ins_len > 0 && btrfs_header_nritems(b) >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { int sret = split_node(trans, root, p, level); BUG_ON(sret > 0); if (sret) return sret; b = p->nodes[level]; - c = btrfs_buffer_node(b); slot = p->slots[level]; } else if (ins_len < 0) { int sret = balance_level(trans, root, p, @@ -947,22 +1018,19 @@ again: b = p->nodes[level]; if (!b) goto again; - c = btrfs_buffer_node(b); slot = p->slots[level]; - BUG_ON(btrfs_header_nritems(&c->header) == 1); + BUG_ON(btrfs_header_nritems(b) == 1); } /* this is only true while dropping a snapshot */ if (level == lowest_level) break; - blocknr = btrfs_node_blockptr(c, slot); + blocknr = btrfs_node_blockptr(b, slot); if (should_reada) reada_for_search(root, p, level, slot); - b = read_tree_block(root, btrfs_node_blockptr(c, slot)); - + b = read_tree_block(root, btrfs_node_blockptr(b, slot)); } else { - struct btrfs_leaf *l = (struct btrfs_leaf *)c; p->slots[level] = slot; - if (ins_len > 0 && btrfs_leaf_free_space(root, l) < + if (ins_len > 0 && btrfs_leaf_free_space(root, b) < sizeof(struct btrfs_item) + ins_len) { int sret = split_leaf(trans, root, key, p, ins_len); @@ -986,19 +1054,20 @@ again: * If this fails to write a tree block, it returns -1, but continues * fixing up the blocks in ram so the tree is consistent. */ -static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, struct btrfs_disk_key - *key, int level) +static int fixup_low_keys(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_disk_key *key, int level) { int i; int ret = 0; + struct extent_buffer *t; + for (i = level; i < BTRFS_MAX_LEVEL; i++) { - struct btrfs_node *t; int tslot = path->slots[i]; if (!path->nodes[i]) break; - t = btrfs_buffer_node(path->nodes[i]); - btrfs_memcpy(root, t, &t->ptrs[tslot].key, key, sizeof(*key)); + t = path->nodes[i]; + btrfs_set_node_key(t, key, tslot); btrfs_mark_buffer_dirty(path->nodes[i]); if (tslot != 0) break; @@ -1014,18 +1083,16 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root * error, and > 0 if there was no room in the left hand block. */ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *dst_buf, struct - buffer_head *src_buf) + *root, struct extent_buffer *dst, + struct extent_buffer *src) { - struct btrfs_node *src = btrfs_buffer_node(src_buf); - struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int src_nritems; int dst_nritems; int ret = 0; - src_nritems = btrfs_header_nritems(&src->header); - dst_nritems = btrfs_header_nritems(&dst->header); + src_nritems = btrfs_header_nritems(src); + dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; if (push_items <= 0) { @@ -1035,17 +1102,21 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root if (src_nritems < push_items) push_items = src_nritems; - btrfs_memcpy(root, dst, dst->ptrs + dst_nritems, src->ptrs, - push_items * sizeof(struct btrfs_key_ptr)); + copy_extent_buffer(dst, src, + btrfs_node_key_ptr_offset(dst_nritems), + btrfs_node_key_ptr_offset(0), + push_items * sizeof(struct btrfs_key_ptr)); + if (push_items < src_nritems) { - btrfs_memmove(root, src, src->ptrs, src->ptrs + push_items, - (src_nritems - push_items) * - sizeof(struct btrfs_key_ptr)); + memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(push_items), + (src_nritems - push_items) * + sizeof(struct btrfs_key_ptr)); } - btrfs_set_header_nritems(&src->header, src_nritems - push_items); - btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - btrfs_mark_buffer_dirty(src_buf); - btrfs_mark_buffer_dirty(dst_buf); + btrfs_set_header_nritems(src, src_nritems - push_items); + btrfs_set_header_nritems(dst, dst_nritems + push_items); + btrfs_mark_buffer_dirty(src); + btrfs_mark_buffer_dirty(dst); return ret; } @@ -1058,24 +1129,22 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root * * this will only push up to 1/2 the contents of the left node over */ -static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct buffer_head *dst_buf, - struct buffer_head *src_buf) +static int balance_node_right(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *dst, + struct extent_buffer *src) { - struct btrfs_node *src = btrfs_buffer_node(src_buf); - struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int max_push; int src_nritems; int dst_nritems; int ret = 0; - src_nritems = btrfs_header_nritems(&src->header); - dst_nritems = btrfs_header_nritems(&dst->header); + src_nritems = btrfs_header_nritems(src); + dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; - if (push_items <= 0) { + if (push_items <= 0) return 1; - } max_push = src_nritems / 2 + 1; /* don't try to empty the node */ @@ -1085,18 +1154,21 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct if (max_push < push_items) push_items = max_push; - btrfs_memmove(root, dst, dst->ptrs + push_items, dst->ptrs, - dst_nritems * sizeof(struct btrfs_key_ptr)); + memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), + btrfs_node_key_ptr_offset(0), + (dst_nritems) * + sizeof(struct btrfs_key_ptr)); - btrfs_memcpy(root, dst, dst->ptrs, - src->ptrs + src_nritems - push_items, - push_items * sizeof(struct btrfs_key_ptr)); + copy_extent_buffer(dst, src, + btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(src_nritems - push_items), + push_items * sizeof(struct btrfs_key_ptr)); - btrfs_set_header_nritems(&src->header, src_nritems - push_items); - btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); + btrfs_set_header_nritems(src, src_nritems - push_items); + btrfs_set_header_nritems(dst, dst_nritems + push_items); - btrfs_mark_buffer_dirty(src_buf); - btrfs_mark_buffer_dirty(dst_buf); + btrfs_mark_buffer_dirty(src); + btrfs_mark_buffer_dirty(dst); return ret; } @@ -1107,45 +1179,46 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct * * returns zero on success or < 0 on failure. */ -static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int level) +static int insert_new_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int level) { - struct buffer_head *t; - struct btrfs_node *lower; - struct btrfs_node *c; - struct btrfs_disk_key *lower_key; + struct extent_buffer *lower; + struct extent_buffer *c; + struct btrfs_disk_key lower_key; BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0); - if (IS_ERR(t)) - return PTR_ERR(t); - c = btrfs_buffer_node(t); - memset(c, 0, root->blocksize); - btrfs_set_header_nritems(&c->header, 1); - btrfs_set_header_level(&c->header, level); - btrfs_set_header_blocknr(&c->header, bh_blocknr(t)); - btrfs_set_header_generation(&c->header, trans->transid); - btrfs_set_header_owner(&c->header, root->root_key.objectid); - lower = btrfs_buffer_node(path->nodes[level-1]); - memcpy(c->header.fsid, root->fs_info->disk_super->fsid, - sizeof(c->header.fsid)); - if (btrfs_is_leaf(lower)) - lower_key = &((struct btrfs_leaf *)lower)->items[0].key; - else - lower_key = &lower->ptrs[0].key; - btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key, - sizeof(struct btrfs_disk_key)); - btrfs_set_node_blockptr(c, 0, bh_blocknr(path->nodes[level - 1])); + c = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(root->node), 0); + if (IS_ERR(c)) + return PTR_ERR(c); + memset_extent_buffer(c, 0, 0, root->nodesize); + btrfs_set_header_nritems(c, 1); + btrfs_set_header_level(c, level); + btrfs_set_header_blocknr(c, extent_buffer_blocknr(c)); + btrfs_set_header_generation(c, trans->transid); + btrfs_set_header_owner(c, root->root_key.objectid); + lower = path->nodes[level-1]; - btrfs_mark_buffer_dirty(t); + write_extent_buffer(c, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(c), + BTRFS_FSID_SIZE); + if (level == 1) + btrfs_item_key(lower, &lower_key, 0); + else + btrfs_node_key(lower, &lower_key, 0); + btrfs_set_node_key(c, &lower_key, 0); + btrfs_set_node_blockptr(c, 0, extent_buffer_blocknr(lower)); + + btrfs_mark_buffer_dirty(c); /* the super has an extra ref to root->node */ - btrfs_block_release(root, root->node); - root->node = t; - get_bh(t); - path->nodes[level] = t; + free_extent_buffer(root->node); + root->node = c; + extent_buffer_get(c); + path->nodes[level] = c; path->slots[level] = 0; return 0; } @@ -1163,26 +1236,26 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_disk_key *key, u64 blocknr, int slot, int level) { - struct btrfs_node *lower; + struct extent_buffer *lower; int nritems; BUG_ON(!path->nodes[level]); - lower = btrfs_buffer_node(path->nodes[level]); - nritems = btrfs_header_nritems(&lower->header); + lower = path->nodes[level]; + nritems = btrfs_header_nritems(lower); if (slot > nritems) BUG(); if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) BUG(); if (slot != nritems) { - btrfs_memmove(root, lower, lower->ptrs + slot + 1, - lower->ptrs + slot, + memmove_extent_buffer(lower, + btrfs_node_key_ptr_offset(slot + 1), + btrfs_node_key_ptr_offset(slot), (nritems - slot) * sizeof(struct btrfs_key_ptr)); } - btrfs_memcpy(root, lower, &lower->ptrs[slot].key, - key, sizeof(struct btrfs_disk_key)); + btrfs_set_node_key(lower, key, slot); btrfs_set_node_blockptr(lower, slot, blocknr); - btrfs_set_header_nritems(&lower->header, nritems + 1); - btrfs_mark_buffer_dirty(path->nodes[level]); + btrfs_set_header_nritems(lower, nritems + 1); + btrfs_mark_buffer_dirty(lower); check_node(root, path, level); return 0; } @@ -1199,69 +1272,73 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct buffer_head *t; - struct btrfs_node *c; - struct buffer_head *split_buffer; - struct btrfs_node *split; + struct extent_buffer *c; + struct extent_buffer *split; + struct btrfs_disk_key disk_key; int mid; int ret; int wret; u32 c_nritems; - t = path->nodes[level]; - c = btrfs_buffer_node(t); - if (t == root->node) { + c = path->nodes[level]; + if (c == root->node) { /* trying to split the root, lets make a new one */ ret = insert_new_root(trans, root, path, level + 1); if (ret) return ret; } else { ret = push_nodes_for_insert(trans, root, path, level); - t = path->nodes[level]; - c = btrfs_buffer_node(t); - if (!ret && - btrfs_header_nritems(&c->header) < + c = path->nodes[level]; + if (!ret && btrfs_header_nritems(c) < BTRFS_NODEPTRS_PER_BLOCK(root) - 1) return 0; if (ret < 0) return ret; } - c_nritems = btrfs_header_nritems(&c->header); - split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0); - if (IS_ERR(split_buffer)) - return PTR_ERR(split_buffer); + c_nritems = btrfs_header_nritems(c); + split = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(c), 0); + if (IS_ERR(split)) + return PTR_ERR(split); + + btrfs_set_header_flags(split, btrfs_header_flags(c)); + btrfs_set_header_level(split, btrfs_header_level(c)); + btrfs_set_header_blocknr(split, extent_buffer_blocknr(split)); + btrfs_set_header_generation(split, trans->transid); + btrfs_set_header_owner(split, root->root_key.objectid); + write_extent_buffer(split, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(split), + BTRFS_FSID_SIZE); - split = btrfs_buffer_node(split_buffer); - btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); - btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); - btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer)); - btrfs_set_header_generation(&split->header, trans->transid); - btrfs_set_header_owner(&split->header, root->root_key.objectid); - memcpy(split->header.fsid, root->fs_info->disk_super->fsid, - sizeof(split->header.fsid)); mid = (c_nritems + 1) / 2; - btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid, - (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); - btrfs_set_header_nritems(&split->header, c_nritems - mid); - btrfs_set_header_nritems(&c->header, mid); + + copy_extent_buffer(split, c, + btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(mid), + (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); + btrfs_set_header_nritems(split, c_nritems - mid); + btrfs_set_header_nritems(c, mid); ret = 0; - btrfs_mark_buffer_dirty(t); - btrfs_mark_buffer_dirty(split_buffer); - wret = insert_ptr(trans, root, path, &split->ptrs[0].key, - bh_blocknr(split_buffer), path->slots[level + 1] + 1, + btrfs_mark_buffer_dirty(c); + btrfs_mark_buffer_dirty(split); + + btrfs_node_key(split, &disk_key, 0); + wret = insert_ptr(trans, root, path, &disk_key, + extent_buffer_blocknr(split), + path->slots[level + 1] + 1, level + 1); if (wret) ret = wret; if (path->slots[level] >= mid) { path->slots[level] -= mid; - btrfs_block_release(root, t); - path->nodes[level] = split_buffer; + free_extent_buffer(c); + path->nodes[level] = split; path->slots[level + 1] += 1; } else { - btrfs_block_release(root, split_buffer); + free_extent_buffer(split); } return ret; } @@ -1271,16 +1348,16 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root * and nr indicate which items in the leaf to check. This totals up the * space used both by the item structs and the item data */ -static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) +static int leaf_space_used(struct extent_buffer *l, int start, int nr) { int data_len; - int nritems = btrfs_header_nritems(&l->header); + int nritems = btrfs_header_nritems(l); int end = min(nritems, start + nr) - 1; if (!nr) return 0; - data_len = btrfs_item_end(l->items + start); - data_len = data_len - btrfs_item_offset(l->items + end); + data_len = btrfs_item_end_nr(l, start); + data_len = data_len - btrfs_item_offset_nr(l, end); data_len += sizeof(struct btrfs_item) * nr; WARN_ON(data_len < 0); return data_len; @@ -1291,10 +1368,17 @@ static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ -int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) +int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf) { - int nritems = btrfs_header_nritems(&leaf->header); - return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); + int nritems = btrfs_header_nritems(leaf); + int ret; + ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); + if (ret < 0) { + printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n", + ret, BTRFS_LEAF_DATA_SIZE(root), + leaf_space_used(leaf, 0, nritems), nritems); + } + return ret; } /* @@ -1307,12 +1391,10 @@ int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct buffer_head *left_buf = path->nodes[0]; - struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf); - struct btrfs_leaf *right; - struct buffer_head *right_buf; - struct buffer_head *upper; - struct btrfs_node *upper_node; + struct extent_buffer *left = path->nodes[0]; + struct extent_buffer *right; + struct extent_buffer *upper; + struct btrfs_disk_key disk_key; int slot; int i; int free_space; @@ -1321,6 +1403,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_item *item; u32 left_nritems; u32 right_nritems; + u32 data_end; int ret; slot = path->slots[1]; @@ -1328,102 +1411,109 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } upper = path->nodes[1]; - upper_node = btrfs_buffer_node(upper); - if (slot >= btrfs_header_nritems(&upper_node->header) - 1) { + if (slot >= btrfs_header_nritems(upper) - 1) return 1; - } - right_buf = read_tree_block(root, - btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1)); - right = btrfs_buffer_leaf(right_buf); + + right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1)); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, right_buf); - return 1; - } - /* cow and double check */ - ret = btrfs_cow_block(trans, root, right_buf, upper, - slot + 1, &right_buf); - if (ret) { - btrfs_block_release(root, right_buf); - return 1; - } - right = btrfs_buffer_leaf(right_buf); - free_space = btrfs_leaf_free_space(root, right); - if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } - left_nritems = btrfs_header_nritems(&left->header); - if (left_nritems == 0) { - btrfs_block_release(root, right_buf); + /* cow and double check */ + ret = btrfs_cow_block(trans, root, right, upper, + slot + 1, &right); + if (ret) { + free_extent_buffer(right); return 1; } + free_space = btrfs_leaf_free_space(root, right); + if (free_space < data_size + sizeof(struct btrfs_item)) { + free_extent_buffer(right); + return 1; + } + + left_nritems = btrfs_header_nritems(left); + if (left_nritems == 0) { + free_extent_buffer(right); + return 1; + } + for (i = left_nritems - 1; i >= 1; i--) { - item = left->items + i; + item = btrfs_item_nr(left, i); if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(item) + sizeof(*item) + push_space > + if (btrfs_item_size(left, item) + sizeof(*item) + push_space > free_space) break; push_items++; - push_space += btrfs_item_size(item) + sizeof(*item); + push_space += btrfs_item_size(left, item) + sizeof(*item); } + if (push_items == 0) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } + if (push_items == left_nritems) WARN_ON(1); - right_nritems = btrfs_header_nritems(&right->header); + /* push left to right */ - push_space = btrfs_item_end(left->items + left_nritems - push_items); + right_nritems = btrfs_header_nritems(right); + push_space = btrfs_item_end_nr(left, left_nritems - push_items); push_space -= leaf_data_end(root, left); + /* make room in the right data area */ - btrfs_memmove(root, right, btrfs_leaf_data(right) + - leaf_data_end(root, right) - push_space, - btrfs_leaf_data(right) + - leaf_data_end(root, right), BTRFS_LEAF_DATA_SIZE(root) - - leaf_data_end(root, right)); + data_end = leaf_data_end(root, right); + memmove_extent_buffer(right, + btrfs_leaf_data(right) + data_end - push_space, + btrfs_leaf_data(right) + data_end, + BTRFS_LEAF_DATA_SIZE(root) - data_end); + /* copy from the left data area */ - btrfs_memcpy(root, right, btrfs_leaf_data(right) + + copy_extent_buffer(right, left, btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - push_space, btrfs_leaf_data(left) + leaf_data_end(root, left), push_space); - btrfs_memmove(root, right, right->items + push_items, right->items, - right_nritems * sizeof(struct btrfs_item)); + + memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), + btrfs_item_nr_offset(0), + right_nritems * sizeof(struct btrfs_item)); + /* copy the items from left to right */ - btrfs_memcpy(root, right, right->items, left->items + - left_nritems - push_items, - push_items * sizeof(struct btrfs_item)); + copy_extent_buffer(right, left, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(left_nritems - push_items), + push_items * sizeof(struct btrfs_item)); /* update the item pointers */ right_nritems += push_items; - btrfs_set_header_nritems(&right->header, right_nritems); + btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < right_nritems; i++) { - btrfs_set_item_offset(right->items + i, push_space - - btrfs_item_size(right->items + i)); - push_space = btrfs_item_offset(right->items + i); + item = btrfs_item_nr(right, i); + btrfs_set_item_offset(right, item, push_space - + btrfs_item_size(right, item)); + push_space = btrfs_item_offset(right, item); } left_nritems -= push_items; - btrfs_set_header_nritems(&left->header, left_nritems); + btrfs_set_header_nritems(left, left_nritems); - btrfs_mark_buffer_dirty(left_buf); - btrfs_mark_buffer_dirty(right_buf); + btrfs_mark_buffer_dirty(left); + btrfs_mark_buffer_dirty(right); - btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key, - &right->items[0].key, sizeof(struct btrfs_disk_key)); + btrfs_item_key(right, &disk_key, 0); + btrfs_set_node_key(upper, &disk_key, slot + 1); btrfs_mark_buffer_dirty(upper); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { path->slots[0] -= left_nritems; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buf; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[1] += 1; } else { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); } if (path->nodes[1]) check_node(root, path, 1); @@ -1436,10 +1526,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct buffer_head *right_buf = path->nodes[0]; - struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf); - struct buffer_head *t; - struct btrfs_leaf *left; + struct btrfs_disk_key disk_key; + struct extent_buffer *right = path->nodes[0]; + struct extent_buffer *left; int slot; int i; int free_space; @@ -1447,119 +1536,128 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root int push_items = 0; struct btrfs_item *item; u32 old_left_nritems; + u32 right_nritems; int ret = 0; int wret; slot = path->slots[1]; - if (slot == 0) { + if (slot == 0) return 1; - } - if (!path->nodes[1]) { + if (!path->nodes[1]) return 1; - } - t = read_tree_block(root, - btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1)); - left = btrfs_buffer_leaf(t); + + left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1], + slot - 1)); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } /* cow and double check */ - ret = btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); + ret = btrfs_cow_block(trans, root, left, + path->nodes[1], slot - 1, &left); if (ret) { /* we hit -ENOSPC, but it isn't fatal here */ - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } - left = btrfs_buffer_leaf(t); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } - if (btrfs_header_nritems(&right->header) == 0) { - btrfs_block_release(root, t); + right_nritems = btrfs_header_nritems(right); + if (right_nritems == 0) { + free_extent_buffer(left); return 1; } - for (i = 0; i < btrfs_header_nritems(&right->header) - 1; i++) { - item = right->items + i; + for (i = 0; i < right_nritems - 1; i++) { + item = btrfs_item_nr(right, i); if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(item) + sizeof(*item) + push_space > + if (btrfs_item_size(right, item) + sizeof(*item) + push_space > free_space) break; push_items++; - push_space += btrfs_item_size(item) + sizeof(*item); + push_space += btrfs_item_size(right, item) + sizeof(*item); } if (push_items == 0) { - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } - if (push_items == btrfs_header_nritems(&right->header)) + if (push_items == btrfs_header_nritems(right)) WARN_ON(1); + /* push data from right to left */ - btrfs_memcpy(root, left, left->items + - btrfs_header_nritems(&left->header), - right->items, push_items * sizeof(struct btrfs_item)); + copy_extent_buffer(left, right, + btrfs_item_nr_offset(btrfs_header_nritems(left)), + btrfs_item_nr_offset(0), + push_items * sizeof(struct btrfs_item)); + push_space = BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_offset(right->items + push_items -1); - btrfs_memcpy(root, left, btrfs_leaf_data(left) + + btrfs_item_offset_nr(right, push_items -1); + + copy_extent_buffer(left, right, btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space, btrfs_leaf_data(right) + - btrfs_item_offset(right->items + push_items - 1), + btrfs_item_offset_nr(right, push_items - 1), push_space); - old_left_nritems = btrfs_header_nritems(&left->header); + old_left_nritems = btrfs_header_nritems(left); BUG_ON(old_left_nritems < 0); for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { - u32 ioff = btrfs_item_offset(left->items + i); - btrfs_set_item_offset(left->items + i, ioff - - (BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_offset(left->items + - old_left_nritems - 1))); + u32 ioff; + item = btrfs_item_nr(left, i); + ioff = btrfs_item_offset(left, item); + btrfs_set_item_offset(left, item, + ioff - (BTRFS_LEAF_DATA_SIZE(root) - + btrfs_item_offset_nr(left, old_left_nritems - 1))); } - btrfs_set_header_nritems(&left->header, old_left_nritems + push_items); + btrfs_set_header_nritems(left, old_left_nritems + push_items); /* fixup right node */ - push_space = btrfs_item_offset(right->items + push_items - 1) - - leaf_data_end(root, right); - btrfs_memmove(root, right, btrfs_leaf_data(right) + - BTRFS_LEAF_DATA_SIZE(root) - push_space, - btrfs_leaf_data(right) + - leaf_data_end(root, right), push_space); - btrfs_memmove(root, right, right->items, right->items + push_items, - (btrfs_header_nritems(&right->header) - push_items) * - sizeof(struct btrfs_item)); - btrfs_set_header_nritems(&right->header, - btrfs_header_nritems(&right->header) - - push_items); + push_space = btrfs_item_offset_nr(right, push_items - 1) - + leaf_data_end(root, right); + memmove_extent_buffer(right, btrfs_leaf_data(right) + + BTRFS_LEAF_DATA_SIZE(root) - push_space, + btrfs_leaf_data(right) + + leaf_data_end(root, right), push_space); + + memmove_extent_buffer(right, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(push_items), + (btrfs_header_nritems(right) - push_items) * + sizeof(struct btrfs_item)); + + right_nritems = btrfs_header_nritems(right) - push_items; + btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); - for (i = 0; i < btrfs_header_nritems(&right->header); i++) { - btrfs_set_item_offset(right->items + i, push_space - - btrfs_item_size(right->items + i)); - push_space = btrfs_item_offset(right->items + i); + for (i = 0; i < right_nritems; i++) { + item = btrfs_item_nr(right, i); + btrfs_set_item_offset(right, item, push_space - + btrfs_item_size(right, item)); + push_space = btrfs_item_offset(right, item); } - btrfs_mark_buffer_dirty(t); - btrfs_mark_buffer_dirty(right_buf); + btrfs_mark_buffer_dirty(left); + btrfs_mark_buffer_dirty(right); - wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); + btrfs_item_key(right, &disk_key, 0); + wret = fixup_low_keys(trans, root, path, &disk_key, 1); if (wret) ret = wret; /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { path->slots[0] += old_left_nritems; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = t; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = left; path->slots[1] -= 1; } else { - btrfs_block_release(root, t); + free_extent_buffer(left); path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); @@ -1578,13 +1676,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *ins_key, struct btrfs_path *path, int data_size) { - struct buffer_head *l_buf; - struct btrfs_leaf *l; + struct extent_buffer *l; u32 nritems; int mid; int slot; - struct btrfs_leaf *right; - struct buffer_head *right_buffer; + struct extent_buffer *right; int space_needed = data_size + sizeof(struct btrfs_item); int data_copy_size; int rt_data_off; @@ -1603,8 +1699,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) return wret; } - l_buf = path->nodes[0]; - l = btrfs_buffer_leaf(l_buf); + l = path->nodes[0]; /* did the pushes work? */ if (btrfs_leaf_free_space(root, l) >= @@ -1617,36 +1712,38 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } slot = path->slots[0]; - nritems = btrfs_header_nritems(&l->header); + nritems = btrfs_header_nritems(l); mid = (nritems + 1)/ 2; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); - if (IS_ERR(right_buffer)) - return PTR_ERR(right_buffer); + right = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(l), 0); + if (IS_ERR(right)) + return PTR_ERR(right); + + memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_generation(right, trans->transid); + btrfs_set_header_owner(right, root->root_key.objectid); + btrfs_set_header_level(right, 0); + write_extent_buffer(right, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(right), + BTRFS_FSID_SIZE); - right = btrfs_buffer_leaf(right_buffer); - memset(&right->header, 0, sizeof(right->header)); - btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); - btrfs_set_header_generation(&right->header, trans->transid); - btrfs_set_header_owner(&right->header, root->root_key.objectid); - btrfs_set_header_level(&right->header, 0); - memcpy(right->header.fsid, root->fs_info->disk_super->fsid, - sizeof(right->header.fsid)); if (mid <= slot) { if (nritems == 1 || leaf_space_used(l, mid, nritems - mid) + space_needed > BTRFS_LEAF_DATA_SIZE(root)) { if (slot >= nritems) { btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(&right->header, 0); + btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - bh_blocknr(right_buffer), + extent_buffer_blocknr(right), path->slots[1] + 1, 1); if (wret) ret = wret; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] = 0; path->slots[1] += 1; return ret; @@ -1659,15 +1756,15 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BTRFS_LEAF_DATA_SIZE(root)) { if (slot == 0) { btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(&right->header, 0); + btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - bh_blocknr(right_buffer), + extent_buffer_blocknr(right), path->slots[1], 1); if (wret) ret = wret; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] = 0; if (path->slots[1] == 0) { wret = fixup_low_keys(trans, root, @@ -1681,61 +1778,74 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root double_split = 1; } } - btrfs_set_header_nritems(&right->header, nritems - mid); - data_copy_size = btrfs_item_end(l->items + mid) - - leaf_data_end(root, l); - btrfs_memcpy(root, right, right->items, l->items + mid, - (nritems - mid) * sizeof(struct btrfs_item)); - btrfs_memcpy(root, right, + nritems = nritems - mid; + btrfs_set_header_nritems(right, nritems); + data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); + + copy_extent_buffer(right, l, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(mid), + nritems * sizeof(struct btrfs_item)); + + copy_extent_buffer(right, l, btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - data_copy_size, btrfs_leaf_data(l) + leaf_data_end(root, l), data_copy_size); - rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_end(l->items + mid); - for (i = 0; i < btrfs_header_nritems(&right->header); i++) { - u32 ioff = btrfs_item_offset(right->items + i); - btrfs_set_item_offset(right->items + i, ioff + rt_data_off); + rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - + btrfs_item_end_nr(l, mid); + + for (i = 0; i < nritems; i++) { + struct btrfs_item *item = btrfs_item_nr(right, i); + u32 ioff = btrfs_item_offset(right, item); + btrfs_set_item_offset(right, item, ioff + rt_data_off); } - btrfs_set_header_nritems(&l->header, mid); + btrfs_set_header_nritems(l, mid); ret = 0; - wret = insert_ptr(trans, root, path, &right->items[0].key, - bh_blocknr(right_buffer), path->slots[1] + 1, 1); + btrfs_item_key(right, &disk_key, 0); + wret = insert_ptr(trans, root, path, &disk_key, + extent_buffer_blocknr(right), path->slots[1] + 1, 1); if (wret) ret = wret; - btrfs_mark_buffer_dirty(right_buffer); - btrfs_mark_buffer_dirty(l_buf); + + btrfs_mark_buffer_dirty(right); + btrfs_mark_buffer_dirty(l); BUG_ON(path->slots[0] != slot); + if (mid <= slot) { - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] -= mid; path->slots[1] += 1; } else - btrfs_block_release(root, right_buffer); + free_extent_buffer(right); + BUG_ON(path->slots[0] < 0); check_node(root, path, 1); + check_leaf(root, path, 0); if (!double_split) return ret; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); - if (IS_ERR(right_buffer)) - return PTR_ERR(right_buffer); - right = btrfs_buffer_leaf(right_buffer); - memset(&right->header, 0, sizeof(right->header)); - btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); - btrfs_set_header_generation(&right->header, trans->transid); - btrfs_set_header_owner(&right->header, root->root_key.objectid); - btrfs_set_header_level(&right->header, 0); - memcpy(right->header.fsid, root->fs_info->disk_super->fsid, - sizeof(right->header.fsid)); + right = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(l), 0); + if (IS_ERR(right)) + return PTR_ERR(right); + + memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_generation(right, trans->transid); + btrfs_set_header_owner(right, root->root_key.objectid); + btrfs_set_header_level(right, 0); + write_extent_buffer(right, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(right), + BTRFS_FSID_SIZE); + btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(&right->header, 0); + btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - bh_blocknr(right_buffer), + extent_buffer_blocknr(right), path->slots[1], 1); if (wret) ret = wret; @@ -1744,8 +1854,8 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (wret) ret = wret; } - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] = 0; check_node(root, path, 1); check_leaf(root, path, 0); @@ -1760,8 +1870,8 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, int ret = 0; int slot; int slot_orig; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; + struct extent_buffer *leaf; + struct btrfs_item *item; u32 nritems; unsigned int data_end; unsigned int old_data_start; @@ -1770,15 +1880,14 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, int i; slot_orig = path->slots[0]; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; - nritems = btrfs_header_nritems(&leaf->header); + nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); slot = path->slots[0]; - old_data_start = btrfs_item_offset(leaf->items + slot); - old_size = btrfs_item_size(leaf->items + slot); + old_data_start = btrfs_item_offset_nr(leaf, slot); + old_size = btrfs_item_size_nr(leaf, slot); BUG_ON(old_size <= new_size); size_diff = old_size - new_size; @@ -1790,32 +1899,38 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, - ioff + size_diff); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff + size_diff); } /* shift the data */ - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end + size_diff, btrfs_leaf_data(leaf) + data_end, old_data_start + new_size - data_end); - btrfs_set_item_size(leaf->items + slot, new_size); - btrfs_mark_buffer_dirty(leaf_buf); + + item = btrfs_item_nr(leaf, slot); + btrfs_set_item_size(leaf, item, new_size); + btrfs_mark_buffer_dirty(leaf); ret = 0; - if (btrfs_leaf_free_space(root, leaf) < 0) + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); BUG(); + } check_leaf(root, path, 0); return ret; } -int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u32 data_size) +int btrfs_extend_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u32 data_size) { int ret = 0; int slot; int slot_orig; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; + struct extent_buffer *leaf; + struct btrfs_item *item; u32 nritems; unsigned int data_end; unsigned int old_data; @@ -1823,16 +1938,17 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root int i; slot_orig = path->slots[0]; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; - nritems = btrfs_header_nritems(&leaf->header); + nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); - if (btrfs_leaf_free_space(root, leaf) < data_size) + if (btrfs_leaf_free_space(root, leaf) < data_size) { + btrfs_print_leaf(root, leaf); BUG(); + } slot = path->slots[0]; - old_data = btrfs_item_end(leaf->items + slot); + old_data = btrfs_item_end_nr(leaf, slot); BUG_ON(slot < 0); BUG_ON(slot >= nritems); @@ -1842,22 +1958,28 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, - ioff - data_size); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff - data_size); } + /* shift the data */ - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end - data_size, btrfs_leaf_data(leaf) + data_end, old_data - data_end); + data_end = old_data; - old_size = btrfs_item_size(leaf->items + slot); - btrfs_set_item_size(leaf->items + slot, old_size + data_size); - btrfs_mark_buffer_dirty(leaf_buf); + old_size = btrfs_item_size_nr(leaf, slot); + item = btrfs_item_nr(leaf, slot); + btrfs_set_item_size(leaf, item, old_size + data_size); + btrfs_mark_buffer_dirty(leaf); ret = 0; - if (btrfs_leaf_free_space(root, leaf) < 0) + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); BUG(); + } check_leaf(root, path, 0); return ret; } @@ -1866,15 +1988,16 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ -int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, struct btrfs_key - *cpu_key, u32 data_size) +int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 data_size) { + struct extent_buffer *leaf; + struct btrfs_item *item; int ret = 0; int slot; int slot_orig; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; u32 nritems; unsigned int data_end; struct btrfs_disk_key disk_key; @@ -1884,6 +2007,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root /* create a root if there isn't one */ if (!root->node) BUG(); + ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1); if (ret == 0) { return -EEXIST; @@ -1892,57 +2016,68 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root goto out; slot_orig = path->slots[0]; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; - nritems = btrfs_header_nritems(&leaf->header); + nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); if (btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item) + data_size) { BUG(); } + slot = path->slots[0]; BUG_ON(slot < 0); + if (slot != nritems) { int i; - unsigned int old_data = btrfs_item_end(leaf->items + slot); + unsigned int old_data = btrfs_item_end_nr(leaf, slot); + if (old_data < data_end) { + btrfs_print_leaf(root, leaf); + printk("slot %d old_data %d data_end %d\n", + slot, old_data, data_end); + BUG_ON(1); + } /* * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, - ioff - data_size); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff - data_size); } /* shift the items */ - btrfs_memmove(root, leaf, leaf->items + slot + 1, - leaf->items + slot, + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), + btrfs_item_nr_offset(slot), (nritems - slot) * sizeof(struct btrfs_item)); /* shift the data */ - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end - data_size, btrfs_leaf_data(leaf) + data_end, old_data - data_end); data_end = old_data; } + /* setup the item for the new data */ - btrfs_memcpy(root, leaf, &leaf->items[slot].key, &disk_key, - sizeof(struct btrfs_disk_key)); - btrfs_set_item_offset(leaf->items + slot, data_end - data_size); - btrfs_set_item_size(leaf->items + slot, data_size); - btrfs_set_header_nritems(&leaf->header, nritems + 1); - btrfs_mark_buffer_dirty(leaf_buf); + btrfs_set_item_key(leaf, &disk_key, slot); + item = btrfs_item_nr(leaf, slot); + btrfs_set_item_offset(leaf, item, data_end - data_size); + btrfs_set_item_size(leaf, item, data_size); + btrfs_set_header_nritems(leaf, nritems + 1); + btrfs_mark_buffer_dirty(leaf); ret = 0; if (slot == 0) ret = fixup_low_keys(trans, root, path, &disk_key, 1); - if (btrfs_leaf_free_space(root, leaf) < 0) + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); BUG(); + } check_leaf(root, path, 0); out: return ret; @@ -1958,17 +2093,17 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root { int ret = 0; struct btrfs_path *path; - u8 *ptr; + struct extent_buffer *leaf; + unsigned long ptr; path = btrfs_alloc_path(); BUG_ON(!path); ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (!ret) { - ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], u8); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, data, data_size); - btrfs_mark_buffer_dirty(path->nodes[0]); + leaf = path->nodes[0]; + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + write_extent_buffer(leaf, data, ptr, data_size); + btrfs_mark_buffer_dirty(leaf); } btrfs_free_path(path); return ret; @@ -1984,30 +2119,30 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { - struct btrfs_node *node; - struct buffer_head *parent = path->nodes[level]; + struct extent_buffer *parent = path->nodes[level]; u32 nritems; int ret = 0; int wret; - node = btrfs_buffer_node(parent); - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(parent); if (slot != nritems -1) { - btrfs_memmove(root, node, node->ptrs + slot, - node->ptrs + slot + 1, + memmove_extent_buffer(parent, + btrfs_node_key_ptr_offset(slot), + btrfs_node_key_ptr_offset(slot + 1), sizeof(struct btrfs_key_ptr) * (nritems - slot - 1)); } nritems--; - btrfs_set_header_nritems(&node->header, nritems); + btrfs_set_header_nritems(parent, nritems); if (nritems == 0 && parent == root->node) { - struct btrfs_header *header = btrfs_buffer_header(root->node); - BUG_ON(btrfs_header_level(header) != 1); + BUG_ON(btrfs_header_level(root->node) != 1); /* just turn the root into a leaf and break */ - btrfs_set_header_level(header, 0); + btrfs_set_header_level(root->node, 0); } else if (slot == 0) { - wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key, - level + 1); + struct btrfs_disk_key disk_key; + + btrfs_node_key(parent, &disk_key, 0); + wret = fixup_low_keys(trans, root, path, &disk_key, level + 1); if (wret) ret = wret; } @@ -2023,59 +2158,67 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path) { int slot; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; + struct extent_buffer *leaf; + struct btrfs_item *item; int doff; int dsize; int ret = 0; int wret; u32 nritems; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; slot = path->slots[0]; - doff = btrfs_item_offset(leaf->items + slot); - dsize = btrfs_item_size(leaf->items + slot); - nritems = btrfs_header_nritems(&leaf->header); + doff = btrfs_item_offset_nr(leaf, slot); + dsize = btrfs_item_size_nr(leaf, slot); + nritems = btrfs_header_nritems(leaf); if (slot != nritems - 1) { int i; int data_end = leaf_data_end(root, leaf); - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end + dsize, btrfs_leaf_data(leaf) + data_end, doff - data_end); + for (i = slot + 1; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, ioff + dsize); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff + dsize); } - btrfs_memmove(root, leaf, leaf->items + slot, - leaf->items + slot + 1, + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), + btrfs_item_nr_offset(slot + 1), sizeof(struct btrfs_item) * (nritems - slot - 1)); } - btrfs_set_header_nritems(&leaf->header, nritems - 1); + btrfs_set_header_nritems(leaf, nritems - 1); nritems--; + /* delete the leaf if we've emptied it */ if (nritems == 0) { - if (leaf_buf == root->node) { - btrfs_set_header_level(&leaf->header, 0); + if (leaf == root->node) { + btrfs_set_header_level(leaf, 0); } else { - clean_tree_block(trans, root, leaf_buf); - wait_on_buffer(leaf_buf); + clean_tree_block(trans, root, leaf); + wait_on_tree_block_writeback(root, leaf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - bh_blocknr(leaf_buf), 1, 1); + extent_buffer_blocknr(leaf), + 1, 1); if (wret) ret = wret; } } else { int used = leaf_space_used(leaf, 0, nritems); if (slot == 0) { + struct btrfs_disk_key disk_key; + + btrfs_item_key(leaf, &disk_key, 0); wret = fixup_low_keys(trans, root, path, - &leaf->items[0].key, 1); + &disk_key, 1); if (wret) ret = wret; } @@ -2087,34 +2230,40 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, * for possible call to del_ptr below */ slot = path->slots[1]; - get_bh(leaf_buf); + extent_buffer_get(leaf); + wret = push_leaf_left(trans, root, path, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; - if (path->nodes[0] == leaf_buf && - btrfs_header_nritems(&leaf->header)) { + + if (path->nodes[0] == leaf && + btrfs_header_nritems(leaf)) { wret = push_leaf_right(trans, root, path, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; } - if (btrfs_header_nritems(&leaf->header) == 0) { - u64 blocknr = bh_blocknr(leaf_buf); - clean_tree_block(trans, root, leaf_buf); - wait_on_buffer(leaf_buf); + + if (btrfs_header_nritems(leaf) == 0) { + u64 blocknr = extent_buffer_blocknr(leaf); + + clean_tree_block(trans, root, leaf); + wait_on_tree_block_writeback(root, leaf); + wret = del_ptr(trans, root, path, 1, slot); if (wret) ret = wret; - btrfs_block_release(root, leaf_buf); + + free_extent_buffer(leaf); wret = btrfs_free_extent(trans, root, blocknr, 1, 1); if (wret) ret = wret; } else { - btrfs_mark_buffer_dirty(leaf_buf); - btrfs_block_release(root, leaf_buf); + btrfs_mark_buffer_dirty(leaf); + free_extent_buffer(leaf); } } else { - btrfs_mark_buffer_dirty(leaf_buf); + btrfs_mark_buffer_dirty(leaf); } } return ret; @@ -2130,25 +2279,27 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) int slot; int level = 1; u64 blocknr; - struct buffer_head *c; - struct btrfs_node *c_node; - struct buffer_head *next = NULL; + struct extent_buffer *c; + struct extent_buffer *next = NULL; while(level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) return 1; + slot = path->slots[level] + 1; c = path->nodes[level]; - c_node = btrfs_buffer_node(c); - if (slot >= btrfs_header_nritems(&c_node->header)) { + if (slot >= btrfs_header_nritems(c)) { level++; continue; } - blocknr = btrfs_node_blockptr(c_node, slot); + + blocknr = btrfs_node_blockptr(c, slot); if (next) - btrfs_block_release(root, next); + free_extent_buffer(next); + if (path->reada) reada_for_search(root, path, level, slot); + next = read_tree_block(root, blocknr); break; } @@ -2156,15 +2307,14 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) while(1) { level--; c = path->nodes[level]; - btrfs_block_release(root, c); + free_extent_buffer(c); path->nodes[level] = next; path->slots[level] = 0; if (!level) break; if (path->reada) reada_for_search(root, path, level, 0); - next = read_tree_block(root, - btrfs_node_blockptr(btrfs_buffer_node(next), 0)); + next = read_tree_block(root, btrfs_node_blockptr(next, 0)); } return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 800a3499cc3f..c4b829806855 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -20,10 +20,10 @@ #define __BTRFS__ #include -#include #include #include #include "bit-radix.h" +#include "extent_map.h" struct btrfs_trans_handle; struct btrfs_transaction; @@ -32,7 +32,7 @@ extern struct kmem_cache *btrfs_transaction_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; -#define BTRFS_MAGIC "_BtRfS_M" +#define BTRFS_MAGIC "_B2RfS_M" #define BTRFS_ROOT_TREE_OBJECTID 1ULL #define BTRFS_EXTENT_TREE_OBJECTID 2ULL @@ -78,41 +78,41 @@ extern struct kmem_cache *btrfs_path_cachep; */ struct btrfs_disk_key { __le64 objectid; - __le32 flags; + u8 type; __le64 offset; } __attribute__ ((__packed__)); struct btrfs_key { u64 objectid; - u32 flags; + u8 type; u64 offset; } __attribute__ ((__packed__)); +#define BTRFS_FSID_SIZE 16 /* * every tree block (leaf or node) starts with this header. */ struct btrfs_header { u8 csum[BTRFS_CSUM_SIZE]; - u8 fsid[16]; /* FS specific uuid */ + u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ __le64 generation; __le64 owner; - __le16 nritems; + __le32 nritems; __le16 flags; u8 level; } __attribute__ ((__packed__)); #define BTRFS_MAX_LEVEL 8 -#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->blocksize - \ +#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \ sizeof(struct btrfs_header)) / \ (sizeof(struct btrfs_disk_key) + sizeof(u64))) #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) -#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize)) +#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize)) #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) - \ sizeof(struct btrfs_file_extent_item)) -struct buffer_head; /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc @@ -128,7 +128,9 @@ struct btrfs_super_block { __le64 total_blocks; __le64 blocks_used; __le64 root_dir_objectid; - __le32 blocksize; + __le32 sectorsize; + __le32 nodesize; + __le32 leafsize; } __attribute__ ((__packed__)); /* @@ -138,7 +140,7 @@ struct btrfs_super_block { struct btrfs_item { struct btrfs_disk_key key; __le32 offset; - __le16 size; + __le32 size; } __attribute__ ((__packed__)); /* @@ -176,7 +178,7 @@ struct btrfs_node { * used while walking the tree. */ struct btrfs_path { - struct buffer_head *nodes[BTRFS_MAX_LEVEL]; + struct extent_buffer *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; int reada; int lowest_level; @@ -292,6 +294,7 @@ struct btrfs_block_group_cache { }; struct btrfs_fs_info { + u8 fsid[BTRFS_FSID_SIZE]; struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct radix_tree_root fs_roots_radix; @@ -304,9 +307,8 @@ struct btrfs_fs_info { u64 generation; u64 last_trans_committed; struct btrfs_transaction *running_transaction; - struct btrfs_super_block *disk_super; struct btrfs_super_block super_copy; - struct buffer_head *sb_buffer; + struct extent_buffer *sb_buffer; struct super_block *sb; struct inode *btree_inode; struct mutex trans_mutex; @@ -325,8 +327,8 @@ struct btrfs_fs_info { * and for the extent tree extent_root root. */ struct btrfs_root { - struct buffer_head *node; - struct buffer_head *commit_root; + struct extent_buffer *node; + struct extent_buffer *commit_root; struct btrfs_root_item root_item; struct btrfs_key root_key; struct btrfs_fs_info *fs_info; @@ -336,7 +338,16 @@ struct btrfs_root { struct rw_semaphore snap_sem; u64 objectid; u64 last_trans; - u32 blocksize; + + /* data allocations are done in sectorsize units */ + u32 sectorsize; + + /* node allocations are done in nodesize units */ + u32 nodesize; + + /* leaf allocations are done in leafsize units */ + u32 leafsize; + u32 type; u64 highest_inode; u64 last_inode_alloc; @@ -347,12 +358,6 @@ struct btrfs_root { char *name; }; -/* the lower bits in the key flags defines the item type */ -#define BTRFS_KEY_TYPE_MAX 256 -#define BTRFS_KEY_TYPE_SHIFT 24 -#define BTRFS_KEY_TYPE_MASK (((u32)BTRFS_KEY_TYPE_MAX - 1) << \ - BTRFS_KEY_TYPE_SHIFT) - /* * inode items have the data typically returned from stat and store other * info about object characteristics. There is one for every file and dir in @@ -402,246 +407,253 @@ struct btrfs_root { */ #define BTRFS_STRING_ITEM_KEY 253 +/* some macros to generate set/get funcs for the struct fields. This + * assumes there is a lefoo_to_cpu for every type, so lets make a simple + * one for u8: + */ +#define le8_to_cpu(v) (v) +#define cpu_to_le8(v) (v) +#define __le8 u8 -static inline u64 btrfs_block_group_used(struct btrfs_block_group_item *bi) -{ - return le64_to_cpu(bi->used); +#define read_eb_member(eb, ptr, type, member, result) ( \ + read_extent_buffer(eb, (char *)(result), \ + ((unsigned long)(ptr)) + \ + offsetof(type, member), \ + sizeof(((type *)0)->member))) + +#define write_eb_member(eb, ptr, type, member, result) ( \ + write_extent_buffer(eb, (char *)(result), \ + ((unsigned long)(ptr)) + \ + offsetof(type, member), \ + sizeof(((type *)0)->member))) + +#define BTRFS_SETGET_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(struct extent_buffer *eb, \ + type *s) \ +{ \ + __le##bits res; \ + read_eb_member(eb, s, type, member, &res); \ + return le##bits##_to_cpu(res); \ +} \ +static inline void btrfs_set_##name(struct extent_buffer *eb, \ + type *s, u##bits val) \ +{ \ + val = cpu_to_le##bits(val); \ + write_eb_member(eb, s, type, member, &val); \ } -static inline void btrfs_set_block_group_used(struct - btrfs_block_group_item *bi, - u64 val) -{ - bi->used = cpu_to_le64(val); +#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(struct extent_buffer *eb) \ +{ \ + __le##bits res; \ + read_eb_member(eb, NULL, type, member, &res); \ + return le##bits##_to_cpu(res); \ +} \ +static inline void btrfs_set_##name(struct extent_buffer *eb, \ + u##bits val) \ +{ \ + val = cpu_to_le##bits(val); \ + write_eb_member(eb, NULL, type, member, &val); \ } -static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) -{ - return le64_to_cpu(i->generation); +#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(type *s) \ +{ \ + return le##bits##_to_cpu(s->member); \ +} \ +static inline void btrfs_set_##name(type *s, u##bits val) \ +{ \ + s->member = cpu_to_le##bits(val); \ } -static inline void btrfs_set_inode_generation(struct btrfs_inode_item *i, - u64 val) +/* struct btrfs_block_group_item */ +BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, + used, 64); +BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, + used, 64); + +/* struct btrfs_inode_item */ +BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); +BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); +BTRFS_SETGET_FUNCS(inode_nblocks, struct btrfs_inode_item, nblocks, 64); +BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64); +BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); +BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); +BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); +BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); +BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 32); +BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 16); +BTRFS_SETGET_FUNCS(inode_compat_flags, struct btrfs_inode_item, + compat_flags, 16); + +static inline struct btrfs_inode_timespec * +btrfs_inode_atime(struct btrfs_inode_item *inode_item) { - i->generation = cpu_to_le64(val); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, atime); + return (struct btrfs_inode_timespec *)ptr; } -static inline u64 btrfs_inode_size(struct btrfs_inode_item *i) +static inline struct btrfs_inode_timespec * +btrfs_inode_mtime(struct btrfs_inode_item *inode_item) { - return le64_to_cpu(i->size); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, mtime); + return (struct btrfs_inode_timespec *)ptr; } -static inline void btrfs_set_inode_size(struct btrfs_inode_item *i, u64 val) +static inline struct btrfs_inode_timespec * +btrfs_inode_ctime(struct btrfs_inode_item *inode_item) { - i->size = cpu_to_le64(val); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, ctime); + return (struct btrfs_inode_timespec *)ptr; } -static inline u64 btrfs_inode_nblocks(struct btrfs_inode_item *i) +static inline struct btrfs_inode_timespec * +btrfs_inode_otime(struct btrfs_inode_item *inode_item) { - return le64_to_cpu(i->nblocks); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, otime); + return (struct btrfs_inode_timespec *)ptr; } -static inline void btrfs_set_inode_nblocks(struct btrfs_inode_item *i, u64 val) +BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_inode_timespec, sec, 64); +BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_inode_timespec, nsec, 32); + +/* struct btrfs_extent_item */ +BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); +BTRFS_SETGET_FUNCS(extent_owner, struct btrfs_extent_item, owner, 32); + +BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, + refs, 32); +BTRFS_SETGET_STACK_FUNCS(stack_extent_owner, struct btrfs_extent_item, + owner, 32); + +/* struct btrfs_node */ +BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); + +static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) { - i->nblocks = cpu_to_le64(val); + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr); } -static inline u64 btrfs_inode_block_group(struct btrfs_inode_item *i) +static inline void btrfs_set_node_blockptr(struct extent_buffer *eb, + int nr, u64 val) { - return le64_to_cpu(i->block_group); + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val); } -static inline void btrfs_set_inode_block_group(struct btrfs_inode_item *i, - u64 val) +static unsigned long btrfs_node_key_ptr_offset(int nr) { - i->block_group = cpu_to_le64(val); + return offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; } -static inline u32 btrfs_inode_nlink(struct btrfs_inode_item *i) +static void btrfs_node_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - return le32_to_cpu(i->nlink); + unsigned long ptr; + ptr = btrfs_node_key_ptr_offset(nr); + read_eb_member(eb, (struct btrfs_key_ptr *)ptr, + struct btrfs_key_ptr, key, disk_key); +} +static inline void btrfs_set_node_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) +{ + unsigned long ptr; + ptr = btrfs_node_key_ptr_offset(nr); + write_eb_member(eb, (struct btrfs_key_ptr *)ptr, + struct btrfs_key_ptr, key, disk_key); } -static inline void btrfs_set_inode_nlink(struct btrfs_inode_item *i, u32 val) +/* struct btrfs_item */ +BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32); +BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32); + +static inline unsigned long btrfs_item_nr_offset(int nr) { - i->nlink = cpu_to_le32(val); + return offsetof(struct btrfs_leaf, items) + + sizeof(struct btrfs_item) * nr; } -static inline u32 btrfs_inode_uid(struct btrfs_inode_item *i) +static inline struct btrfs_item *btrfs_item_nr(struct extent_buffer *eb, + int nr) { - return le32_to_cpu(i->uid); + return (struct btrfs_item *)btrfs_item_nr_offset(nr); } -static inline void btrfs_set_inode_uid(struct btrfs_inode_item *i, u32 val) +static inline u32 btrfs_item_end(struct extent_buffer *eb, + struct btrfs_item *item) { - i->uid = cpu_to_le32(val); + return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item); } -static inline u32 btrfs_inode_gid(struct btrfs_inode_item *i) +static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr) { - return le32_to_cpu(i->gid); + return btrfs_item_end(eb, btrfs_item_nr(eb, nr)); } -static inline void btrfs_set_inode_gid(struct btrfs_inode_item *i, u32 val) +static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr) { - i->gid = cpu_to_le32(val); + return btrfs_item_offset(eb, btrfs_item_nr(eb, nr)); } -static inline u32 btrfs_inode_mode(struct btrfs_inode_item *i) +static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr) { - return le32_to_cpu(i->mode); + return btrfs_item_size(eb, btrfs_item_nr(eb, nr)); } -static inline void btrfs_set_inode_mode(struct btrfs_inode_item *i, u32 val) +static inline void btrfs_item_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - i->mode = cpu_to_le32(val); + struct btrfs_item *item = btrfs_item_nr(eb, nr); + read_eb_member(eb, item, struct btrfs_item, key, disk_key); } -static inline u32 btrfs_inode_rdev(struct btrfs_inode_item *i) +static inline void btrfs_set_item_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - return le32_to_cpu(i->rdev); + struct btrfs_item *item = btrfs_item_nr(eb, nr); + write_eb_member(eb, item, struct btrfs_item, key, disk_key); } -static inline void btrfs_set_inode_rdev(struct btrfs_inode_item *i, u32 val) +/* struct btrfs_dir_item */ +BTRFS_SETGET_FUNCS(dir_flags, struct btrfs_dir_item, flags, 16); +BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); +BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); + +static inline void btrfs_dir_item_key(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_disk_key *key) { - i->rdev = cpu_to_le32(val); + read_eb_member(eb, item, struct btrfs_dir_item, location, key); } -static inline u16 btrfs_inode_flags(struct btrfs_inode_item *i) +static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_disk_key *key) { - return le16_to_cpu(i->flags); + write_eb_member(eb, item, struct btrfs_dir_item, location, key); } -static inline void btrfs_set_inode_flags(struct btrfs_inode_item *i, u16 val) -{ - i->flags = cpu_to_le16(val); -} - -static inline u16 btrfs_inode_compat_flags(struct btrfs_inode_item *i) -{ - return le16_to_cpu(i->compat_flags); -} - -static inline void btrfs_set_inode_compat_flags(struct btrfs_inode_item *i, - u16 val) -{ - i->compat_flags = cpu_to_le16(val); -} - -static inline u64 btrfs_timespec_sec(struct btrfs_inode_timespec *ts) -{ - return le64_to_cpu(ts->sec); -} - -static inline void btrfs_set_timespec_sec(struct btrfs_inode_timespec *ts, - u64 val) -{ - ts->sec = cpu_to_le64(val); -} - -static inline u32 btrfs_timespec_nsec(struct btrfs_inode_timespec *ts) -{ - return le32_to_cpu(ts->nsec); -} - -static inline void btrfs_set_timespec_nsec(struct btrfs_inode_timespec *ts, - u32 val) -{ - ts->nsec = cpu_to_le32(val); -} - -static inline u32 btrfs_extent_refs(struct btrfs_extent_item *ei) -{ - return le32_to_cpu(ei->refs); -} - -static inline void btrfs_set_extent_refs(struct btrfs_extent_item *ei, u32 val) -{ - ei->refs = cpu_to_le32(val); -} - -static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) -{ - return le64_to_cpu(ei->owner); -} - -static inline void btrfs_set_extent_owner(struct btrfs_extent_item *ei, u64 val) -{ - ei->owner = cpu_to_le64(val); -} - -static inline u64 btrfs_node_blockptr(struct btrfs_node *n, int nr) -{ - return le64_to_cpu(n->ptrs[nr].blockptr); -} - - -static inline void btrfs_set_node_blockptr(struct btrfs_node *n, int nr, - u64 val) -{ - n->ptrs[nr].blockptr = cpu_to_le64(val); -} - -static inline u32 btrfs_item_offset(struct btrfs_item *item) -{ - return le32_to_cpu(item->offset); -} - -static inline void btrfs_set_item_offset(struct btrfs_item *item, u32 val) -{ - item->offset = cpu_to_le32(val); -} - -static inline u32 btrfs_item_end(struct btrfs_item *item) -{ - return le32_to_cpu(item->offset) + le16_to_cpu(item->size); -} - -static inline u16 btrfs_item_size(struct btrfs_item *item) -{ - return le16_to_cpu(item->size); -} - -static inline void btrfs_set_item_size(struct btrfs_item *item, u16 val) -{ - item->size = cpu_to_le16(val); -} - -static inline u16 btrfs_dir_flags(struct btrfs_dir_item *d) -{ - return le16_to_cpu(d->flags); -} - -static inline void btrfs_set_dir_flags(struct btrfs_dir_item *d, u16 val) -{ - d->flags = cpu_to_le16(val); -} - -static inline u8 btrfs_dir_type(struct btrfs_dir_item *d) -{ - return d->type; -} - -static inline void btrfs_set_dir_type(struct btrfs_dir_item *d, u8 val) -{ - d->type = val; -} - -static inline u16 btrfs_dir_name_len(struct btrfs_dir_item *d) -{ - return le16_to_cpu(d->name_len); -} - -static inline void btrfs_set_dir_name_len(struct btrfs_dir_item *d, u16 val) -{ - d->name_len = cpu_to_le16(val); -} +/* struct btrfs_disk_key */ +BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, + objectid, 64); +BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64); +BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8); static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, struct btrfs_disk_key *disk) { cpu->offset = le64_to_cpu(disk->offset); - cpu->flags = le32_to_cpu(disk->flags); + cpu->type = disk->type; cpu->objectid = le64_to_cpu(disk->objectid); } @@ -649,400 +661,167 @@ static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, struct btrfs_key *cpu) { disk->offset = cpu_to_le64(cpu->offset); - disk->flags = cpu_to_le32(cpu->flags); + disk->type = cpu->type; disk->objectid = cpu_to_le64(cpu->objectid); } -static inline u64 btrfs_disk_key_objectid(struct btrfs_disk_key *disk) +static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb, + struct btrfs_key *key, int nr) { - return le64_to_cpu(disk->objectid); + struct btrfs_disk_key disk_key; + btrfs_node_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); } -static inline void btrfs_set_disk_key_objectid(struct btrfs_disk_key *disk, - u64 val) +static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb, + struct btrfs_key *key, int nr) { - disk->objectid = cpu_to_le64(val); + struct btrfs_disk_key disk_key; + btrfs_item_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); } -static inline u64 btrfs_disk_key_offset(struct btrfs_disk_key *disk) +static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_key *key) { - return le64_to_cpu(disk->offset); + struct btrfs_disk_key disk_key; + btrfs_dir_item_key(eb, item, &disk_key); + btrfs_disk_key_to_cpu(key, &disk_key); } -static inline void btrfs_set_disk_key_offset(struct btrfs_disk_key *disk, - u64 val) + +static inline u8 btrfs_key_type(struct btrfs_key *key) { - disk->offset = cpu_to_le64(val); + return key->type; } -static inline u32 btrfs_disk_key_flags(struct btrfs_disk_key *disk) +static inline void btrfs_set_key_type(struct btrfs_key *key, u8 val) { - return le32_to_cpu(disk->flags); + key->type = val; } -static inline void btrfs_set_disk_key_flags(struct btrfs_disk_key *disk, - u32 val) +/* struct btrfs_header */ +BTRFS_SETGET_HEADER_FUNCS(header_blocknr, struct btrfs_header, blocknr, 64); +BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header, + generation, 64); +BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); +BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); +BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 16); +BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); + +static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) { - disk->flags = cpu_to_le32(val); + unsigned long ptr = offsetof(struct btrfs_header, fsid); + return (u8 *)ptr; } -static inline u32 btrfs_disk_key_type(struct btrfs_disk_key *key) +static inline u8 *btrfs_super_fsid(struct extent_buffer *eb) { - return le32_to_cpu(key->flags) >> BTRFS_KEY_TYPE_SHIFT; + unsigned long ptr = offsetof(struct btrfs_super_block, fsid); + return (u8 *)ptr; } -static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key, - u32 val) +static inline u8 *btrfs_header_csum(struct extent_buffer *eb) { - u32 flags = btrfs_disk_key_flags(key); - BUG_ON(val >= BTRFS_KEY_TYPE_MAX); - val = val << BTRFS_KEY_TYPE_SHIFT; - flags = (flags & ~BTRFS_KEY_TYPE_MASK) | val; - btrfs_set_disk_key_flags(key, flags); + unsigned long ptr = offsetof(struct btrfs_header, csum); + return (u8 *)ptr; } -static inline u32 btrfs_key_type(struct btrfs_key *key) +static inline struct btrfs_node *btrfs_buffer_node(struct extent_buffer *eb) { - return key->flags >> BTRFS_KEY_TYPE_SHIFT; + return NULL; } -static inline void btrfs_set_key_type(struct btrfs_key *key, u32 val) +static inline struct btrfs_leaf *btrfs_buffer_leaf(struct extent_buffer *eb) { - BUG_ON(val >= BTRFS_KEY_TYPE_MAX); - val = val << BTRFS_KEY_TYPE_SHIFT; - key->flags = (key->flags & ~(BTRFS_KEY_TYPE_MASK)) | val; + return NULL; } -static inline u64 btrfs_header_blocknr(struct btrfs_header *h) +static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb) { - return le64_to_cpu(h->blocknr); + return NULL; } -static inline void btrfs_set_header_blocknr(struct btrfs_header *h, u64 blocknr) +static inline int btrfs_is_leaf(struct extent_buffer *eb) { - h->blocknr = cpu_to_le64(blocknr); + return (btrfs_header_level(eb) == 0); } -static inline u64 btrfs_header_generation(struct btrfs_header *h) +/* struct btrfs_root_item */ +BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32); +BTRFS_SETGET_FUNCS(disk_root_blocknr, struct btrfs_root_item, blocknr, 64); + +BTRFS_SETGET_STACK_FUNCS(root_blocknr, struct btrfs_root_item, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); +BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32); +BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 32); +BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, blocks_used, 64); +BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, block_limit, 64); + +/* struct btrfs_super_block */ +BTRFS_SETGET_STACK_FUNCS(super_blocknr, struct btrfs_super_block, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); +BTRFS_SETGET_STACK_FUNCS(super_total_blocks, struct btrfs_super_block, + total_blocks, 64); +BTRFS_SETGET_STACK_FUNCS(super_blocks_used, struct btrfs_super_block, + blocks_used, 64); +BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, + sectorsize, 32); +BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, + nodesize, 32); +BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block, + leafsize, 32); +BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, + root_dir_objectid, 64); + +static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) { - return le64_to_cpu(h->generation); + return offsetof(struct btrfs_leaf, items); } -static inline void btrfs_set_header_generation(struct btrfs_header *h, - u64 val) -{ - h->generation = cpu_to_le64(val); -} +/* struct btrfs_file_extent_item */ +BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); -static inline u64 btrfs_header_owner(struct btrfs_header *h) -{ - return le64_to_cpu(h->owner); -} - -static inline void btrfs_set_header_owner(struct btrfs_header *h, - u64 val) -{ - h->owner = cpu_to_le64(val); -} - -static inline u16 btrfs_header_nritems(struct btrfs_header *h) -{ - return le16_to_cpu(h->nritems); -} - -static inline void btrfs_set_header_nritems(struct btrfs_header *h, u16 val) -{ - h->nritems = cpu_to_le16(val); -} - -static inline u16 btrfs_header_flags(struct btrfs_header *h) -{ - return le16_to_cpu(h->flags); -} - -static inline void btrfs_set_header_flags(struct btrfs_header *h, u16 val) -{ - h->flags = cpu_to_le16(val); -} - -static inline int btrfs_header_level(struct btrfs_header *h) -{ - return h->level; -} - -static inline void btrfs_set_header_level(struct btrfs_header *h, int level) -{ - BUG_ON(level > BTRFS_MAX_LEVEL); - h->level = level; -} - -static inline int btrfs_is_leaf(struct btrfs_node *n) -{ - return (btrfs_header_level(&n->header) == 0); -} - -static inline u64 btrfs_root_blocknr(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->blocknr); -} - -static inline void btrfs_set_root_blocknr(struct btrfs_root_item *item, u64 val) -{ - item->blocknr = cpu_to_le64(val); -} - -static inline u64 btrfs_root_dirid(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->root_dirid); -} - -static inline void btrfs_set_root_dirid(struct btrfs_root_item *item, u64 val) -{ - item->root_dirid = cpu_to_le64(val); -} - -static inline u32 btrfs_root_refs(struct btrfs_root_item *item) -{ - return le32_to_cpu(item->refs); -} - -static inline void btrfs_set_root_refs(struct btrfs_root_item *item, u32 val) -{ - item->refs = cpu_to_le32(val); -} - -static inline u32 btrfs_root_flags(struct btrfs_root_item *item) -{ - return le32_to_cpu(item->flags); -} - -static inline void btrfs_set_root_flags(struct btrfs_root_item *item, u32 val) -{ - item->flags = cpu_to_le32(val); -} - -static inline void btrfs_set_root_blocks_used(struct btrfs_root_item *item, - u64 val) -{ - item->blocks_used = cpu_to_le64(val); -} - -static inline u64 btrfs_root_blocks_used(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->blocks_used); -} - -static inline void btrfs_set_root_block_limit(struct btrfs_root_item *item, - u64 val) -{ - item->block_limit = cpu_to_le64(val); -} - -static inline u64 btrfs_root_block_limit(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->block_limit); -} - -static inline u64 btrfs_super_blocknr(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->blocknr); -} - -static inline void btrfs_set_super_blocknr(struct btrfs_super_block *s, u64 val) -{ - s->blocknr = cpu_to_le64(val); -} - -static inline u64 btrfs_super_generation(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->generation); -} - -static inline void btrfs_set_super_generation(struct btrfs_super_block *s, - u64 val) -{ - s->generation = cpu_to_le64(val); -} - -static inline u64 btrfs_super_root(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->root); -} - -static inline void btrfs_set_super_root(struct btrfs_super_block *s, u64 val) -{ - s->root = cpu_to_le64(val); -} - -static inline u64 btrfs_super_total_blocks(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->total_blocks); -} - -static inline void btrfs_set_super_total_blocks(struct btrfs_super_block *s, - u64 val) -{ - s->total_blocks = cpu_to_le64(val); -} - -static inline u64 btrfs_super_blocks_used(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->blocks_used); -} - -static inline void btrfs_set_super_blocks_used(struct btrfs_super_block *s, - u64 val) -{ - s->blocks_used = cpu_to_le64(val); -} - -static inline u32 btrfs_super_blocksize(struct btrfs_super_block *s) -{ - return le32_to_cpu(s->blocksize); -} - -static inline void btrfs_set_super_blocksize(struct btrfs_super_block *s, - u32 val) -{ - s->blocksize = cpu_to_le32(val); -} - -static inline u64 btrfs_super_root_dir(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->root_dir_objectid); -} - -static inline void btrfs_set_super_root_dir(struct btrfs_super_block *s, u64 - val) -{ - s->root_dir_objectid = cpu_to_le64(val); -} - -static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) -{ - return (u8 *)l->items; -} - -static inline int btrfs_file_extent_type(struct btrfs_file_extent_item *e) -{ - return e->type; -} -static inline void btrfs_set_file_extent_type(struct btrfs_file_extent_item *e, - u8 val) -{ - e->type = val; -} - -static inline char *btrfs_file_extent_inline_start(struct +static inline unsigned long btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) { - return (char *)(&e->disk_blocknr); + unsigned long offset = (unsigned long)e; + offset += offsetof(struct btrfs_file_extent_item, disk_blocknr); + return offset; } static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) { - return (unsigned long)(&((struct - btrfs_file_extent_item *)NULL)->disk_blocknr) + datasize; + return offsetof(struct btrfs_file_extent_item, disk_blocknr) + datasize; } -static inline u32 btrfs_file_extent_inline_len(struct btrfs_item *e) +static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, + struct btrfs_item *e) { - struct btrfs_file_extent_item *fe = NULL; - return btrfs_item_size(e) - (unsigned long)(&fe->disk_blocknr); + unsigned long offset; + offset = offsetof(struct btrfs_file_extent_item, disk_blocknr); + return btrfs_item_size(eb, e) - offset; } -static inline u64 btrfs_file_extent_disk_blocknr(struct btrfs_file_extent_item - *e) -{ - return le64_to_cpu(e->disk_blocknr); -} - -static inline void btrfs_set_file_extent_disk_blocknr(struct - btrfs_file_extent_item - *e, u64 val) -{ - e->disk_blocknr = cpu_to_le64(val); -} - -static inline u64 btrfs_file_extent_generation(struct btrfs_file_extent_item *e) -{ - return le64_to_cpu(e->generation); -} - -static inline void btrfs_set_file_extent_generation(struct - btrfs_file_extent_item *e, - u64 val) -{ - e->generation = cpu_to_le64(val); -} - -static inline u64 btrfs_file_extent_disk_num_blocks(struct - btrfs_file_extent_item *e) -{ - return le64_to_cpu(e->disk_num_blocks); -} - -static inline void btrfs_set_file_extent_disk_num_blocks(struct - btrfs_file_extent_item - *e, u64 val) -{ - e->disk_num_blocks = cpu_to_le64(val); -} - -static inline u64 btrfs_file_extent_offset(struct btrfs_file_extent_item *e) -{ - return le64_to_cpu(e->offset); -} - -static inline void btrfs_set_file_extent_offset(struct btrfs_file_extent_item - *e, u64 val) -{ - e->offset = cpu_to_le64(val); -} - -static inline u64 btrfs_file_extent_num_blocks(struct btrfs_file_extent_item - *e) -{ - return le64_to_cpu(e->num_blocks); -} - -static inline void btrfs_set_file_extent_num_blocks(struct - btrfs_file_extent_item *e, - u64 val) -{ - e->num_blocks = cpu_to_le64(val); -} +BTRFS_SETGET_FUNCS(file_extent_disk_blocknr, struct btrfs_file_extent_item, + disk_blocknr, 64); +BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, + generation, 64); +BTRFS_SETGET_FUNCS(file_extent_disk_num_blocks, struct btrfs_file_extent_item, + disk_num_blocks, 64); +BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, + offset, 64); +BTRFS_SETGET_FUNCS(file_extent_num_blocks, struct btrfs_file_extent_item, + num_blocks, 64); static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; } -static inline void btrfs_check_bounds(void *vptr, size_t len, - void *vcontainer, size_t container_len) -{ - char *ptr = vptr; - char *container = vcontainer; - WARN_ON(ptr < container); - WARN_ON(ptr + len > container + container_len); -} - -static inline void btrfs_memcpy(struct btrfs_root *root, - void *dst_block, - void *dst, const void *src, size_t nr) -{ - btrfs_check_bounds(dst, nr, dst_block, root->fs_info->sb->s_blocksize); - memcpy(dst, src, nr); -} - -static inline void btrfs_memmove(struct btrfs_root *root, - void *dst_block, - void *dst, void *src, size_t nr) -{ - btrfs_check_bounds(dst, nr, dst_block, root->fs_info->sb->s_blocksize); - memmove(dst, src, nr); -} - static inline int btrfs_set_root_name(struct btrfs_root *root, const char *name, int len) { @@ -1063,7 +842,11 @@ static inline int btrfs_set_root_name(struct btrfs_root *root, /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ - btrfs_item_offset((leaf)->items + (slot)))) + btrfs_item_offset_nr(leaf, slot))) + +#define btrfs_item_ptr_offset(leaf, slot) \ + ((unsigned long)(btrfs_leaf_data(leaf) + \ + btrfs_item_offset_nr(leaf, slot))) /* mount option defines and helpers */ #define BTRFS_MOUNT_SUBVOL 0x000001 @@ -1084,7 +867,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int data, int owner); int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); -struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, +struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 hint, u64 empty_size); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, @@ -1092,7 +875,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 num_blocks, u64 empty_size, u64 search_start, u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf); + struct extent_buffer *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, @@ -1106,10 +889,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); /* ctree.c */ -int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *buf, struct buffer_head - *parent, int parent_slot, struct buffer_head - **cow_ret); +int btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret); int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); int btrfs_truncate_item(struct btrfs_trans_handle *trans, @@ -1120,7 +903,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); int btrfs_realloc_node(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *parent, + struct btrfs_root *root, struct extent_buffer *parent, int cache_only, u64 *last_ret); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); @@ -1134,7 +917,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *cpu_key, u32 data_size); int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); -int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); +int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root); /* root-item.c */ @@ -1179,9 +962,9 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid); /* inode-item.c */ -int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 objectid, struct btrfs_inode_item - *inode_item); +int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid); int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *location, int mod); @@ -1224,8 +1007,6 @@ struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root); int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to); -int btrfs_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create); struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 end, int create); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 49db5fa7ced3..6f19de41b878 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -33,7 +33,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle int ret; char *ptr; struct btrfs_item *item; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (ret == -EEXIST) { @@ -49,11 +49,11 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle if (ret < 0) return ERR_PTR(ret); WARN_ON(ret > 0); - leaf = btrfs_buffer_leaf(path->nodes[0]); - item = leaf->items + path->slots[0]; + leaf = path->nodes[0]; + item = btrfs_item_nr(leaf, path->slots[0]); ptr = btrfs_item_ptr(leaf, path->slots[0], char); - BUG_ON(data_size > btrfs_item_size(item)); - ptr += btrfs_item_size(item) - data_size; + BUG_ON(data_size > btrfs_item_size(leaf, item)); + ptr += btrfs_item_size(leaf, item) - data_size; return (struct btrfs_dir_item *)ptr; } @@ -65,12 +65,13 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root int ret2 = 0; struct btrfs_path *path; struct btrfs_dir_item *dir_item; - char *name_ptr; + struct extent_buffer *leaf; + unsigned long name_ptr; struct btrfs_key key; + struct btrfs_disk_key disk_key; u32 data_size; key.objectid = dir; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); @@ -85,14 +86,16 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root goto out; } - btrfs_cpu_key_to_disk(&dir_item->location, location); - btrfs_set_dir_type(dir_item, type); - btrfs_set_dir_flags(dir_item, 0); - btrfs_set_dir_name_len(dir_item, name_len); - name_ptr = (char *)(dir_item + 1); + leaf = path->nodes[0]; + btrfs_cpu_key_to_disk(&disk_key, location); + btrfs_set_dir_item_key(leaf, dir_item, &disk_key); + btrfs_set_dir_type(leaf, dir_item, type); + btrfs_set_dir_flags(leaf, dir_item, 0); + btrfs_set_dir_name_len(leaf, dir_item, name_len); + name_ptr = (unsigned long)(dir_item + 1); - btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); + write_extent_buffer(leaf, name, name_ptr, name_len); + btrfs_mark_buffer_dirty(leaf); second_insert: /* FIXME, use some real flag for selecting the extra index */ @@ -110,13 +113,15 @@ second_insert: ret2 = PTR_ERR(dir_item); goto out; } - btrfs_cpu_key_to_disk(&dir_item->location, location); - btrfs_set_dir_type(dir_item, type); - btrfs_set_dir_flags(dir_item, 0); - btrfs_set_dir_name_len(dir_item, name_len); - name_ptr = (char *)(dir_item + 1); - btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); + leaf = path->nodes[0]; + btrfs_cpu_key_to_disk(&disk_key, location); + btrfs_set_dir_item_key(leaf, dir_item, &disk_key); + btrfs_set_dir_type(leaf, dir_item, type); + btrfs_set_dir_flags(leaf, dir_item, 0); + btrfs_set_dir_name_len(leaf, dir_item, name_len); + name_ptr = (unsigned long)(dir_item + 1); + write_extent_buffer(leaf, name, name_ptr, name_len); + btrfs_mark_buffer_dirty(leaf); out: btrfs_free_path(path); if (ret) @@ -136,14 +141,15 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; - struct btrfs_disk_key *found_key; - struct btrfs_leaf *leaf; + struct btrfs_key found_key; + struct extent_buffer *leaf; key.objectid = dir; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); if (ret < 0) return ERR_PTR(ret); @@ -152,12 +158,13 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, return NULL; path->slots[0]--; } - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - if (btrfs_disk_key_objectid(found_key) != dir || - btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY || - btrfs_disk_key_offset(found_key) != key.offset) + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.objectid != dir || + btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY || + found_key.offset != key.offset) return NULL; return btrfs_match_dir_item_name(root, path, name, name_len); @@ -176,7 +183,6 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, int cow = mod != 0; key.objectid = dir; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); key.offset = objectid; @@ -193,21 +199,22 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, const char *name, int name_len) { struct btrfs_dir_item *dir_item; - char *name_ptr; + unsigned long name_ptr; u32 total_len; u32 cur = 0; u32 this_len; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); - total_len = btrfs_item_size(leaf->items + path->slots[0]); + total_len = btrfs_item_size_nr(leaf, path->slots[0]); while(cur < total_len) { - this_len = sizeof(*dir_item) + btrfs_dir_name_len(dir_item); - name_ptr = (char *)(dir_item + 1); + this_len = sizeof(*dir_item) + + btrfs_dir_name_len(leaf, dir_item); + name_ptr = (unsigned long)(dir_item + 1); - if (btrfs_dir_name_len(dir_item) == name_len && - memcmp(name_ptr, name, name_len) == 0) + if (btrfs_dir_name_len(leaf, dir_item) == name_len && + memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) return dir_item; cur += this_len; @@ -223,20 +230,23 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, struct btrfs_dir_item *di) { - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u32 sub_item_len; u32 item_len; int ret = 0; - leaf = btrfs_buffer_leaf(path->nodes[0]); - sub_item_len = sizeof(*di) + btrfs_dir_name_len(di); - item_len = btrfs_item_size(leaf->items + path->slots[0]); - if (sub_item_len == btrfs_item_size(leaf->items + path->slots[0])) { + leaf = path->nodes[0]; + sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di); + item_len = btrfs_item_size_nr(leaf, path->slots[0]); + if (sub_item_len == item_len) { ret = btrfs_del_item(trans, root, path); } else { - char *ptr = (char *)di; - char *start = btrfs_item_ptr(leaf, path->slots[0], char); - btrfs_memmove(root, leaf, ptr, ptr + sub_item_len, + /* MARKER */ + unsigned long ptr = (unsigned long)di; + unsigned long start; + + start = btrfs_item_ptr_offset(leaf, path->slots[0]); + memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, item_len - (ptr + sub_item_len - start)); ret = btrfs_truncate_item(trans, root, path, item_len - sub_item_len); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ef6d76f2ec20..0c1f90cbedb0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -23,138 +23,132 @@ #include #include #include +#include // for block_sync_page #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -u64 bh_blocknr(struct buffer_head *bh) +#if 0 +static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) { - return bh->b_blocknr; -} - -static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) -{ - struct btrfs_node *node = btrfs_buffer_node(buf); - if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { - printk(KERN_CRIT "bh_blocknr(buf) is %llu, header is %llu\n", - (unsigned long long)bh_blocknr(buf), - (unsigned long long)btrfs_header_blocknr(&node->header)); + if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) { + printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n", + (unsigned long long)extent_buffer_blocknr(buf), + (unsigned long long)btrfs_header_blocknr(buf)); return 1; } return 0; } +#endif -struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) +struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, + u64 blocknr) { - struct address_space *mapping = root->fs_info->btree_inode->i_mapping; - int blockbits = root->fs_info->sb->s_blocksize_bits; - unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); - struct page *page; - struct buffer_head *bh; - struct buffer_head *head; - struct buffer_head *ret = NULL; - - - page = find_lock_page(mapping, index); - if (!page) - return NULL; - - if (!page_has_buffers(page)) - goto out_unlock; - - head = page_buffers(page); - bh = head; - do { - if (buffer_mapped(bh) && bh_blocknr(bh) == blocknr) { - ret = bh; - get_bh(bh); - goto out_unlock; - } - bh = bh->b_this_page; - } while (bh != head); -out_unlock: - unlock_page(page); - page_cache_release(page); - return ret; + struct inode *btree_inode = root->fs_info->btree_inode; + return find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + blocknr * root->sectorsize, + root->sectorsize, GFP_NOFS); } -int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, - u64 logical) -{ - if (logical == 0) { - bh->b_bdev = NULL; - bh->b_blocknr = 0; - set_buffer_mapped(bh); - } else { - map_bh(bh, root->fs_info->sb, logical); - } - return 0; -} - -struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, +struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr) { - struct address_space *mapping = root->fs_info->btree_inode->i_mapping; - int blockbits = root->fs_info->sb->s_blocksize_bits; - unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); - struct page *page; - struct buffer_head *bh; - struct buffer_head *head; - struct buffer_head *ret = NULL; - int err; - u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); + struct inode *btree_inode = root->fs_info->btree_inode; + return alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + blocknr * root->sectorsize, + root->sectorsize, GFP_NOFS); +} - page = find_or_create_page(mapping, index, GFP_NOFS); - if (!page) - return NULL; +struct extent_map *btree_get_extent(struct inode *inode, struct page *page, + size_t page_offset, u64 start, u64 end, + int create) +{ + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map *em; + int ret; - if (!page_has_buffers(page)) - create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0); - head = page_buffers(page); - bh = head; - do { - if (!buffer_mapped(bh)) { - err = btrfs_map_bh_to_logical(root, bh, first_block); - BUG_ON(err); - } - if (bh_blocknr(bh) == blocknr) { - ret = bh; - get_bh(bh); - goto out_unlock; - } - bh = bh->b_this_page; - first_block++; - } while (bh != head); -out_unlock: - unlock_page(page); - if (ret) - touch_buffer(ret); - page_cache_release(page); +again: + em = lookup_extent_mapping(em_tree, start, end); + if (em) { + goto out; + } + em = alloc_extent_map(GFP_NOFS); + if (!em) { + em = ERR_PTR(-ENOMEM); + goto out; + } + em->start = 0; + em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1; + em->block_start = 0; + em->block_end = em->end; + em->bdev = inode->i_sb->s_bdev; + ret = add_extent_mapping(em_tree, em); + if (ret == -EEXIST) { + free_extent_map(em); + em = NULL; + goto again; + } else if (ret) { + em = ERR_PTR(ret); + } +out: + return em; +} + +static int btree_writepage(struct page *page, struct writeback_control *wbc) +{ + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + return extent_write_full_page(tree, page, btree_get_extent, wbc); +} +int btree_readpage(struct file *file, struct page *page) +{ + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + return extent_read_full_page(tree, page, btree_get_extent); +} + +static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) +{ + struct extent_map_tree *tree; + int ret; + + BUG_ON(page->private != 1); + tree = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(tree, page); + if (ret == 1) { + ClearPagePrivate(page); + set_page_private(page, 0); + page_cache_release(page); + } return ret; } -static int btree_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int create) +static void btree_invalidatepage(struct page *page, unsigned long offset) { - int err; - struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; - err = btrfs_map_bh_to_logical(root, bh, iblock); - return err; + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + extent_invalidatepage(tree, page, offset); + btree_releasepage(page, GFP_NOFS); } int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result) { + return 0; +#if 0 u32 crc; crc = crc32c(0, data, len); memcpy(result, &crc, BTRFS_CRC32_SIZE); return 0; +#endif } -static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, +#if 0 +static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int verify) { + return 0; char result[BTRFS_CRC32_SIZE]; int ret; struct btrfs_node *node; @@ -176,7 +170,9 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, } return 0; } +#endif +#if 0 static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct buffer_head *bh; @@ -195,87 +191,65 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) } while (bh != head); return block_write_full_page(page, btree_get_block, wbc); } - -static int btree_readpage(struct file * file, struct page * page) -{ - return block_read_full_page(page, btree_get_block); -} +#endif static struct address_space_operations btree_aops = { .readpage = btree_readpage, .writepage = btree_writepage, + .releasepage = btree_releasepage, + .invalidatepage = btree_invalidatepage, .sync_page = block_sync_page, }; int readahead_tree_block(struct btrfs_root *root, u64 blocknr) { - struct buffer_head *bh = NULL; + struct extent_buffer *buf = NULL; + struct inode *btree_inode = root->fs_info->btree_inode; int ret = 0; - bh = btrfs_find_create_tree_block(root, blocknr); - if (!bh) + buf = btrfs_find_create_tree_block(root, blocknr); + if (!buf) return 0; - if (buffer_uptodate(bh)) { - ret = 1; - goto done; - } - if (test_set_buffer_locked(bh)) { - ret = 1; - goto done; - } - if (!buffer_uptodate(bh)) { - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); - } else { - unlock_buffer(bh); - ret = 1; - } -done: - brelse(bh); + read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + buf, 0); + free_extent_buffer(buf); return ret; } -struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) { - struct buffer_head *bh = NULL; + struct extent_buffer *buf = NULL; + struct inode *btree_inode = root->fs_info->btree_inode; - bh = btrfs_find_create_tree_block(root, blocknr); - if (!bh) - return bh; - if (buffer_uptodate(bh)) - goto uptodate; - lock_buffer(bh); - if (!buffer_uptodate(bh)) { - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - goto fail; - } else { - unlock_buffer(bh); - } -uptodate: - if (!buffer_checked(bh)) { - csum_tree_block(root, bh, 1); - set_buffer_checked(bh); - } - if (check_tree_block(root, bh)) - goto fail; - return bh; -fail: - brelse(bh); - return NULL; + buf = btrfs_find_create_tree_block(root, blocknr); + if (!buf) + return NULL; + read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + buf, 1); + return buf; } int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) + struct extent_buffer *buf) { - WARN_ON(atomic_read(&buf->b_count) == 0); - lock_buffer(buf); - clear_buffer_dirty(buf); - unlock_buffer(buf); + struct inode *btree_inode = root->fs_info->btree_inode; + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); + return 0; +} + +int wait_on_tree_block_writeback(struct btrfs_root *root, + struct extent_buffer *buf) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree, + buf); + return 0; +} + +int set_tree_block_dirty(struct btrfs_root *root, struct extent_buffer *buf) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); return 0; } @@ -287,7 +261,9 @@ static int __setup_root(int blocksize, root->node = NULL; root->inode = NULL; root->commit_root = NULL; - root->blocksize = blocksize; + root->sectorsize = blocksize; + root->nodesize = blocksize; + root->leafsize = blocksize; root->ref_cows = 0; root->fs_info = fs_info; root->objectid = objectid; @@ -332,7 +308,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_path *path; - struct btrfs_leaf *l; + struct extent_buffer *l; u64 highest_inode; int ret = 0; @@ -361,11 +337,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, ret = -ENOENT; goto out; } - l = btrfs_buffer_leaf(path->nodes[0]); - memcpy(&root->root_item, - btrfs_item_ptr(l, path->slots[0], struct btrfs_root_item), + l = path->nodes[0]; + read_extent_buffer(l, &root->root_item, + btrfs_item_ptr_offset(l, path->slots[0]), sizeof(root->root_item)); - memcpy(&root->root_key, location, sizeof(*location)); ret = 0; out: btrfs_release_path(root, path); @@ -406,21 +381,21 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, (unsigned long)root->root_key.objectid, root); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root); return ERR_PTR(ret); } ret = btrfs_set_root_name(root, name, namelen); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root); return ERR_PTR(ret); } ret = btrfs_sysfs_add_root(root); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root->name); kfree(root); return ERR_PTR(ret); @@ -471,6 +446,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + fs_info->btree_inode->i_mapping, + GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; @@ -493,10 +471,14 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!fs_info->sb_buffer) goto fail_iput; - disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; - fs_info->disk_super = disk_super; - memcpy(&fs_info->super_copy, disk_super, sizeof(fs_info->super_copy)); + read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0, + sizeof(fs_info->super_copy)); + + read_extent_buffer(fs_info->sb_buffer, fs_info->fsid, + (unsigned long)btrfs_super_fsid(fs_info->sb_buffer), + BTRFS_FSID_SIZE); + disk_super = &fs_info->super_copy; if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; @@ -530,9 +512,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) return tree_root; fail_tree_root: - btrfs_block_release(tree_root, tree_root->node); + free_extent_buffer(tree_root->node); fail_sb_buffer: - btrfs_block_release(tree_root, fs_info->sb_buffer); + free_extent_buffer(fs_info->sb_buffer); fail_iput: iput(fs_info->btree_inode); fail: @@ -546,31 +528,13 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret; - struct buffer_head *bh = root->fs_info->sb_buffer; + struct extent_buffer *super = root->fs_info->sb_buffer; + struct inode *btree_inode = root->fs_info->btree_inode; - lock_buffer(bh); - WARN_ON(atomic_read(&bh->b_count) < 1); - clear_buffer_dirty(bh); - csum_tree_block(root, bh, 0); - bh->b_end_io = end_buffer_write_sync; - get_bh(bh); - if (root->fs_info->do_barriers) - ret = submit_bh(WRITE_BARRIER, bh); - else - ret = submit_bh(WRITE, bh); - if (ret == -EOPNOTSUPP) { - get_bh(bh); - lock_buffer(bh); - set_buffer_uptodate(bh); - root->fs_info->do_barriers = 0; - ret = submit_bh(WRITE, bh); - } - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - WARN_ON(1); - return -EIO; - } - return 0; + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super); + ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, + super->start, super->len); + return ret; } int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) @@ -581,9 +545,9 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) if (root->inode) iput(root->inode); if (root->node) - brelse(root->node); + free_extent_buffer(root->node); if (root->commit_root) - brelse(root->commit_root); + free_extent_buffer(root->commit_root); if (root->name) kfree(root->name); kfree(root); @@ -629,12 +593,10 @@ int close_ctree(struct btrfs_root *root) mutex_unlock(&fs_info->fs_mutex); if (fs_info->extent_root->node) - btrfs_block_release(fs_info->extent_root, - fs_info->extent_root->node); + free_extent_buffer(fs_info->extent_root->node); if (fs_info->tree_root->node) - btrfs_block_release(fs_info->tree_root, - fs_info->tree_root->node); - btrfs_block_release(root, fs_info->sb_buffer); + free_extent_buffer(fs_info->tree_root->node); + free_extent_buffer(fs_info->sb_buffer); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); @@ -645,25 +607,32 @@ int close_ctree(struct btrfs_root *root) return 0; } -void btrfs_mark_buffer_dirty(struct buffer_head *bh) +int btrfs_buffer_uptodate(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; - u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); + struct inode *btree_inode = buf->pages[0]->mapping->host; + return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); +} - WARN_ON(!atomic_read(&bh->b_count)); +int btrfs_set_buffer_uptodate(struct extent_buffer *buf) +{ + struct inode *btree_inode = buf->pages[0]->mapping->host; + return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, + buf); +} + +void btrfs_mark_buffer_dirty(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + u64 transid = btrfs_header_generation(buf); + struct inode *btree_inode = root->fs_info->btree_inode; if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", - (unsigned long long)bh->b_blocknr, + (unsigned long long)extent_buffer_blocknr(buf), transid, root->fs_info->generation); WARN_ON(1); } - mark_buffer_dirty(bh); -} - -void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) -{ - brelse(buf); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index fc6b520c6e0b..70d9413c599f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -19,68 +19,35 @@ #ifndef __DISKIO__ #define __DISKIO__ -#include - #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) -enum btrfs_bh_state_bits { - BH_Checked = BH_PrivateStart, - BH_Defrag, - BH_DefragDone, -}; -BUFFER_FNS(Checked, checked); -BUFFER_FNS(Defrag, defrag); -BUFFER_FNS(DefragDone, defrag_done); - -static inline struct btrfs_node *btrfs_buffer_node(struct buffer_head *bh) -{ - return (struct btrfs_node *)bh->b_data; -} - -static inline struct btrfs_leaf *btrfs_buffer_leaf(struct buffer_head *bh) -{ - return (struct btrfs_leaf *)bh->b_data; -} - -static inline struct btrfs_header *btrfs_buffer_header(struct buffer_head *bh) -{ - return &((struct btrfs_node *)bh->b_data)->header; -} - -struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr); +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); int readahead_tree_block(struct btrfs_root *root, u64 blocknr); -struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr); -int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf); -int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf); +struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 blocknr); int clean_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *buf); -int btrfs_commit_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root); + struct btrfs_root *root, struct extent_buffer *buf); struct btrfs_root *open_ctree(struct super_block *sb); int close_ctree(struct btrfs_root *root); -void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); -struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr); -int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, - char *result); +struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, + u64 blocknr); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen); struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_key *location); -u64 bh_blocknr(struct buffer_head *bh); int btrfs_insert_dev_radix(struct btrfs_root *root, struct block_device *bdev, u64 device_id, u64 block_start, u64 num_blocks); -int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, - u64 logical); void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); -void btrfs_mark_buffer_dirty(struct buffer_head *bh); +void btrfs_mark_buffer_dirty(struct extent_buffer *buf); +int btrfs_buffer_uptodate(struct extent_buffer *buf); +int btrfs_set_buffer_uptodate(struct extent_buffer *buf); +int wait_on_tree_block_writeback(struct btrfs_root *root, + struct extent_buffer *buf); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f261a8326cdf..089c41cbca74 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -33,7 +33,7 @@ static int cache_block_group(struct btrfs_root *root, struct btrfs_path *path; int ret; struct btrfs_key key; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct radix_tree_root *extent_radix; int slot; u64 i; @@ -56,7 +56,6 @@ static int cache_block_group(struct btrfs_root *root, path->reada = 2; first_free = block_group->key.objectid; key.objectid = block_group->key.objectid; - key.flags = 0; key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -69,9 +68,9 @@ static int cache_block_group(struct btrfs_root *root, path->slots[0]--; while(1) { - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; slot = path->slots[0]; - if (slot >= btrfs_header_nritems(&leaf->header)) { + if (slot >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); if (ret < 0) goto err; @@ -82,7 +81,7 @@ static int cache_block_group(struct btrfs_root *root, } } - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid < block_group->key.objectid) { if (key.objectid + key.offset > first_free) first_free = key.objectid + key.offset; @@ -116,8 +115,7 @@ next: hole_size = block_group->key.objectid + block_group->key.offset - last; for (i = 0; i < hole_size; i++) { - set_radix_bit(extent_radix, - last + i); + set_radix_bit(extent_radix, last + i); } } block_group->cached = 1; @@ -366,7 +364,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_path *path; int ret; struct btrfs_key key; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_extent_item *item; u32 refs; @@ -375,7 +373,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, return -ENOMEM; key.objectid = blocknr; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, @@ -386,10 +383,10 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, BUG(); } BUG_ON(ret != 0); - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - refs = btrfs_extent_refs(item); - btrfs_set_extent_refs(item, refs + 1); + refs = btrfs_extent_refs(l, item); + btrfs_set_extent_refs(l, item, refs + 1); btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root->fs_info->extent_root, path); @@ -414,23 +411,25 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_path *path; int ret; struct btrfs_key key; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_extent_item *item; path = btrfs_alloc_path(); key.objectid = blocknr; key.offset = num_blocks; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); if (ret < 0) goto out; - if (ret != 0) + if (ret != 0) { + btrfs_print_leaf(root, path->nodes[0]); + printk("failed to find block number %Lu\n", blocknr); BUG(); - l = btrfs_buffer_leaf(path->nodes[0]); + } + l = path->nodes[0]; item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - *refs = btrfs_extent_refs(item); + *refs = btrfs_extent_refs(l, item); out: btrfs_free_path(path); return 0; @@ -439,16 +438,16 @@ out: int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1); + return btrfs_inc_extent_ref(trans, root, + extent_buffer_blocknr(root->node), 1); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) + struct extent_buffer *buf) { u64 blocknr; - struct btrfs_node *buf_node; - struct btrfs_leaf *buf_leaf; - struct btrfs_disk_key *key; + u32 nritems; + struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; int leaf; @@ -458,31 +457,31 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (!root->ref_cows) return 0; - buf_node = btrfs_buffer_node(buf); - leaf = btrfs_is_leaf(buf_node); - buf_leaf = btrfs_buffer_leaf(buf); - for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) { + + leaf = btrfs_is_leaf(buf); + nritems = btrfs_header_nritems(buf); + for (i = 0; i < nritems; i++) { if (leaf) { u64 disk_blocknr; - key = &buf_leaf->items[i].key; - if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + btrfs_item_key_to_cpu(buf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; - fi = btrfs_item_ptr(buf_leaf, i, + fi = btrfs_item_ptr(buf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == + if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); if (disk_blocknr == 0) continue; ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(fi)); + btrfs_file_extent_disk_num_blocks(buf, fi)); if (ret) { faili = i; goto fail; } } else { - blocknr = btrfs_node_blockptr(buf_node, i); + blocknr = btrfs_node_blockptr(buf, i); ret = btrfs_inc_extent_ref(trans, root, blocknr, 1); if (ret) { faili = i; @@ -496,22 +495,23 @@ fail: for (i =0; i < faili; i++) { if (leaf) { u64 disk_blocknr; - key = &buf_leaf->items[i].key; - if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + btrfs_item_key_to_cpu(buf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; - fi = btrfs_item_ptr(buf_leaf, i, + fi = btrfs_item_ptr(buf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == + if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); if (disk_blocknr == 0) continue; err = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(fi), 0); + btrfs_file_extent_disk_num_blocks(buf, + fi), 0); BUG_ON(err); } else { - blocknr = btrfs_node_blockptr(buf_node, i); + blocknr = btrfs_node_blockptr(buf, i); err = btrfs_free_extent(trans, root, blocknr, 1, 0); BUG_ON(err); } @@ -527,16 +527,18 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, int ret; int pending_ret; struct btrfs_root *extent_root = root->fs_info->extent_root; - struct btrfs_block_group_item *bi; + unsigned long bi; + struct extent_buffer *leaf; ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); if (ret < 0) goto fail; BUG_ON(ret); - bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_block_group_item); - memcpy(bi, &cache->item, sizeof(*bi)); - btrfs_mark_buffer_dirty(path->nodes[0]); + + leaf = path->nodes[0]; + bi = btrfs_item_ptr_offset(leaf, path->slots[0]); + write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); + btrfs_mark_buffer_dirty(leaf); btrfs_release_path(extent_root, path); fail: finish_current_insert(trans, extent_root); @@ -768,11 +770,11 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct unsigned long gang[8]; struct btrfs_fs_info *info = extent_root->fs_info; - btrfs_set_extent_refs(&extent_item, 1); + btrfs_set_stack_extent_refs(&extent_item, 1); ins.offset = 1; - ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); - btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid); + btrfs_set_stack_extent_owner(&extent_item, + extent_root->root_key.objectid); while(1) { ret = find_first_radix_bit(&info->extent_ins_radix, gang, 0, @@ -795,23 +797,20 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) { int err; - struct btrfs_header *header; - struct buffer_head *bh; + struct extent_buffer *buf; if (!pending) { - bh = btrfs_find_tree_block(root, blocknr); - if (bh) { - if (buffer_uptodate(bh)) { + buf = btrfs_find_tree_block(root, blocknr); + if (buf) { + if (btrfs_buffer_uptodate(buf)) { u64 transid = root->fs_info->running_transaction->transid; - header = btrfs_buffer_header(bh); - if (btrfs_header_generation(header) == - transid) { - btrfs_block_release(root, bh); + if (btrfs_header_generation(buf) == transid) { + free_extent_buffer(buf); return 0; } } - btrfs_block_release(root, bh); + free_extent_buffer(buf); } err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); if (!err) { @@ -839,12 +838,12 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key key; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; + struct extent_buffer *leaf; int ret; struct btrfs_extent_item *ei; u32 refs; key.objectid = blocknr; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; @@ -856,12 +855,16 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (ret < 0) return ret; BUG_ON(ret); - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); - BUG_ON(ei->refs == 0); - refs = btrfs_extent_refs(ei) - 1; - btrfs_set_extent_refs(ei, refs); - btrfs_mark_buffer_dirty(path->nodes[0]); + refs = btrfs_extent_refs(leaf, ei); + BUG_ON(refs == 0); + refs -= 1; + btrfs_set_extent_refs(leaf, ei, refs); + btrfs_mark_buffer_dirty(leaf); + if (refs == 0) { u64 super_blocks_used, root_blocks_used; @@ -876,8 +879,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root super_blocks_used - num_blocks); /* block accounting for root item */ - root_blocks_used = btrfs_root_blocks_used(&root->root_item); - btrfs_set_root_blocks_used(&root->root_item, + root_blocks_used = btrfs_root_used(&root->root_item); + btrfs_set_root_used(&root->root_item, root_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, path); @@ -984,7 +987,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root u64 test_block; u64 orig_search_start = search_start; int start_found; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; int total_needed = num_blocks; @@ -994,10 +997,10 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int wrapped = 0; WARN_ON(num_blocks < 1); - ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); - level = btrfs_header_level(btrfs_buffer_header(root->node)); + level = btrfs_header_level(root->node); + if (search_end == (u64)-1) search_end = btrfs_super_total_blocks(&info->super_copy); if (hint_block) { @@ -1034,8 +1037,9 @@ check_failed: path->slots[0]--; } - l = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key); + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &key, path->slots[0]); + /* * a rare case, go back one key if we hit a block group item * instead of an extent item @@ -1055,9 +1059,9 @@ check_failed: } while (1) { - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; slot = path->slots[0]; - if (slot >= btrfs_header_nritems(&l->header)) { + if (slot >= btrfs_header_nritems(l)) { ret = btrfs_next_leaf(root, path); if (ret == 0) continue; @@ -1075,7 +1079,7 @@ check_failed: goto check_pending; } - btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + btrfs_item_key_to_cpu(l, &key, slot); if (key.objectid >= search_start && key.objectid > last_block && start_found) { if (last_block < search_start) @@ -1183,8 +1187,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; - btrfs_set_extent_refs(&extent_item, 1); - btrfs_set_extent_owner(&extent_item, owner); + btrfs_set_stack_extent_refs(&extent_item, 1); + btrfs_set_stack_extent_owner(&extent_item, owner); WARN_ON(num_blocks < 1); ret = find_free_extent(trans, root, num_blocks, empty_size, @@ -1201,8 +1205,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, num_blocks); /* block accounting for root item */ - root_blocks_used = btrfs_root_blocks_used(&root->root_item); - btrfs_set_root_blocks_used(&root->root_item, root_blocks_used + + root_blocks_used = btrfs_root_used(&root->root_item); + btrfs_set_root_used(&root->root_item, root_blocks_used + num_blocks); if (root == extent_root) { @@ -1241,13 +1245,13 @@ update_block: * helper function to allocate a block for a given tree * returns the tree buffer or NULL. */ -struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint, - u64 empty_size) +struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 hint, + u64 empty_size) { struct btrfs_key ins; int ret; - struct buffer_head *buf; + struct extent_buffer *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, 1, empty_size, hint, (u64)-1, &ins, 0); @@ -1260,53 +1264,57 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, btrfs_free_extent(trans, root, ins.objectid, 1, 0); return ERR_PTR(-ENOMEM); } - WARN_ON(buffer_dirty(buf)); - set_buffer_uptodate(buf); + btrfs_set_buffer_uptodate(buf); + set_extent_dirty(&trans->transaction->dirty_pages, buf->start, + buf->start + buf->len - 1, GFP_NOFS); + /* set_buffer_checked(buf); set_buffer_defrag(buf); - set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); + */ + /* FIXME!!!!!!!!!!!!!!!! + set_radix_bit(&trans->transaction->dirty_pages, buf->pages[0]->index); + */ trans->blocks_used++; return buf; } static int drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *cur) + struct btrfs_root *root, struct extent_buffer *leaf) { - struct btrfs_disk_key *key; - struct btrfs_leaf *leaf; + struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; int nritems; int ret; - BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur))); - leaf = btrfs_buffer_leaf(cur); - nritems = btrfs_header_nritems(&leaf->header); + BUG_ON(!btrfs_is_leaf(leaf)); + nritems = btrfs_header_nritems(leaf); for (i = 0; i < nritems; i++) { u64 disk_blocknr; - key = &leaf->items[i].key; - if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + + btrfs_item_key_to_cpu(leaf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE) + if (btrfs_file_extent_type(leaf, fi) == + BTRFS_FILE_EXTENT_INLINE) continue; /* * FIXME make sure to insert a trans record that * repeats the snapshot del on crash */ - disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + disk_blocknr = btrfs_file_extent_disk_blocknr(leaf, fi); if (disk_blocknr == 0) continue; ret = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(fi), - 0); + btrfs_file_extent_disk_num_blocks(leaf, fi), 0); BUG_ON(ret); } return 0; } static void reada_walk_down(struct btrfs_root *root, - struct btrfs_node *node) + struct extent_buffer *node) { int i; u32 nritems; @@ -1314,7 +1322,7 @@ static void reada_walk_down(struct btrfs_root *root, int ret; u32 refs; - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(node); for (i = 0; i < nritems; i++) { blocknr = btrfs_node_blockptr(node, i); ret = lookup_extent_ref(NULL, root, blocknr, 1, &refs); @@ -1337,16 +1345,17 @@ static void reada_walk_down(struct btrfs_root *root, static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level) { - struct buffer_head *next; - struct buffer_head *cur; + struct extent_buffer *next; + struct extent_buffer *cur; u64 blocknr; int ret; u32 refs; WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]), - 1, &refs); + ret = lookup_extent_ref(trans, root, + extent_buffer_blocknr(path->nodes[*level]), + 1, &refs); BUG_ON(ret); if (refs > 1) goto out; @@ -1360,21 +1369,20 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root cur = path->nodes[*level]; if (*level > 0 && path->slots[*level] == 0) - reada_walk_down(root, btrfs_buffer_node(cur)); + reada_walk_down(root, cur); - if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + if (btrfs_header_level(cur) != *level) WARN_ON(1); if (path->slots[*level] >= - btrfs_header_nritems(btrfs_buffer_header(cur))) + btrfs_header_nritems(cur)) break; if (*level == 0) { ret = drop_leaf_ref(trans, root, cur); BUG_ON(ret); break; } - blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), - path->slots[*level]); + blocknr = btrfs_node_blockptr(cur, path->slots[*level]); ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); BUG_ON(ret); if (refs != 1) { @@ -1384,8 +1392,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root continue; } next = btrfs_find_tree_block(root, blocknr); - if (!next || !buffer_uptodate(next)) { - brelse(next); + if (!next || !btrfs_buffer_uptodate(next)) { + free_extent_buffer(next); mutex_unlock(&root->fs_info->fs_mutex); next = read_tree_block(root, blocknr); mutex_lock(&root->fs_info->fs_mutex); @@ -1395,7 +1403,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); if (refs != 1) { path->slots[*level]++; - brelse(next); + free_extent_buffer(next); ret = btrfs_free_extent(trans, root, blocknr, 1, 1); BUG_ON(ret); @@ -1404,17 +1412,17 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root } WARN_ON(*level <= 0); if (path->nodes[*level-1]) - btrfs_block_release(root, path->nodes[*level-1]); + free_extent_buffer(path->nodes[*level-1]); path->nodes[*level-1] = next; - *level = btrfs_header_level(btrfs_buffer_header(next)); + *level = btrfs_header_level(next); path->slots[*level] = 0; } out: WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = btrfs_free_extent(trans, root, - bh_blocknr(path->nodes[*level]), 1, 1); - btrfs_block_release(root, path->nodes[*level]); + extent_buffer_blocknr(path->nodes[*level]), 1, 1); + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; BUG_ON(ret); @@ -1436,24 +1444,24 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; - if (slot < btrfs_header_nritems( - btrfs_buffer_header(path->nodes[i])) - 1) { - struct btrfs_node *node; - node = btrfs_buffer_node(path->nodes[i]); + if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { + struct extent_buffer *node; + struct btrfs_disk_key disk_key; + node = path->nodes[i]; path->slots[i]++; *level = i; WARN_ON(*level == 0); + btrfs_node_key(node, &disk_key, path->slots[i]); memcpy(&root_item->drop_progress, - &node->ptrs[path->slots[i]].key, - sizeof(root_item->drop_progress)); + &disk_key, sizeof(disk_key)); root_item->drop_level = i; return 0; } else { ret = btrfs_free_extent(trans, root, - bh_blocknr(path->nodes[*level]), - 1, 1); + extent_buffer_blocknr(path->nodes[*level]), + 1, 1); BUG_ON(ret); - btrfs_block_release(root, path->nodes[*level]); + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; } @@ -1480,15 +1488,15 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); BUG_ON(!path); - level = btrfs_header_level(btrfs_buffer_header(root->node)); + level = btrfs_header_level(root->node); orig_level = level; if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { path->nodes[level] = root->node; path->slots[level] = 0; } else { struct btrfs_key key; - struct btrfs_disk_key *found_key; - struct btrfs_node *node; + struct btrfs_disk_key found_key; + struct extent_buffer *node; btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); level = root_item->drop_level; @@ -1498,10 +1506,10 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; goto out; } - node = btrfs_buffer_node(path->nodes[level]); - found_key = &node->ptrs[path->slots[level]].key; - WARN_ON(memcmp(found_key, &root_item->drop_progress, - sizeof(*found_key))); + node = path->nodes[level]; + btrfs_node_key(node, &found_key, path->slots[level]); + WARN_ON(memcmp(&found_key, &root_item->drop_progress, + sizeof(found_key))); } while(1) { wret = walk_down_tree(trans, root, path, &level); @@ -1516,12 +1524,12 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) ret = wret; ret = -EAGAIN; - get_bh(root->node); + extent_buffer_get(root->node); break; } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { - btrfs_block_release(root, path->nodes[i]); + free_extent_buffer(path->nodes[i]); path->nodes[i] = 0; } } @@ -1581,13 +1589,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_path *path; int ret; int err = 0; - struct btrfs_block_group_item *bi; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; struct radix_tree_root *radix; struct btrfs_key key; struct btrfs_key found_key; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u64 group_size_blocks; u64 used; @@ -1596,7 +1603,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) root = info->extent_root; key.objectid = 0; key.offset = group_size_blocks; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); path = btrfs_alloc_path(); @@ -1610,18 +1616,18 @@ int btrfs_read_block_groups(struct btrfs_root *root) err = ret; break; } - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); cache = kmalloc(sizeof(*cache), GFP_NOFS); if (!cache) { err = -1; break; } - bi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_block_group_item); - if (bi->flags & BTRFS_BLOCK_GROUP_DATA) { + read_extent_buffer(leaf, &cache->item, + btrfs_item_ptr_offset(leaf, path->slots[0]), + sizeof(cache->item)); + if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { radix = &info->block_group_data_radix; cache->data = 1; } else { @@ -1629,7 +1635,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->data = 0; } - memcpy(&cache->item, bi, sizeof(*bi)); memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; cache->first_free = cache->key.objectid; @@ -1640,11 +1645,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); + ret = radix_tree_insert(radix, found_key.objectid + found_key.offset - 1, (void *)cache); BUG_ON(ret); - used = btrfs_block_group_used(bi); + used = btrfs_block_group_used(&cache->item); if (used < div_factor(key.offset, 8)) { radix_tree_tag_set(radix, found_key.objectid + found_key.offset - 1, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 0ab368e091f9..55fefdfab84c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -26,16 +26,6 @@ struct tree_entry { struct rb_node rb_node; }; -/* bits for the extent state */ -#define EXTENT_DIRTY 1 -#define EXTENT_WRITEBACK (1 << 1) -#define EXTENT_UPTODATE (1 << 2) -#define EXTENT_LOCKED (1 << 3) -#define EXTENT_NEW (1 << 4) -#define EXTENT_DELALLOC (1 << 5) - -#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) - void __init extent_map_init(void) { extent_map_cache = btrfs_cache_create("extent_map", @@ -223,7 +213,8 @@ int add_extent_mapping(struct extent_map_tree *tree, if (rb) prev = rb_entry(rb, struct extent_map, rb_node); if (prev && prev->end + 1 == em->start && - ((em->block_start == 0 && prev->block_start == 0) || + ((em->block_start == EXTENT_MAP_HOLE && + prev->block_start == EXTENT_MAP_HOLE) || (em->block_start == prev->block_end + 1))) { em->start = prev->start; em->block_start = prev->block_start; @@ -926,6 +917,40 @@ int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(set_range_writeback); +int find_first_extent_bit(struct extent_map_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 1; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->state & bits) { + *start_ret = state->start; + *end_ret = state->end; + ret = 0; + } + node = rb_next(node); + if (!node) + break; + } +out: + write_unlock_irq(&tree->lock); + return ret; +} +EXPORT_SYMBOL(find_first_extent_bit); + u64 find_lock_delalloc_range(struct extent_map_tree *tree, u64 start, u64 lock_start, u64 *end, u64 max_bytes) { @@ -1450,7 +1475,7 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, em = NULL; /* we've found a hole, just zero and go on */ - if (block_start == 0) { + if (block_start == EXTENT_MAP_HOLE) { zero_user_page(page, page_offset, iosize, KM_USER0); set_extent_uptodate(tree, cur, cur + iosize - 1, GFP_NOFS); @@ -1593,7 +1618,8 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, free_extent_map(em); em = NULL; - if (block_start == 0 || block_start == EXTENT_MAP_INLINE) { + if (block_start == EXTENT_MAP_HOLE || + block_start == EXTENT_MAP_INLINE) { clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); cur = cur + iosize; @@ -1630,7 +1656,6 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, nr++; } done: - WARN_ON(test_range_bit(tree, start, page_end, EXTENT_DIRTY, 0)); unlock_extent(tree, start, page_end, GFP_NOFS); unlock_page(page); return 0; @@ -1827,8 +1852,623 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, // XXX(hch): block 0 is valid in some cases, e.g. XFS RT device if (em->block_start == EXTENT_MAP_INLINE || - em->block_start == 0) - return 0; + em->block_start == EXTENT_MAP_HOLE) + return 0; return (em->block_start + start - em->start) >> inode->i_blkbits; } + +struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask) +{ + unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + int uptodate = 0; + + eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask); + if (!eb || IS_ERR(eb)) + return NULL; + + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + for (i = 0; i < num_pages; i++, index++) { + p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); + if (!p) + goto fail; + eb->pages[i] = p; + if (!PageUptodate(p)) + uptodate = 0; + unlock_page(p); + } + if (uptodate) + eb->flags |= EXTENT_UPTODATE; + return eb; +fail: + free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(alloc_extent_buffer); + +struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask) +{ + unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + + eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask); + if (!eb || IS_ERR(eb)) + return NULL; + + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + for (i = 0; i < num_pages; i++, index++) { + p = find_get_page(mapping, index); + if (!p) + goto fail; + eb->pages[i] = p; + } + return eb; +fail: + free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(find_extent_buffer); + +void free_extent_buffer(struct extent_buffer *eb) +{ + unsigned long i; + unsigned long num_pages; + + if (!eb) + return; + + if (!atomic_dec_and_test(&eb->refs)) + return; + + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + + for (i = 0; i < num_pages; i++) { + if (eb->pages[i]) + page_cache_release(eb->pages[i]); + } + kfree(eb); +} +EXPORT_SYMBOL(free_extent_buffer); + +int clear_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + int set; + unsigned long i; + unsigned long num_pages; + struct page *page; + + u64 start = eb->start; + u64 end = start + eb->len - 1; + + set = clear_extent_dirty(tree, start, end, GFP_NOFS); + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + lock_page(page); + /* + * if we're on the last page or the first page and the + * block isn't aligned on a page boundary, do extra checks + * to make sure we don't clean page that is partially dirty + */ + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { + start = page->index << PAGE_CACHE_SHIFT; + end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, + EXTENT_DIRTY, 0)) { + unlock_page(page); + continue; + } + } + clear_page_dirty_for_io(page); + unlock_page(page); + } + return 0; +} +EXPORT_SYMBOL(clear_extent_buffer_dirty); + +int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + return wait_on_extent_writeback(tree, eb->start, + eb->start + eb->len - 1); +} +EXPORT_SYMBOL(wait_on_extent_buffer_writeback); + +int set_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + return set_range_dirty(tree, eb->start, eb->start + eb->len - 1); +} +EXPORT_SYMBOL(set_extent_buffer_dirty); + +int set_extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + struct page *page; + unsigned long num_pages; + + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + + set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + GFP_NOFS); + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { + check_page_uptodate(tree, page); + continue; + } + SetPageUptodate(page); + } + return 0; +} +EXPORT_SYMBOL(set_extent_buffer_uptodate); + +int extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + if (eb->flags & EXTENT_UPTODATE) + return 1; + return test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1); +} +EXPORT_SYMBOL(extent_buffer_uptodate); + +int read_extent_buffer_pages(struct extent_map_tree *tree, + struct extent_buffer *eb, int wait) +{ + unsigned long i; + struct page *page; + int err; + int ret = 0; + unsigned long num_pages; + + if (eb->flags & EXTENT_UPTODATE) + return 0; + + if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1)) { + return 0; + } + + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + if (PageUptodate(page)) { + continue; + } + if (!wait) { + if (TestSetPageLocked(page)) { + continue; + } + } else { + lock_page(page); + } + if (!PageUptodate(page)) { + err = page->mapping->a_ops->readpage(NULL, page); + if (err) { + ret = err; + } + } else { + unlock_page(page); + } + } + + if (ret || !wait) { + return ret; + } + + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + wait_on_page_locked(page); + if (!PageUptodate(page)) { + ret = -EIO; + } + } + eb->flags |= EXTENT_UPTODATE; + return ret; +} +EXPORT_SYMBOL(read_extent_buffer_pages); + +void read_extent_buffer(struct extent_buffer *eb, void *dstv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *dst = (char *)dstv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + memcpy(dst, kaddr + offset, cur); + // kunmap_atomic(kaddr, KM_USER0); + + dst += cur; + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } +} +EXPORT_SYMBOL(read_extent_buffer); + +int map_extent_buffer(struct extent_buffer *eb, unsigned long start, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + size_t offset; + char *kaddr; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + + if (i == 0) { + offset = start_offset; + *map_start = 0; + } else { + offset = 0; + *map_start = (i << PAGE_CACHE_SHIFT) - offset; + } + + // kaddr = kmap_atomic(eb->pages[i], km); + kaddr = page_address(eb->pages[i]); + *token = kaddr; + *map = kaddr + offset; + *map_len = PAGE_CACHE_SIZE - offset; + return 0; +} +EXPORT_SYMBOL(map_extent_buffer); + +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) +{ + // kunmap_atomic(token, km); +} +EXPORT_SYMBOL(unmap_extent_buffer); + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *ptr = (char *)ptrv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + int ret = 0; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + ret = memcmp(ptr, kaddr + offset, cur); + // kunmap_atomic(kaddr, KM_USER0); + if (ret) + break; + + ptr += cur; + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } + return ret; +} +EXPORT_SYMBOL(memcmp_extent_buffer); + +void write_extent_buffer(struct extent_buffer *eb, const void *srcv, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *src = (char *)srcv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + memcpy(kaddr + offset, src, cur); + // kunmap_atomic(kaddr, KM_USER0); + + src += cur; + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } +} +EXPORT_SYMBOL(write_extent_buffer); + +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + memset(kaddr + offset, c, cur); + // kunmap_atomic(kaddr, KM_USER0); + + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } +} +EXPORT_SYMBOL(memset_extent_buffer); + +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len) +{ + u64 dst_len = dst->len; + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + + WARN_ON(src->len != dst_len); + + offset = dst_offset & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + page = dst->pages[i]; + WARN_ON(!PageUptodate(page)); + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); + + // kaddr = kmap_atomic(page, KM_USER1); + kaddr = page_address(page); + read_extent_buffer(src, kaddr + offset, src_offset, cur); + // kunmap_atomic(kaddr, KM_USER1); + + src_offset += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(copy_extent_buffer); + +static void move_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + // char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + char *dst_kaddr = page_address(dst_page); + if (dst_page == src_page) { + memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); + } else { + // char *src_kaddr = kmap_atomic(src_page, KM_USER1); + char *src_kaddr = page_address(src_page); + char *p = dst_kaddr + dst_off + len; + char *s = src_kaddr + src_off + len; + + while (len--) + *--p = *--s; + + // kunmap_atomic(src_kaddr, KM_USER1); + } + // kunmap_atomic(dst_kaddr, KM_USER0); +} + +static void copy_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + //kmap_atomic(dst_page, KM_USER0); + char *dst_kaddr = page_address(dst_page); + char *src_kaddr; + + if (dst_page != src_page) + src_kaddr = page_address(src_page); // kmap_atomic(src_page, KM_USER1); + else + src_kaddr = dst_kaddr; + + memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); + /* + kunmap_atomic(dst_kaddr, KM_USER0); + if (dst_page != src_page) + kunmap_atomic(src_kaddr, KM_USER1); + */ +} + +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + + while(len > 0) { + dst_off_in_page = dst_offset & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = src_offset & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; + + if (src_i == 0) + src_off_in_page += start_offset; + if (dst_i == 0) + dst_off_in_page += start_offset; + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - + src_off_in_page)); + cur = min(cur, (unsigned long)(PAGE_CACHE_SIZE - + dst_off_in_page)); + + copy_pages(dst->pages[dst_i], dst->pages[src_i], + dst_off_in_page, src_off_in_page, cur); + + src_offset += cur; + dst_offset += cur; + len -= cur; + } +} +EXPORT_SYMBOL(memcpy_extent_buffer); + +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + unsigned long dst_end = dst_offset + len - 1; + unsigned long src_end = src_offset + len - 1; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset < src_offset) { + memcpy_extent_buffer(dst, dst_offset, src_offset, len); + return; + } + while(len > 0) { + dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; + + dst_off_in_page = dst_end & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = src_end & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + if (src_i == 0) + src_off_in_page += start_offset; + if (dst_i == 0) + dst_off_in_page += start_offset; + + cur = min(len, src_off_in_page + 1); + cur = min(cur, dst_off_in_page + 1); +// printk("move pages orig dst %lu src %lu len %lu, this %lu %lu %lu\n", dst_offset, src_offset, len, dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); + move_pages(dst->pages[dst_i], dst->pages[src_i], + dst_off_in_page - cur + 1, + src_off_in_page - cur + 1, cur); + + dst_end -= cur - 1; + src_end -= cur - 1; + len -= cur; + } +} +EXPORT_SYMBOL(memmove_extent_buffer); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index d913ce01248d..430b997a70f6 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -3,9 +3,20 @@ #include +#define EXTENT_MAP_HOLE (u64)-3 #define EXTENT_MAP_INLINE (u64)-2 #define EXTENT_MAP_DELALLOC (u64)-1 +/* bits for the extent state */ +#define EXTENT_DIRTY 1 +#define EXTENT_WRITEBACK (1 << 1) +#define EXTENT_UPTODATE (1 << 2) +#define EXTENT_LOCKED (1 << 3) +#define EXTENT_NEW (1 << 4) +#define EXTENT_DELALLOC (1 << 5) +#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) + + struct extent_map_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); @@ -51,10 +62,13 @@ struct extent_state { struct list_head list; }; +#define EXTENT_BUFFER_SIZE(nr) (sizeof(struct extent_buffer) + \ + (nr - 1) * sizeof(struct page *)) struct extent_buffer { u64 start; - u64 end; /* inclusive */ - char *addr; + unsigned long len; + atomic_t refs; + int flags; struct page *pages[]; }; @@ -87,8 +101,12 @@ int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask); int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +int find_first_extent_bit(struct extent_map_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits); int extent_invalidatepage(struct extent_map_tree *tree, struct page *page, unsigned long offset); int extent_write_full_page(struct extent_map_tree *tree, struct page *page, @@ -106,4 +124,57 @@ int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); int set_state_private(struct extent_map_tree *tree, u64 start, u64 private); int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private); void set_page_extent_mapped(struct page *page); + +struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask); +struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask); +void free_extent_buffer(struct extent_buffer *eb); +int read_extent_buffer_pages(struct extent_map_tree *tree, + struct extent_buffer *eb, int wait); + +static inline void extent_buffer_get(struct extent_buffer *eb) +{ + atomic_inc(&eb->refs); +} + +static inline u64 extent_buffer_blocknr(struct extent_buffer *eb) +{ + return eb->start / 4096; +} + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len); +void read_extent_buffer(struct extent_buffer *eb, void *dst, + unsigned long start, + unsigned long len); +void write_extent_buffer(struct extent_buffer *eb, const void *src, + unsigned long start, unsigned long len); +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len); +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len); +int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, + struct extent_buffer *eb); +int clear_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb); +int extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb); +int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); #endif diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index ea4dd4c5fce3..226f6d028c3f 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -34,12 +34,12 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item *item; struct btrfs_key file_key; struct btrfs_path *path; + struct extent_buffer *leaf; path = btrfs_alloc_path(); BUG_ON(!path); file_key.objectid = objectid; file_key.offset = pos; - file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); ret = btrfs_insert_empty_item(trans, root, path, &file_key, @@ -47,15 +47,16 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, if (ret < 0) goto out; BUG_ON(ret); - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(item, offset); - btrfs_set_file_extent_disk_num_blocks(item, disk_num_blocks); - btrfs_set_file_extent_offset(item, 0); - btrfs_set_file_extent_num_blocks(item, num_blocks); - btrfs_set_file_extent_generation(item, trans->transid); - btrfs_set_file_extent_type(item, BTRFS_FILE_EXTENT_REG); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_set_file_extent_disk_blocknr(leaf, item, offset); + btrfs_set_file_extent_disk_num_blocks(leaf, item, disk_num_blocks); + btrfs_set_file_extent_offset(leaf, item, 0); + btrfs_set_file_extent_num_blocks(leaf, item, num_blocks); + btrfs_set_file_extent_generation(leaf, item, trans->transid); + btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); + btrfs_mark_buffer_dirty(leaf); out: btrfs_free_path(path); return ret; @@ -71,32 +72,30 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_key file_key; struct btrfs_key found_key; struct btrfs_csum_item *item; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u64 csum_offset = 0; int csums_in_item; file_key.objectid = objectid; file_key.offset = offset; - file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); if (ret < 0) goto fail; - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; if (ret > 0) { ret = 1; if (path->slots[0] == 0) goto fail; path->slots[0]--; - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid) { goto fail; } csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; - csums_in_item = btrfs_item_size(leaf->items + path->slots[0]); + csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); csums_in_item /= BTRFS_CRC32_SIZE; if (csum_offset >= csums_in_item) { @@ -127,7 +126,6 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, file_key.objectid = objectid; file_key.offset = offset; - file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); return ret; @@ -138,12 +136,14 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, u64 objectid, u64 offset, char *data, size_t len) { + return 0; +#if 0 int ret; struct btrfs_key file_key; struct btrfs_key found_key; struct btrfs_path *path; struct btrfs_csum_item *item; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u64 csum_offset; path = btrfs_alloc_path(); @@ -161,8 +161,8 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, if (ret == -EFBIG) { u32 item_size; /* we found one, but it isn't big enough yet */ - leaf = btrfs_buffer_leaf(path->nodes[0]); - item_size = btrfs_item_size(leaf->items + path->slots[0]); + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); if ((item_size / BTRFS_CRC32_SIZE) >= MAX_CSUM_ITEMS(root)) { /* already at max size, make a new one */ goto insert; @@ -188,8 +188,8 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, goto insert; } path->slots[0]--; - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || @@ -197,10 +197,10 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, csum_offset >= MAX_CSUM_ITEMS(root)) { goto insert; } - if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / + if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) / BTRFS_CRC32_SIZE) { u32 diff = (csum_offset + 1) * BTRFS_CRC32_SIZE; - diff = diff - btrfs_item_size(leaf->items + path->slots[0]); + diff = diff - btrfs_item_size_nr(leaf, path->slots[0]); if (diff != BTRFS_CRC32_SIZE) goto insert; ret = btrfs_extend_item(trans, root, path, diff); @@ -220,21 +220,20 @@ insert: goto fail; } csum: - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_csum_item); + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); ret = 0; item = (struct btrfs_csum_item *)((unsigned char *)item + csum_offset * BTRFS_CRC32_SIZE); found: - btrfs_check_bounds(&item->csum, BTRFS_CRC32_SIZE, - path->nodes[0]->b_data, - root->fs_info->sb->s_blocksize); + /* FIXME!!!!!!!!!!!! */ ret = btrfs_csum_data(root, data, len, &item->csum); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); btrfs_free_path(path); return ret; +#endif } int btrfs_csum_truncate(struct btrfs_trans_handle *trans, @@ -242,21 +241,21 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, u64 isize) { struct btrfs_key key; - struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[0]); + struct extent_buffer *leaf = path->nodes[0]; int slot = path->slots[0]; int ret; u32 new_item_size; u64 new_item_span; u64 blocks; - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + btrfs_item_key_to_cpu(leaf, &key, slot); if (isize <= key.offset) return 0; new_item_span = isize - key.offset; - blocks = (new_item_span + root->blocksize - 1) >> + blocks = (new_item_span + root->sectorsize - 1) >> root->fs_info->sb->s_blocksize_bits; new_item_size = blocks * BTRFS_CRC32_SIZE; - if (new_item_size >= btrfs_item_size(leaf->items + slot)) + if (new_item_size >= btrfs_item_size_nr(leaf, slot)) return 0; ret = btrfs_truncate_item(trans, root, path, new_item_size); BUG_ON(ret); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4cc459c943ec..1734ca695555 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include @@ -88,7 +87,9 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, { struct btrfs_key key; struct btrfs_path *path; - char *ptr, *kaddr; + struct extent_buffer *leaf; + char *kaddr; + unsigned long ptr; struct btrfs_file_extent_item *ei; u32 datasize; int err = 0; @@ -102,7 +103,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, key.objectid = inode->i_ino; key.offset = offset; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); BUG_ON(size >= PAGE_CACHE_SIZE); datasize = btrfs_file_extent_calc_inline_size(size); @@ -113,18 +113,17 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, err = ret; goto fail; } - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(ei, trans->transid); - btrfs_set_file_extent_type(ei, - BTRFS_FILE_EXTENT_INLINE); + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, ei, trans->transid); + btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); kaddr = kmap_atomic(page, KM_USER0); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, kaddr + page_offset, size); + write_extent_buffer(leaf, kaddr + page_offset, ptr, size); kunmap_atomic(kaddr, KM_USER0); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(leaf); fail: btrfs_free_path(path); return err; @@ -156,8 +155,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, em->bdev = inode->i_sb->s_bdev; - start_pos = pos & ~((u64)root->blocksize - 1); - num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> + start_pos = pos & ~((u64)root->sectorsize - 1); + num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> inode->i_blkbits; down_read(&BTRFS_I(inode)->root->snap_sem); @@ -184,7 +183,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, if (inode->i_size < start_pos) { u64 last_pos_in_file; u64 hole_size; - u64 mask = root->blocksize - 1; + u64 mask = root->sectorsize - 1; last_pos_in_file = (isize + mask) & ~mask; hole_size = (start_pos - last_pos_in_file + mask) & ~mask; @@ -227,8 +226,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, /* step one, delete the existing extents in this range */ /* FIXME blocksize != pagesize */ err = btrfs_drop_extents(trans, root, inode, start_pos, - (pos + write_bytes + root->blocksize -1) & - ~((u64)root->blocksize - 1), &hint_block); + (pos + write_bytes + root->sectorsize -1) & + ~((u64)root->sectorsize - 1), &hint_block); if (err) goto failed; @@ -288,7 +287,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, { int ret; struct btrfs_key key; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; int slot; struct btrfs_file_extent_item *extent; u64 extent_end = 0; @@ -327,10 +326,10 @@ next_slot: found_extent = 0; found_inline = 0; extent = NULL; - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; slot = path->slots[0]; ret = 0; - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + btrfs_item_key_to_cpu(leaf, &key, slot); if (key.offset >= end || key.objectid != inode->i_ino) { goto out; } @@ -344,17 +343,18 @@ next_slot: if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - found_type = btrfs_file_extent_type(extent); + found_type = btrfs_file_extent_type(leaf, extent); if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = key.offset + - (btrfs_file_extent_num_blocks(extent) << + (btrfs_file_extent_num_blocks(leaf, extent) << inode->i_blkbits); found_extent = 1; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + struct btrfs_item *item; + item = btrfs_item_nr(leaf, slot); found_inline = 1; extent_end = key.offset + - btrfs_file_extent_inline_len(leaf->items + - slot); + btrfs_file_extent_inline_len(leaf, item); } } else { extent_end = search_start; @@ -365,8 +365,7 @@ next_slot: search_start >= extent_end) { int nextret; u32 nritems; - nritems = btrfs_header_nritems( - btrfs_buffer_header(path->nodes[0])); + nritems = btrfs_header_nritems(leaf); if (slot >= nritems - 1) { nextret = btrfs_next_leaf(root, path); if (nextret) @@ -380,7 +379,7 @@ next_slot: /* FIXME, there's only one inline extent allowed right now */ if (found_inline) { - u64 mask = root->blocksize - 1; + u64 mask = root->sectorsize - 1; search_start = (extent_end + mask) & ~mask; } else search_start = extent_end; @@ -388,10 +387,13 @@ next_slot: if (end < extent_end && end >= key.offset) { if (found_extent) { u64 disk_blocknr = - btrfs_file_extent_disk_blocknr(extent); + btrfs_file_extent_disk_blocknr(leaf,extent); u64 disk_num_blocks = - btrfs_file_extent_disk_num_blocks(extent); - memcpy(&old, extent, sizeof(old)); + btrfs_file_extent_disk_num_blocks(leaf, + extent); + read_extent_buffer(leaf, &old, + (unsigned long)extent, + sizeof(old)); if (disk_blocknr != 0) { ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, disk_num_blocks); @@ -406,20 +408,24 @@ next_slot: u64 new_num; u64 old_num; keep = 1; - WARN_ON(start & (root->blocksize - 1)); + WARN_ON(start & (root->sectorsize - 1)); if (found_extent) { new_num = (start - key.offset) >> inode->i_blkbits; - old_num = btrfs_file_extent_num_blocks(extent); + old_num = btrfs_file_extent_num_blocks(leaf, + extent); *hint_block = - btrfs_file_extent_disk_blocknr(extent); - if (btrfs_file_extent_disk_blocknr(extent)) { + btrfs_file_extent_disk_blocknr(leaf, + extent); + if (btrfs_file_extent_disk_blocknr(leaf, + extent)) { inode->i_blocks -= (old_num - new_num) << 3; } - btrfs_set_file_extent_num_blocks(extent, + btrfs_set_file_extent_num_blocks(leaf, + extent, new_num); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(leaf); } else { WARN_ON(1); } @@ -431,13 +437,17 @@ next_slot: u64 extent_num_blocks = 0; if (found_extent) { disk_blocknr = - btrfs_file_extent_disk_blocknr(extent); + btrfs_file_extent_disk_blocknr(leaf, + extent); disk_num_blocks = - btrfs_file_extent_disk_num_blocks(extent); + btrfs_file_extent_disk_num_blocks(leaf, + extent); extent_num_blocks = - btrfs_file_extent_num_blocks(extent); + btrfs_file_extent_num_blocks(leaf, + extent); *hint_block = - btrfs_file_extent_disk_blocknr(extent); + btrfs_file_extent_disk_blocknr(leaf, + extent); } ret = btrfs_del_item(trans, root, path); /* TODO update progress marker and return */ @@ -464,42 +474,37 @@ next_slot: struct btrfs_key ins; ins.objectid = inode->i_ino; ins.offset = end; - ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); btrfs_release_path(root, path); ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*extent)); + leaf = path->nodes[0]; if (ret) { - btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0])); - printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end, keep); + btrfs_print_leaf(root, leaf); + printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.type, ins.offset, start, end, key.offset, extent_end, keep); } BUG_ON(ret); - extent = btrfs_item_ptr( - btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(extent, - btrfs_file_extent_disk_blocknr(&old)); - btrfs_set_file_extent_disk_num_blocks(extent, - btrfs_file_extent_disk_num_blocks(&old)); + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + write_extent_buffer(leaf, &old, + (unsigned long)extent, sizeof(old)); - btrfs_set_file_extent_offset(extent, - btrfs_file_extent_offset(&old) + + btrfs_set_file_extent_offset(leaf, extent, + le64_to_cpu(old.offset) + ((end - key.offset) >> inode->i_blkbits)); - WARN_ON(btrfs_file_extent_num_blocks(&old) < + WARN_ON(le64_to_cpu(old.num_blocks) < (extent_end - end) >> inode->i_blkbits); - btrfs_set_file_extent_num_blocks(extent, + btrfs_set_file_extent_num_blocks(leaf, extent, (extent_end - end) >> inode->i_blkbits); - btrfs_set_file_extent_type(extent, + btrfs_set_file_extent_type(leaf, extent, BTRFS_FILE_EXTENT_REG); - btrfs_set_file_extent_generation(extent, - btrfs_file_extent_generation(&old)); btrfs_mark_buffer_dirty(path->nodes[0]); - if (btrfs_file_extent_disk_blocknr(&old) != 0) { + if (le64_to_cpu(old.disk_blocknr) != 0) { inode->i_blocks += - btrfs_file_extent_num_blocks(extent) << 3; + btrfs_file_extent_num_blocks(leaf, + extent) << 3; } ret = 0; goto out; @@ -529,8 +534,8 @@ static int prepare_pages(struct btrfs_root *root, u64 num_blocks; u64 start_pos; - start_pos = pos & ~((u64)root->blocksize - 1); - num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> + start_pos = pos & ~((u64)root->sectorsize - 1); + num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> inode->i_blkbits; memset(pages, 0, num_pages * sizeof(struct page *)); diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index b78346177539..35d2608f8918 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -20,24 +20,18 @@ #include "disk-io.h" #include "transaction.h" -int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 objectid, struct btrfs_inode_item - *inode_item) +int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid) { - struct btrfs_path *path; struct btrfs_key key; int ret; key.objectid = objectid; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; - path = btrfs_alloc_path(); - BUG_ON(!path); - ret = btrfs_insert_item(trans, root, &key, inode_item, - sizeof(*inode_item)); - btrfs_release_path(root, path); - btrfs_free_path(path); + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(struct btrfs_inode_item)); if (ret == 0 && objectid > root->highest_inode) root->highest_inode = objectid; return ret; @@ -51,15 +45,15 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root int cow = mod != 0; int ret; int slot; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct btrfs_key found_key; ret = btrfs_search_slot(trans, root, location, path, ins_len, cow); if (ret > 0 && btrfs_key_type(location) == BTRFS_ROOT_ITEM_KEY && location->offset == (u64)-1 && path->slots[0] != 0) { slot = path->slots[0] - 1; - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, &leaf->items[slot].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, slot); if (found_key.objectid == location->objectid && btrfs_key_type(&found_key) == btrfs_key_type(location)) { path->slots[0]--; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 405470866254..ab74977adf5c 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -24,8 +24,9 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) { struct btrfs_path *path; int ret; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_key search_key; + struct btrfs_key found_key; int slot; path = btrfs_alloc_path(); @@ -39,8 +40,9 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) BUG_ON(ret == 0); if (path->slots[0] > 0) { slot = path->slots[0] - 1; - l = btrfs_buffer_leaf(path->nodes[0]); - *objectid = btrfs_disk_key_objectid(&l->items[slot].key); + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &found_key, slot); + *objectid = found_key.objectid; } else { *objectid = BTRFS_FIRST_FREE_OBJECTID; } @@ -64,13 +66,12 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, int slot = 0; u64 last_ino = 0; int start_found; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_key search_key; u64 search_start = dirid; path = btrfs_alloc_path(); BUG_ON(!path); - search_key.flags = 0; search_start = root->last_inode_alloc; search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); search_key.objectid = search_start; @@ -86,9 +87,9 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, path->slots[0]--; while (1) { - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; slot = path->slots[0]; - if (slot >= btrfs_header_nritems(&l->header)) { + if (slot >= btrfs_header_nritems(l)) { ret = btrfs_next_leaf(root, path); if (ret == 0) continue; @@ -103,7 +104,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, last_ino : search_start; goto found; } - btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + btrfs_item_key_to_cpu(l, &key, slot); if (key.objectid >= search_start) { if (start_found) { if (last_ino < search_start) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b03d40a907ca..fbe2836364e0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -159,10 +159,8 @@ out: int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) { - char csum[BTRFS_CRC32_SIZE]; size_t offset = start - (page->index << PAGE_CACHE_SHIFT); struct inode *inode = page->mapping->host; - struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; char *kaddr; u64 private; @@ -173,11 +171,15 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) if (ret) { goto zeroit; } + /* + struct btrfs_root *root = BTRFS_I(inode)->root; + char csum[BTRFS_CRC32_SIZE]; ret = btrfs_csum_data(root, kaddr + offset, end - start + 1, csum); BUG_ON(ret); if (memcmp(csum, &private, BTRFS_CRC32_SIZE)) { goto zeroit; } + */ kunmap_atomic(kaddr, KM_IRQ0); return 0; @@ -192,7 +194,9 @@ zeroit: void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; + struct extent_buffer *leaf; struct btrfs_inode_item *inode_item; + struct btrfs_inode_timespec *tspec; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; u64 alloc_group_block; @@ -205,29 +209,37 @@ void btrfs_read_locked_inode(struct inode *inode) memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); - if (ret) { + if (ret) goto make_bad; - } - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_inode_item); - inode->i_mode = btrfs_inode_mode(inode_item); - inode->i_nlink = btrfs_inode_nlink(inode_item); - inode->i_uid = btrfs_inode_uid(inode_item); - inode->i_gid = btrfs_inode_gid(inode_item); - inode->i_size = btrfs_inode_size(inode_item); - inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime); - inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime); - inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime); - inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime); - inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime); - inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); - inode->i_blocks = btrfs_inode_nblocks(inode_item); - inode->i_generation = btrfs_inode_generation(inode_item); + leaf = path->nodes[0]; + inode_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_inode_item); + + inode->i_mode = btrfs_inode_mode(leaf, inode_item); + inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); + inode->i_uid = btrfs_inode_uid(leaf, inode_item); + inode->i_gid = btrfs_inode_gid(leaf, inode_item); + inode->i_size = btrfs_inode_size(leaf, inode_item); + + tspec = btrfs_inode_atime(inode_item); + inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec); + inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + + tspec = btrfs_inode_mtime(inode_item); + inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec); + inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + + tspec = btrfs_inode_ctime(inode_item); + inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec); + inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + + inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item); + inode->i_generation = btrfs_inode_generation(leaf, inode_item); inode->i_rdev = 0; - rdev = btrfs_inode_rdev(inode_item); - alloc_group_block = btrfs_inode_block_group(inode_item); + rdev = btrfs_inode_rdev(leaf, inode_item); + + alloc_group_block = btrfs_inode_block_group(leaf, inode_item); BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, alloc_group_block); @@ -267,24 +279,35 @@ make_bad: make_bad_inode(inode); } -static void fill_inode_item(struct btrfs_inode_item *item, +static void fill_inode_item(struct extent_buffer *leaf, + struct btrfs_inode_item *item, struct inode *inode) { - btrfs_set_inode_uid(item, inode->i_uid); - btrfs_set_inode_gid(item, inode->i_gid); - btrfs_set_inode_size(item, inode->i_size); - btrfs_set_inode_mode(item, inode->i_mode); - btrfs_set_inode_nlink(item, inode->i_nlink); - btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec); - btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec); - btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec); - btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec); - btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec); - btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); - btrfs_set_inode_nblocks(item, inode->i_blocks); - btrfs_set_inode_generation(item, inode->i_generation); - btrfs_set_inode_rdev(item, inode->i_rdev); - btrfs_set_inode_block_group(item, + btrfs_set_inode_uid(leaf, item, inode->i_uid); + btrfs_set_inode_gid(leaf, item, inode->i_gid); + btrfs_set_inode_size(leaf, item, inode->i_size); + btrfs_set_inode_mode(leaf, item, inode->i_mode); + btrfs_set_inode_nlink(leaf, item, inode->i_nlink); + + btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item), + inode->i_atime.tv_sec); + btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item), + inode->i_atime.tv_nsec); + + btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item), + inode->i_mtime.tv_sec); + btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item), + inode->i_mtime.tv_nsec); + + btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item), + inode->i_ctime.tv_sec); + btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), + inode->i_ctime.tv_nsec); + + btrfs_set_inode_nblocks(leaf, item, inode->i_blocks); + btrfs_set_inode_generation(leaf, item, inode->i_generation); + btrfs_set_inode_rdev(leaf, item, inode->i_rdev); + btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group->key.objectid); } @@ -294,6 +317,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, { struct btrfs_inode_item *inode_item; struct btrfs_path *path; + struct extent_buffer *leaf; int ret; path = btrfs_alloc_path(); @@ -306,12 +330,12 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, goto failed; } - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], + leaf = path->nodes[0]; + inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); - fill_inode_item(inode_item, inode); - btrfs_mark_buffer_dirty(path->nodes[0]); + fill_inode_item(leaf, inode_item, inode); + btrfs_mark_buffer_dirty(leaf); btrfs_set_inode_last_trans(trans, inode); ret = 0; failed: @@ -330,8 +354,9 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, const char *name = dentry->d_name.name; int name_len = dentry->d_name.len; int ret = 0; - u64 objectid; + struct extent_buffer *leaf; struct btrfs_dir_item *di; + struct btrfs_key key; path = btrfs_alloc_path(); if (!path) { @@ -349,14 +374,15 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, ret = -ENOENT; goto err; } - objectid = btrfs_disk_key_objectid(&di->location); + leaf = path->nodes[0]; + btrfs_dir_item_key_to_cpu(leaf, di, &key); ret = btrfs_delete_one_dir_name(trans, root, path, di); if (ret) goto err; btrfs_release_path(root, path); di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, - objectid, name, name_len, -1); + key.objectid, name, name_len, -1); if (IS_ERR(di)) { ret = PTR_ERR(di); goto err; @@ -391,12 +417,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) root = BTRFS_I(dir)->root; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); ret = btrfs_unlink_trans(trans, root, dir, dentry); nr = trans->blocks_used; + btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); + return ret; } @@ -411,7 +440,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_trans_handle *trans; struct btrfs_key found_key; int found_type; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; char *goodnames = ".."; unsigned long nr; @@ -419,10 +448,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) BUG_ON(!path); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); key.objectid = inode->i_ino; key.offset = (u64)-1; - key.flags = (u32)-1; + key.type = (u8)-1; while(1) { ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { @@ -435,9 +465,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) goto out; } path->slots[0]--; - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); found_type = btrfs_key_type(&found_key); if (found_key.objectid != inode->i_ino) { err = -ENOENT; @@ -513,9 +542,9 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int ret; struct btrfs_path *path; struct btrfs_key key; - struct btrfs_disk_key *found_key; + struct btrfs_key found_key; u32 found_type; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; u64 extent_start = 0; u64 extent_num_blocks = 0; @@ -527,10 +556,12 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); path->reada = -1; BUG_ON(!path); + /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; key.offset = (u64)-1; - key.flags = (u32)-1; + key.type = (u8)-1; + while(1) { btrfs_init_path(path); fi = NULL; @@ -542,27 +573,28 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, BUG_ON(path->slots[0] == 0); path->slots[0]--; } - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - found_type = btrfs_disk_key_type(found_key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + found_type = btrfs_key_type(&found_key); - if (btrfs_disk_key_objectid(found_key) != inode->i_ino) + if (found_key.objectid != inode->i_ino) break; + if (found_type != BTRFS_CSUM_ITEM_KEY && found_type != BTRFS_DIR_ITEM_KEY && found_type != BTRFS_DIR_INDEX_KEY && found_type != BTRFS_EXTENT_DATA_KEY) break; - item_end = btrfs_disk_key_offset(found_key); + item_end = found_key.offset; if (found_type == BTRFS_EXTENT_DATA_KEY) { - fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], + fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) != + if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { - item_end += btrfs_file_extent_num_blocks(fi) << - inode->i_blkbits; + item_end += + btrfs_file_extent_num_blocks(leaf, fi) << + inode->i_blkbits; } } if (found_type == BTRFS_CSUM_ITEM_KEY) { @@ -583,7 +615,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, btrfs_set_key_type(&key, found_type); continue; } - if (btrfs_disk_key_offset(found_key) >= inode->i_size) + if (found_key.offset >= inode->i_size) del_item = 1; else del_item = 0; @@ -591,30 +623,31 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, /* FIXME, shrink the extent if the ref count is only 1 */ if (found_type == BTRFS_EXTENT_DATA_KEY && - btrfs_file_extent_type(fi) != + btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; - extent_start = btrfs_file_extent_disk_blocknr(fi); + extent_start = btrfs_file_extent_disk_blocknr(leaf, fi); if (!del_item) { u64 orig_num_blocks = - btrfs_file_extent_num_blocks(fi); + btrfs_file_extent_num_blocks(leaf, fi); extent_num_blocks = inode->i_size - - btrfs_disk_key_offset(found_key) + - root->blocksize - 1; + found_key.offset + root->sectorsize - 1; extent_num_blocks >>= inode->i_blkbits; - btrfs_set_file_extent_num_blocks(fi, + btrfs_set_file_extent_num_blocks(leaf, fi, extent_num_blocks); num_dec = (orig_num_blocks - extent_num_blocks) << 3; if (extent_start != 0) { inode->i_blocks -= num_dec; } - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(leaf); } else { extent_num_blocks = - btrfs_file_extent_disk_num_blocks(fi); + btrfs_file_extent_disk_num_blocks(leaf, + fi); /* FIXME blocksize != 4096 */ - num_dec = btrfs_file_extent_num_blocks(fi) << 3; + num_dec = btrfs_file_extent_num_blocks(leaf, + fi) << 3; if (extent_start != 0) { found_extent = 1; inode->i_blocks -= num_dec; @@ -725,7 +758,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - u64 mask = root->blocksize - 1; + u64 mask = root->sectorsize - 1; u64 pos = (inode->i_size + mask) & ~mask; u64 block_end = attr->ia_size | mask; u64 hole_size; @@ -771,9 +804,11 @@ void btrfs_delete_inode(struct inode *inode) if (is_bad_inode(inode)) { goto no_delete; } + inode->i_size = 0; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); if (ret) @@ -782,6 +817,7 @@ void btrfs_delete_inode(struct inode *inode) if (ret) goto no_delete_lock; nr = trans->blocks_used; + btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); @@ -819,7 +855,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, ret = 0; goto out; } - btrfs_disk_key_to_cpu(location, &di->location); + btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); out: btrfs_release_path(root, path); btrfs_free_path(path); @@ -856,7 +892,6 @@ static int fixup_tree_root_location(struct btrfs_root *root, ri = &(*sub_root)->root_item; location->objectid = btrfs_root_dirid(ri); - location->flags = 0; btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); location->offset = 0; @@ -908,11 +943,14 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); + mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_inode_by_name(dir, dentry, &location); mutex_unlock(&root->fs_info->fs_mutex); + if (ret < 0) return ERR_PTR(ret); + inode = NULL; if (location.objectid) { ret = fixup_tree_root_location(root, &location, &sub_root, @@ -952,10 +990,11 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct btrfs_item *item; struct btrfs_dir_item *di; struct btrfs_key key; + struct btrfs_key found_key; struct btrfs_path *path; int ret; u32 nritems; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; int slot; int advance; unsigned char d_type; @@ -964,15 +1003,19 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) u32 di_total; u32 di_len; int key_type = BTRFS_DIR_INDEX_KEY; + char tmp_name[32]; + char *name_ptr; + int name_len; /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) key_type = BTRFS_DIR_ITEM_KEY; + mutex_lock(&root->fs_info->fs_mutex); key.objectid = inode->i_ino; - key.flags = 0; btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; + path = btrfs_alloc_path(); path->reada = 2; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -980,16 +1023,16 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) goto err; advance = 0; while(1) { - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; if (advance || slot >= nritems) { if (slot >= nritems -1) { ret = btrfs_next_leaf(root, path); if (ret) break; - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; } else { slot++; @@ -997,28 +1040,48 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } } advance = 1; - item = leaf->items + slot; - if (btrfs_disk_key_objectid(&item->key) != key.objectid) + item = btrfs_item_nr(leaf, slot); + btrfs_item_key_to_cpu(leaf, &found_key, slot); + + if (found_key.objectid != key.objectid) break; - if (btrfs_disk_key_type(&item->key) != key_type) + if (btrfs_key_type(&found_key) != key_type) break; - if (btrfs_disk_key_offset(&item->key) < filp->f_pos) + if (found_key.offset < filp->f_pos) continue; - filp->f_pos = btrfs_disk_key_offset(&item->key); + + filp->f_pos = found_key.offset; advance = 1; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); di_cur = 0; - di_total = btrfs_item_size(leaf->items + slot); + di_total = btrfs_item_size(leaf, item); while(di_cur < di_total) { - d_type = btrfs_filetype_table[btrfs_dir_type(di)]; - over = filldir(dirent, (const char *)(di + 1), - btrfs_dir_name_len(di), - btrfs_disk_key_offset(&item->key), - btrfs_disk_key_objectid(&di->location), + struct btrfs_key location; + + name_len = btrfs_dir_name_len(leaf, di); + if (name_len < 32) { + name_ptr = tmp_name; + } else { + name_ptr = kmalloc(name_len, GFP_NOFS); + BUG_ON(!name_ptr); + } + read_extent_buffer(leaf, name_ptr, + (unsigned long)(di + 1), name_len); + + d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; + btrfs_dir_item_key_to_cpu(leaf, di, &location); + + over = filldir(dirent, name_ptr, name_len, + found_key.offset, + location.objectid, d_type); + + if (name_ptr != tmp_name) + kfree(name_ptr); + if (over) goto nopos; - di_len = btrfs_dir_name_len(di) + sizeof(*di); + di_len = btrfs_dir_name_len(leaf, di) + sizeof(*di); di_cur += di_len; di = (struct btrfs_dir_item *)((char *)di + di_len); } @@ -1075,11 +1138,15 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, int mode) { struct inode *inode; - struct btrfs_inode_item inode_item; + struct btrfs_inode_item *inode_item; struct btrfs_key *location; + struct btrfs_path *path; int ret; int owner; + path = btrfs_alloc_path(); + BUG_ON(!path); + inode = new_inode(root->fs_info->sb); if (!inode) return ERR_PTR(-ENOMEM); @@ -1095,24 +1162,32 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, group = btrfs_find_block_group(root, group, 0, 0, owner); BTRFS_I(inode)->block_group = group; + ret = btrfs_insert_empty_inode(trans, root, path, objectid); + if (ret) + goto fail; + inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; inode->i_mode = mode; inode->i_ino = objectid; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - fill_inode_item(&inode_item, inode); + inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + fill_inode_item(path->nodes[0], inode_item, inode); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + location = &BTRFS_I(inode)->location; location->objectid = objectid; - location->flags = 0; location->offset = 0; btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); - ret = btrfs_insert_inode(trans, root, objectid, &inode_item); - if (ret) - return ERR_PTR(ret); insert_inode_hash(inode); return inode; +fail: + btrfs_free_path(path); + return ERR_PTR(ret); } static inline u8 btrfs_inode_type(struct inode *inode) @@ -1127,8 +1202,8 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_key key; struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root; struct inode *parent_inode; + key.objectid = inode->i_ino; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; @@ -1285,14 +1360,18 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, inc_nlink(inode); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); atomic_inc(&inode->i_count); err = btrfs_add_nondir(trans, dentry, inode); + if (err) drop_inode = 1; + dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, dir); err = btrfs_update_inode(trans, root, inode); + if (err) drop_inode = 1; @@ -1321,13 +1400,13 @@ static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, key.objectid = objectid; key.offset = 0; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid, &key, BTRFS_FT_DIR); if (ret) goto error; + key.objectid = dirid; ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid, &key, BTRFS_FT_DIR); @@ -1350,6 +1429,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); + if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out_unlock; @@ -1367,6 +1447,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) err = PTR_ERR(inode); goto out_fail; } + drop_on_err = 1; inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; @@ -1380,9 +1461,11 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) err = btrfs_update_inode(trans, root, inode); if (err) goto out_fail; + err = btrfs_add_link(trans, dentry, inode); if (err) goto out_fail; + d_instantiate(dentry, inode); drop_on_err = 0; dir->i_sb->s_dirt = 1; @@ -1392,6 +1475,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) out_fail: nr = trans->blocks_used; btrfs_end_transaction(trans, root); + out_unlock: mutex_unlock(&root->fs_info->fs_mutex); if (drop_on_err) @@ -1415,8 +1499,8 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_file_extent_item *item; - struct btrfs_leaf *leaf; - struct btrfs_disk_key *found_key; + struct extent_buffer *leaf; + struct btrfs_key found_key; struct extent_map *em = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct btrfs_trans_handle *trans = NULL; @@ -1436,8 +1520,8 @@ again: err = -ENOMEM; goto out; } - em->start = 0; - em->end = 0; + em->start = EXTENT_MAP_HOLE; + em->end = EXTENT_MAP_HOLE; } em->bdev = inode->i_sb->s_bdev; ret = btrfs_lookup_file_extent(NULL, root, path, @@ -1453,25 +1537,27 @@ again: path->slots[0]--; } - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - leaf = btrfs_buffer_leaf(path->nodes[0]); - blocknr = btrfs_file_extent_disk_blocknr(item); - blocknr += btrfs_file_extent_offset(item); + + blocknr = btrfs_file_extent_disk_blocknr(leaf, item); + blocknr += btrfs_file_extent_offset(leaf, item); /* are we inside the extent that was found? */ - found_key = &leaf->items[path->slots[0]].key; - found_type = btrfs_disk_key_type(found_key); - if (btrfs_disk_key_objectid(found_key) != objectid || + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + found_type = btrfs_key_type(&found_key); + if (found_key.objectid != objectid || found_type != BTRFS_EXTENT_DATA_KEY) { goto not_found; } - found_type = btrfs_file_extent_type(item); - extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); + found_type = btrfs_file_extent_type(leaf, item); + extent_start = found_key.offset; if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = extent_start + - (btrfs_file_extent_num_blocks(item) << inode->i_blkbits); + (btrfs_file_extent_num_blocks(leaf, item) << + inode->i_blkbits); err = 0; if (start < extent_start || start >= extent_end) { em->start = start; @@ -1484,28 +1570,29 @@ again: } goto not_found_em; } - if (btrfs_file_extent_disk_blocknr(item) == 0) { + if (btrfs_file_extent_disk_blocknr(leaf, item) == 0) { em->start = extent_start; em->end = extent_end - 1; - em->block_start = 0; - em->block_end = 0; + em->block_start = EXTENT_MAP_HOLE; + em->block_end = EXTENT_MAP_HOLE; goto insert; } em->block_start = blocknr << inode->i_blkbits; em->block_end = em->block_start + - (btrfs_file_extent_num_blocks(item) << + (btrfs_file_extent_num_blocks(leaf, item) << inode->i_blkbits) - 1; em->start = extent_start; em->end = extent_end - 1; goto insert; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - char *ptr; + unsigned long ptr; char *map; u32 size; - size = btrfs_file_extent_inline_len(leaf->items + - path->slots[0]); - extent_end = extent_start | ((u64)root->blocksize - 1); + size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, + path->slots[0])); + + extent_end = extent_start | ((u64)root->sectorsize - 1); if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { @@ -1517,18 +1604,21 @@ again: } goto not_found_em; } + em->block_start = EXTENT_MAP_INLINE; em->block_end = EXTENT_MAP_INLINE; em->start = extent_start; em->end = extent_end; + if (!page) { goto insert; } + ptr = btrfs_file_extent_inline_start(item); map = kmap(page); - memcpy(map + page_offset, ptr, size); + read_extent_buffer(leaf, map + page_offset, ptr, size); memset(map + page_offset + size, 0, - root->blocksize - (page_offset + size)); + root->sectorsize - (page_offset + size)); flush_dcache_page(page); kunmap(page); set_extent_uptodate(em_tree, extent_start, @@ -1542,8 +1632,8 @@ not_found: em->start = start; em->end = end; not_found_em: - em->block_start = 0; - em->block_end = 0; + em->block_start = EXTENT_MAP_HOLE; + em->block_end = EXTENT_MAP_HOLE; insert: btrfs_release_path(root, path); if (em->start > start || em->end < start) { @@ -1712,6 +1802,7 @@ static void btrfs_truncate(struct inode *inode) ret = btrfs_truncate_in_trans(trans, root, inode); btrfs_update_inode(trans, root, inode); nr = trans->blocks_used; + ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); @@ -1731,8 +1822,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) struct btrfs_key key; struct btrfs_root_item root_item; struct btrfs_inode_item *inode_item; - struct buffer_head *subvol; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct btrfs_root *new_root; struct inode *inode; struct inode *dir; @@ -1746,34 +1836,37 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - subvol = btrfs_alloc_free_block(trans, root, 0, 0); - if (IS_ERR(subvol)) - return PTR_ERR(subvol); - leaf = btrfs_buffer_leaf(subvol); - btrfs_set_header_nritems(&leaf->header, 0); - btrfs_set_header_level(&leaf->header, 0); - btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol)); - btrfs_set_header_generation(&leaf->header, trans->transid); - btrfs_set_header_owner(&leaf->header, root->root_key.objectid); - memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, - sizeof(leaf->header.fsid)); - btrfs_mark_buffer_dirty(subvol); + leaf = btrfs_alloc_free_block(trans, root, 0, 0); + if (IS_ERR(leaf)) + return PTR_ERR(leaf); + + btrfs_set_header_nritems(leaf, 0); + btrfs_set_header_level(leaf, 0); + btrfs_set_header_blocknr(leaf, extent_buffer_blocknr(leaf)); + btrfs_set_header_generation(leaf, trans->transid); + btrfs_set_header_owner(leaf, root->root_key.objectid); + write_extent_buffer(leaf, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(leaf), + BTRFS_FSID_SIZE); + btrfs_mark_buffer_dirty(leaf); inode_item = &root_item.inode; memset(inode_item, 0, sizeof(*inode_item)); - btrfs_set_inode_generation(inode_item, 1); - btrfs_set_inode_size(inode_item, 3); - btrfs_set_inode_nlink(inode_item, 1); - btrfs_set_inode_nblocks(inode_item, 1); - btrfs_set_inode_mode(inode_item, S_IFDIR | 0755); + inode_item->generation = cpu_to_le64(1); + inode_item->size = cpu_to_le64(3); + inode_item->nlink = cpu_to_le32(1); + inode_item->nblocks = cpu_to_le64(1); + inode_item->mode = cpu_to_le32(S_IFDIR | 0755); - btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); + btrfs_set_root_blocknr(&root_item, extent_buffer_blocknr(leaf)); btrfs_set_root_refs(&root_item, 1); - btrfs_set_root_blocks_used(&root_item, 0); + btrfs_set_root_used(&root_item, 0); + memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); root_item.drop_level = 0; - brelse(subvol); - subvol = NULL; + + free_extent_buffer(leaf); + leaf = NULL; ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); @@ -1784,7 +1877,6 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) key.objectid = objectid; key.offset = 1; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &root_item); @@ -1845,7 +1937,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) struct btrfs_trans_handle *trans; struct btrfs_key key; struct btrfs_root_item new_root_item; - struct buffer_head *tmp; + struct extent_buffer *tmp; int ret; int err; u64 objectid; @@ -1876,10 +1968,11 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) key.objectid = objectid; key.offset = 1; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); - btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node)); + btrfs_set_root_blocknr(&new_root_item, + extent_buffer_blocknr(root->node)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); @@ -1904,8 +1997,10 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) fail: nr = trans->blocks_used; err = btrfs_commit_transaction(trans, root); + if (err && !ret) ret = err; + mutex_unlock(&root->fs_info->fs_mutex); up_write(&root->snap_sem); btrfs_btree_balance_dirty(root, nr); @@ -1986,7 +2081,7 @@ static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg) if (copy_from_user(&vol_args, arg, sizeof(vol_args))) return -EFAULT; - + namelen = strlen(vol_args.name); if (namelen > BTRFS_VOL_NAME_MAX) return -EINVAL; @@ -2164,8 +2259,10 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { return -ENOTEMPTY; } + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, new_dir); path = btrfs_alloc_path(); if (!path) { @@ -2177,9 +2274,10 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, old_dir->i_ctime = old_dir->i_mtime = ctime; new_dir->i_ctime = new_dir->i_mtime = ctime; old_inode->i_ctime = ctime; + if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) { struct btrfs_key *location = &BTRFS_I(new_dir)->location; - u64 old_parent_oid; + struct btrfs_key old_parent_key; di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino, "..", 2, -1); if (IS_ERR(di)) { @@ -2190,7 +2288,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, ret = -ENOENT; goto out_fail; } - old_parent_oid = btrfs_disk_key_objectid(&di->location); + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &old_parent_key); ret = btrfs_del_item(trans, root, path); if (ret) { goto out_fail; @@ -2199,7 +2297,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, di = btrfs_lookup_dir_index_item(trans, root, path, old_inode->i_ino, - old_parent_oid, + old_parent_key.objectid, "..", 2, -1); if (IS_ERR(di)) { ret = PTR_ERR(di); @@ -2257,8 +2355,9 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, u64 objectid; int name_len; int datasize; - char *ptr; + unsigned long ptr; struct btrfs_file_extent_item *ei; + struct extent_buffer *leaf; unsigned long nr; name_len = strlen(symname) + 1; @@ -2302,7 +2401,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, BUG_ON(!path); key.objectid = inode->i_ino; key.offset = 0; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); datasize = btrfs_file_extent_calc_inline_size(name_len); err = btrfs_insert_empty_item(trans, root, path, &key, @@ -2311,16 +2409,17 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, drop_inode = 1; goto out_unlock; } - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(ei, trans->transid); - btrfs_set_file_extent_type(ei, + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, ei, trans->transid); + btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, symname, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); + write_extent_buffer(leaf, symname, ptr, name_len); + btrfs_mark_buffer_dirty(leaf); btrfs_free_path(path); + inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; inode->i_size = name_len - 1; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index bafa78603bd2..a825ce078a54 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -20,10 +20,10 @@ #include "disk-io.h" #include "print-tree.h" -void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) +void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) { int i; - u32 nr = btrfs_header_nritems(&l->header); + u32 nr = btrfs_header_nritems(l); struct btrfs_item *item; struct btrfs_extent_item *ei; struct btrfs_root_item *ri; @@ -31,119 +31,113 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_inode_item *ii; struct btrfs_block_group_item *bi; struct btrfs_file_extent_item *fi; + struct btrfs_key key; + struct btrfs_key found_key; u32 type; printk("leaf %llu total ptrs %d free space %d\n", - (unsigned long long)btrfs_header_blocknr(&l->header), nr, + (unsigned long long)btrfs_header_blocknr(l), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { - item = l->items + i; - type = btrfs_disk_key_type(&item->key); + item = btrfs_item_nr(l, i); + btrfs_item_key_to_cpu(l, &key, i); + type = btrfs_key_type(&key); printk("\titem %d key (%llu %x %llu) itemoff %d itemsize %d\n", i, - (unsigned long long)btrfs_disk_key_objectid(&item->key), - btrfs_disk_key_flags(&item->key), - (unsigned long long)btrfs_disk_key_offset(&item->key), - btrfs_item_offset(item), - btrfs_item_size(item)); + (unsigned long long)key.objectid, type, + (unsigned long long)key.offset, + btrfs_item_offset(l, item), btrfs_item_size(l, item)); switch (type) { case BTRFS_INODE_ITEM_KEY: ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); printk("\t\tinode generation %llu size %llu mode %o\n", - (unsigned long long)btrfs_inode_generation(ii), - (unsigned long long)btrfs_inode_size(ii), - btrfs_inode_mode(ii)); + (unsigned long long)btrfs_inode_generation(l, ii), + (unsigned long long)btrfs_inode_size(l, ii), + btrfs_inode_mode(l, ii)); break; case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); + btrfs_dir_item_key_to_cpu(l, di, &found_key); printk("\t\tdir oid %llu flags %u type %u\n", - (unsigned long long)btrfs_disk_key_objectid( - &di->location), - btrfs_dir_flags(di), - btrfs_dir_type(di)); - printk("\t\tname %.*s\n", - btrfs_dir_name_len(di),(char *)(di + 1)); + (unsigned long long)found_key.objectid, + btrfs_dir_flags(l, di), + btrfs_dir_type(l, di)); break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); printk("\t\troot data blocknr %llu refs %u\n", - (unsigned long long)btrfs_root_blocknr(ri), - btrfs_root_refs(ri)); + (unsigned long long)btrfs_disk_root_blocknr(l, ri), + btrfs_disk_root_refs(l, ri)); break; case BTRFS_EXTENT_ITEM_KEY: ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); printk("\t\textent data refs %u\n", - btrfs_extent_refs(ei)); + btrfs_extent_refs(l, ei)); break; case BTRFS_EXTENT_DATA_KEY: fi = btrfs_item_ptr(l, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == + if (btrfs_file_extent_type(l, fi) == BTRFS_FILE_EXTENT_INLINE) { printk("\t\tinline extent data size %u\n", - btrfs_file_extent_inline_len(l->items + i)); + btrfs_file_extent_inline_len(l, item)); break; } printk("\t\textent data disk block %llu nr %llu\n", - (unsigned long long)btrfs_file_extent_disk_blocknr(fi), - (unsigned long long)btrfs_file_extent_disk_num_blocks(fi)); + (unsigned long long)btrfs_file_extent_disk_blocknr(l, fi), + (unsigned long long)btrfs_file_extent_disk_num_blocks(l, fi)); printk("\t\textent data offset %llu nr %llu\n", - (unsigned long long)btrfs_file_extent_offset(fi), - (unsigned long long)btrfs_file_extent_num_blocks(fi)); + (unsigned long long)btrfs_file_extent_offset(l, fi), + (unsigned long long)btrfs_file_extent_num_blocks(l, fi)); break; case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, struct btrfs_block_group_item); printk("\t\tblock group used %llu\n", - (unsigned long long)btrfs_block_group_used(bi)); - break; - case BTRFS_STRING_ITEM_KEY: - printk("\t\titem data %.*s\n", btrfs_item_size(item), - btrfs_leaf_data(l) + btrfs_item_offset(item)); + (unsigned long long)btrfs_disk_block_group_used(l, bi)); break; }; } } -void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t) +void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) { int i; u32 nr; - struct btrfs_node *c; + struct btrfs_key key; - if (!t) + if (!c) return; - c = btrfs_buffer_node(t); - nr = btrfs_header_nritems(&c->header); + nr = btrfs_header_nritems(c); if (btrfs_is_leaf(c)) { - btrfs_print_leaf(root, (struct btrfs_leaf *)c); + btrfs_print_leaf(root, c); return; } printk("node %llu level %d total ptrs %d free spc %u\n", - (unsigned long long)btrfs_header_blocknr(&c->header), - btrfs_header_level(&c->header), nr, + (unsigned long long)btrfs_header_blocknr(c), + btrfs_header_level(c), nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { + btrfs_node_key_to_cpu(c, &key, i); printk("\tkey %d (%llu %u %llu) block %llu\n", i, - (unsigned long long)c->ptrs[i].key.objectid, - c->ptrs[i].key.flags, - (unsigned long long)c->ptrs[i].key.offset, + (unsigned long long)key.objectid, + key.type, + (unsigned long long)key.offset, (unsigned long long)btrfs_node_blockptr(c, i)); } for (i = 0; i < nr; i++) { - struct buffer_head *next_buf = read_tree_block(root, + struct extent_buffer *next = read_tree_block(root, btrfs_node_blockptr(c, i)); - struct btrfs_node *next = btrfs_buffer_node(next_buf); if (btrfs_is_leaf(next) && - btrfs_header_level(&c->header) != 1) + btrfs_header_level(c) != 1) BUG(); - if (btrfs_header_level(&next->header) != - btrfs_header_level(&c->header) - 1) + if (btrfs_header_level(next) != + btrfs_header_level(c) - 1) BUG(); - btrfs_print_tree(root, next_buf); - btrfs_block_release(root, next_buf); + btrfs_print_tree(root, next); + free_extent_buffer(next); } } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 205cd03601ab..da75efe534d5 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -18,6 +18,6 @@ #ifndef __PRINT_TREE_ #define __PRINT_TREE_ -void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); -void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t); +void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l); +void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t); #endif diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 3b5926dfbeba..88bcdd33f56e 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -26,12 +26,13 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, { struct btrfs_path *path; struct btrfs_key search_key; - struct btrfs_leaf *l; + struct btrfs_key found_key; + struct extent_buffer *l; int ret; int slot; search_key.objectid = objectid; - search_key.flags = (u32)-1; + search_key.type = (u8)-1; search_key.offset = (u64)-1; path = btrfs_alloc_path(); @@ -39,17 +40,19 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); if (ret < 0) goto out; + BUG_ON(ret == 0); - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; BUG_ON(path->slots[0] == 0); slot = path->slots[0] - 1; - if (btrfs_disk_key_objectid(&l->items[slot].key) != objectid) { + btrfs_item_key_to_cpu(l, &found_key, slot); + if (found_key.objectid != objectid) { ret = 1; goto out; } - memcpy(item, btrfs_item_ptr(l, slot, struct btrfs_root_item), - sizeof(*item)); - btrfs_disk_key_to_cpu(key, &l->items[slot].key); + read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), + sizeof(*item)); + memcpy(key, &found_key, sizeof(found_key)); ret = 0; out: btrfs_release_path(root, path); @@ -62,10 +65,10 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *item) { struct btrfs_path *path; - struct btrfs_leaf *l; + struct extent_buffer *l; int ret; int slot; - struct btrfs_root_item *update_item; + unsigned long ptr; path = btrfs_alloc_path(); BUG_ON(!path); @@ -73,10 +76,10 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root if (ret < 0) goto out; BUG_ON(ret != 0); - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; slot = path->slots[0]; - update_item = btrfs_item_ptr(l, slot, struct btrfs_root_item); - btrfs_memcpy(root, l, update_item, item, sizeof(*item)); + ptr = btrfs_item_ptr_offset(l, slot); + write_extent_buffer(l, item, ptr, sizeof(*item)); btrfs_mark_buffer_dirty(path->nodes[0]); out: btrfs_release_path(root, path); @@ -103,11 +106,10 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, struct btrfs_path *path; int ret; u32 nritems; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; int slot; key.objectid = objectid; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); key.offset = 0; path = btrfs_alloc_path(); @@ -117,19 +119,19 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, if (ret < 0) goto err; while(1) { - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; if (slot >= nritems) { ret = btrfs_next_leaf(root, path); if (ret) break; - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; } - item = leaf->items + slot; - btrfs_disk_key_to_cpu(&key, &item->key); + item = btrfs_item_nr(leaf, slot); + btrfs_item_key_to_cpu(leaf, &key, slot); if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) goto next; @@ -140,7 +142,7 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, break; ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item); - if (btrfs_root_refs(ri) != 0) + if (btrfs_disk_root_refs(leaf, ri) != 0) goto next; dead_root = btrfs_read_fs_root_no_radix(root->fs_info, &key); @@ -170,6 +172,7 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, int ret; u32 refs; struct btrfs_root_item *ri; + struct extent_buffer *leaf; path = btrfs_alloc_path(); BUG_ON(!path); @@ -177,10 +180,10 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (ret < 0) goto out; BUG_ON(ret != 0); - ri = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_root_item); + leaf = path->nodes[0]; + ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); - refs = btrfs_root_refs(ri); + refs = btrfs_disk_root_refs(leaf, ri); BUG_ON(refs != 0); ret = btrfs_del_item(trans, root, path); out: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 66a01cbbbea1..39a1435c68f1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -41,7 +41,7 @@ #include "ioctl.h" #include "print-tree.h" -#define BTRFS_SUPER_MAGIC 0x9123682E +#define BTRFS_SUPER_MAGIC 0x9123683E static struct super_operations btrfs_super_ops; @@ -115,13 +115,12 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) return -EIO; } sb->s_fs_info = tree_root; - disk_super = tree_root->fs_info->disk_super; + disk_super = &tree_root->fs_info->super_copy; inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super), tree_root); bi = BTRFS_I(inode); bi->location.objectid = inode->i_ino; bi->location.offset = 0; - bi->location.flags = 0; bi->root = tree_root; btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); @@ -281,6 +280,7 @@ error_s: error_bdev: close_bdev_excl(bdev); error: +printk("get_sb failed\n"); return error; } /* end copy & paste */ @@ -295,6 +295,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, btrfs_fill_super, mnt, subvol_name ? subvol_name : "default"); +printk("btrfs_get_sb returns %d\n", ret); return ret; } diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 2058783373eb..9654e90eec89 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -31,31 +31,31 @@ static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_root_blocks_used(&root->root_item)); + (unsigned long long)btrfs_root_used(&root->root_item)); } static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_root_block_limit(&root->root_item)); + (unsigned long long)btrfs_root_limit(&root->root_item)); } static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_blocks_used(fs->disk_super)); + (unsigned long long)btrfs_super_blocks_used(&fs->super_copy)); } static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_total_blocks(fs->disk_super)); + (unsigned long long)btrfs_super_total_blocks(&fs->super_copy)); } static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_blocksize(fs->disk_super)); + (unsigned long long)btrfs_super_sectorsize(&fs->super_copy)); } /* this is for root attrs (subvols/snapshots) */ diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 087074db0bd5..750f35a37aae 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -66,7 +67,9 @@ static int join_transaction(struct btrfs_root *root) cur_trans->commit_done = 0; cur_trans->start_time = get_seconds(); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); - init_bit_radix(&cur_trans->dirty_pages); + extent_map_tree_init(&cur_trans->dirty_pages, + root->fs_info->btree_inode->i_mapping, + GFP_NOFS); } else { cur_trans->num_writers++; cur_trans->num_joined++; @@ -88,7 +91,7 @@ static int record_root_in_trans(struct btrfs_root *root) (unsigned long)root->root_key.objectid, BTRFS_ROOT_DEFRAG_TAG); root->commit_root = root->node; - get_bh(root->node); + extent_buffer_get(root->node); } else { WARN_ON(1); } @@ -144,29 +147,30 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - unsigned long gang[16]; int ret; - int i; int err; int werr = 0; + struct extent_map_tree *dirty_pages; struct page *page; - struct radix_tree_root *dirty_pages; struct inode *btree_inode = root->fs_info->btree_inode; + u64 start; + u64 end; + unsigned long index; if (!trans || !trans->transaction) { return filemap_write_and_wait(btree_inode->i_mapping); } dirty_pages = &trans->transaction->dirty_pages; while(1) { - ret = find_first_radix_bit(dirty_pages, gang, - 0, ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(dirty_pages, 0, &start, &end, + EXTENT_DIRTY); + if (ret) break; - for (i = 0; i < ret; i++) { - /* FIXME EIO */ - clear_radix_bit(dirty_pages, gang[i]); - page = find_lock_page(btree_inode->i_mapping, - gang[i]); + clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); + while(start <= end) { + index = start >> PAGE_CACHE_SHIFT; + start = (index + 1) << PAGE_CACHE_SHIFT; + page = find_lock_page(btree_inode->i_mapping, index); if (!page) continue; if (PageWriteback(page)) { @@ -202,10 +206,11 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, btrfs_write_dirty_block_groups(trans, extent_root); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == bh_blocknr(extent_root->node)) + if (old_extent_block == + extent_buffer_blocknr(extent_root->node)) break; btrfs_set_root_blocknr(&extent_root->root_item, - bh_blocknr(extent_root->node)); + extent_buffer_blocknr(extent_root->node)); ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); @@ -279,9 +284,9 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); if (root->commit_root == root->node) { - WARN_ON(bh_blocknr(root->node) != + WARN_ON(extent_buffer_blocknr(root->node) != btrfs_root_blocknr(&root->root_item)); - brelse(root->commit_root); + free_extent_buffer(root->commit_root); root->commit_root = NULL; /* make sure to update the root on disk @@ -310,7 +315,7 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, root->root_key.offset = root->fs_info->generation; btrfs_set_root_blocknr(&root->root_item, - bh_blocknr(root->node)); + extent_buffer_blocknr(root->node)); err = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); @@ -389,10 +394,10 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) for (i = 0; i < ret; i++) { root = gang[i]; last = root->root_key.objectid + 1; - btrfs_defrag_root(root, 1); + // btrfs_defrag_root(root, 1); } } - btrfs_defrag_root(info->extent_root, 1); + // btrfs_defrag_root(info->extent_root, 1); return err; } @@ -414,7 +419,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); - num_blocks = btrfs_root_blocks_used(&dirty->root->root_item); + num_blocks = btrfs_root_used(&dirty->root->root_item); root = dirty->latest_root; while(1) { @@ -441,11 +446,11 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, } BUG_ON(ret); - num_blocks -= btrfs_root_blocks_used(&dirty->root->root_item); - blocks_used = btrfs_root_blocks_used(&root->root_item); + num_blocks -= btrfs_root_used(&dirty->root->root_item); + blocks_used = btrfs_root_used(&root->root_item); if (num_blocks) { record_root_in_trans(root); - btrfs_set_root_blocks_used(&root->root_item, + btrfs_set_root_used(&root->root_item, blocks_used - num_blocks); } ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); @@ -553,9 +558,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_set_super_generation(&root->fs_info->super_copy, cur_trans->transid); btrfs_set_super_root(&root->fs_info->super_copy, - bh_blocknr(root->fs_info->tree_root->node)); - memcpy(root->fs_info->disk_super, &root->fs_info->super_copy, - sizeof(root->fs_info->super_copy)); + extent_buffer_blocknr(root->fs_info->tree_root->node)); + + write_extent_buffer(root->fs_info->sb_buffer, + &root->fs_info->super_copy, 0, + sizeof(root->fs_info->super_copy)); btrfs_copy_pinned(root, &pinned_copy); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 4bc328cbb24c..ae39fcfc169a 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -28,7 +28,7 @@ struct btrfs_transaction { int use_count; int commit_done; struct list_head list; - struct radix_tree_root dirty_pages; + struct extent_map_tree dirty_pages; unsigned long start_time; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; @@ -83,5 +83,6 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); - +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 420597127ed1..daf019afa0a1 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -23,14 +23,14 @@ #include "transaction.h" static void reada_defrag(struct btrfs_root *root, - struct btrfs_node *node) + struct extent_buffer *node) { int i; u32 nritems; u64 blocknr; int ret; - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(node); for (i = 0; i < nritems; i++) { blocknr = btrfs_node_blockptr(node, i); ret = readahead_tree_block(root, blocknr); @@ -44,8 +44,8 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, struct btrfs_path *path, int *level, int cache_only, u64 *last_ret) { - struct buffer_head *next; - struct buffer_head *cur; + struct extent_buffer *next; + struct extent_buffer *cur; u64 blocknr; int ret = 0; int is_extent = 0; @@ -62,13 +62,13 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, cur = path->nodes[*level]; if (!cache_only && *level > 1 && path->slots[*level] == 0) - reada_defrag(root, btrfs_buffer_node(cur)); + reada_defrag(root, cur); - if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + if (btrfs_header_level(cur) != *level) WARN_ON(1); if (path->slots[*level] >= - btrfs_header_nritems(btrfs_buffer_header(cur))) + btrfs_header_nritems(cur)) break; if (*level == 1) { @@ -80,14 +80,13 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, break; } - blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), - path->slots[*level]); + blocknr = btrfs_node_blockptr(cur, path->slots[*level]); if (cache_only) { next = btrfs_find_tree_block(root, blocknr); - if (!next || !buffer_uptodate(next) || - buffer_locked(next) || !buffer_defrag(next)) { - brelse(next); + /* FIXME, test for defrag */ + if (!next || !btrfs_buffer_uptodate(next)) { + free_extent_buffer(next); path->slots[*level]++; continue; } @@ -106,16 +105,18 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, WARN_ON(*level <= 0); if (path->nodes[*level-1]) - btrfs_block_release(root, path->nodes[*level-1]); + free_extent_buffer(path->nodes[*level-1]); path->nodes[*level-1] = next; - *level = btrfs_header_level(btrfs_buffer_header(next)); + *level = btrfs_header_level(next); path->slots[*level] = 0; } WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); +#if 0 clear_buffer_defrag(path->nodes[*level]); clear_buffer_defrag_done(path->nodes[*level]); - btrfs_block_release(root, path->nodes[*level]); +#endif + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; WARN_ON(ret); @@ -129,24 +130,25 @@ static int defrag_walk_up(struct btrfs_trans_handle *trans, { int i; int slot; - struct btrfs_node *node; + struct extent_buffer *node; for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; - if (slot < btrfs_header_nritems( - btrfs_buffer_header(path->nodes[i])) - 1) { + if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { path->slots[i]++; *level = i; - node = btrfs_buffer_node(path->nodes[i]); + node = path->nodes[i]; WARN_ON(i == 0); - btrfs_disk_key_to_cpu(&root->defrag_progress, - &node->ptrs[path->slots[i]].key); + btrfs_node_key_to_cpu(node, &root->defrag_progress, + path->slots[i]); root->defrag_level = i; return 0; } else { + /* clear_buffer_defrag(path->nodes[*level]); clear_buffer_defrag_done(path->nodes[*level]); - btrfs_block_release(root, path->nodes[*level]); + */ + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; } @@ -158,7 +160,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only) { struct btrfs_path *path = NULL; - struct buffer_head *tmp; + struct extent_buffer *tmp; int ret = 0; int wret; int level; @@ -172,17 +174,18 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (root->ref_cows == 0 && !is_extent) goto out; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; - level = btrfs_header_level(btrfs_buffer_header(root->node)); + level = btrfs_header_level(root->node); orig_level = level; if (level == 0) { goto out; } if (root->defrag_progress.objectid == 0) { - get_bh(root->node); + extent_buffer_get(root->node); ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); BUG_ON(ret); ret = btrfs_realloc_node(trans, root, root->node, cache_only, @@ -200,12 +203,15 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (is_extent) btrfs_extent_post_op(trans, root); + if (wret < 0) { ret = wret; goto out; } + while(level > 0 && !path->nodes[level]) level--; + if (!path->nodes[level]) { ret = 0; goto out; @@ -230,7 +236,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { - btrfs_block_release(root, path->nodes[i]); + free_extent_buffer(path->nodes[i]); path->nodes[i] = 0; } } From 479965d66e320f1a095bb76027171daa675a9c72 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:14:27 -0400 Subject: [PATCH 0242/2985] Btrfs: Optimizations for the extent_buffer code Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 51 ++++++++++++++++++++----------------------- fs/btrfs/ctree.h | 48 +++++++++++++++++++++++++++++++++++----- fs/btrfs/extent_map.c | 11 +++++++--- fs/btrfs/extent_map.h | 2 +- 4 files changed, 76 insertions(+), 36 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f60920e8a0e0..9427b79c5d79 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -464,22 +464,12 @@ static int check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { struct extent_buffer *buf = path->nodes[level]; - char fsid[BTRFS_FSID_SIZE]; - read_extent_buffer(buf, fsid, (unsigned long)btrfs_header_fsid(buf), - BTRFS_FSID_SIZE); - - if (memcmp(fsid, root->fs_info->fsid, BTRFS_FSID_SIZE)) { - int i = 0; + if (memcmp_extent_buffer(buf, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(buf), + BTRFS_FSID_SIZE)) { printk("warning bad block %Lu\n", buf->start); - if (!btrfs_buffer_uptodate(buf)) { - WARN_ON(1); - } - for (i = 0; i < BTRFS_FSID_SIZE; i++) { - printk("%x:%x ", root->fs_info->fsid[i], fsid[i]); - } - printk("\n"); - // BUG(); + BUG(); } if (level == 0) return check_leaf(root, path, level); @@ -504,13 +494,14 @@ static int generic_bin_search(struct extent_buffer *eb, unsigned long p, int high = max; int mid; int ret; - struct btrfs_disk_key *tmp; + struct btrfs_disk_key *tmp = NULL; struct btrfs_disk_key unaligned; unsigned long offset; char *map_token = NULL; char *kaddr = NULL; unsigned long map_start = 0; unsigned long map_len = 0; + int err; while(low < high) { mid = (low + high) / 2; @@ -519,19 +510,24 @@ static int generic_bin_search(struct extent_buffer *eb, unsigned long p, if (!map_token || offset < map_start || (offset + sizeof(struct btrfs_disk_key)) > map_start + map_len) { - if (map_token) + if (map_token) { unmap_extent_buffer(eb, map_token, KM_USER0); - map_extent_buffer(eb, offset, &map_token, &kaddr, - &map_start, &map_len, KM_USER0); + map_token = NULL; + } + err = map_extent_buffer(eb, offset, + sizeof(struct btrfs_disk_key), + &map_token, &kaddr, + &map_start, &map_len, KM_USER0); + + if (!err) { + tmp = (struct btrfs_disk_key *)(kaddr + offset - + map_start); + } else { + read_extent_buffer(eb, &unaligned, + offset, sizeof(unaligned)); + tmp = &unaligned; + } - } - if (offset + sizeof(struct btrfs_disk_key) > - map_start + map_len) { - unmap_extent_buffer(eb, map_token, KM_USER0); - read_extent_buffer(eb, &unaligned, - offset, sizeof(unaligned)); - map_token = NULL; - tmp = &unaligned; } else { tmp = (struct btrfs_disk_key *)(kaddr + offset - map_start); @@ -544,7 +540,8 @@ static int generic_bin_search(struct extent_buffer *eb, unsigned long p, high = mid; else { *slot = mid; - unmap_extent_buffer(eb, map_token, KM_USER0); + if (map_token) + unmap_extent_buffer(eb, map_token, KM_USER0); return 0; } } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c4b829806855..30fbbd7221a9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -22,6 +22,7 @@ #include #include #include +#include #include "bit-radix.h" #include "extent_map.h" @@ -431,15 +432,52 @@ struct btrfs_root { static inline u##bits btrfs_##name(struct extent_buffer *eb, \ type *s) \ { \ - __le##bits res; \ - read_eb_member(eb, s, type, member, &res); \ - return le##bits##_to_cpu(res); \ + int err; \ + char *map_token; \ + char *kaddr; \ + unsigned long map_start; \ + unsigned long map_len; \ + unsigned long offset = (unsigned long)s + \ + offsetof(type, member); \ + err = map_extent_buffer(eb, offset, \ + sizeof(((type *)0)->member), \ + &map_token, &kaddr, \ + &map_start, &map_len, KM_USER0); \ + if (!err) { \ + __le##bits *tmp = (__le##bits *)(kaddr + offset - \ + map_start); \ + u##bits res = le##bits##_to_cpu(*tmp); \ + unmap_extent_buffer(eb, map_token, KM_USER0); \ + return res; \ + } else { \ + __le##bits res; \ + read_eb_member(eb, s, type, member, &res); \ + return le##bits##_to_cpu(res); \ + } \ } \ static inline void btrfs_set_##name(struct extent_buffer *eb, \ type *s, u##bits val) \ { \ - val = cpu_to_le##bits(val); \ - write_eb_member(eb, s, type, member, &val); \ + int err; \ + char *map_token; \ + char *kaddr; \ + unsigned long map_start; \ + unsigned long map_len; \ + unsigned long offset = (unsigned long)s + \ + offsetof(type, member); \ + err = map_extent_buffer(eb, offset, \ + sizeof(((type *)0)->member), \ + &map_token, &kaddr, \ + &map_start, &map_len, KM_USER0); \ + if (!err) { \ + __le##bits *tmp = (__le##bits *)(kaddr + offset - \ + map_start); \ + *tmp = cpu_to_le##bits(val); \ + unmap_extent_buffer(eb, map_token, KM_USER0); \ + } else { \ + val = cpu_to_le##bits(val); \ + write_eb_member(eb, s, type, member, &val); \ + } \ } #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 55fefdfab84c..8bef309e1b37 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2140,14 +2140,20 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, EXPORT_SYMBOL(read_extent_buffer); int map_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, char **token, char **map, unsigned long *map_start, unsigned long *map_len, int km) { - size_t offset; + size_t offset = start & (PAGE_CACHE_SIZE - 1); char *kaddr; size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + unsigned long end_i = (start_offset + start + min_len) >> + PAGE_CACHE_SHIFT; + + if (i != end_i) + return -EINVAL; WARN_ON(start > eb->len); @@ -2155,8 +2161,7 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, offset = start_offset; *map_start = 0; } else { - offset = 0; - *map_start = (i << PAGE_CACHE_SHIFT) - offset; + *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; } // kaddr = kmap_atomic(eb->pages[i], km); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 430b997a70f6..895789039ddd 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -173,7 +173,7 @@ int set_extent_buffer_uptodate(struct extent_map_tree *tree, int extent_buffer_uptodate(struct extent_map_tree *tree, struct extent_buffer *eb); int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, - char **token, char **map, + unsigned long min_len, char **token, char **map, unsigned long *map_start, unsigned long *map_len, int km); void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); From 6d36dcd48f1e4e7446d603a3df9638bd314a182d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:14:37 -0400 Subject: [PATCH 0243/2985] Btrfs: Avoid memcpy where possible in extent_buffers Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 45 ++++++++++++-- fs/btrfs/disk-io.c | 6 +- fs/btrfs/extent_map.c | 132 ++++++++++++++++++++++++++++++++---------- fs/btrfs/extent_map.h | 5 +- 4 files changed, 147 insertions(+), 41 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 30fbbd7221a9..aed08618aca6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -483,15 +483,50 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \ #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ { \ - __le##bits res; \ - read_eb_member(eb, NULL, type, member, &res); \ - return le##bits##_to_cpu(res); \ + int err; \ + char *map_token; \ + char *kaddr; \ + unsigned long map_start; \ + unsigned long map_len; \ + unsigned long offset = offsetof(type, member); \ + err = map_extent_buffer(eb, offset, \ + sizeof(((type *)0)->member), \ + &map_token, &kaddr, \ + &map_start, &map_len, KM_USER0); \ + if (!err) { \ + __le##bits *tmp = (__le##bits *)(kaddr + offset - \ + map_start); \ + u##bits res = le##bits##_to_cpu(*tmp); \ + unmap_extent_buffer(eb, map_token, KM_USER0); \ + return res; \ + } else { \ + __le##bits res; \ + read_eb_member(eb, NULL, type, member, &res); \ + return le##bits##_to_cpu(res); \ + } \ } \ static inline void btrfs_set_##name(struct extent_buffer *eb, \ u##bits val) \ { \ - val = cpu_to_le##bits(val); \ - write_eb_member(eb, NULL, type, member, &val); \ + int err; \ + char *map_token; \ + char *kaddr; \ + unsigned long map_start; \ + unsigned long map_len; \ + unsigned long offset = offsetof(type, member); \ + err = map_extent_buffer(eb, offset, \ + sizeof(((type *)0)->member), \ + &map_token, &kaddr, \ + &map_start, &map_len, KM_USER0); \ + if (!err) { \ + __le##bits *tmp = (__le##bits *)(kaddr + offset - \ + map_start); \ + *tmp = cpu_to_le##bits(val); \ + unmap_extent_buffer(eb, map_token, KM_USER0); \ + } else { \ + val = cpu_to_le##bits(val); \ + write_eb_member(eb, NULL, type, member, &val); \ + } \ } #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0c1f90cbedb0..8242933a1d8b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -609,20 +609,20 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->pages[0]->mapping->host; + struct inode *btree_inode = buf->first_page->mapping->host; return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->pages[0]->mapping->host; + struct inode *btree_inode = buf->first_page->mapping->host; return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 8bef309e1b37..d2c733c68b4c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -18,6 +18,11 @@ struct kmem_cache *btrfs_cache_create(const char *name, size_t size, static struct kmem_cache *extent_map_cache; static struct kmem_cache *extent_state_cache; +static struct kmem_cache *extent_buffer_cache; +static LIST_HEAD(extent_buffers); +static spinlock_t extent_buffers_lock; +static int nr_extent_buffers; +#define MAX_EXTENT_BUFFER_CACHE 128 struct tree_entry { u64 start; @@ -29,21 +34,33 @@ struct tree_entry { void __init extent_map_init(void) { extent_map_cache = btrfs_cache_create("extent_map", - sizeof(struct extent_map), - SLAB_DESTROY_BY_RCU, + sizeof(struct extent_map), 0, NULL); extent_state_cache = btrfs_cache_create("extent_state", - sizeof(struct extent_state), - SLAB_DESTROY_BY_RCU, + sizeof(struct extent_state), 0, NULL); + extent_buffer_cache = btrfs_cache_create("extent_buffers", + sizeof(struct extent_buffer), 0, + NULL); + spin_lock_init(&extent_buffers_lock); } void __exit extent_map_exit(void) { + struct extent_buffer *eb; + + while (!list_empty(&extent_buffers)) { + eb = list_entry(extent_buffers.next, + struct extent_buffer, list); + list_del(&eb->list); + kmem_cache_free(extent_buffer_cache, eb); + } if (extent_map_cache) kmem_cache_destroy(extent_map_cache); if (extent_state_cache) kmem_cache_destroy(extent_state_cache); + if (extent_buffer_cache) + kmem_cache_destroy(extent_buffer_cache); } void extent_map_tree_init(struct extent_map_tree *tree, @@ -1858,6 +1875,48 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, return (em->block_start + start - em->start) >> inode->i_blkbits; } +static struct extent_buffer *__alloc_extent_buffer(gfp_t mask) +{ + struct extent_buffer *eb = NULL; + spin_lock(&extent_buffers_lock); + if (!list_empty(&extent_buffers)) { + eb = list_entry(extent_buffers.next, struct extent_buffer, + list); + list_del(&eb->list); + WARN_ON(nr_extent_buffers == 0); + nr_extent_buffers--; + } + spin_unlock(&extent_buffers_lock); + if (eb) { + memset(eb, 0, sizeof(*eb)); + return eb; + } + return kmem_cache_zalloc(extent_buffer_cache, mask); +} + +static void __free_extent_buffer(struct extent_buffer *eb) +{ + if (nr_extent_buffers >= MAX_EXTENT_BUFFER_CACHE) { + kmem_cache_free(extent_buffer_cache, eb); + } else { + spin_lock(&extent_buffers_lock); + list_add(&eb->list, &extent_buffers); + nr_extent_buffers++; + spin_unlock(&extent_buffers_lock); + } +} + +static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i) +{ + struct page *p; + if (i == 0) + return eb->first_page; + i += eb->start >> PAGE_CACHE_SHIFT; + p = find_get_page(eb->first_page->mapping, i); + page_cache_release(p); + return p; +} + struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, gfp_t mask) @@ -1871,7 +1930,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, struct address_space *mapping = tree->mapping; int uptodate = 0; - eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask); + eb = __alloc_extent_buffer(mask); if (!eb || IS_ERR(eb)) return NULL; @@ -1881,9 +1940,16 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, for (i = 0; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); - if (!p) + if (!p) { + /* make sure the free only frees the pages we've + * grabbed a reference on + */ + eb->len = i << PAGE_CACHE_SHIFT; + eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); goto fail; - eb->pages[i] = p; + } + if (i == 0) + eb->first_page = p; if (!PageUptodate(p)) uptodate = 0; unlock_page(p); @@ -1909,7 +1975,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, struct page *p; struct address_space *mapping = tree->mapping; - eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask); + eb = __alloc_extent_buffer(mask); if (!eb || IS_ERR(eb)) return NULL; @@ -1919,9 +1985,16 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, for (i = 0; i < num_pages; i++, index++) { p = find_get_page(mapping, index); - if (!p) + if (!p) { + /* make sure the free only frees the pages we've + * grabbed a reference on + */ + eb->len = i << PAGE_CACHE_SHIFT; + eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); goto fail; - eb->pages[i] = p; + } + if (i == 0) + eb->first_page = p; } return eb; fail: @@ -1944,11 +2017,12 @@ void free_extent_buffer(struct extent_buffer *eb) num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - (eb->start >> PAGE_CACHE_SHIFT) + 1; - for (i = 0; i < num_pages; i++) { - if (eb->pages[i]) - page_cache_release(eb->pages[i]); + if (eb->first_page) + page_cache_release(eb->first_page); + for (i = 1; i < num_pages; i++) { + page_cache_release(extent_buffer_page(eb, i)); } - kfree(eb); + __free_extent_buffer(eb); } EXPORT_SYMBOL(free_extent_buffer); @@ -1968,7 +2042,7 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree, (eb->start >> PAGE_CACHE_SHIFT) + 1; for (i = 0; i < num_pages; i++) { - page = eb->pages[i]; + page = extent_buffer_page(eb, i); lock_page(page); /* * if we're on the last page or the first page and the @@ -2021,7 +2095,7 @@ int set_extent_buffer_uptodate(struct extent_map_tree *tree, set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); for (i = 0; i < num_pages; i++) { - page = eb->pages[i]; + page = extent_buffer_page(eb, i); if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || ((i == num_pages - 1) && ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { @@ -2064,7 +2138,7 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - (eb->start >> PAGE_CACHE_SHIFT) + 1; for (i = 0; i < num_pages; i++) { - page = eb->pages[i]; + page = extent_buffer_page(eb, i); if (PageUptodate(page)) { continue; } @@ -2090,7 +2164,7 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, } for (i = 0; i < num_pages; i++) { - page = eb->pages[i]; + page = extent_buffer_page(eb, i); wait_on_page_locked(page); if (!PageUptodate(page)) { ret = -EIO; @@ -2116,12 +2190,12 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - page = eb->pages[i]; offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); if (i == 0) offset += start_offset; while(len > 0) { + page = extent_buffer_page(eb, i); WARN_ON(!PageUptodate(page)); cur = min(len, (PAGE_CACHE_SIZE - offset)); @@ -2134,7 +2208,6 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, len -= cur; offset = 0; i++; - page = eb->pages[i]; } } EXPORT_SYMBOL(read_extent_buffer); @@ -2165,7 +2238,7 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, } // kaddr = kmap_atomic(eb->pages[i], km); - kaddr = page_address(eb->pages[i]); + kaddr = page_address(extent_buffer_page(eb, i)); *token = kaddr; *map = kaddr + offset; *map_len = PAGE_CACHE_SIZE - offset; @@ -2195,12 +2268,12 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - page = eb->pages[i]; offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); if (i == 0) offset += start_offset; while(len > 0) { + page = extent_buffer_page(eb, i); WARN_ON(!PageUptodate(page)); cur = min(len, (PAGE_CACHE_SIZE - offset)); @@ -2216,7 +2289,6 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, len -= cur; offset = 0; i++; - page = eb->pages[i]; } return ret; } @@ -2236,12 +2308,12 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - page = eb->pages[i]; offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); if (i == 0) offset += start_offset; while(len > 0) { + page = extent_buffer_page(eb, i); WARN_ON(!PageUptodate(page)); cur = min(len, PAGE_CACHE_SIZE - offset); @@ -2254,7 +2326,6 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, len -= cur; offset = 0; i++; - page = eb->pages[i]; } } EXPORT_SYMBOL(write_extent_buffer); @@ -2272,12 +2343,12 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - page = eb->pages[i]; offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); if (i == 0) offset += start_offset; while(len > 0) { + page = extent_buffer_page(eb, i); WARN_ON(!PageUptodate(page)); cur = min(len, PAGE_CACHE_SIZE - offset); @@ -2289,7 +2360,6 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, len -= cur; offset = 0; i++; - page = eb->pages[i]; } } EXPORT_SYMBOL(memset_extent_buffer); @@ -2313,7 +2383,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, offset += start_offset; while(len > 0) { - page = dst->pages[i]; + page = extent_buffer_page(dst, i); WARN_ON(!PageUptodate(page)); cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); @@ -2414,7 +2484,8 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, cur = min(cur, (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); - copy_pages(dst->pages[dst_i], dst->pages[src_i], + copy_pages(extent_buffer_page(dst, dst_i), + extent_buffer_page(dst, src_i), dst_off_in_page, src_off_in_page, cur); src_offset += cur; @@ -2467,7 +2538,8 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, cur = min(len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); // printk("move pages orig dst %lu src %lu len %lu, this %lu %lu %lu\n", dst_offset, src_offset, len, dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); - move_pages(dst->pages[dst_i], dst->pages[src_i], + move_pages(extent_buffer_page(dst, dst_i), + extent_buffer_page(dst, src_i), dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 895789039ddd..3b3abf34c403 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -62,14 +62,13 @@ struct extent_state { struct list_head list; }; -#define EXTENT_BUFFER_SIZE(nr) (sizeof(struct extent_buffer) + \ - (nr - 1) * sizeof(struct page *)) struct extent_buffer { u64 start; unsigned long len; atomic_t refs; int flags; - struct page *pages[]; + struct list_head list; + struct page *first_page; }; typedef struct extent_map *(get_extent_t)(struct inode *inode, From ae5252bd51a252b7b8b02289337c36774835101c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:14:41 -0400 Subject: [PATCH 0244/2985] Btrfs: Go back to kmaps instead of page_address in extent_buffers Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 50 ++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index d2c733c68b4c..f150188f621c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1867,7 +1867,6 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, if (!em || IS_ERR(em)) return 0; - // XXX(hch): block 0 is valid in some cases, e.g. XFS RT device if (em->block_start == EXTENT_MAP_INLINE || em->block_start == EXTENT_MAP_HOLE) return 0; @@ -2199,10 +2198,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, WARN_ON(!PageUptodate(page)); cur = min(len, (PAGE_CACHE_SIZE - offset)); - // kaddr = kmap_atomic(page, KM_USER0); - kaddr = page_address(page); + kaddr = kmap_atomic(page, KM_USER0); memcpy(dst, kaddr + offset, cur); - // kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr, KM_USER0); dst += cur; len -= cur; @@ -2237,8 +2235,7 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; } - // kaddr = kmap_atomic(eb->pages[i], km); - kaddr = page_address(extent_buffer_page(eb, i)); + kaddr = kmap_atomic(extent_buffer_page(eb, i), km); *token = kaddr; *map = kaddr + offset; *map_len = PAGE_CACHE_SIZE - offset; @@ -2248,7 +2245,7 @@ EXPORT_SYMBOL(map_extent_buffer); void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) { - // kunmap_atomic(token, km); + kunmap_atomic(token, km); } EXPORT_SYMBOL(unmap_extent_buffer); @@ -2278,10 +2275,9 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, cur = min(len, (PAGE_CACHE_SIZE - offset)); - // kaddr = kmap_atomic(page, KM_USER0); - kaddr = page_address(page); + kaddr = kmap_atomic(page, KM_USER0); ret = memcmp(ptr, kaddr + offset, cur); - // kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr, KM_USER0); if (ret) break; @@ -2317,10 +2313,9 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, WARN_ON(!PageUptodate(page)); cur = min(len, PAGE_CACHE_SIZE - offset); - // kaddr = kmap_atomic(page, KM_USER0); - kaddr = page_address(page); + kaddr = kmap_atomic(page, KM_USER0); memcpy(kaddr + offset, src, cur); - // kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr, KM_USER0); src += cur; len -= cur; @@ -2352,10 +2347,9 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, WARN_ON(!PageUptodate(page)); cur = min(len, PAGE_CACHE_SIZE - offset); - // kaddr = kmap_atomic(page, KM_USER0); - kaddr = page_address(page); + kaddr = kmap_atomic(page, KM_USER0); memset(kaddr + offset, c, cur); - // kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr, KM_USER0); len -= cur; offset = 0; @@ -2388,10 +2382,9 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); - // kaddr = kmap_atomic(page, KM_USER1); - kaddr = page_address(page); + kaddr = kmap_atomic(page, KM_USER1); read_extent_buffer(src, kaddr + offset, src_offset, cur); - // kunmap_atomic(kaddr, KM_USER1); + kunmap_atomic(kaddr, KM_USER1); src_offset += cur; len -= cur; @@ -2405,43 +2398,38 @@ static void move_pages(struct page *dst_page, struct page *src_page, unsigned long dst_off, unsigned long src_off, unsigned long len) { - // char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); - char *dst_kaddr = page_address(dst_page); + char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); if (dst_page == src_page) { memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); } else { - // char *src_kaddr = kmap_atomic(src_page, KM_USER1); - char *src_kaddr = page_address(src_page); + char *src_kaddr = kmap_atomic(src_page, KM_USER1); char *p = dst_kaddr + dst_off + len; char *s = src_kaddr + src_off + len; while (len--) *--p = *--s; - // kunmap_atomic(src_kaddr, KM_USER1); + kunmap_atomic(src_kaddr, KM_USER1); } - // kunmap_atomic(dst_kaddr, KM_USER0); + kunmap_atomic(dst_kaddr, KM_USER0); } static void copy_pages(struct page *dst_page, struct page *src_page, unsigned long dst_off, unsigned long src_off, unsigned long len) { - //kmap_atomic(dst_page, KM_USER0); - char *dst_kaddr = page_address(dst_page); + char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); char *src_kaddr; if (dst_page != src_page) - src_kaddr = page_address(src_page); // kmap_atomic(src_page, KM_USER1); + src_kaddr = kmap_atomic(src_page, KM_USER1); else src_kaddr = dst_kaddr; memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); - /* kunmap_atomic(dst_kaddr, KM_USER0); if (dst_page != src_page) kunmap_atomic(src_kaddr, KM_USER1); - */ } void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, @@ -2537,7 +2525,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, cur = min(len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); -// printk("move pages orig dst %lu src %lu len %lu, this %lu %lu %lu\n", dst_offset, src_offset, len, dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); + move_pages(extent_buffer_page(dst, dst_i), extent_buffer_page(dst, src_i), dst_off_in_page - cur + 1, From f510cfecfc98759d75283823cfccf0cc0d59a4c6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:14:48 -0400 Subject: [PATCH 0245/2985] Btrfs: Fix extent_buffer and extent_state leaks Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 10 +++- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 17 +++++- fs/btrfs/extent-tree.c | 126 +++++++++++++++++++---------------------- fs/btrfs/extent_map.c | 64 +++++++++++++++++---- fs/btrfs/extent_map.h | 2 + fs/btrfs/transaction.c | 5 +- 7 files changed, 139 insertions(+), 87 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9427b79c5d79..50ef351ef6b3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -87,6 +87,7 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (IS_ERR(cow)) return PTR_ERR(cow); + cow->alloc_addr = (unsigned long)__builtin_return_address(0); if (buf->len != root->sectorsize || cow->len != root->sectorsize) WARN_ON(1); @@ -132,6 +133,7 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, struct extent_buffer **cow_ret) { u64 search_start; + int ret; if (trans->transaction != root->fs_info->running_transaction) { printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, root->fs_info->running_transaction->transid); @@ -148,8 +150,10 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, } search_start = extent_buffer_blocknr(buf) & ~((u64)65535); - return __btrfs_cow_block(trans, root, buf, parent, + ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); + (*cow_ret)->alloc_addr = (unsigned long)__builtin_return_address(0); + return ret; } static int close_blocks(u64 blocknr, u64 other) @@ -1013,8 +1017,10 @@ again: if (sret) return sret; b = p->nodes[level]; - if (!b) + if (!b) { + btrfs_release_path(NULL, p); goto again; + } slot = p->slots[level]; BUG_ON(btrfs_header_nritems(b) == 1); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index aed08618aca6..5262b28f468c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -303,8 +303,8 @@ struct btrfs_fs_info { struct radix_tree_root pinned_radix; struct radix_tree_root block_group_radix; struct radix_tree_root block_group_data_radix; - struct radix_tree_root extent_map_radix; struct radix_tree_root extent_ins_radix; + struct extent_map_tree free_space_cache; u64 generation; u64 last_trans_committed; struct btrfs_transaction *running_transaction; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8242933a1d8b..09f4e694624d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -46,18 +46,25 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) { struct inode *btree_inode = root->fs_info->btree_inode; - return find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + struct extent_buffer *eb; + eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, blocknr * root->sectorsize, root->sectorsize, GFP_NOFS); + if (eb) + eb->alloc_addr = (unsigned long)__builtin_return_address(0); + return eb; } struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr) { struct inode *btree_inode = root->fs_info->btree_inode; - return alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + struct extent_buffer *eb; + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, blocknr * root->sectorsize, root->sectorsize, GFP_NOFS); + eb->alloc_addr = (unsigned long)__builtin_return_address(0); + return eb; } struct extent_map *btree_get_extent(struct inode *inode, struct page *page, @@ -226,6 +233,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 1); + buf->alloc_addr = (unsigned long)__builtin_return_address(0); return buf; } @@ -426,7 +434,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) } init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); - init_bit_radix(&fs_info->extent_map_radix); init_bit_radix(&fs_info->extent_ins_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); @@ -449,6 +456,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->free_space_cache, + fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; @@ -594,8 +603,10 @@ int close_ctree(struct btrfs_root *root) if (fs_info->extent_root->node) free_extent_buffer(fs_info->extent_root->node); + if (fs_info->tree_root->node) free_extent_buffer(fs_info->tree_root->node); + free_extent_buffer(fs_info->sb_buffer); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 089c41cbca74..74cfbee2ff33 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -34,21 +34,19 @@ static int cache_block_group(struct btrfs_root *root, int ret; struct btrfs_key key; struct extent_buffer *leaf; - struct radix_tree_root *extent_radix; + struct extent_map_tree *free_space_cache; int slot; - u64 i; u64 last = 0; u64 hole_size; u64 first_free; int found = 0; root = root->fs_info->extent_root; - extent_radix = &root->fs_info->extent_map_radix; + free_space_cache = &root->fs_info->free_space_cache; if (block_group->cached) return 0; - if (block_group->data) - return 0; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -98,9 +96,11 @@ static int cache_block_group(struct btrfs_root *root, last = first_free; found = 1; } - hole_size = key.objectid - last; - for (i = 0; i < hole_size; i++) { - set_radix_bit(extent_radix, last + i); + if (key.objectid > last) { + hole_size = key.objectid - last; + set_extent_dirty(free_space_cache, last, + last + hole_size - 1, + GFP_NOFS); } last = key.objectid + key.offset; } @@ -114,9 +114,8 @@ next: block_group->key.offset > last) { hole_size = block_group->key.objectid + block_group->key.offset - last; - for (i = 0; i < hole_size; i++) { - set_radix_bit(extent_radix, last + i); - } + set_extent_dirty(free_space_cache, last, + last + hole_size - 1, GFP_NOFS); } block_group->cached = 1; err: @@ -150,47 +149,33 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct return NULL; } -static u64 leaf_range(struct btrfs_root *root) -{ - u64 size = BTRFS_LEAF_DATA_SIZE(root); - do_div(size, sizeof(struct btrfs_extent_item) + - sizeof(struct btrfs_item)); - return size; -} - static u64 find_search_start(struct btrfs_root *root, struct btrfs_block_group_cache **cache_ret, - u64 search_start, int num) + u64 search_start, int num, int data) { - unsigned long gang[8]; int ret; struct btrfs_block_group_cache *cache = *cache_ret; u64 last = max(search_start, cache->key.objectid); + u64 start = 0; + u64 end = 0; - if (cache->data) - goto out; again: ret = cache_block_group(root, cache); if (ret) goto out; while(1) { - ret = find_first_radix_bit(&root->fs_info->extent_map_radix, - gang, last, ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(&root->fs_info->free_space_cache, + last, &start, &end, EXTENT_DIRTY); + if (ret) goto out; - last = gang[ret-1] + 1; - if (num > 1) { - if (ret != ARRAY_SIZE(gang)) { - goto new_group; - } - if (gang[ret-1] - gang[0] > leaf_range(root)) { - continue; - } - } - if (gang[0] >= cache->key.objectid + cache->key.offset) { + + start = max(last, start); + last = end + 1; + if (end + 1 - start < num) + continue; + if (start + num > cache->key.objectid + cache->key.offset) goto new_group; - } - return gang[0]; + return start; } out: return max(cache->last_alloc, search_start); @@ -202,7 +187,7 @@ new_group: return max((*cache_ret)->last_alloc, search_start); } cache = btrfs_find_block_group(root, cache, - last + cache->key.offset - 1, 0, 0); + last + cache->key.offset - 1, data, 0); *cache_ret = cache; goto again; } @@ -625,7 +610,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 total = num; u64 old_val; u64 block_in_group; - u64 i; int ret; while(total) { @@ -644,12 +628,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, if (alloc) { if (blocknr > cache->last_alloc) cache->last_alloc = blocknr; - if (!cache->data) { - for (i = 0; i < num; i++) { - clear_radix_bit(&info->extent_map_radix, - blocknr + i); - } - } if (cache->data != data && old_val < (cache->key.offset >> 1)) { cache->data = data; @@ -677,11 +655,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val -= num; if (blocknr < cache->first_free) cache->first_free = blocknr; - if (!cache->data && mark_free) { - for (i = 0; i < num; i++) { - set_radix_bit(&info->extent_map_radix, - blocknr + i); - } + if (mark_free) { + set_extent_dirty(&info->free_space_cache, + blocknr, blocknr + num - 1, + GFP_NOFS); } if (old_val < (cache->key.offset >> 1) && old_val + num >= (cache->key.offset >> 1)) { @@ -732,7 +709,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, int ret; int i; struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; - struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix; + struct extent_map_tree *free_space_cache; + + free_space_cache = &root->fs_info->free_space_cache; while(1) { ret = find_first_radix_bit(unpin_radix, gang, 0, @@ -751,8 +730,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, block_group->pinned--; if (gang[i] < block_group->last_alloc) block_group->last_alloc = gang[i]; - if (!block_group->data) - set_radix_bit(extent_radix, gang[i]); + if (!block_group->data) { + set_extent_dirty(free_space_cache, + gang[i], gang[i], + GFP_NOFS); + } } } } @@ -995,6 +977,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; + u64 cached_search_start = 0; WARN_ON(num_blocks < 1); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); @@ -1017,11 +1000,9 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); check_failed: - if (!block_group->data) - search_start = find_search_start(root, &block_group, - search_start, total_needed); - else if (!full_scan) - search_start = max(block_group->last_alloc, search_start); + search_start = find_search_start(root, &block_group, + search_start, total_needed, data); + cached_search_start = search_start; btrfs_init_path(path); ins->objectid = search_start; @@ -1097,6 +1078,7 @@ check_failed: start_found = 1; last_block = key.objectid + key.offset; + if (!full_scan && last_block >= block_group->key.objectid + block_group->key.offset) { btrfs_release_path(root, path); @@ -1138,6 +1120,9 @@ check_pending: } ins->offset = num_blocks; btrfs_free_path(path); + if (0 && ins->objectid != cached_search_start) { +printk("\tcached was %Lu found %Lu\n", cached_search_start, ins->objectid); + } return 0; new_group: @@ -1209,6 +1194,10 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, btrfs_set_root_used(&root->root_item, root_blocks_used + num_blocks); + clear_extent_dirty(&root->fs_info->free_space_cache, + ins->objectid, ins->objectid + ins->offset - 1, + GFP_NOFS); + if (root == extent_root) { BUG_ON(num_blocks != 1); set_radix_bit(&root->fs_info->extent_ins_radix, ins->objectid); @@ -1227,6 +1216,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); + if (ret) { return ret; } @@ -1265,6 +1255,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); } btrfs_set_buffer_uptodate(buf); + buf->alloc_addr = (unsigned long)__builtin_return_address(0); set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); /* @@ -1492,6 +1483,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root orig_level = level; if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { path->nodes[level] = root->node; + extent_buffer_get(root->node); path->slots[level] = 0; } else { struct btrfs_key key; @@ -1524,7 +1516,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) ret = wret; ret = -EAGAIN; - extent_buffer_get(root->node); break; } for (i = 0; i <= orig_level; i++) { @@ -1562,8 +1553,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) { int ret; int ret2; - unsigned long gang[16]; - int i; + u64 start; + u64 end; ret = free_block_group_radix(&info->block_group_radix); ret2 = free_block_group_radix(&info->block_group_data_radix); @@ -1573,13 +1564,12 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return ret2; while(1) { - ret = find_first_radix_bit(&info->extent_map_radix, - gang, 0, ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(&info->free_space_cache, 0, + &start, &end, EXTENT_DIRTY); + if (ret) break; - for (i = 0; i < ret; i++) { - clear_radix_bit(&info->extent_map_radix, gang[i]); - } + clear_extent_dirty(&info->free_space_cache, start, + end, GFP_NOFS); } return 0; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f150188f621c..5b7dbcaacd11 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -19,8 +19,13 @@ struct kmem_cache *btrfs_cache_create(const char *name, size_t size, static struct kmem_cache *extent_map_cache; static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; + static LIST_HEAD(extent_buffers); +static LIST_HEAD(buffers); +static LIST_HEAD(states); + static spinlock_t extent_buffers_lock; +static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; static int nr_extent_buffers; #define MAX_EXTENT_BUFFER_CACHE 128 @@ -48,6 +53,7 @@ void __init extent_map_init(void) void __exit extent_map_exit(void) { struct extent_buffer *eb; + struct extent_state *state; while (!list_empty(&extent_buffers)) { eb = list_entry(extent_buffers.next, @@ -55,6 +61,22 @@ void __exit extent_map_exit(void) list_del(&eb->list); kmem_cache_free(extent_buffer_cache, eb); } + while (!list_empty(&states)) { + state = list_entry(states.next, struct extent_state, list); + printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); + list_del(&state->list); + kmem_cache_free(extent_state_cache, state); + + } + while (!list_empty(&buffers)) { + eb = list_entry(buffers.next, + struct extent_buffer, leak_list); + printk("buffer leak start %Lu len %lu return %lX\n", eb->start, eb->len, eb->alloc_addr); + list_del(&eb->leak_list); + kmem_cache_free(extent_buffer_cache, eb); + } + + if (extent_map_cache) kmem_cache_destroy(extent_map_cache); if (extent_state_cache) @@ -101,12 +123,19 @@ EXPORT_SYMBOL(free_extent_map); struct extent_state *alloc_extent_state(gfp_t mask) { struct extent_state *state; + unsigned long flags; + state = kmem_cache_alloc(extent_state_cache, mask); if (!state || IS_ERR(state)) return state; state->state = 0; state->in_tree = 0; state->private = 0; + + spin_lock_irqsave(&state_lock, flags); + list_add(&state->list, &states); + spin_unlock_irqrestore(&state_lock, flags); + atomic_set(&state->refs, 1); init_waitqueue_head(&state->wq); return state; @@ -115,10 +144,14 @@ EXPORT_SYMBOL(alloc_extent_state); void free_extent_state(struct extent_state *state) { + unsigned long flags; if (!state) return; if (atomic_dec_and_test(&state->refs)) { WARN_ON(state->in_tree); + spin_lock_irqsave(&state_lock, flags); + list_del(&state->list); + spin_unlock_irqrestore(&state_lock, flags); kmem_cache_free(extent_state_cache, state); } } @@ -361,10 +394,6 @@ static int insert_state(struct extent_map_tree *tree, state->state |= bits; state->start = start; state->end = end; - if ((end & 4095) == 0) { - printk("insert state %Lu %Lu strange end\n", start, end); - WARN_ON(1); - } node = tree_insert(&tree->state, end, &state->rb_node); if (node) { struct extent_state *found; @@ -399,11 +428,7 @@ static int split_state(struct extent_map_tree *tree, struct extent_state *orig, prealloc->end = split - 1; prealloc->state = orig->state; orig->start = split; - if ((prealloc->end & 4095) == 0) { - printk("insert state %Lu %Lu strange end\n", prealloc->start, - prealloc->end); - WARN_ON(1); - } + node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); if (node) { struct extent_state *found; @@ -957,6 +982,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start, *start_ret = state->start; *end_ret = state->end; ret = 0; + break; } node = rb_next(node); if (!node) @@ -1877,6 +1903,7 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, static struct extent_buffer *__alloc_extent_buffer(gfp_t mask) { struct extent_buffer *eb = NULL; + spin_lock(&extent_buffers_lock); if (!list_empty(&extent_buffers)) { eb = list_entry(extent_buffers.next, struct extent_buffer, @@ -1886,15 +1913,26 @@ static struct extent_buffer *__alloc_extent_buffer(gfp_t mask) nr_extent_buffers--; } spin_unlock(&extent_buffers_lock); + if (eb) { memset(eb, 0, sizeof(*eb)); - return eb; + } else { + eb = kmem_cache_zalloc(extent_buffer_cache, mask); } - return kmem_cache_zalloc(extent_buffer_cache, mask); + spin_lock(&extent_buffers_lock); + list_add(&eb->leak_list, &buffers); + spin_unlock(&extent_buffers_lock); + + return eb; } static void __free_extent_buffer(struct extent_buffer *eb) { + + spin_lock(&extent_buffers_lock); + list_del_init(&eb->leak_list); + spin_unlock(&extent_buffers_lock); + if (nr_extent_buffers >= MAX_EXTENT_BUFFER_CACHE) { kmem_cache_free(extent_buffer_cache, eb); } else { @@ -1933,6 +1971,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, if (!eb || IS_ERR(eb)) return NULL; + eb->alloc_addr = __builtin_return_address(0); eb->start = start; eb->len = len; atomic_set(&eb->refs, 1); @@ -1947,6 +1986,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); goto fail; } + set_page_extent_mapped(p); if (i == 0) eb->first_page = p; if (!PageUptodate(p)) @@ -1978,6 +2018,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, if (!eb || IS_ERR(eb)) return NULL; + eb->alloc_addr = __builtin_return_address(0); eb->start = start; eb->len = len; atomic_set(&eb->refs, 1); @@ -1992,6 +2033,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); goto fail; } + set_page_extent_mapped(p); if (i == 0) eb->first_page = p; } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 3b3abf34c403..d100f7c1476c 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -68,7 +68,9 @@ struct extent_buffer { atomic_t refs; int flags; struct list_head list; + struct list_head leak_list; struct page *first_page; + unsigned long alloc_addr; }; typedef struct extent_map *(get_extent_t)(struct inode *inode, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 750f35a37aae..372b61f5733f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -352,7 +352,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) return 0; trans = btrfs_start_transaction(root, 1); - while (1) { + while (0) { root->defrag_running = 1; ret = btrfs_defrag_leaves(trans, root, cacheonly); nr = trans->blocks_used; @@ -394,7 +394,7 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) for (i = 0; i < ret; i++) { root = gang[i]; last = root->root_key.objectid + 1; - // btrfs_defrag_root(root, 1); + btrfs_defrag_root(root, 1); } } // btrfs_defrag_root(info->extent_root, 1); @@ -462,6 +462,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); + free_extent_buffer(dirty->root->node); kfree(dirty->root); kfree(dirty); mutex_unlock(&tree_root->fs_info->fs_mutex); From 96b5179d0d9b6368c203856f2ad6e8e12a8b2a2c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:15:19 -0400 Subject: [PATCH 0246/2985] Btrfs: Stop using radix trees for the block group cache Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 6 +- fs/btrfs/disk-io.c | 4 +- fs/btrfs/extent-tree.c | 364 +++++++++++++++++------------------------ fs/btrfs/extent_map.c | 18 +- fs/btrfs/extent_map.h | 4 + 5 files changed, 172 insertions(+), 224 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5262b28f468c..c6174b27fd13 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -271,8 +271,6 @@ struct btrfs_csum_item { } __attribute__ ((__packed__)); /* tag for the radix tree of block groups in ram */ -#define BTRFS_BLOCK_GROUP_DIRTY 0 -#define BTRFS_BLOCK_GROUP_AVAIL 1 #define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) @@ -285,7 +283,6 @@ struct btrfs_block_group_item { struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; - struct radix_tree_root *radix; u64 first_free; u64 last_alloc; u64 pinned; @@ -301,10 +298,9 @@ struct btrfs_fs_info { struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; - struct radix_tree_root block_group_radix; - struct radix_tree_root block_group_data_radix; struct radix_tree_root extent_ins_radix; struct extent_map_tree free_space_cache; + struct extent_map_tree block_group_cache; u64 generation; u64 last_trans_committed; struct btrfs_transaction *running_transaction; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 09f4e694624d..aac7c82b0dce 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -436,8 +436,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pending_del_radix); init_bit_radix(&fs_info->extent_ins_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); - INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); - INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); @@ -458,6 +456,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) GFP_NOFS); extent_map_tree_init(&fs_info->free_space_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->block_group_cache, + fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 74cfbee2ff33..4bc639565d1c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -22,6 +22,10 @@ #include "print-tree.h" #include "transaction.h" +#define BLOCK_GROUP_DATA EXTENT_WRITEBACK +#define BLOCK_GROUP_METADATA EXTENT_UPTODATE +#define BLOCK_GROUP_DIRTY EXTENT_DIRTY + static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -127,25 +131,31 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 blocknr) { - struct btrfs_block_group_cache *block_group; + struct extent_map_tree *block_group_cache; + struct btrfs_block_group_cache *block_group = NULL; + u64 ptr; + u64 start; + u64 end; int ret; - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&block_group, - blocknr, 1); + block_group_cache = &info->block_group_cache; + ret = find_first_extent_bit(block_group_cache, + blocknr, &start, &end, + BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA); if (ret) { - if (block_group->key.objectid <= blocknr && blocknr <= - block_group->key.objectid + block_group->key.offset) - return block_group; - } - ret = radix_tree_gang_lookup(&info->block_group_data_radix, - (void **)&block_group, - blocknr, 1); - if (ret) { - if (block_group->key.objectid <= blocknr && blocknr <= - block_group->key.objectid + block_group->key.offset) - return block_group; + return NULL; } + ret = get_state_private(block_group_cache, start, &ptr); + if (ret) + return NULL; + + block_group = (struct btrfs_block_group_cache *)ptr; + + + if (block_group->key.objectid <= blocknr && blocknr <= + block_group->key.objectid + block_group->key.offset) + return block_group; + return NULL; } @@ -173,7 +183,7 @@ again: last = end + 1; if (end + 1 - start < num) continue; - if (start + num > cache->key.objectid + cache->key.offset) + if (start + num >= cache->key.objectid + cache->key.offset) goto new_group; return start; } @@ -189,6 +199,7 @@ new_group: cache = btrfs_find_block_group(root, cache, last + cache->key.offset - 1, data, 0); *cache_ret = cache; + last = min(cache->key.objectid, last); goto again; } @@ -204,30 +215,32 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, *hint, u64 search_start, int data, int owner) { - struct btrfs_block_group_cache *cache[8]; + struct btrfs_block_group_cache *cache; + struct extent_map_tree *block_group_cache; struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; - struct radix_tree_root *radix; - struct radix_tree_root *swap_radix; u64 used; u64 last = 0; u64 hint_last; - int i; + u64 start; + u64 end; + u64 free_check; + u64 ptr; + int bit; int ret; int full_search = 0; int factor = 8; int data_swap = 0; + block_group_cache = &info->block_group_cache; + if (!owner) factor = 5; - if (data) { - radix = &info->block_group_data_radix; - swap_radix = &info->block_group_radix; - } else { - radix = &info->block_group_radix; - swap_radix = &info->block_group_data_radix; - } + if (data) + bit = BLOCK_GROUP_DATA; + else + bit = BLOCK_GROUP_METADATA; if (search_start) { struct btrfs_block_group_cache *shint; @@ -246,12 +259,6 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, div_factor(hint->key.offset, factor)) { return hint; } - if (used >= div_factor(hint->key.offset, 8)) { - radix_tree_tag_clear(radix, - hint->key.objectid + - hint->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); - } last = hint->key.offset * 3; if (hint->key.objectid >= last) last = max(search_start + hint->key.offset - 1, @@ -267,51 +274,29 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, last = hint_last; } - while(1) { - ret = radix_tree_gang_lookup_tag(radix, (void **)cache, - last, ARRAY_SIZE(cache), - BTRFS_BLOCK_GROUP_AVAIL); - if (!ret) - break; - for (i = 0; i < ret; i++) { - last = cache[i]->key.objectid + - cache[i]->key.offset; - used = btrfs_block_group_used(&cache[i]->item); - if (used + cache[i]->pinned < - div_factor(cache[i]->key.offset, factor)) { - found_group = cache[i]; - goto found; - } - if (used >= div_factor(cache[i]->key.offset, 8)) { - radix_tree_tag_clear(radix, - cache[i]->key.objectid + - cache[i]->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); - } - } - cond_resched(); - } - last = hint_last; again: while(1) { - ret = radix_tree_gang_lookup(radix, (void **)cache, - last, ARRAY_SIZE(cache)); - if (!ret) + ret = find_first_extent_bit(block_group_cache, last, + &start, &end, bit); + if (ret) break; - for (i = 0; i < ret; i++) { - last = cache[i]->key.objectid + - cache[i]->key.offset; - used = btrfs_block_group_used(&cache[i]->item); - if (used + cache[i]->pinned < cache[i]->key.offset) { - found_group = cache[i]; - goto found; - } - if (used >= cache[i]->key.offset) { - radix_tree_tag_clear(radix, - cache[i]->key.objectid + - cache[i]->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); - } + + ret = get_state_private(block_group_cache, start, &ptr); + if (ret) + break; + + cache = (struct btrfs_block_group_cache *)ptr; + last = cache->key.objectid + cache->key.offset; + used = btrfs_block_group_used(&cache->item); + + if (full_search) + free_check = cache->key.offset; + else + free_check = div_factor(cache->key.offset, factor); + + if (used + cache->pinned < free_check) { + found_group = cache; + goto found; } cond_resched(); } @@ -321,23 +306,11 @@ again: goto again; } if (!data_swap) { - struct radix_tree_root *tmp = radix; data_swap = 1; - radix = swap_radix; - swap_radix = tmp; + bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; last = search_start; goto again; } - if (!found_group) { - ret = radix_tree_gang_lookup(radix, - (void **)&found_group, 0, 1); - if (ret == 0) { - ret = radix_tree_gang_lookup(swap_radix, - (void **)&found_group, - 0, 1); - } - BUG_ON(ret != 1); - } found: return found_group; } @@ -538,68 +511,55 @@ fail: } -static int write_dirty_block_radix(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct radix_tree_root *radix) +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { - struct btrfs_block_group_cache *cache[8]; + struct extent_map_tree *block_group_cache; + struct btrfs_block_group_cache *cache; int ret; int err = 0; int werr = 0; - int i; struct btrfs_path *path; - unsigned long off = 0; + u64 last = 0; + u64 start; + u64 end; + u64 ptr; + block_group_cache = &root->fs_info->block_group_cache; path = btrfs_alloc_path(); if (!path) return -ENOMEM; while(1) { - ret = radix_tree_gang_lookup_tag(radix, (void **)cache, - off, ARRAY_SIZE(cache), - BTRFS_BLOCK_GROUP_DIRTY); - if (!ret) + ret = find_first_extent_bit(block_group_cache, last, + &start, &end, BLOCK_GROUP_DIRTY); + if (ret) break; - for (i = 0; i < ret; i++) { - err = write_one_cache_group(trans, root, - path, cache[i]); - /* - * if we fail to write the cache group, we want - * to keep it marked dirty in hopes that a later - * write will work - */ - if (err) { - werr = err; - off = cache[i]->key.objectid + - cache[i]->key.offset; - continue; - } - radix_tree_tag_clear(radix, cache[i]->key.objectid + - cache[i]->key.offset - 1, - BTRFS_BLOCK_GROUP_DIRTY); + last = end + 1; + ret = get_state_private(block_group_cache, start, &ptr); + if (ret) + break; + + cache = (struct btrfs_block_group_cache *)ptr; + err = write_one_cache_group(trans, root, + path, cache); + /* + * if we fail to write the cache group, we want + * to keep it marked dirty in hopes that a later + * write will work + */ + if (err) { + werr = err; + continue; } + clear_extent_bits(block_group_cache, start, end, + BLOCK_GROUP_DIRTY, GFP_NOFS); } btrfs_free_path(path); return werr; } -int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - int ret; - int ret2; - ret = write_dirty_block_radix(trans, root, - &root->fs_info->block_group_radix); - ret2 = write_dirty_block_radix(trans, root, - &root->fs_info->block_group_data_radix); - if (ret) - return ret; - if (ret2) - return ret2; - return 0; -} - static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num, int alloc, int mark_free, @@ -610,7 +570,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 total = num; u64 old_val; u64 block_in_group; - int ret; + u64 start; + u64 end; while(total) { cache = btrfs_lookup_block_group(info, blocknr); @@ -619,9 +580,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, } block_in_group = blocknr - cache->key.objectid; WARN_ON(block_in_group > cache->key.offset); - radix_tree_tag_set(cache->radix, cache->key.objectid + - cache->key.offset - 1, - BTRFS_BLOCK_GROUP_DIRTY); + start = cache->key.objectid; + end = start + cache->key.offset - 1; + set_extent_bits(&info->block_group_cache, start, end, + BLOCK_GROUP_DIRTY, GFP_NOFS); old_val = btrfs_block_group_used(&cache->item); num = min(total, cache->key.offset - block_in_group); @@ -630,25 +592,27 @@ static int update_block_group(struct btrfs_trans_handle *trans, cache->last_alloc = blocknr; if (cache->data != data && old_val < (cache->key.offset >> 1)) { - cache->data = data; - radix_tree_delete(cache->radix, - cache->key.objectid + - cache->key.offset - 1); + int bit_to_clear; + int bit_to_set; + cache->data = data; if (data) { - cache->radix = - &info->block_group_data_radix; + bit_to_clear = BLOCK_GROUP_DATA; + bit_to_set = BLOCK_GROUP_METADATA; cache->item.flags |= BTRFS_BLOCK_GROUP_DATA; } else { - cache->radix = &info->block_group_radix; + bit_to_clear = BLOCK_GROUP_METADATA; + bit_to_set = BLOCK_GROUP_DATA; cache->item.flags &= ~BTRFS_BLOCK_GROUP_DATA; } - ret = radix_tree_insert(cache->radix, - cache->key.objectid + - cache->key.offset - 1, - (void *)cache); + clear_extent_bits(&info->block_group_cache, + start, end, bit_to_clear, + GFP_NOFS); + set_extent_bits(&info->block_group_cache, + start, end, bit_to_set, + GFP_NOFS); } old_val += num; } else { @@ -660,13 +624,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, blocknr, blocknr + num - 1, GFP_NOFS); } - if (old_val < (cache->key.offset >> 1) && - old_val + num >= (cache->key.offset >> 1)) { - radix_tree_tag_set(cache->radix, - cache->key.objectid + - cache->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); - } } btrfs_set_block_group_used(&cache->item, old_val); total -= num; @@ -730,11 +687,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, block_group->pinned--; if (gang[i] < block_group->last_alloc) block_group->last_alloc = gang[i]; - if (!block_group->data) { - set_extent_dirty(free_space_cache, - gang[i], gang[i], - GFP_NOFS); - } + set_extent_dirty(free_space_cache, + gang[i], gang[i], GFP_NOFS); } } } @@ -1059,8 +1013,8 @@ check_failed: ins->offset = search_end - ins->objectid; goto check_pending; } - btrfs_item_key_to_cpu(l, &key, slot); + if (key.objectid >= search_start && key.objectid > last_block && start_found) { if (last_block < search_start) @@ -1072,9 +1026,14 @@ check_failed: goto check_pending; } } - - if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) + if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { + if (!start_found) { + last_block = key.objectid; + start_found = 1; + } goto next; + } + start_found = 1; last_block = key.objectid + key.offset; @@ -1120,9 +1079,6 @@ check_pending: } ins->offset = num_blocks; btrfs_free_path(path); - if (0 && ins->objectid != cached_search_start) { -printk("\tcached was %Lu found %Lu\n", cached_search_start, ins->objectid); - } return 0; new_group: @@ -1529,40 +1485,20 @@ out: return ret; } -static int free_block_group_radix(struct radix_tree_root *radix) -{ - int ret; - struct btrfs_block_group_cache *cache[8]; - int i; - - while(1) { - ret = radix_tree_gang_lookup(radix, (void **)cache, 0, - ARRAY_SIZE(cache)); - if (!ret) - break; - for (i = 0; i < ret; i++) { - radix_tree_delete(radix, cache[i]->key.objectid + - cache[i]->key.offset - 1); - kfree(cache[i]); - } - } - return 0; -} - int btrfs_free_block_groups(struct btrfs_fs_info *info) { - int ret; - int ret2; u64 start; u64 end; + int ret; - ret = free_block_group_radix(&info->block_group_radix); - ret2 = free_block_group_radix(&info->block_group_data_radix); - if (ret) - return ret; - if (ret2) - return ret2; - + while(1) { + ret = find_first_extent_bit(&info->block_group_cache, 0, + &start, &end, (unsigned int)-1); + if (ret) + break; + clear_extent_bits(&info->block_group_cache, start, + end, (unsigned int)-1, GFP_NOFS); + } while(1) { ret = find_first_extent_bit(&info->free_space_cache, 0, &start, &end, EXTENT_DIRTY); @@ -1579,17 +1515,20 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_path *path; int ret; int err = 0; + int bit; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; - struct radix_tree_root *radix; + struct extent_map_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; u64 group_size_blocks; - u64 used; + + block_group_cache = &info->block_group_cache; group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >> - root->fs_info->sb->s_blocksize_bits; + info->sb->s_blocksize_bits; + root = info->extent_root; key.objectid = 0; key.offset = group_size_blocks; @@ -1617,35 +1556,30 @@ int btrfs_read_block_groups(struct btrfs_root *root) read_extent_buffer(leaf, &cache->item, btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(cache->item)); - if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { - radix = &info->block_group_data_radix; - cache->data = 1; - } else { - radix = &info->block_group_radix; - cache->data = 0; - } - memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; cache->first_free = cache->key.objectid; cache->pinned = 0; cache->cached = 0; - cache->radix = radix; - key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); - ret = radix_tree_insert(radix, found_key.objectid + - found_key.offset - 1, - (void *)cache); - BUG_ON(ret); - used = btrfs_block_group_used(&cache->item); - if (used < div_factor(key.offset, 8)) { - radix_tree_tag_set(radix, found_key.objectid + - found_key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); + if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { + bit = BLOCK_GROUP_DATA; + cache->data = 1; + } else { + bit = BLOCK_GROUP_METADATA; + cache->data = 0; } + + /* use EXTENT_LOCKED to prevent merging */ + set_extent_bits(block_group_cache, found_key.objectid, + found_key.objectid + found_key.offset - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + set_state_private(block_group_cache, found_key.objectid, + (u64)cache); + if (key.objectid >= btrfs_super_total_blocks(&info->super_copy)) break; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 5b7dbcaacd11..1b2f9e059de6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -574,7 +574,7 @@ out: return set; search_again: - if (start >= end) + if (start > end) goto out; write_unlock_irqrestore(&tree->lock, flags); if (mask & __GFP_WAIT) @@ -819,6 +819,21 @@ int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(set_extent_dirty); +int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, + int bits, gfp_t mask) +{ + return set_extent_bit(tree, start, end, bits, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_bits); + +int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, + int bits, gfp_t mask) +{ + return clear_extent_bit(tree, start, end, bits, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_bits); + int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) { @@ -1138,7 +1153,6 @@ int set_state_private(struct extent_map_tree *tree, u64 start, u64 private) out: write_unlock_irq(&tree->lock); return ret; - } int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private) diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index d100f7c1476c..5a63b417034a 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -96,6 +96,10 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, void __init extent_map_init(void); void __exit extent_map_exit(void); int extent_clean_all_trees(struct extent_map_tree *tree); +int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, + int bits, gfp_t mask); +int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, + int bits, gfp_t mask); int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, From 1a5bc167f6707542b79a55452075525620ed43f5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:15:26 -0400 Subject: [PATCH 0247/2985] Btrfs: Change the remaining radix trees used by extent-tree.c to extent_map trees Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 16 ++-- fs/btrfs/disk-io.c | 9 +- fs/btrfs/extent-tree.c | 193 +++++++++++++++-------------------------- fs/btrfs/extent_map.c | 5 +- fs/btrfs/extent_map.h | 4 +- fs/btrfs/transaction.c | 5 +- 6 files changed, 94 insertions(+), 138 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c6174b27fd13..256689551eb0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -283,10 +283,6 @@ struct btrfs_block_group_item { struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; - u64 first_free; - u64 last_alloc; - u64 pinned; - u64 last_prealloc; int data; int cached; }; @@ -296,11 +292,13 @@ struct btrfs_fs_info { struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct radix_tree_root fs_roots_radix; - struct radix_tree_root pending_del_radix; - struct radix_tree_root pinned_radix; - struct radix_tree_root extent_ins_radix; + struct extent_map_tree free_space_cache; struct extent_map_tree block_group_cache; + struct extent_map_tree pinned_extents; + struct extent_map_tree pending_del; + struct extent_map_tree extent_ins; + u64 generation; u64 last_trans_committed; struct btrfs_transaction *running_transaction; @@ -926,7 +924,7 @@ static inline int btrfs_set_root_name(struct btrfs_root *root, /* extent-tree.c */ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy); +int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 blocknr); @@ -949,7 +947,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *unpin_radix); + struct extent_map_tree *unpin); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index aac7c82b0dce..2b86a1d779b7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -432,9 +432,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) err = -ENOMEM; goto fail; } - init_bit_radix(&fs_info->pinned_radix); - init_bit_radix(&fs_info->pending_del_radix); - init_bit_radix(&fs_info->extent_ins_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); @@ -458,6 +455,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_mapping, GFP_NOFS); extent_map_tree_init(&fs_info->block_group_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->pinned_extents, + fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->pending_del, + fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->extent_ins, + fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4bc639565d1c..477466d167a4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -188,13 +188,13 @@ again: return start; } out: - return max(cache->last_alloc, search_start); + return search_start; new_group: cache = btrfs_lookup_block_group(root->fs_info, last + cache->key.offset - 1); if (!cache) { - return max((*cache_ret)->last_alloc, search_start); + return search_start; } cache = btrfs_find_block_group(root, cache, last + cache->key.offset - 1, data, 0); @@ -247,16 +247,14 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, shint = btrfs_lookup_block_group(info, search_start); if (shint && shint->data == data) { used = btrfs_block_group_used(&shint->item); - if (used + shint->pinned < - div_factor(shint->key.offset, factor)) { + if (used < div_factor(shint->key.offset, factor)) { return shint; } } } if (hint && hint->data == data) { used = btrfs_block_group_used(&hint->item); - if (used + hint->pinned < - div_factor(hint->key.offset, factor)) { + if (used < div_factor(hint->key.offset, factor)) { return hint; } last = hint->key.offset * 3; @@ -294,7 +292,7 @@ again: else free_check = div_factor(cache->key.offset, factor); - if (used + cache->pinned < free_check) { + if (used < free_check) { found_group = cache; goto found; } @@ -505,8 +503,6 @@ fail: return ret; if (pending_ret) return pending_ret; - if (cache->data) - cache->last_alloc = cache->first_free; return 0; } @@ -588,8 +584,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val = btrfs_block_group_used(&cache->item); num = min(total, cache->key.offset - block_in_group); if (alloc) { - if (blocknr > cache->last_alloc) - cache->last_alloc = blocknr; if (cache->data != data && old_val < (cache->key.offset >> 1)) { int bit_to_clear; @@ -617,8 +611,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val += num; } else { old_val -= num; - if (blocknr < cache->first_free) - cache->first_free = blocknr; if (mark_free) { set_extent_dirty(&info->free_space_cache, blocknr, blocknr + num - 1, @@ -632,65 +624,47 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } -int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy) +int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy) { - unsigned long gang[8]; u64 last = 0; - struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; + u64 start; + u64 end; + struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents; int ret; - int i; while(1) { - ret = find_first_radix_bit(pinned_radix, gang, last, - ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(pinned_extents, last, + &start, &end, EXTENT_DIRTY); + if (ret) break; - for (i = 0 ; i < ret; i++) { - set_radix_bit(copy, gang[i]); - last = gang[i] + 1; - } + set_extent_dirty(copy, start, end, GFP_NOFS); + last = end + 1; } - ret = find_first_radix_bit(&root->fs_info->extent_ins_radix, gang, 0, - ARRAY_SIZE(gang)); - WARN_ON(ret); return 0; } int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct radix_tree_root *unpin_radix) + struct extent_map_tree *unpin) { - unsigned long gang[8]; - struct btrfs_block_group_cache *block_group; - u64 first = 0; + u64 start; + u64 end; int ret; - int i; - struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; + struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents; struct extent_map_tree *free_space_cache; free_space_cache = &root->fs_info->free_space_cache; while(1) { - ret = find_first_radix_bit(unpin_radix, gang, 0, - ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(unpin, 0, &start, &end, + EXTENT_DIRTY); + if (ret) break; - if (!first) - first = gang[0]; - for (i = 0; i < ret; i++) { - clear_radix_bit(pinned_radix, gang[i]); - clear_radix_bit(unpin_radix, gang[i]); - block_group = btrfs_lookup_block_group(root->fs_info, - gang[i]); - if (block_group) { - WARN_ON(block_group->pinned == 0); - block_group->pinned--; - if (gang[i] < block_group->last_alloc) - block_group->last_alloc = gang[i]; - set_extent_dirty(free_space_cache, - gang[i], gang[i], GFP_NOFS); - } - } + + clear_extent_dirty(pinned_extents, start, end, + GFP_NOFS); + clear_extent_dirty(unpin, start, end, GFP_NOFS); + set_extent_dirty(free_space_cache, start, end, GFP_NOFS); } return 0; } @@ -700,39 +674,36 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct { struct btrfs_key ins; struct btrfs_extent_item extent_item; - int i; int ret; - int err; - unsigned long gang[8]; + int err = 0; + u64 start; + u64 end; struct btrfs_fs_info *info = extent_root->fs_info; btrfs_set_stack_extent_refs(&extent_item, 1); - ins.offset = 1; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); btrfs_set_stack_extent_owner(&extent_item, extent_root->root_key.objectid); while(1) { - ret = find_first_radix_bit(&info->extent_ins_radix, gang, 0, - ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(&info->extent_ins, 0, &start, + &end, EXTENT_LOCKED); + if (ret) break; - for (i = 0; i < ret; i++) { - ins.objectid = gang[i]; - err = btrfs_insert_item(trans, extent_root, &ins, - &extent_item, - sizeof(extent_item)); - clear_radix_bit(&info->extent_ins_radix, gang[i]); - WARN_ON(err); - } + ins.objectid = start; + ins.offset = end + 1 - start; + err = btrfs_insert_item(trans, extent_root, &ins, + &extent_item, sizeof(extent_item)); + clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, + GFP_NOFS); } return 0; } static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) { - int err; + int err = 0; struct extent_buffer *buf; if (!pending) { @@ -748,16 +719,11 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) } free_extent_buffer(buf); } - err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); - if (!err) { - struct btrfs_block_group_cache *cache; - cache = btrfs_lookup_block_group(root->fs_info, - blocknr); - if (cache) - cache->pinned++; - } + set_extent_dirty(&root->fs_info->pinned_extents, + blocknr, blocknr, GFP_NOFS); } else { - err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr); + set_extent_bits(&root->fs_info->pending_del, + blocknr, blocknr, EXTENT_LOCKED, GFP_NOFS); } BUG_ON(err < 0); return 0; @@ -840,43 +806,28 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root) { int ret; - int wret; int err = 0; - unsigned long gang[4]; - int i; - struct radix_tree_root *pending_radix; - struct radix_tree_root *pinned_radix; - struct btrfs_block_group_cache *cache; + u64 start; + u64 end; + struct extent_map_tree *pending_del; + struct extent_map_tree *pinned_extents; - pending_radix = &extent_root->fs_info->pending_del_radix; - pinned_radix = &extent_root->fs_info->pinned_radix; + pending_del = &extent_root->fs_info->pending_del; + pinned_extents = &extent_root->fs_info->pinned_extents; while(1) { - ret = find_first_radix_bit(pending_radix, gang, 0, - ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(pending_del, 0, &start, &end, + EXTENT_LOCKED); + if (ret) break; - for (i = 0; i < ret; i++) { - wret = set_radix_bit(pinned_radix, gang[i]); - if (wret == 0) { - cache = - btrfs_lookup_block_group(extent_root->fs_info, - gang[i]); - if (cache) - cache->pinned++; - } - if (wret < 0) { - printk(KERN_CRIT "set_radix_bit, err %d\n", - wret); - BUG_ON(wret < 0); - } - wret = clear_radix_bit(pending_radix, gang[i]); - BUG_ON(wret); - wret = __free_extent(trans, extent_root, - gang[i], 1, 0, 0); - if (wret) - err = wret; - } + + set_extent_dirty(pinned_extents, start, end, GFP_NOFS); + clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, + GFP_NOFS); + ret = __free_extent(trans, extent_root, + start, end + 1 - start, 0, 0); + if (ret) + err = ret; } return err; } @@ -920,7 +871,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root u64 hole_size = 0; int slot = 0; u64 last_block = 0; - u64 test_block; u64 orig_search_start = search_start; int start_found; struct extent_buffer *l; @@ -1059,13 +1009,15 @@ check_pending: if (ins->objectid + num_blocks >= search_end) goto enospc; - for (test_block = ins->objectid; - test_block < ins->objectid + num_blocks; test_block++) { - if (test_radix_bit(&info->pinned_radix, test_block) || - test_radix_bit(&info->extent_ins_radix, test_block)) { - search_start = test_block + 1; - goto new_group; - } + if (test_range_bit(&info->extent_ins, ins->objectid, + ins->objectid + num_blocks -1, EXTENT_LOCKED, 0)) { + search_start = ins->objectid + num_blocks; + goto new_group; + } + if (test_range_bit(&info->pinned_extents, ins->objectid, + ins->objectid + num_blocks -1, EXTENT_DIRTY, 0)) { + search_start = ins->objectid + num_blocks; + goto new_group; } if (exclude_nr > 0 && (ins->objectid + num_blocks > exclude_start && ins->objectid < exclude_start + exclude_nr)) { @@ -1156,7 +1108,9 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (root == extent_root) { BUG_ON(num_blocks != 1); - set_radix_bit(&root->fs_info->extent_ins_radix, ins->objectid); + set_extent_bits(&root->fs_info->extent_ins, ins->objectid, + ins->objectid + ins->offset - 1, + EXTENT_LOCKED, GFP_NOFS); goto update_block; } @@ -1557,9 +1511,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(cache->item)); memcpy(&cache->key, &found_key, sizeof(found_key)); - cache->last_alloc = cache->key.objectid; - cache->first_free = cache->key.objectid; - cache->pinned = 0; cache->cached = 0; key.objectid = found_key.objectid + found_key.offset; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 1b2f9e059de6..e081558d52ff 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1188,8 +1188,8 @@ out: * has the bits set. Otherwise, 1 is returned if any bit in the * range is found set. */ -static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int filled) +int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, + int bits, int filled) { struct extent_state *state = NULL; struct rb_node *node; @@ -1222,6 +1222,7 @@ static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, read_unlock_irq(&tree->lock); return bitset; } +EXPORT_SYMBOL(test_range_bit); /* * helper function to set a given page up to date if all the diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 5a63b417034a..75dc600dc0f9 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -95,7 +95,9 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, get_extent_t *get_extent); void __init extent_map_init(void); void __exit extent_map_exit(void); -int extent_clean_all_trees(struct extent_map_tree *tree); + +int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, + int bits, int filled); int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, int bits, gfp_t mask); int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 372b61f5733f..55289b71056e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -481,11 +481,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; - struct radix_tree_root pinned_copy; + struct extent_map_tree pinned_copy; DEFINE_WAIT(wait); int ret; - init_bit_radix(&pinned_copy); + extent_map_tree_init(&pinned_copy, + root->fs_info->btree_inode->i_mapping, GFP_NOFS); INIT_LIST_HEAD(&dirty_fs_roots); mutex_lock(&root->fs_info->trans_mutex); From db94535db75e67fab12ccbb7f5ee548e33fed891 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:15:53 -0400 Subject: [PATCH 0248/2985] Btrfs: Allow tree blocks larger than the page size Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 319 +++++++++++++++++++++++++++++------------ fs/btrfs/ctree.h | 112 +++++++++------ fs/btrfs/disk-io.c | 92 +++++++----- fs/btrfs/disk-io.h | 9 +- fs/btrfs/extent-tree.c | 285 ++++++++++++++++++------------------ fs/btrfs/extent_map.c | 91 ++++++++---- fs/btrfs/extent_map.h | 10 +- fs/btrfs/file-item.c | 10 +- fs/btrfs/file.c | 126 ++++++++-------- fs/btrfs/inode.c | 90 ++++++------ fs/btrfs/print-tree.c | 23 +-- fs/btrfs/super.c | 6 +- fs/btrfs/sysfs.c | 5 +- fs/btrfs/transaction.c | 39 ++--- fs/btrfs/tree-defrag.c | 18 ++- 15 files changed, 738 insertions(+), 497 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 50ef351ef6b3..34de83630ae9 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -83,16 +83,15 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(root->ref_cows && trans->transid != root->last_trans); - cow = btrfs_alloc_free_block(trans, root, search_start, empty_size); + cow = btrfs_alloc_free_block(trans, root, buf->len, + search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); cow->alloc_addr = (unsigned long)__builtin_return_address(0); - if (buf->len != root->sectorsize || cow->len != root->sectorsize) - WARN_ON(1); copy_extent_buffer(cow, buf, 0, 0, cow->len); - btrfs_set_header_blocknr(cow, extent_buffer_blocknr(cow)); + btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); btrfs_set_header_owner(cow, root->root_key.objectid); @@ -110,16 +109,16 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, root->node = cow; extent_buffer_get(cow); if (buf != root->commit_root) { - btrfs_free_extent(trans, root, - extent_buffer_blocknr(buf), 1, 1); + btrfs_free_extent(trans, root, buf->start, + buf->len, 1); } free_extent_buffer(buf); } else { btrfs_set_node_blockptr(parent, parent_slot, - extent_buffer_blocknr(cow)); + cow->start); btrfs_mark_buffer_dirty(parent); WARN_ON(btrfs_header_generation(parent) != trans->transid); - btrfs_free_extent(trans, root, extent_buffer_blocknr(buf),1,1); + btrfs_free_extent(trans, root, buf->start, buf->len, 1); } free_extent_buffer(buf); btrfs_mark_buffer_dirty(cow); @@ -149,13 +148,14 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } - search_start = extent_buffer_blocknr(buf) & ~((u64)65535); + search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); (*cow_ret)->alloc_addr = (unsigned long)__builtin_return_address(0); return ret; } +#if 0 static int close_blocks(u64 blocknr, u64 other) { if (blocknr < other && other - blocknr < 8) @@ -165,7 +165,6 @@ static int close_blocks(u64 blocknr, u64 other) return 0; } -#if 0 static int should_defrag_leaf(struct extent_buffer *eb) { return 0; @@ -355,7 +354,7 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(memcmp(&parent_key, &node_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(node)); + btrfs_header_bytenr(node)); } BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); if (slot != 0) { @@ -398,7 +397,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(memcmp(&parent_key, &leaf_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(leaf)); + btrfs_header_bytenr(leaf)); } #if 0 for (i = 0; nritems > 1 && i < nritems - 2; i++) { @@ -467,14 +466,16 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, static int check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { +#if 0 struct extent_buffer *buf = path->nodes[level]; if (memcmp_extent_buffer(buf, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(buf), BTRFS_FSID_SIZE)) { printk("warning bad block %Lu\n", buf->start); - BUG(); + return 1; } +#endif if (level == 0) return check_leaf(root, path, level); return check_node(root, path, level); @@ -585,7 +586,8 @@ static struct extent_buffer *read_node_slot(struct btrfs_root *root, return NULL; if (slot >= btrfs_header_nritems(parent)) return NULL; - return read_tree_block(root, btrfs_node_blockptr(parent, slot)); + return read_tree_block(root, btrfs_node_blockptr(parent, slot), + btrfs_level_size(root, btrfs_header_level(parent) - 1)); } static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root @@ -618,7 +620,6 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root */ if (!parent) { struct extent_buffer *child; - u64 blocknr = extent_buffer_blocknr(mid); if (btrfs_header_nritems(mid) != 1) return 0; @@ -632,9 +633,10 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root wait_on_tree_block_writeback(root, mid); /* once for the path */ free_extent_buffer(mid); + ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1); /* once for the root ptr */ free_extent_buffer(mid); - return btrfs_free_extent(trans, root, blocknr, 1, 1); + return ret; } if (btrfs_header_nritems(mid) > BTRFS_NODEPTRS_PER_BLOCK(root) / 4) @@ -680,7 +682,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0 && wret != -ENOSPC) ret = wret; if (btrfs_header_nritems(right) == 0) { - u64 blocknr = extent_buffer_blocknr(right); + u64 bytenr = right->start; + u32 blocksize = right->len; + clean_tree_block(trans, root, right); wait_on_tree_block_writeback(root, right); free_extent_buffer(right); @@ -689,7 +693,8 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root 1); if (wret) ret = wret; - wret = btrfs_free_extent(trans, root, blocknr, 1, 1); + wret = btrfs_free_extent(trans, root, bytenr, + blocksize, 1); if (wret) ret = wret; } else { @@ -719,7 +724,8 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } if (btrfs_header_nritems(mid) == 0) { /* we've managed to empty the middle node, drop it */ - u64 blocknr = extent_buffer_blocknr(mid); + u64 bytenr = mid->start; + u32 blocksize = mid->len; clean_tree_block(trans, root, mid); wait_on_tree_block_writeback(root, mid); free_extent_buffer(mid); @@ -727,7 +733,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root wret = del_ptr(trans, root, path, level + 1, pslot); if (wret) ret = wret; - wret = btrfs_free_extent(trans, root, blocknr, 1, 1); + wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1); if (wret) ret = wret; } else { @@ -830,7 +836,6 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, path->slots[level] = orig_slot; free_extent_buffer(left); } - check_node(root, path, level); return 0; } free_extent_buffer(left); @@ -874,12 +879,10 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, } else { free_extent_buffer(right); } - check_node(root, path, level); return 0; } free_extent_buffer(right); } - check_node(root, path, level); return 1; } @@ -889,19 +892,23 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { + return; +#if 0 struct extent_buffer *node; int i; u32 nritems; - u64 blocknr; + u64 bytenr; u64 search; u64 cluster_start; int ret; int nread = 0; int direction = path->reada; + int level; struct radix_tree_root found; unsigned long gang[8]; struct extent_buffer *eb; + if (level == 0) return; @@ -918,8 +925,9 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, init_bit_radix(&found); nritems = btrfs_header_nritems(node); + level = btrfs_header_level(node) - 1; for (i = slot; i < nritems; i++) { - blocknr = btrfs_node_blockptr(node, i); + bytenr = btrfs_node_blockptr(node, i); set_radix_bit(&found, blocknr); } if (direction > 0) { @@ -944,6 +952,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, } } } +#endif } /* * look for key in the tree. path is filled in with nodes along the way @@ -963,7 +972,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow) { struct extent_buffer *b; - u64 blocknr; + u64 bytenr; int slot; int ret; int level; @@ -1027,10 +1036,11 @@ again: /* this is only true while dropping a snapshot */ if (level == lowest_level) break; - blocknr = btrfs_node_blockptr(b, slot); + bytenr = btrfs_node_blockptr(b, slot); if (should_reada) reada_for_search(root, p, level, slot); - b = read_tree_block(root, btrfs_node_blockptr(b, slot)); + b = read_tree_block(root, bytenr, + btrfs_level_size(root, level - 1)); } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < @@ -1193,14 +1203,14 @@ static int insert_new_root(struct btrfs_trans_handle *trans, BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - c = btrfs_alloc_free_block(trans, root, - extent_buffer_blocknr(root->node), 0); + c = btrfs_alloc_free_block(trans, root, root->nodesize, + root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); memset_extent_buffer(c, 0, 0, root->nodesize); btrfs_set_header_nritems(c, 1); btrfs_set_header_level(c, level); - btrfs_set_header_blocknr(c, extent_buffer_blocknr(c)); + btrfs_set_header_bytenr(c, c->start); btrfs_set_header_generation(c, trans->transid); btrfs_set_header_owner(c, root->root_key.objectid); lower = path->nodes[level-1]; @@ -1213,7 +1223,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, else btrfs_node_key(lower, &lower_key, 0); btrfs_set_node_key(c, &lower_key, 0); - btrfs_set_node_blockptr(c, 0, extent_buffer_blocknr(lower)); + btrfs_set_node_blockptr(c, 0, lower->start); btrfs_mark_buffer_dirty(c); @@ -1237,7 +1247,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, */ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_disk_key - *key, u64 blocknr, int slot, int level) + *key, u64 bytenr, int slot, int level) { struct extent_buffer *lower; int nritems; @@ -1256,10 +1266,9 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root (nritems - slot) * sizeof(struct btrfs_key_ptr)); } btrfs_set_node_key(lower, key, slot); - btrfs_set_node_blockptr(lower, slot, blocknr); + btrfs_set_node_blockptr(lower, slot, bytenr); btrfs_set_header_nritems(lower, nritems + 1); btrfs_mark_buffer_dirty(lower); - check_node(root, path, level); return 0; } @@ -1300,14 +1309,14 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(c); - split = btrfs_alloc_free_block(trans, root, - extent_buffer_blocknr(c), 0); + split = btrfs_alloc_free_block(trans, root, root->nodesize, + c->start, 0); if (IS_ERR(split)) return PTR_ERR(split); btrfs_set_header_flags(split, btrfs_header_flags(c)); btrfs_set_header_level(split, btrfs_header_level(c)); - btrfs_set_header_blocknr(split, extent_buffer_blocknr(split)); + btrfs_set_header_bytenr(split, split->start); btrfs_set_header_generation(split, trans->transid); btrfs_set_header_owner(split, root->root_key.objectid); write_extent_buffer(split, root->fs_info->fsid, @@ -1328,8 +1337,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(split); btrfs_node_key(split, &disk_key, 0); - wret = insert_ptr(trans, root, path, &disk_key, - extent_buffer_blocknr(split), + wret = insert_ptr(trans, root, path, &disk_key, split->start, path->slots[level + 1] + 1, level + 1); if (wret) @@ -1407,6 +1415,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root u32 left_nritems; u32 right_nritems; u32 data_end; + u32 this_item_size; int ret; slot = path->slots[1]; @@ -1417,7 +1426,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (slot >= btrfs_header_nritems(upper) - 1) return 1; - right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1)); + right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1), + root->leafsize); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { free_extent_buffer(right); @@ -1445,13 +1455,27 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root for (i = left_nritems - 1; i >= 1; i--) { item = btrfs_item_nr(left, i); + if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(left, item) + sizeof(*item) + push_space > - free_space) + + if (!left->map_token) { + map_extent_buffer(left, (unsigned long)item, + sizeof(struct btrfs_item), + &left->map_token, &left->kaddr, + &left->map_start, &left->map_len, + KM_USER1); + } + + this_item_size = btrfs_item_size(left, item); + if (this_item_size + sizeof(*item) + push_space > free_space) break; push_items++; - push_space += btrfs_item_size(left, item) + sizeof(*item); + push_space += this_item_size + sizeof(*item); + } + if (left->map_token) { + unmap_extent_buffer(left, left->map_token, KM_USER1); + left->map_token = NULL; } if (push_items == 0) { @@ -1493,11 +1517,23 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root right_nritems += push_items; btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); + for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(right, i); - btrfs_set_item_offset(right, item, push_space - - btrfs_item_size(right, item)); - push_space = btrfs_item_offset(right, item); + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + push_space -= btrfs_item_size(right, item); + btrfs_set_item_offset(right, item, push_space); + } + + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; } left_nritems -= push_items; btrfs_set_header_nritems(left, left_nritems); @@ -1518,8 +1554,6 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root } else { free_extent_buffer(right); } - if (path->nodes[1]) - check_node(root, path, 1); return 0; } /* @@ -1542,6 +1576,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root u32 right_nritems; int ret = 0; int wret; + u32 this_item_size; + u32 old_left_item_size; slot = path->slots[1]; if (slot == 0) @@ -1550,7 +1586,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1], - slot - 1)); + slot - 1), root->leafsize); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { free_extent_buffer(left); @@ -1579,14 +1615,30 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i < right_nritems - 1; i++) { item = btrfs_item_nr(right, i); + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(right, item) + sizeof(*item) + push_space > - free_space) + + this_item_size = btrfs_item_size(right, item); + if (this_item_size + sizeof(*item) + push_space > free_space) break; + push_items++; - push_space += btrfs_item_size(right, item) + sizeof(*item); + push_space += this_item_size + sizeof(*item); } + + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; + } + if (push_items == 0) { free_extent_buffer(left); return 1; @@ -1611,15 +1663,28 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root old_left_nritems = btrfs_header_nritems(left); BUG_ON(old_left_nritems < 0); + old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { u32 ioff; + item = btrfs_item_nr(left, i); + if (!left->map_token) { + map_extent_buffer(left, (unsigned long)item, + sizeof(struct btrfs_item), + &left->map_token, &left->kaddr, + &left->map_start, &left->map_len, + KM_USER1); + } + ioff = btrfs_item_offset(left, item); btrfs_set_item_offset(left, item, - ioff - (BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_offset_nr(left, old_left_nritems - 1))); + ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); } btrfs_set_header_nritems(left, old_left_nritems + push_items); + if (left->map_token) { + unmap_extent_buffer(left, left->map_token, KM_USER1); + left->map_token = NULL; + } /* fixup right node */ push_space = btrfs_item_offset_nr(right, push_items - 1) - @@ -1640,9 +1705,21 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(right, i); - btrfs_set_item_offset(right, item, push_space - - btrfs_item_size(right, item)); - push_space = btrfs_item_offset(right, item); + + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + + push_space = push_space - btrfs_item_size(right, item); + btrfs_set_item_offset(right, item, push_space); + } + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; } btrfs_mark_buffer_dirty(left); @@ -1664,8 +1741,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); - if (path->nodes[1]) - check_node(root, path, 1); return ret; } @@ -1718,13 +1793,13 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root nritems = btrfs_header_nritems(l); mid = (nritems + 1)/ 2; - right = btrfs_alloc_free_block(trans, root, - extent_buffer_blocknr(l), 0); + right = btrfs_alloc_free_block(trans, root, root->leafsize, + l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); - btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_bytenr(right, right->start); btrfs_set_header_generation(right, trans->transid); btrfs_set_header_owner(right, root->root_key.objectid); btrfs_set_header_level(right, 0); @@ -1740,8 +1815,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, - &disk_key, - extent_buffer_blocknr(right), + &disk_key, right->start, path->slots[1] + 1, 1); if (wret) ret = wret; @@ -1762,7 +1836,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - extent_buffer_blocknr(right), + right->start, path->slots[1], 1); if (wret) ret = wret; @@ -1799,15 +1873,30 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i < nritems; i++) { struct btrfs_item *item = btrfs_item_nr(right, i); - u32 ioff = btrfs_item_offset(right, item); + u32 ioff; + + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + + ioff = btrfs_item_offset(right, item); btrfs_set_item_offset(right, item, ioff + rt_data_off); } + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; + } + btrfs_set_header_nritems(l, mid); ret = 0; btrfs_item_key(right, &disk_key, 0); - wret = insert_ptr(trans, root, path, &disk_key, - extent_buffer_blocknr(right), path->slots[1] + 1, 1); + wret = insert_ptr(trans, root, path, &disk_key, right->start, + path->slots[1] + 1, 1); if (wret) ret = wret; @@ -1824,19 +1913,17 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root free_extent_buffer(right); BUG_ON(path->slots[0] < 0); - check_node(root, path, 1); - check_leaf(root, path, 0); if (!double_split) return ret; - right = btrfs_alloc_free_block(trans, root, - extent_buffer_blocknr(l), 0); + right = btrfs_alloc_free_block(trans, root, root->leafsize, + l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); - btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_bytenr(right, right->start); btrfs_set_header_generation(right, trans->transid); btrfs_set_header_owner(right, root->root_key.objectid); btrfs_set_header_level(right, 0); @@ -1847,8 +1934,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, - &disk_key, - extent_buffer_blocknr(right), + &disk_key, right->start, path->slots[1], 1); if (wret) ret = wret; @@ -1860,8 +1946,6 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root free_extent_buffer(path->nodes[0]); path->nodes[0] = right; path->slots[0] = 0; - check_node(root, path, 1); - check_leaf(root, path, 0); return ret; } @@ -1904,9 +1988,24 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, for (i = slot; i < nritems; i++) { u32 ioff; item = btrfs_item_nr(leaf, i); + + if (!leaf->map_token) { + map_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + } + ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff + size_diff); } + + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } + /* shift the data */ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end + size_diff, btrfs_leaf_data(leaf) + @@ -1921,7 +2020,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, btrfs_print_leaf(root, leaf); BUG(); } - check_leaf(root, path, 0); return ret; } @@ -1963,10 +2061,23 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, for (i = slot; i < nritems; i++) { u32 ioff; item = btrfs_item_nr(leaf, i); + + if (!leaf->map_token) { + map_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + } ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff - data_size); } + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } + /* shift the data */ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end - data_size, btrfs_leaf_data(leaf) + @@ -1983,7 +2094,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, btrfs_print_leaf(root, leaf); BUG(); } - check_leaf(root, path, 0); return ret; } @@ -2046,12 +2156,26 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ + WARN_ON(leaf->map_token); for (i = slot; i < nritems; i++) { u32 ioff; + item = btrfs_item_nr(leaf, i); + if (!leaf->map_token) { + map_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + } + ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff - data_size); } + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } /* shift the items */ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), @@ -2081,7 +2205,6 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, btrfs_print_leaf(root, leaf); BUG(); } - check_leaf(root, path, 0); out: return ret; } @@ -2186,10 +2309,24 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, for (i = slot + 1; i < nritems; i++) { u32 ioff; + item = btrfs_item_nr(leaf, i); + if (!leaf->map_token) { + map_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + } ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff + dsize); } + + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), btrfs_item_nr_offset(slot + 1), sizeof(struct btrfs_item) * @@ -2209,8 +2346,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - extent_buffer_blocknr(leaf), - 1, 1); + leaf->start, leaf->len, 1); if (wret) ret = wret; } @@ -2247,7 +2383,8 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, } if (btrfs_header_nritems(leaf) == 0) { - u64 blocknr = extent_buffer_blocknr(leaf); + u64 bytenr = leaf->start; + u32 blocksize = leaf->len; clean_tree_block(trans, root, leaf); wait_on_tree_block_writeback(root, leaf); @@ -2257,8 +2394,8 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = wret; free_extent_buffer(leaf); - wret = btrfs_free_extent(trans, root, blocknr, - 1, 1); + wret = btrfs_free_extent(trans, root, bytenr, + blocksize, 1); if (wret) ret = wret; } else { @@ -2281,7 +2418,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) { int slot; int level = 1; - u64 blocknr; + u64 bytenr; struct extent_buffer *c; struct extent_buffer *next = NULL; @@ -2296,14 +2433,15 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) continue; } - blocknr = btrfs_node_blockptr(c, slot); + bytenr = btrfs_node_blockptr(c, slot); if (next) free_extent_buffer(next); if (path->reada) reada_for_search(root, path, level, slot); - next = read_tree_block(root, blocknr); + next = read_tree_block(root, bytenr, + btrfs_level_size(root, level -1)); break; } path->slots[level] = slot; @@ -2317,7 +2455,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) break; if (path->reada) reada_for_search(root, path, level, 0); - next = read_tree_block(root, btrfs_node_blockptr(next, 0)); + next = read_tree_block(root, btrfs_node_blockptr(next, 0), + btrfs_level_size(root, level - 1)); } return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 256689551eb0..4d05456ec32f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -96,7 +96,7 @@ struct btrfs_key { struct btrfs_header { u8 csum[BTRFS_CSUM_SIZE]; u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ - __le64 blocknr; /* which block this node is supposed to live in */ + __le64 bytenr; /* which block this node is supposed to live in */ __le64 generation; __le64 owner; __le32 nritems; @@ -122,16 +122,17 @@ struct btrfs_super_block { u8 csum[BTRFS_CSUM_SIZE]; /* the first 3 fields must match struct btrfs_header */ u8 fsid[16]; /* FS specific uuid */ - __le64 blocknr; /* this block number */ + __le64 bytenr; /* this block number */ __le64 magic; __le64 generation; __le64 root; - __le64 total_blocks; - __le64 blocks_used; + __le64 total_bytes; + __le64 bytes_used; __le64 root_dir_objectid; __le32 sectorsize; __le32 nodesize; __le32 leafsize; + u8 root_level; } __attribute__ ((__packed__)); /* @@ -231,13 +232,14 @@ struct btrfs_dir_item { struct btrfs_root_item { struct btrfs_inode_item inode; __le64 root_dirid; - __le64 blocknr; - __le64 block_limit; - __le64 blocks_used; + __le64 bytenr; + __le64 byte_limit; + __le64 bytes_used; __le32 flags; __le32 refs; struct btrfs_disk_key drop_progress; u8 drop_level; + u8 level; } __attribute__ ((__packed__)); #define BTRFS_FILE_EXTENT_REG 0 @@ -250,8 +252,8 @@ struct btrfs_file_extent_item { * disk space consumed by the extent, checksum blocks are included * in these numbers */ - __le64 disk_blocknr; - __le64 disk_num_blocks; + __le64 disk_bytenr; + __le64 disk_num_bytes; /* * the logical offset in file blocks (no csums) * this extent record is for. This allows a file extent to point @@ -263,7 +265,7 @@ struct btrfs_file_extent_item { /* * the logical number of file blocks (no csums included) */ - __le64 num_blocks; + __le64 num_bytes; } __attribute__ ((__packed__)); struct btrfs_csum_item { @@ -429,6 +431,7 @@ static inline u##bits btrfs_##name(struct extent_buffer *eb, \ int err; \ char *map_token; \ char *kaddr; \ + int unmap_on_exit = (eb->map_token == NULL); \ unsigned long map_start; \ unsigned long map_len; \ unsigned long offset = (unsigned long)s + \ @@ -436,12 +439,13 @@ static inline u##bits btrfs_##name(struct extent_buffer *eb, \ err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ - &map_start, &map_len, KM_USER0); \ + &map_start, &map_len, KM_USER1); \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ u##bits res = le##bits##_to_cpu(*tmp); \ - unmap_extent_buffer(eb, map_token, KM_USER0); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ return res; \ } else { \ __le##bits res; \ @@ -457,17 +461,19 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \ char *kaddr; \ unsigned long map_start; \ unsigned long map_len; \ + int unmap_on_exit = (eb->map_token == NULL); \ unsigned long offset = (unsigned long)s + \ offsetof(type, member); \ err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ - &map_start, &map_len, KM_USER0); \ + &map_start, &map_len, KM_USER1); \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ *tmp = cpu_to_le##bits(val); \ - unmap_extent_buffer(eb, map_token, KM_USER0); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ } else { \ val = cpu_to_le##bits(val); \ write_eb_member(eb, s, type, member, &val); \ @@ -483,15 +489,17 @@ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ unsigned long map_start; \ unsigned long map_len; \ unsigned long offset = offsetof(type, member); \ + int unmap_on_exit = (eb->map_token == NULL); \ err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ - &map_start, &map_len, KM_USER0); \ + &map_start, &map_len, KM_USER1); \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ u##bits res = le##bits##_to_cpu(*tmp); \ - unmap_extent_buffer(eb, map_token, KM_USER0); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ return res; \ } else { \ __le##bits res; \ @@ -508,15 +516,17 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \ unsigned long map_start; \ unsigned long map_len; \ unsigned long offset = offsetof(type, member); \ + int unmap_on_exit = (eb->map_token == NULL); \ err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ - &map_start, &map_len, KM_USER0); \ + &map_start, &map_len, KM_USER1); \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ *tmp = cpu_to_le##bits(val); \ - unmap_extent_buffer(eb, map_token, KM_USER0); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ } else { \ val = cpu_to_le##bits(val); \ write_eb_member(eb, NULL, type, member, &val); \ @@ -769,7 +779,7 @@ static inline void btrfs_set_key_type(struct btrfs_key *key, u8 val) } /* struct btrfs_header */ -BTRFS_SETGET_HEADER_FUNCS(header_blocknr, struct btrfs_header, blocknr, 64); +BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64); BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header, generation, 64); BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); @@ -817,24 +827,28 @@ static inline int btrfs_is_leaf(struct extent_buffer *eb) /* struct btrfs_root_item */ BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32); -BTRFS_SETGET_FUNCS(disk_root_blocknr, struct btrfs_root_item, blocknr, 64); +BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64); +BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8); -BTRFS_SETGET_STACK_FUNCS(root_blocknr, struct btrfs_root_item, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8); BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32); BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 32); -BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, blocks_used, 64); -BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, block_limit, 64); +BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); /* struct btrfs_super_block */ -BTRFS_SETGET_STACK_FUNCS(super_blocknr, struct btrfs_super_block, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, generation, 64); BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); -BTRFS_SETGET_STACK_FUNCS(super_total_blocks, struct btrfs_super_block, - total_blocks, 64); -BTRFS_SETGET_STACK_FUNCS(super_blocks_used, struct btrfs_super_block, - blocks_used, 64); +BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, + root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, + bytes_used, 64); BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, sectorsize, 32); BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, @@ -856,33 +870,33 @@ static inline unsigned long btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) { unsigned long offset = (unsigned long)e; - offset += offsetof(struct btrfs_file_extent_item, disk_blocknr); + offset += offsetof(struct btrfs_file_extent_item, disk_bytenr); return offset; } static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) { - return offsetof(struct btrfs_file_extent_item, disk_blocknr) + datasize; + return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; } static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, struct btrfs_item *e) { unsigned long offset; - offset = offsetof(struct btrfs_file_extent_item, disk_blocknr); + offset = offsetof(struct btrfs_file_extent_item, disk_bytenr); return btrfs_item_size(eb, e) - offset; } -BTRFS_SETGET_FUNCS(file_extent_disk_blocknr, struct btrfs_file_extent_item, - disk_blocknr, 64); +BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, + disk_bytenr, 64); BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, generation, 64); -BTRFS_SETGET_FUNCS(file_extent_disk_num_blocks, struct btrfs_file_extent_item, - disk_num_blocks, 64); +BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item, + disk_num_bytes, 64); BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, offset, 64); -BTRFS_SETGET_FUNCS(file_extent_num_blocks, struct btrfs_file_extent_item, - num_blocks, 64); +BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, + num_bytes, 64); static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { @@ -906,6 +920,12 @@ static inline int btrfs_set_root_name(struct btrfs_root *root, return 0; } +static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { + if (level == 0) + return root->leafsize; + return root->nodesize; +} + /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ @@ -927,7 +947,7 @@ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, - u64 blocknr); + u64 bytenr); struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, @@ -935,22 +955,22 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint, - u64 empty_size); + struct btrfs_root *root, u32 size, + u64 hint, u64 empty_size); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 empty_size, u64 search_start, + u64 num_bytes, u64 empty_size, u64 search_start, u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin); + *root, u64 bytenr, u64 num_bytes, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_map_tree *unpin); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 blocknr, u64 num_blocks); + u64 bytenr, u64 num_bytes); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_block_groups(struct btrfs_fs_info *info); @@ -1040,12 +1060,12 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, u64 offset, - u64 disk_num_blocks, - u64 num_blocks); + u64 disk_num_bytes, + u64 num_bytes); int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, - u64 blocknr, int mod); + u64 bytenr, int mod); int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2b86a1d779b7..fad9298c6962 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -28,6 +28,7 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" +#include "print-tree.h" #if 0 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) @@ -43,26 +44,25 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) #endif struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, - u64 blocknr) + u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, - blocknr * root->sectorsize, - root->sectorsize, GFP_NOFS); + bytenr, blocksize, GFP_NOFS); if (eb) eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr) + u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, - blocknr * root->sectorsize, - root->sectorsize, GFP_NOFS); + bytenr, blocksize, GFP_NOFS); eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } @@ -208,13 +208,13 @@ static struct address_space_operations btree_aops = { .sync_page = block_sync_page, }; -int readahead_tree_block(struct btrfs_root *root, u64 blocknr) +int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; int ret = 0; - buf = btrfs_find_create_tree_block(root, blocknr); + buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return 0; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, @@ -223,12 +223,13 @@ int readahead_tree_block(struct btrfs_root *root, u64 blocknr) return ret; } -struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, + u32 blocksize) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; - buf = btrfs_find_create_tree_block(root, blocknr); + buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, @@ -261,7 +262,7 @@ int set_tree_block_dirty(struct btrfs_root *root, struct extent_buffer *buf) return 0; } -static int __setup_root(int blocksize, +static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) @@ -269,9 +270,9 @@ static int __setup_root(int blocksize, root->node = NULL; root->inode = NULL; root->commit_root = NULL; - root->sectorsize = blocksize; - root->nodesize = blocksize; - root->leafsize = blocksize; + root->sectorsize = sectorsize; + root->nodesize = nodesize; + root->leafsize = leafsize; root->ref_cows = 0; root->fs_info = fs_info; root->objectid = objectid; @@ -291,21 +292,23 @@ static int __setup_root(int blocksize, return 0; } -static int find_and_setup_root(int blocksize, - struct btrfs_root *tree_root, +static int find_and_setup_root(struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info, u64 objectid, struct btrfs_root *root) { int ret; + u32 blocksize; - __setup_root(blocksize, root, fs_info, objectid); + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); - root->node = read_tree_block(root, - btrfs_root_blocknr(&root->root_item)); + blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), + blocksize); BUG_ON(!root->node); return 0; } @@ -318,14 +321,14 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_path *path; struct extent_buffer *l; u64 highest_inode; + u32 blocksize; int ret = 0; root = kzalloc(sizeof(*root), GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); if (location->offset == (u64)-1) { - ret = find_and_setup_root(fs_info->sb->s_blocksize, - fs_info->tree_root, fs_info, + ret = find_and_setup_root(tree_root, fs_info, location->objectid, root); if (ret) { kfree(root); @@ -334,7 +337,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, goto insert; } - __setup_root(fs_info->sb->s_blocksize, root, fs_info, + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, root, fs_info, location->objectid); path = btrfs_alloc_path(); @@ -357,8 +361,9 @@ out: kfree(root); return ERR_PTR(ret); } - root->node = read_tree_block(root, - btrfs_root_blocknr(&root->root_item)); + blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), + blocksize); BUG_ON(!root->node); insert: root->ref_cows = 1; @@ -418,6 +423,10 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *open_ctree(struct super_block *sb) { + u32 sectorsize; + u32 nodesize; + u32 leafsize; + u32 blocksize; struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), @@ -474,12 +483,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - __setup_root(sb->s_blocksize, tree_root, + __setup_root(512, 512, 512, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); fs_info->sb_buffer = read_tree_block(tree_root, - BTRFS_SUPER_INFO_OFFSET / - sb->s_blocksize); + BTRFS_SUPER_INFO_OFFSET, + 512); if (!fs_info->sb_buffer) goto fail_iput; @@ -494,9 +503,15 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; + nodesize = btrfs_super_nodesize(disk_super); + leafsize = btrfs_super_leafsize(disk_super); + sectorsize = btrfs_super_sectorsize(disk_super); + tree_root->nodesize = nodesize; + tree_root->leafsize = leafsize; + tree_root->sectorsize = sectorsize; + i_size_write(fs_info->btree_inode, - btrfs_super_total_blocks(disk_super) << - fs_info->btree_inode->i_blkbits); + btrfs_super_total_bytes(disk_super)); if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, @@ -504,13 +519,22 @@ struct btrfs_root *open_ctree(struct super_block *sb) printk("btrfs: valid FS not found on %s\n", sb->s_id); goto fail_sb_buffer; } + blocksize = btrfs_level_size(tree_root, + btrfs_super_root_level(disk_super)); tree_root->node = read_tree_block(tree_root, - btrfs_super_root(disk_super)); + btrfs_super_root(disk_super), + blocksize); if (!tree_root->node) goto fail_sb_buffer; +#if 0 + btrfs_print_leaf(tree_root, tree_root->node); + err = -EIO; + goto fail_tree_root; +#endif mutex_lock(&fs_info->fs_mutex); - ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, + + ret = find_and_setup_root(tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); if (ret) { mutex_unlock(&fs_info->fs_mutex); @@ -611,11 +635,11 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(fs_info->tree_root->node); free_extent_buffer(fs_info->sb_buffer); - truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); - iput(fs_info->btree_inode); btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); + truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + iput(fs_info->btree_inode); kfree(fs_info->extent_root); kfree(fs_info->tree_root); return 0; @@ -642,7 +666,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", - (unsigned long long)extent_buffer_blocknr(buf), + (unsigned long long)buf->start, transid, root->fs_info->generation); WARN_ON(1); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 70d9413c599f..fd4db5f810cf 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -21,10 +21,11 @@ #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) -struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); -int readahead_tree_block(struct btrfs_root *root, u64 blocknr); +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, + u32 blocksize); +int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr); + u64 bytenr, u32 blocksize); int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); struct btrfs_root *open_ctree(struct super_block *sb); @@ -32,7 +33,7 @@ int close_ctree(struct btrfs_root *root); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, - u64 blocknr); + u64 bytenr, u32 blocksize); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 477466d167a4..1be8f9f04a13 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -129,7 +129,7 @@ err: struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, - u64 blocknr) + u64 bytenr) { struct extent_map_tree *block_group_cache; struct btrfs_block_group_cache *block_group = NULL; @@ -140,7 +140,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct block_group_cache = &info->block_group_cache; ret = find_first_extent_bit(block_group_cache, - blocknr, &start, &end, + bytenr, &start, &end, BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA); if (ret) { return NULL; @@ -152,7 +152,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct block_group = (struct btrfs_block_group_cache *)ptr; - if (block_group->key.objectid <= blocknr && blocknr <= + if (block_group->key.objectid <= bytenr && bytenr <= block_group->key.objectid + block_group->key.offset) return block_group; @@ -315,7 +315,7 @@ found: int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 blocknr, u64 num_blocks) + u64 bytenr, u64 num_bytes) { struct btrfs_path *path; int ret; @@ -324,13 +324,14 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_extent_item *item; u32 refs; + WARN_ON(num_bytes < root->sectorsize); path = btrfs_alloc_path(); if (!path) return -ENOMEM; - key.objectid = blocknr; + key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - key.offset = num_blocks; + key.offset = num_bytes; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); if (ret < 0) @@ -361,8 +362,8 @@ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, } static int lookup_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 blocknr, - u64 num_blocks, u32 *refs) + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u32 *refs) { struct btrfs_path *path; int ret; @@ -370,9 +371,10 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, struct extent_buffer *l; struct btrfs_extent_item *item; + WARN_ON(num_bytes < root->sectorsize); path = btrfs_alloc_path(); - key.objectid = blocknr; - key.offset = num_blocks; + key.objectid = bytenr; + key.offset = num_bytes; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); @@ -380,7 +382,7 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, goto out; if (ret != 0) { btrfs_print_leaf(root, path->nodes[0]); - printk("failed to find block number %Lu\n", blocknr); + printk("failed to find block number %Lu\n", bytenr); BUG(); } l = path->nodes[0]; @@ -394,19 +396,19 @@ out: int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return btrfs_inc_extent_ref(trans, root, - extent_buffer_blocknr(root->node), 1); + return btrfs_inc_extent_ref(trans, root, root->node->start, + root->node->len); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) { - u64 blocknr; + u64 bytenr; u32 nritems; struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; - int leaf; + int level; int ret; int faili; int err; @@ -414,11 +416,11 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (!root->ref_cows) return 0; - leaf = btrfs_is_leaf(buf); + level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); for (i = 0; i < nritems; i++) { - if (leaf) { - u64 disk_blocknr; + if (level == 0) { + u64 disk_bytenr; btrfs_item_key_to_cpu(buf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; @@ -427,18 +429,19 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); - if (disk_blocknr == 0) + disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); + if (disk_bytenr == 0) continue; - ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(buf, fi)); + ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, + btrfs_file_extent_disk_num_bytes(buf, fi)); if (ret) { faili = i; goto fail; } } else { - blocknr = btrfs_node_blockptr(buf, i); - ret = btrfs_inc_extent_ref(trans, root, blocknr, 1); + bytenr = btrfs_node_blockptr(buf, i); + ret = btrfs_inc_extent_ref(trans, root, bytenr, + btrfs_level_size(root, level - 1)); if (ret) { faili = i; goto fail; @@ -449,8 +452,8 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, fail: WARN_ON(1); for (i =0; i < faili; i++) { - if (leaf) { - u64 disk_blocknr; + if (level == 0) { + u64 disk_bytenr; btrfs_item_key_to_cpu(buf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; @@ -459,16 +462,17 @@ fail: if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); - if (disk_blocknr == 0) + disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); + if (disk_bytenr == 0) continue; - err = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(buf, + err = btrfs_free_extent(trans, root, disk_bytenr, + btrfs_file_extent_disk_num_bytes(buf, fi), 0); BUG_ON(err); } else { - blocknr = btrfs_node_blockptr(buf, i); - err = btrfs_free_extent(trans, root, blocknr, 1, 0); + bytenr = btrfs_node_blockptr(buf, i); + err = btrfs_free_extent(trans, root, bytenr, + btrfs_level_size(root, level - 1), 0); BUG_ON(err); } } @@ -558,31 +562,31 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 blocknr, u64 num, int alloc, int mark_free, - int data) + u64 bytenr, u64 num_bytes, int alloc, + int mark_free, int data) { struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; - u64 total = num; + u64 total = num_bytes; u64 old_val; - u64 block_in_group; + u64 byte_in_group; u64 start; u64 end; while(total) { - cache = btrfs_lookup_block_group(info, blocknr); + cache = btrfs_lookup_block_group(info, bytenr); if (!cache) { return -1; } - block_in_group = blocknr - cache->key.objectid; - WARN_ON(block_in_group > cache->key.offset); + byte_in_group = bytenr - cache->key.objectid; + WARN_ON(byte_in_group > cache->key.offset); start = cache->key.objectid; end = start + cache->key.offset - 1; set_extent_bits(&info->block_group_cache, start, end, BLOCK_GROUP_DIRTY, GFP_NOFS); old_val = btrfs_block_group_used(&cache->item); - num = min(total, cache->key.offset - block_in_group); + num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { if (cache->data != data && old_val < (cache->key.offset >> 1)) { @@ -608,18 +612,18 @@ static int update_block_group(struct btrfs_trans_handle *trans, start, end, bit_to_set, GFP_NOFS); } - old_val += num; + old_val += num_bytes; } else { - old_val -= num; + old_val -= num_bytes; if (mark_free) { set_extent_dirty(&info->free_space_cache, - blocknr, blocknr + num - 1, + bytenr, bytenr + num_bytes - 1, GFP_NOFS); } } btrfs_set_block_group_used(&cache->item, old_val); - total -= num; - blocknr += num; + total -= num_bytes; + bytenr += num_bytes; } return 0; } @@ -701,13 +705,14 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct return 0; } -static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) +static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, + int pending) { int err = 0; struct extent_buffer *buf; if (!pending) { - buf = btrfs_find_tree_block(root, blocknr); + buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { if (btrfs_buffer_uptodate(buf)) { u64 transid = @@ -720,10 +725,11 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) free_extent_buffer(buf); } set_extent_dirty(&root->fs_info->pinned_extents, - blocknr, blocknr, GFP_NOFS); + bytenr, bytenr + num_bytes - 1, GFP_NOFS); } else { set_extent_bits(&root->fs_info->pending_del, - blocknr, blocknr, EXTENT_LOCKED, GFP_NOFS); + bytenr, bytenr + num_bytes - 1, + EXTENT_LOCKED, GFP_NOFS); } BUG_ON(err < 0); return 0; @@ -733,7 +739,7 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) * remove an extent from the root, returns 0 on success */ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin, + *root, u64 bytenr, u64 num_bytes, int pin, int mark_free) { struct btrfs_path *path; @@ -745,9 +751,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_extent_item *ei; u32 refs; - key.objectid = blocknr; + key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - key.offset = num_blocks; + key.offset = num_bytes; path = btrfs_alloc_path(); if (!path) @@ -768,28 +774,29 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(leaf); if (refs == 0) { - u64 super_blocks_used, root_blocks_used; + u64 super_used; + u64 root_used; if (pin) { - ret = pin_down_block(root, blocknr, 0); + ret = pin_down_bytes(root, bytenr, num_bytes, 0); BUG_ON(ret); } /* block accounting for super block */ - super_blocks_used = btrfs_super_blocks_used(&info->super_copy); - btrfs_set_super_blocks_used(&info->super_copy, - super_blocks_used - num_blocks); + super_used = btrfs_super_bytes_used(&info->super_copy); + btrfs_set_super_bytes_used(&info->super_copy, + super_used - num_bytes); /* block accounting for root item */ - root_blocks_used = btrfs_root_used(&root->root_item); + root_used = btrfs_root_used(&root->root_item); btrfs_set_root_used(&root->root_item, - root_blocks_used - num_blocks); + root_used - num_bytes); ret = btrfs_del_item(trans, extent_root, path); if (ret) { return ret; } - ret = update_block_group(trans, root, blocknr, num_blocks, 0, + ret = update_block_group(trans, root, bytenr, num_bytes, 0, mark_free, 0); BUG_ON(ret); } @@ -836,17 +843,18 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct * remove an extent from the root, returns 0 on success */ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin) + *root, u64 bytenr, u64 num_bytes, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; int pending_ret; int ret; + WARN_ON(num_bytes < root->sectorsize); if (root == extent_root) { - pin_down_block(root, blocknr, 1); + pin_down_bytes(root, bytenr, num_bytes, 1); return 0; } - ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0); + ret = __free_extent(trans, root, bytenr, num_bytes, pin, pin == 0); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } @@ -860,8 +868,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root * Any available blocks before search_start are skipped. */ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 empty_size, - u64 search_start, u64 search_end, u64 hint_block, + *orig_root, u64 num_bytes, u64 empty_size, + u64 search_start, u64 search_end, u64 hint_byte, struct btrfs_key *ins, u64 exclude_start, u64 exclude_nr, int data) { @@ -870,30 +878,29 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int ret; u64 hole_size = 0; int slot = 0; - u64 last_block = 0; + u64 last_byte = 0; u64 orig_search_start = search_start; int start_found; struct extent_buffer *l; struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; - int total_needed = num_blocks; + u64 total_needed = num_bytes; int level; struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; - u64 cached_search_start = 0; - WARN_ON(num_blocks < 1); + WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(root->node); if (search_end == (u64)-1) - search_end = btrfs_super_total_blocks(&info->super_copy); - if (hint_block) { - block_group = btrfs_lookup_block_group(info, hint_block); + search_end = btrfs_super_total_bytes(&info->super_copy); + if (hint_byte) { + block_group = btrfs_lookup_block_group(info, hint_byte); block_group = btrfs_find_block_group(root, block_group, - hint_block, data, 1); + hint_byte, data, 1); } else { block_group = btrfs_find_block_group(root, trans->block_group, 0, @@ -906,7 +913,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root check_failed: search_start = find_search_start(root, &block_group, search_start, total_needed, data); - cached_search_start = search_start; btrfs_init_path(path); ins->objectid = search_start; @@ -958,27 +964,27 @@ check_failed: start_found = 1; goto check_pending; } - ins->objectid = last_block > search_start ? - last_block : search_start; + ins->objectid = last_byte > search_start ? + last_byte : search_start; ins->offset = search_end - ins->objectid; goto check_pending; } btrfs_item_key_to_cpu(l, &key, slot); - if (key.objectid >= search_start && key.objectid > last_block && + if (key.objectid >= search_start && key.objectid > last_byte && start_found) { - if (last_block < search_start) - last_block = search_start; - hole_size = key.objectid - last_block; - if (hole_size >= num_blocks) { - ins->objectid = last_block; + if (last_byte < search_start) + last_byte = search_start; + hole_size = key.objectid - last_byte; + if (hole_size >= num_bytes) { + ins->objectid = last_byte; ins->offset = hole_size; goto check_pending; } } if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { if (!start_found) { - last_block = key.objectid; + last_byte = key.objectid; start_found = 1; } goto next; @@ -986,9 +992,9 @@ check_failed: start_found = 1; - last_block = key.objectid + key.offset; + last_byte = key.objectid + key.offset; - if (!full_scan && last_block >= block_group->key.objectid + + if (!full_scan && last_byte >= block_group->key.objectid + block_group->key.offset) { btrfs_release_path(root, path); search_start = block_group->key.objectid + @@ -1006,20 +1012,20 @@ check_pending: btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); - if (ins->objectid + num_blocks >= search_end) + if (ins->objectid + num_bytes >= search_end) goto enospc; if (test_range_bit(&info->extent_ins, ins->objectid, - ins->objectid + num_blocks -1, EXTENT_LOCKED, 0)) { - search_start = ins->objectid + num_blocks; + ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { + search_start = ins->objectid + num_bytes; goto new_group; } if (test_range_bit(&info->pinned_extents, ins->objectid, - ins->objectid + num_blocks -1, EXTENT_DIRTY, 0)) { - search_start = ins->objectid + num_blocks; + ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { + search_start = ins->objectid + num_bytes; goto new_group; } - if (exclude_nr > 0 && (ins->objectid + num_blocks > exclude_start && + if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && ins->objectid < exclude_start + exclude_nr)) { search_start = exclude_start + exclude_nr; goto new_group; @@ -1029,12 +1035,12 @@ check_pending: if (block_group) trans->block_group = block_group; } - ins->offset = num_blocks; + ins->offset = num_bytes; btrfs_free_path(path); return 0; new_group: - if (search_start + num_blocks >= search_end) { + if (search_start + num_bytes >= search_end) { enospc: search_start = orig_search_start; if (full_scan) { @@ -1069,12 +1075,12 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 empty_size, u64 hint_block, + u64 num_bytes, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, int data) { int ret; int pending_ret; - u64 super_blocks_used, root_blocks_used; + u64 super_used, root_used; u64 search_start = 0; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; @@ -1083,9 +1089,9 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, btrfs_set_stack_extent_refs(&extent_item, 1); btrfs_set_stack_extent_owner(&extent_item, owner); - WARN_ON(num_blocks < 1); - ret = find_free_extent(trans, root, num_blocks, empty_size, - search_start, search_end, hint_block, ins, + WARN_ON(num_bytes < root->sectorsize); + ret = find_free_extent(trans, root, num_bytes, empty_size, + search_start, search_end, hint_byte, ins, trans->alloc_exclude_start, trans->alloc_exclude_nr, data); BUG_ON(ret); @@ -1093,21 +1099,18 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, return ret; /* block accounting for super block */ - super_blocks_used = btrfs_super_blocks_used(&info->super_copy); - btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used + - num_blocks); + super_used = btrfs_super_bytes_used(&info->super_copy); + btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); /* block accounting for root item */ - root_blocks_used = btrfs_root_used(&root->root_item); - btrfs_set_root_used(&root->root_item, root_blocks_used + - num_blocks); + root_used = btrfs_root_used(&root->root_item); + btrfs_set_root_used(&root->root_item, root_used + num_bytes); clear_extent_dirty(&root->fs_info->free_space_cache, ins->objectid, ins->objectid + ins->offset - 1, GFP_NOFS); if (root == extent_root) { - BUG_ON(num_blocks != 1); set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, EXTENT_LOCKED, GFP_NOFS); @@ -1146,7 +1149,8 @@ update_block: * returns the tree buffer or NULL. */ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint, + struct btrfs_root *root, + u32 blocksize, u64 hint, u64 empty_size) { struct btrfs_key ins; @@ -1154,14 +1158,15 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct extent_buffer *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, empty_size, hint, (u64)-1, &ins, 0); + blocksize, empty_size, hint, + (u64)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); return ERR_PTR(ret); } - buf = btrfs_find_create_tree_block(root, ins.objectid); + buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize); if (!buf) { - btrfs_free_extent(trans, root, ins.objectid, 1, 0); + btrfs_free_extent(trans, root, ins.objectid, blocksize, 0); return ERR_PTR(-ENOMEM); } btrfs_set_buffer_uptodate(buf); @@ -1191,7 +1196,7 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, BUG_ON(!btrfs_is_leaf(leaf)); nritems = btrfs_header_nritems(leaf); for (i = 0; i < nritems; i++) { - u64 disk_blocknr; + u64 disk_bytenr; btrfs_item_key_to_cpu(leaf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) @@ -1204,11 +1209,11 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, * FIXME make sure to insert a trans record that * repeats the snapshot del on crash */ - disk_blocknr = btrfs_file_extent_disk_blocknr(leaf, fi); - if (disk_blocknr == 0) + disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + if (disk_bytenr == 0) continue; - ret = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(leaf, fi), 0); + ret = btrfs_free_extent(trans, root, disk_bytenr, + btrfs_file_extent_disk_num_bytes(leaf, fi), 0); BUG_ON(ret); } return 0; @@ -1219,19 +1224,23 @@ static void reada_walk_down(struct btrfs_root *root, { int i; u32 nritems; - u64 blocknr; + u64 bytenr; int ret; u32 refs; + int level; + u32 blocksize; nritems = btrfs_header_nritems(node); + level = btrfs_header_level(node); for (i = 0; i < nritems; i++) { - blocknr = btrfs_node_blockptr(node, i); - ret = lookup_extent_ref(NULL, root, blocknr, 1, &refs); + bytenr = btrfs_node_blockptr(node, i); + blocksize = btrfs_level_size(root, level - 1); + ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs); BUG_ON(ret); if (refs != 1) continue; mutex_unlock(&root->fs_info->fs_mutex); - ret = readahead_tree_block(root, blocknr); + ret = readahead_tree_block(root, bytenr, blocksize); cond_resched(); mutex_lock(&root->fs_info->fs_mutex); if (ret) @@ -1248,15 +1257,16 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root { struct extent_buffer *next; struct extent_buffer *cur; - u64 blocknr; + u64 bytenr; + u32 blocksize; int ret; u32 refs; WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = lookup_extent_ref(trans, root, - extent_buffer_blocknr(path->nodes[*level]), - 1, &refs); + path->nodes[*level]->start, + path->nodes[*level]->len, &refs); BUG_ON(ret); if (refs > 1) goto out; @@ -1283,30 +1293,33 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); break; } - blocknr = btrfs_node_blockptr(cur, path->slots[*level]); - ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); + bytenr = btrfs_node_blockptr(cur, path->slots[*level]); + blocksize = btrfs_level_size(root, *level - 1); + ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); BUG_ON(ret); if (refs != 1) { path->slots[*level]++; - ret = btrfs_free_extent(trans, root, blocknr, 1, 1); + ret = btrfs_free_extent(trans, root, bytenr, + blocksize, 1); BUG_ON(ret); continue; } - next = btrfs_find_tree_block(root, blocknr); + next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next)) { free_extent_buffer(next); mutex_unlock(&root->fs_info->fs_mutex); - next = read_tree_block(root, blocknr); + next = read_tree_block(root, bytenr, blocksize); mutex_lock(&root->fs_info->fs_mutex); /* we dropped the lock, check one more time */ - ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); + ret = lookup_extent_ref(trans, root, bytenr, + blocksize, &refs); BUG_ON(ret); if (refs != 1) { path->slots[*level]++; free_extent_buffer(next); ret = btrfs_free_extent(trans, root, - blocknr, 1, 1); + bytenr, blocksize, 1); BUG_ON(ret); continue; } @@ -1321,8 +1334,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root out: WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = btrfs_free_extent(trans, root, - extent_buffer_blocknr(path->nodes[*level]), 1, 1); + ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, + path->nodes[*level]->len, 1); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -1359,8 +1372,8 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } else { ret = btrfs_free_extent(trans, root, - extent_buffer_blocknr(path->nodes[*level]), - 1, 1); + path->nodes[*level]->start, + path->nodes[*level]->len, 1); BUG_ON(ret); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; @@ -1476,16 +1489,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; - u64 group_size_blocks; block_group_cache = &info->block_group_cache; - group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >> - info->sb->s_blocksize_bits; - root = info->extent_root; key.objectid = 0; - key.offset = group_size_blocks; + key.offset = BTRFS_BLOCK_GROUP_SIZE; btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); path = btrfs_alloc_path(); @@ -1532,7 +1541,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) (u64)cache); if (key.objectid >= - btrfs_super_total_blocks(&info->super_copy)) + btrfs_super_total_bytes(&info->super_copy)) break; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index e081558d52ff..f658703c42e6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1963,18 +1963,27 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i) struct page *p; if (i == 0) return eb->first_page; + i += eb->start >> PAGE_CACHE_SHIFT; + if (eb->last_page && eb->last_page->index == i) + return eb->last_page; + p = find_get_page(eb->first_page->mapping, i); page_cache_release(p); + eb->last_page = p; return p; } +static inline unsigned long num_extent_pages(u64 start, u64 len) +{ + return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT); +} struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, gfp_t mask) { - unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long num_pages = num_extent_pages(start, len); unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT; struct extent_buffer *eb; @@ -1986,7 +1995,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, if (!eb || IS_ERR(eb)) return NULL; - eb->alloc_addr = __builtin_return_address(0); + eb->alloc_addr = (unsigned long)__builtin_return_address(0); eb->start = start; eb->len = len; atomic_set(&eb->refs, 1); @@ -1994,6 +2003,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, for (i = 0; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); if (!p) { + WARN_ON(1); /* make sure the free only frees the pages we've * grabbed a reference on */ @@ -2021,8 +2031,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, gfp_t mask) { - unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long num_pages = num_extent_pages(start, len); unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT; struct extent_buffer *eb; @@ -2033,7 +2042,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, if (!eb || IS_ERR(eb)) return NULL; - eb->alloc_addr = __builtin_return_address(0); + eb->alloc_addr = (unsigned long)__builtin_return_address(0); eb->start = start; eb->len = len; atomic_set(&eb->refs, 1); @@ -2070,8 +2079,7 @@ void free_extent_buffer(struct extent_buffer *eb) if (!atomic_dec_and_test(&eb->refs)) return; - num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT) + 1; + num_pages = num_extent_pages(eb->start, eb->len); if (eb->first_page) page_cache_release(eb->first_page); @@ -2094,8 +2102,7 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree, u64 end = start + eb->len - 1; set = clear_extent_dirty(tree, start, end, GFP_NOFS); - num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT) + 1; + num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); @@ -2145,8 +2152,7 @@ int set_extent_buffer_uptodate(struct extent_map_tree *tree, struct page *page; unsigned long num_pages; - num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT) + 1; + num_pages = num_extent_pages(eb->start, eb->len); set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); @@ -2191,8 +2197,7 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, return 0; } - num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT) + 1; + num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); if (PageUptodate(page)) { @@ -2267,14 +2272,14 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, } EXPORT_SYMBOL(read_extent_buffer); -int map_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, - char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km) +static int __map_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) { size_t offset = start & (PAGE_CACHE_SIZE - 1); char *kaddr; + struct page *p; size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; unsigned long end_i = (start_offset + start + min_len) >> @@ -2283,21 +2288,59 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, if (i != end_i) return -EINVAL; - WARN_ON(start > eb->len); + if (start >= eb->len) { + printk("bad start in map eb start %Lu len %lu caller start %lu min %lu\n", eb->start, eb->len, start, min_len); + WARN_ON(1); + } if (i == 0) { offset = start_offset; *map_start = 0; } else { + offset = 0; *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; } - kaddr = kmap_atomic(extent_buffer_page(eb, i), km); + p = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(p)); + kaddr = kmap_atomic(p, km); *token = kaddr; *map = kaddr + offset; *map_len = PAGE_CACHE_SIZE - offset; return 0; } + +int map_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + int err; + int save = 0; + if (eb->map_token) { + if (start >= eb->map_start && + start + min_len <= eb->map_start + eb->map_len) { + *token = eb->map_token; + *map = eb->kaddr; + *map_start = eb->map_start; + *map_len = eb->map_len; + return 0; + } + unmap_extent_buffer(eb, eb->map_token, km); + eb->map_token = NULL; + save = 1; + } + err = __map_extent_buffer(eb, start, min_len, token, map, + map_start, map_len, km); + if (!err && save) { + eb->map_token = *token; + eb->kaddr = *map; + eb->map_start = *map_start; + eb->map_len = *map_len; + } + return err; +} EXPORT_SYMBOL(map_extent_buffer); void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) @@ -2574,7 +2617,6 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, ((unsigned long)PAGE_CACHE_SIZE - 1); src_off_in_page = src_end & ((unsigned long)PAGE_CACHE_SIZE - 1); - if (src_i == 0) src_off_in_page += start_offset; if (dst_i == 0) @@ -2582,14 +2624,13 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, cur = min(len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); - move_pages(extent_buffer_page(dst, dst_i), extent_buffer_page(dst, src_i), dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); - dst_end -= cur - 1; - src_end -= cur - 1; + dst_end -= cur; + src_end -= cur; len -= cur; } } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 75dc600dc0f9..4ef8bdd68348 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -70,7 +70,12 @@ struct extent_buffer { struct list_head list; struct list_head leak_list; struct page *first_page; + struct page *last_page; unsigned long alloc_addr; + char *map_token; + char *kaddr; + unsigned long map_start; + unsigned long map_len; }; typedef struct extent_map *(get_extent_t)(struct inode *inode, @@ -147,11 +152,6 @@ static inline void extent_buffer_get(struct extent_buffer *eb) atomic_inc(&eb->refs); } -static inline u64 extent_buffer_blocknr(struct extent_buffer *eb) -{ - return eb->start / 4096; -} - int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, unsigned long start, unsigned long len); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 226f6d028c3f..9a65e97a4e28 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -27,8 +27,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, - u64 offset, u64 disk_num_blocks, - u64 num_blocks) + u64 offset, u64 disk_num_bytes, + u64 num_bytes) { int ret = 0; struct btrfs_file_extent_item *item; @@ -50,10 +50,10 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(leaf, item, offset); - btrfs_set_file_extent_disk_num_blocks(leaf, item, disk_num_blocks); + btrfs_set_file_extent_disk_bytenr(leaf, item, offset); + btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes); btrfs_set_file_extent_offset(leaf, item, 0); - btrfs_set_file_extent_num_blocks(leaf, item, num_blocks); + btrfs_set_file_extent_num_bytes(leaf, item, num_bytes); btrfs_set_file_extent_generation(leaf, item, trans->transid); btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); btrfs_mark_buffer_dirty(leaf); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1734ca695555..844d8807e44a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -120,9 +120,9 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page, KM_USER1); write_extent_buffer(leaf, kaddr + page_offset, ptr, size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr, KM_USER1); btrfs_mark_buffer_dirty(leaf); fail: btrfs_free_path(path); @@ -142,11 +142,12 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct inode *inode = file->f_path.dentry->d_inode; struct extent_map *em; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - u64 hint_block; - u64 num_blocks; + u64 hint_byte; + u64 num_bytes; u64 start_pos; u64 end_of_last_block; u64 end_pos = pos + write_bytes; + u32 inline_size; loff_t isize = i_size_read(inode); em = alloc_extent_map(GFP_NOFS); @@ -156,11 +157,12 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, em->bdev = inode->i_sb->s_bdev; start_pos = pos & ~((u64)root->sectorsize - 1); - num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> - inode->i_blkbits; + num_bytes = (write_bytes + pos - start_pos + + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); down_read(&BTRFS_I(inode)->root->snap_sem); - end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1; + end_of_last_block = start_pos + num_bytes - 1; + lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -169,8 +171,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, goto out_unlock; } btrfs_set_trans_block_group(trans, inode); - inode->i_blocks += num_blocks << 3; - hint_block = 0; + inode->i_blocks += num_bytes >> 9; + hint_byte = 0; if ((end_of_last_block & 4095) == 0) { printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block); @@ -191,11 +193,10 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, err = btrfs_drop_extents(trans, root, inode, last_pos_in_file, last_pos_in_file + hole_size, - &hint_block); + &hint_byte); if (err) goto failed; - hole_size >>= inode->i_blkbits; err = btrfs_insert_file_extent(trans, root, inode->i_ino, last_pos_in_file, @@ -209,8 +210,10 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, * either allocate an extent for the new bytes or setup the key * to show we are doing inline data in the extent */ + inline_size = end_pos - start_pos; if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || - pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { + inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root) || + inline_size >= PAGE_CACHE_SIZE) { u64 last_end; for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; @@ -224,10 +227,9 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, } else { struct page *p = pages[0]; /* step one, delete the existing extents in this range */ - /* FIXME blocksize != pagesize */ err = btrfs_drop_extents(trans, root, inode, start_pos, (pos + write_bytes + root->sectorsize -1) & - ~((u64)root->sectorsize - 1), &hint_block); + ~((u64)root->sectorsize - 1), &hint_byte); if (err) goto failed; @@ -283,7 +285,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) */ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end, u64 *hint_block) + u64 start, u64 end, u64 *hint_byte) { int ret; struct btrfs_key key; @@ -346,8 +348,7 @@ next_slot: found_type = btrfs_file_extent_type(leaf, extent); if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = key.offset + - (btrfs_file_extent_num_blocks(leaf, extent) << - inode->i_blkbits); + btrfs_file_extent_num_bytes(leaf, extent); found_extent = 1; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { struct btrfs_item *item; @@ -386,17 +387,17 @@ next_slot: if (end < extent_end && end >= key.offset) { if (found_extent) { - u64 disk_blocknr = - btrfs_file_extent_disk_blocknr(leaf,extent); - u64 disk_num_blocks = - btrfs_file_extent_disk_num_blocks(leaf, + u64 disk_bytenr = + btrfs_file_extent_disk_bytenr(leaf, extent); + u64 disk_num_bytes = + btrfs_file_extent_disk_num_bytes(leaf, extent); read_extent_buffer(leaf, &old, (unsigned long)extent, sizeof(old)); - if (disk_blocknr != 0) { + if (disk_bytenr != 0) { ret = btrfs_inc_extent_ref(trans, root, - disk_blocknr, disk_num_blocks); + disk_bytenr, disk_num_bytes); BUG_ON(ret); } } @@ -410,21 +411,19 @@ next_slot: keep = 1; WARN_ON(start & (root->sectorsize - 1)); if (found_extent) { - new_num = (start - key.offset) >> - inode->i_blkbits; - old_num = btrfs_file_extent_num_blocks(leaf, - extent); - *hint_block = - btrfs_file_extent_disk_blocknr(leaf, - extent); - if (btrfs_file_extent_disk_blocknr(leaf, - extent)) { + new_num = start - key.offset; + old_num = btrfs_file_extent_num_bytes(leaf, + extent); + *hint_byte = + btrfs_file_extent_disk_bytenr(leaf, + extent); + if (btrfs_file_extent_disk_bytenr(leaf, + extent)) { inode->i_blocks -= - (old_num - new_num) << 3; + (old_num - new_num) >> 9; } - btrfs_set_file_extent_num_blocks(leaf, - extent, - new_num); + btrfs_set_file_extent_num_bytes(leaf, extent, + new_num); btrfs_mark_buffer_dirty(leaf); } else { WARN_ON(1); @@ -432,33 +431,32 @@ next_slot: } /* delete the entire extent */ if (!keep) { - u64 disk_blocknr = 0; - u64 disk_num_blocks = 0; - u64 extent_num_blocks = 0; + u64 disk_bytenr = 0; + u64 disk_num_bytes = 0; + u64 extent_num_bytes = 0; if (found_extent) { - disk_blocknr = - btrfs_file_extent_disk_blocknr(leaf, + disk_bytenr = + btrfs_file_extent_disk_bytenr(leaf, extent); - disk_num_blocks = - btrfs_file_extent_disk_num_blocks(leaf, - extent); - extent_num_blocks = - btrfs_file_extent_num_blocks(leaf, - extent); - *hint_block = - btrfs_file_extent_disk_blocknr(leaf, + disk_num_bytes = + btrfs_file_extent_disk_num_bytes(leaf, extent); + extent_num_bytes = + btrfs_file_extent_num_bytes(leaf, extent); + *hint_byte = + btrfs_file_extent_disk_bytenr(leaf, + extent); } ret = btrfs_del_item(trans, root, path); /* TODO update progress marker and return */ BUG_ON(ret); btrfs_release_path(root, path); extent = NULL; - if (found_extent && disk_blocknr != 0) { - inode->i_blocks -= extent_num_blocks << 3; + if (found_extent && disk_bytenr != 0) { + inode->i_blocks -= extent_num_bytes >> 9; ret = btrfs_free_extent(trans, root, - disk_blocknr, - disk_num_blocks, 0); + disk_bytenr, + disk_num_bytes, 0); } BUG_ON(ret); @@ -491,20 +489,19 @@ next_slot: (unsigned long)extent, sizeof(old)); btrfs_set_file_extent_offset(leaf, extent, - le64_to_cpu(old.offset) + - ((end - key.offset) >> inode->i_blkbits)); - WARN_ON(le64_to_cpu(old.num_blocks) < - (extent_end - end) >> inode->i_blkbits); - btrfs_set_file_extent_num_blocks(leaf, extent, - (extent_end - end) >> inode->i_blkbits); - + le64_to_cpu(old.offset) + end - key.offset); + WARN_ON(le64_to_cpu(old.num_bytes) < + (extent_end - end)); + btrfs_set_file_extent_num_bytes(leaf, extent, + extent_end - end); btrfs_set_file_extent_type(leaf, extent, BTRFS_FILE_EXTENT_REG); + btrfs_mark_buffer_dirty(path->nodes[0]); - if (le64_to_cpu(old.disk_blocknr) != 0) { + if (le64_to_cpu(old.disk_bytenr) != 0) { inode->i_blocks += - btrfs_file_extent_num_blocks(leaf, - extent) << 3; + btrfs_file_extent_num_bytes(leaf, + extent) >> 9; } ret = 0; goto out; @@ -531,12 +528,9 @@ static int prepare_pages(struct btrfs_root *root, unsigned long index = pos >> PAGE_CACHE_SHIFT; struct inode *inode = file->f_path.dentry->d_inode; int err = 0; - u64 num_blocks; u64 start_pos; start_pos = pos & ~((u64)root->sectorsize - 1); - num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> - inode->i_blkbits; memset(pages, 0, num_pages * sizeof(struct page *)); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fbe2836364e0..d6b3a55ed8e0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -77,19 +77,19 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; struct btrfs_key ins; u64 alloc_hint = 0; - u64 num_blocks; + u64 num_bytes; int ret; - u64 blocksize = 1 << inode->i_blkbits; + u64 blocksize = root->sectorsize; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); BUG_ON(!trans); - num_blocks = (end - start + blocksize) & ~(blocksize - 1); + num_bytes = (end - start + blocksize) & ~(blocksize - 1); ret = btrfs_drop_extents(trans, root, inode, - start, start + num_blocks, &alloc_hint); - num_blocks = num_blocks >> inode->i_blkbits; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_blocks, 0, + start, start + num_bytes, &alloc_hint); + + ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_bytes, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { WARN_ON(1); @@ -186,7 +186,8 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) zeroit: printk("btrfs csum failed ino %lu off %llu\n", page->mapping->host->i_ino, (unsigned long long)start); - memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page); + memset(kaddr + offset, 1, end - start + 1); + flush_dcache_page(page); kunmap_atomic(kaddr, KM_IRQ0); return 0; } @@ -547,7 +548,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; u64 extent_start = 0; - u64 extent_num_blocks = 0; + u64 extent_num_bytes = 0; u64 item_end = 0; int found_extent; int del_item; @@ -593,8 +594,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { item_end += - btrfs_file_extent_num_blocks(leaf, fi) << - inode->i_blkbits; + btrfs_file_extent_num_bytes(leaf, fi); } } if (found_type == BTRFS_CSUM_ITEM_KEY) { @@ -626,28 +626,27 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; - extent_start = btrfs_file_extent_disk_blocknr(leaf, fi); + extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); if (!del_item) { - u64 orig_num_blocks = - btrfs_file_extent_num_blocks(leaf, fi); - extent_num_blocks = inode->i_size - + u64 orig_num_bytes = + btrfs_file_extent_num_bytes(leaf, fi); + extent_num_bytes = inode->i_size - found_key.offset + root->sectorsize - 1; - extent_num_blocks >>= inode->i_blkbits; - btrfs_set_file_extent_num_blocks(leaf, fi, - extent_num_blocks); - num_dec = (orig_num_blocks - - extent_num_blocks) << 3; + btrfs_set_file_extent_num_bytes(leaf, fi, + extent_num_bytes); + num_dec = (orig_num_bytes - + extent_num_bytes) >> 9; if (extent_start != 0) { inode->i_blocks -= num_dec; } btrfs_mark_buffer_dirty(leaf); } else { - extent_num_blocks = - btrfs_file_extent_disk_num_blocks(leaf, - fi); + extent_num_bytes = + btrfs_file_extent_disk_num_bytes(leaf, + fi); /* FIXME blocksize != 4096 */ - num_dec = btrfs_file_extent_num_blocks(leaf, - fi) << 3; + num_dec = btrfs_file_extent_num_bytes(leaf, + fi) >> 9; if (extent_start != 0) { found_extent = 1; inode->i_blocks -= num_dec; @@ -664,7 +663,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, btrfs_release_path(root, path); if (found_extent) { ret = btrfs_free_extent(trans, root, extent_start, - extent_num_blocks, 0); + extent_num_bytes, 0); BUG_ON(ret); } } @@ -709,7 +708,8 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, static int btrfs_truncate_page(struct address_space *mapping, loff_t from) { struct inode *inode = mapping->host; - unsigned blocksize = 1 << inode->i_blkbits; + struct btrfs_root *root = BTRFS_I(inode)->root; + u32 blocksize = root->sectorsize; pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); struct page *page; @@ -719,7 +719,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) if ((offset & (blocksize - 1)) == 0) goto out; - down_read(&BTRFS_I(inode)->root->snap_sem); + down_read(&root->snap_sem); ret = -ENOMEM; page = grab_cache_page(mapping, index); if (!page) @@ -778,8 +778,6 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) err = btrfs_drop_extents(trans, root, inode, pos, pos + hole_size, &alloc_hint); - hole_size >>= inode->i_blkbits; - err = btrfs_insert_file_extent(trans, root, inode->i_ino, pos, 0, 0, hole_size); btrfs_end_transaction(trans, root); @@ -1490,7 +1488,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, { int ret; int err = 0; - u64 blocknr; + u64 bytenr; u64 extent_start = 0; u64 extent_end = 0; u64 objectid = inode->i_ino; @@ -1540,10 +1538,6 @@ again: leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - - blocknr = btrfs_file_extent_disk_blocknr(leaf, item); - blocknr += btrfs_file_extent_offset(leaf, item); - /* are we inside the extent that was found? */ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); found_type = btrfs_key_type(&found_key); @@ -1556,8 +1550,7 @@ again: extent_start = found_key.offset; if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = extent_start + - (btrfs_file_extent_num_blocks(leaf, item) << - inode->i_blkbits); + btrfs_file_extent_num_bytes(leaf, item); err = 0; if (start < extent_start || start >= extent_end) { em->start = start; @@ -1570,17 +1563,18 @@ again: } goto not_found_em; } - if (btrfs_file_extent_disk_blocknr(leaf, item) == 0) { + bytenr = btrfs_file_extent_disk_bytenr(leaf, item); + if (bytenr == 0) { em->start = extent_start; em->end = extent_end - 1; em->block_start = EXTENT_MAP_HOLE; em->block_end = EXTENT_MAP_HOLE; goto insert; } - em->block_start = blocknr << inode->i_blkbits; + bytenr += btrfs_file_extent_offset(leaf, item); + em->block_start = bytenr; em->block_end = em->block_start + - (btrfs_file_extent_num_blocks(leaf, item) << - inode->i_blkbits) - 1; + btrfs_file_extent_num_bytes(leaf, item) - 1; em->start = extent_start; em->end = extent_end - 1; goto insert; @@ -1592,7 +1586,8 @@ again: size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, path->slots[0])); - extent_end = extent_start | ((u64)root->sectorsize - 1); + extent_end = (extent_start + size) | + ((u64)root->sectorsize - 1); if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { @@ -1617,8 +1612,10 @@ again: ptr = btrfs_file_extent_inline_start(item); map = kmap(page); read_extent_buffer(leaf, map + page_offset, ptr, size); + /* memset(map + page_offset + size, 0, root->sectorsize - (page_offset + size)); + */ flush_dcache_page(page); kunmap(page); set_extent_uptodate(em_tree, extent_start, @@ -1836,13 +1833,13 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - leaf = btrfs_alloc_free_block(trans, root, 0, 0); + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 0); if (IS_ERR(leaf)) return PTR_ERR(leaf); btrfs_set_header_nritems(leaf, 0); btrfs_set_header_level(leaf, 0); - btrfs_set_header_blocknr(leaf, extent_buffer_blocknr(leaf)); + btrfs_set_header_bytenr(leaf, leaf->start); btrfs_set_header_generation(leaf, trans->transid); btrfs_set_header_owner(leaf, root->root_key.objectid); write_extent_buffer(leaf, root->fs_info->fsid, @@ -1858,7 +1855,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) inode_item->nblocks = cpu_to_le64(1); inode_item->mode = cpu_to_le32(S_IFDIR | 0755); - btrfs_set_root_blocknr(&root_item, extent_buffer_blocknr(leaf)); + btrfs_set_root_bytenr(&root_item, leaf->start); + btrfs_set_root_level(&root_item, 0); btrfs_set_root_refs(&root_item, 1); btrfs_set_root_used(&root_item, 0); @@ -1971,8 +1969,8 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); - btrfs_set_root_blocknr(&new_root_item, - extent_buffer_blocknr(root->node)); + btrfs_set_root_bytenr(&new_root_item, root->node->start); + btrfs_set_root_level(&new_root_item, btrfs_header_level(root->node)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index a825ce078a54..9f8696c8a8e8 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -36,7 +36,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) u32 type; printk("leaf %llu total ptrs %d free space %d\n", - (unsigned long long)btrfs_header_blocknr(l), nr, + (unsigned long long)btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { item = btrfs_item_nr(l, i); @@ -65,8 +65,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); - printk("\t\troot data blocknr %llu refs %u\n", - (unsigned long long)btrfs_disk_root_blocknr(l, ri), + printk("\t\troot data bytenr %llu refs %u\n", + (unsigned long long)btrfs_disk_root_bytenr(l, ri), btrfs_disk_root_refs(l, ri)); break; case BTRFS_EXTENT_ITEM_KEY: @@ -84,12 +84,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) btrfs_file_extent_inline_len(l, item)); break; } - printk("\t\textent data disk block %llu nr %llu\n", - (unsigned long long)btrfs_file_extent_disk_blocknr(l, fi), - (unsigned long long)btrfs_file_extent_disk_num_blocks(l, fi)); + printk("\t\textent data disk bytenr %llu nr %llu\n", + (unsigned long long)btrfs_file_extent_disk_bytenr(l, fi), + (unsigned long long)btrfs_file_extent_disk_num_bytes(l, fi)); printk("\t\textent data offset %llu nr %llu\n", (unsigned long long)btrfs_file_extent_offset(l, fi), - (unsigned long long)btrfs_file_extent_num_blocks(l, fi)); + (unsigned long long)btrfs_file_extent_num_bytes(l, fi)); break; case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, @@ -106,16 +106,18 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) int i; u32 nr; struct btrfs_key key; + int level; if (!c) return; nr = btrfs_header_nritems(c); - if (btrfs_is_leaf(c)) { + level = btrfs_header_level(c); + if (level == 0) { btrfs_print_leaf(root, c); return; } printk("node %llu level %d total ptrs %d free spc %u\n", - (unsigned long long)btrfs_header_blocknr(c), + (unsigned long long)btrfs_header_bytenr(c), btrfs_header_level(c), nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { @@ -129,7 +131,8 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) } for (i = 0; i < nr; i++) { struct extent_buffer *next = read_tree_block(root, - btrfs_node_blockptr(c, i)); + btrfs_node_blockptr(c, i), + btrfs_level_size(root, level - 1)); if (btrfs_is_leaf(next) && btrfs_header_level(c) != 1) BUG(); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 39a1435c68f1..5c4370f3a5b8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -303,10 +303,12 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct btrfs_root *root = btrfs_sb(dentry->d_sb); struct btrfs_super_block *disk_super = &root->fs_info->super_copy; + int bits = dentry->d_sb->s_blocksize_bits; buf->f_namelen = BTRFS_NAME_LEN; - buf->f_blocks = btrfs_super_total_blocks(disk_super); - buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super); + buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; + buf->f_bfree = buf->f_blocks - + (btrfs_super_bytes_used(disk_super) >> bits); buf->f_bavail = buf->f_bfree; buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_type = BTRFS_SUPER_MAGIC; diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 9654e90eec89..0bd1fd3d29df 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -42,14 +42,15 @@ static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) { + return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_blocks_used(&fs->super_copy)); + (unsigned long long)btrfs_super_bytes_used(&fs->super_copy)); } static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_total_blocks(&fs->super_copy)); + (unsigned long long)btrfs_super_total_bytes(&fs->super_copy)); } static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 55289b71056e..60f61345a8d0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -205,12 +205,13 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, btrfs_write_dirty_block_groups(trans, extent_root); while(1) { - old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == - extent_buffer_blocknr(extent_root->node)) + old_extent_block = btrfs_root_bytenr(&extent_root->root_item); + if (old_extent_block == extent_root->node->start) break; - btrfs_set_root_blocknr(&extent_root->root_item, - extent_buffer_blocknr(extent_root->node)); + btrfs_set_root_bytenr(&extent_root->root_item, + extent_root->node->start); + btrfs_set_root_level(&extent_root->root_item, + btrfs_header_level(extent_root->node)); ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); @@ -284,8 +285,8 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); if (root->commit_root == root->node) { - WARN_ON(extent_buffer_blocknr(root->node) != - btrfs_root_blocknr(&root->root_item)); + WARN_ON(root->node->start != + btrfs_root_bytenr(&root->root_item)); free_extent_buffer(root->commit_root); root->commit_root = NULL; @@ -314,8 +315,10 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, root->commit_root = NULL; root->root_key.offset = root->fs_info->generation; - btrfs_set_root_blocknr(&root->root_item, - extent_buffer_blocknr(root->node)); + btrfs_set_root_bytenr(&root->root_item, + root->node->start); + btrfs_set_root_level(&root->root_item, + btrfs_header_level(root->node)); err = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); @@ -407,8 +410,8 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, struct dirty_root *dirty; struct btrfs_trans_handle *trans; unsigned long nr; - u64 num_blocks; - u64 blocks_used; + u64 num_bytes; + u64 bytes_used; int ret = 0; int err; @@ -419,7 +422,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); - num_blocks = btrfs_root_used(&dirty->root->root_item); + num_bytes = btrfs_root_used(&dirty->root->root_item); root = dirty->latest_root; while(1) { @@ -446,12 +449,12 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, } BUG_ON(ret); - num_blocks -= btrfs_root_used(&dirty->root->root_item); - blocks_used = btrfs_root_used(&root->root_item); - if (num_blocks) { + num_bytes -= btrfs_root_used(&dirty->root->root_item); + bytes_used = btrfs_root_used(&root->root_item); + if (num_bytes) { record_root_in_trans(root); btrfs_set_root_used(&root->root_item, - blocks_used - num_blocks); + bytes_used - num_bytes); } ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); if (ret) { @@ -560,7 +563,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_set_super_generation(&root->fs_info->super_copy, cur_trans->transid); btrfs_set_super_root(&root->fs_info->super_copy, - extent_buffer_blocknr(root->fs_info->tree_root->node)); + root->fs_info->tree_root->node->start); + btrfs_set_super_root_level(&root->fs_info->super_copy, + btrfs_header_level(root->fs_info->tree_root->node)); write_extent_buffer(root->fs_info->sb_buffer, &root->fs_info->super_copy, 0, diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index daf019afa0a1..3feac2f28628 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -27,13 +27,15 @@ static void reada_defrag(struct btrfs_root *root, { int i; u32 nritems; - u64 blocknr; + u64 bytenr; + u32 blocksize; int ret; + blocksize = btrfs_level_size(root, btrfs_header_level(node) - 1); nritems = btrfs_header_nritems(node); for (i = 0; i < nritems; i++) { - blocknr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, blocknr); + bytenr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, bytenr, blocksize); if (ret) break; } @@ -46,7 +48,7 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, { struct extent_buffer *next; struct extent_buffer *cur; - u64 blocknr; + u64 bytenr; int ret = 0; int is_extent = 0; @@ -80,10 +82,11 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, break; } - blocknr = btrfs_node_blockptr(cur, path->slots[*level]); + bytenr = btrfs_node_blockptr(cur, path->slots[*level]); if (cache_only) { - next = btrfs_find_tree_block(root, blocknr); + next = btrfs_find_tree_block(root, bytenr, + btrfs_level_size(root, *level - 1)); /* FIXME, test for defrag */ if (!next || !btrfs_buffer_uptodate(next)) { free_extent_buffer(next); @@ -91,7 +94,8 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, continue; } } else { - next = read_tree_block(root, blocknr); + next = read_tree_block(root, bytenr, + btrfs_level_size(root, *level - 1)); } ret = btrfs_cow_block(trans, root, next, path->nodes[*level], path->slots[*level], &next); From 14048ed0c415b8729b194e92c16d31c61628d216 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:16:28 -0400 Subject: [PATCH 0249/2985] Btrfs: Cache extent buffer mappings Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 40 ++++++++++++++++++++++++++++++++++++---- fs/btrfs/extent_map.c | 15 +++++++++++++-- 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4d05456ec32f..54c754dd9a14 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -436,10 +436,18 @@ static inline u##bits btrfs_##name(struct extent_buffer *eb, \ unsigned long map_len; \ unsigned long offset = (unsigned long)s + \ offsetof(type, member); \ - err = map_extent_buffer(eb, offset, \ + if (eb->map_token && offset >= eb->map_start && \ + offset + sizeof(((type *)0)->member) <= eb->map_start + \ + eb->map_len) { \ + kaddr = eb->kaddr; \ + map_start = eb->map_start; \ + err = 0; \ + } else { \ + err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ &map_start, &map_len, KM_USER1); \ + } \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ @@ -464,10 +472,18 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \ int unmap_on_exit = (eb->map_token == NULL); \ unsigned long offset = (unsigned long)s + \ offsetof(type, member); \ - err = map_extent_buffer(eb, offset, \ + if (eb->map_token && offset >= eb->map_start && \ + offset + sizeof(((type *)0)->member) <= eb->map_start + \ + eb->map_len) { \ + kaddr = eb->kaddr; \ + map_start = eb->map_start; \ + err = 0; \ + } else { \ + err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ &map_start, &map_len, KM_USER1); \ + } \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ @@ -490,10 +506,18 @@ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ unsigned long map_len; \ unsigned long offset = offsetof(type, member); \ int unmap_on_exit = (eb->map_token == NULL); \ - err = map_extent_buffer(eb, offset, \ + if (eb->map_token && offset >= eb->map_start && \ + offset + sizeof(((type *)0)->member) <= eb->map_start + \ + eb->map_len) { \ + kaddr = eb->kaddr; \ + map_start = eb->map_start; \ + err = 0; \ + } else { \ + err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ &map_start, &map_len, KM_USER1); \ + } \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ @@ -517,10 +541,18 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \ unsigned long map_len; \ unsigned long offset = offsetof(type, member); \ int unmap_on_exit = (eb->map_token == NULL); \ - err = map_extent_buffer(eb, offset, \ + if (eb->map_token && offset >= eb->map_start && \ + offset + sizeof(((type *)0)->member) <= eb->map_start + \ + eb->map_len) { \ + kaddr = eb->kaddr; \ + map_start = eb->map_start; \ + err = 0; \ + } else { \ + err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ &map_start, &map_len, KM_USER1); \ + } \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f658703c42e6..ea6ee68ef53c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2037,6 +2037,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, struct extent_buffer *eb; struct page *p; struct address_space *mapping = tree->mapping; + int uptodate = 1; eb = __alloc_extent_buffer(mask); if (!eb || IS_ERR(eb)) @@ -2048,7 +2049,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, atomic_set(&eb->refs, 1); for (i = 0; i < num_pages; i++, index++) { - p = find_get_page(mapping, index); + p = find_lock_page(mapping, index); if (!p) { /* make sure the free only frees the pages we've * grabbed a reference on @@ -2060,7 +2061,12 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, set_page_extent_mapped(p); if (i == 0) eb->first_page = p; + if (!PageUptodate(p)) + uptodate = 0; + unlock_page(p); } + if (uptodate) + eb->flags |= EXTENT_UPTODATE; return eb; fail: free_extent_buffer(eb); @@ -2192,7 +2198,7 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, if (eb->flags & EXTENT_UPTODATE) return 0; - if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, + if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1, EXTENT_UPTODATE, 1)) { return 0; } @@ -2247,6 +2253,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, char *dst = (char *)dstv; size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + unsigned long num_pages = num_extent_pages(eb->start, eb->len); WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); @@ -2257,6 +2264,10 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, while(len > 0) { page = extent_buffer_page(eb, i); + if (!PageUptodate(page)) { + printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len); + WARN_ON(1); + } WARN_ON(!PageUptodate(page)); cur = min(len, (PAGE_CACHE_SIZE - offset)); From 09e71a326341f40111400c88aaf0498ef622824b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:17:04 -0400 Subject: [PATCH 0250/2985] Btrfs: Use an array of pages in the extent buffers to reduce the cost of find_get_page Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 +++--- fs/btrfs/extent_map.c | 22 ++++++++-------------- fs/btrfs/extent_map.h | 4 ++-- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fad9298c6962..0636f79672e9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -647,20 +647,20 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->first_page->mapping->host; + struct inode *btree_inode = buf->pages[0]->mapping->host; return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->first_page->mapping->host; + struct inode *btree_inode = buf->pages[0]->mapping->host; return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index ea6ee68ef53c..7ef3397a266d 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1961,16 +1961,12 @@ static void __free_extent_buffer(struct extent_buffer *eb) static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i) { struct page *p; - if (i == 0) - return eb->first_page; + if (i < EXTENT_INLINE_PAGES) + return eb->pages[i]; i += eb->start >> PAGE_CACHE_SHIFT; - if (eb->last_page && eb->last_page->index == i) - return eb->last_page; - - p = find_get_page(eb->first_page->mapping, i); + p = find_get_page(eb->pages[0]->mapping, i); page_cache_release(p); - eb->last_page = p; return p; } @@ -2012,8 +2008,8 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); - if (i == 0) - eb->first_page = p; + if (i < EXTENT_INLINE_PAGES) + eb->pages[i] = p; if (!PageUptodate(p)) uptodate = 0; unlock_page(p); @@ -2059,8 +2055,8 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); - if (i == 0) - eb->first_page = p; + if (i < EXTENT_INLINE_PAGES) + eb->pages[i] = p; if (!PageUptodate(p)) uptodate = 0; unlock_page(p); @@ -2087,9 +2083,7 @@ void free_extent_buffer(struct extent_buffer *eb) num_pages = num_extent_pages(eb->start, eb->len); - if (eb->first_page) - page_cache_release(eb->first_page); - for (i = 1; i < num_pages; i++) { + for (i = 0; i < num_pages; i++) { page_cache_release(extent_buffer_page(eb, i)); } __free_extent_buffer(eb); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 4ef8bdd68348..d74a2b3e3b5d 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -62,6 +62,7 @@ struct extent_state { struct list_head list; }; +#define EXTENT_INLINE_PAGES 32 struct extent_buffer { u64 start; unsigned long len; @@ -69,13 +70,12 @@ struct extent_buffer { int flags; struct list_head list; struct list_head leak_list; - struct page *first_page; - struct page *last_page; unsigned long alloc_addr; char *map_token; char *kaddr; unsigned long map_start; unsigned long map_len; + struct page *pages[EXTENT_INLINE_PAGES]; }; typedef struct extent_map *(get_extent_t)(struct inode *inode, From 6b80053d02be41886344b5007d04e345311ec0b5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:17:34 -0400 Subject: [PATCH 0251/2985] Btrfs: Add back the online defragging code Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 177 ++++++++++++++++++++--------------------- fs/btrfs/disk-io.c | 60 ++++++++++++++ fs/btrfs/disk-io.h | 7 ++ fs/btrfs/extent-tree.c | 8 +- fs/btrfs/extent_map.h | 2 + fs/btrfs/transaction.c | 4 +- fs/btrfs/tree-defrag.c | 14 ++-- 7 files changed, 166 insertions(+), 106 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 34de83630ae9..08ddf1873ab1 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -155,55 +155,49 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, return ret; } -#if 0 -static int close_blocks(u64 blocknr, u64 other) +static int close_blocks(u64 blocknr, u64 other, u32 blocksize) { - if (blocknr < other && other - blocknr < 8) + if (blocknr < other && other - (blocknr + blocksize) < 32768) return 1; - if (blocknr > other && blocknr - other < 8) + if (blocknr > other && blocknr - (other + blocksize) < 32768) return 1; return 0; } -static int should_defrag_leaf(struct extent_buffer *eb) +static int should_defrag_leaf(struct extent_buffer *leaf) { - return 0; - struct btrfs_leaf *leaf = btrfs_buffer_leaf(eb); - struct btrfs_disk_key *key; + struct btrfs_key key; u32 nritems; - if (buffer_defrag(bh)) + if (btrfs_buffer_defrag(leaf)) return 1; - nritems = btrfs_header_nritems(&leaf->header); + nritems = btrfs_header_nritems(leaf); if (nritems == 0) return 0; - key = &leaf->items[0].key; - if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY) + btrfs_item_key_to_cpu(leaf, &key, 0); + if (key.type == BTRFS_DIR_ITEM_KEY) return 1; - key = &leaf->items[nritems-1].key; - if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY) + + btrfs_item_key_to_cpu(leaf, &key, nritems - 1); + if (key.type == BTRFS_DIR_ITEM_KEY) return 1; if (nritems > 4) { - key = &leaf->items[nritems/2].key; - if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY) + btrfs_item_key_to_cpu(leaf, &key, nritems / 2); + if (key.type == BTRFS_DIR_ITEM_KEY) return 1; } return 0; } -#endif int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, int cache_only, u64 *last_ret) { - return 0; -#if 0 - struct btrfs_node *parent_node; - struct extent_buffer *cur_eb; - struct extent_buffer *tmp_eb; + struct extent_buffer *cur; + struct extent_buffer *tmp; u64 blocknr; u64 search_start = *last_ret; u64 last_block = 0; @@ -214,6 +208,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, int i; int err = 0; int parent_level; + int uptodate; + u32 blocksize; if (trans->transaction != root->fs_info->running_transaction) { printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, @@ -225,12 +221,12 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, root->fs_info->generation); WARN_ON(1); } - if (buffer_defrag_done(parent)) + if (btrfs_buffer_defrag_done(parent)) return 0; - parent_node = btrfs_buffer_node(parent); - parent_nritems = btrfs_header_nritems(&parent_node->header); - parent_level = btrfs_header_level(&parent_node->header); + parent_nritems = btrfs_header_nritems(parent); + parent_level = btrfs_header_level(parent); + blocksize = btrfs_level_size(root, parent_level - 1); start_slot = 0; end_slot = parent_nritems; @@ -240,56 +236,60 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, for (i = start_slot; i < end_slot; i++) { int close = 1; - blocknr = btrfs_node_blockptr(parent_node, i); + blocknr = btrfs_node_blockptr(parent, i); if (last_block == 0) last_block = blocknr; if (i > 0) { - other = btrfs_node_blockptr(parent_node, i - 1); - close = close_blocks(blocknr, other); + other = btrfs_node_blockptr(parent, i - 1); + close = close_blocks(blocknr, other, blocksize); } if (close && i < end_slot - 1) { - other = btrfs_node_blockptr(parent_node, i + 1); - close = close_blocks(blocknr, other); + other = btrfs_node_blockptr(parent, i + 1); + close = close_blocks(blocknr, other, blocksize); } if (close) { last_block = blocknr; continue; } - cur_bh = btrfs_find_tree_block(root, blocknr); - if (!cur_bh || !buffer_uptodate(cur_bh) || - buffer_locked(cur_bh) || - (parent_level != 1 && !buffer_defrag(cur_bh)) || - (parent_level == 1 && !should_defrag_leaf(cur_bh))) { + cur = btrfs_find_tree_block(root, blocknr, blocksize); + if (cur) + uptodate = btrfs_buffer_uptodate(cur); + else + uptodate = 0; + if (!cur || !uptodate || + (parent_level != 1 && !btrfs_buffer_defrag(cur)) || + (parent_level == 1 && !should_defrag_leaf(cur))) { if (cache_only) { - brelse(cur_bh); + free_extent_buffer(cur); continue; } - if (!cur_bh || !buffer_uptodate(cur_bh) || - buffer_locked(cur_bh)) { - brelse(cur_bh); - cur_bh = read_tree_block(root, blocknr); + if (!cur) { + cur = read_tree_block(root, blocknr, + blocksize); + } else if (!uptodate) { + btrfs_read_buffer(cur); } } if (search_start == 0) - search_start = last_block & ~((u64)65535); + search_start = last_block; - err = __btrfs_cow_block(trans, root, cur_bh, parent, i, - &tmp_bh, search_start, - min(8, end_slot - i)); + err = __btrfs_cow_block(trans, root, cur, parent, i, + &tmp, search_start, + min(16 * blocksize, + (end_slot - i) * blocksize)); if (err) { - brelse(cur_bh); + free_extent_buffer(cur); break; } - search_start = bh_blocknr(tmp_bh); + search_start = tmp->start; *last_ret = search_start; if (parent_level == 1) - clear_buffer_defrag(tmp_bh); - set_buffer_defrag_done(tmp_bh); - brelse(tmp_bh); + btrfs_clear_buffer_defrag(tmp); + btrfs_set_buffer_defrag_done(tmp); + free_extent_buffer(tmp); } return err; -#endif } /* @@ -892,22 +892,17 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { - return; -#if 0 struct extent_buffer *node; - int i; u32 nritems; - u64 bytenr; u64 search; - u64 cluster_start; - int ret; - int nread = 0; + u64 lowest_read; + u64 highest_read; + u64 nread = 0; int direction = path->reada; - int level; - struct radix_tree_root found; - unsigned long gang[8]; struct extent_buffer *eb; - + u32 nr; + u32 blocksize; + u32 nscan = 0; if (level == 0) return; @@ -917,42 +912,46 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, node = path->nodes[level]; search = btrfs_node_blockptr(node, slot); - eb = btrfs_find_tree_block(root, search); + blocksize = btrfs_level_size(root, level - 1); + eb = btrfs_find_tree_block(root, search, blocksize); if (eb) { free_extent_buffer(eb); return; } - init_bit_radix(&found); + highest_read = search; + lowest_read = search; + nritems = btrfs_header_nritems(node); - level = btrfs_header_level(node) - 1; - for (i = slot; i < nritems; i++) { - bytenr = btrfs_node_blockptr(node, i); - set_radix_bit(&found, blocknr); - } - if (direction > 0) { - cluster_start = search - 4; - if (cluster_start > search) - cluster_start = 0; - } else - cluster_start = search + 4; + nr = slot; while(1) { - ret = find_first_radix_bit(&found, gang, 0, ARRAY_SIZE(gang)); - if (!ret) - break; - for (i = 0; i < ret; i++) { - blocknr = gang[i]; - clear_radix_bit(&found, blocknr); - if (path->reada == 1 && nread > 16) - continue; - if (close_blocks(cluster_start, blocknr)) { - readahead_tree_block(root, blocknr); - nread++; - cluster_start = blocknr; - } + if (direction < 0) { + if (nr == 0) + break; + nr--; + } else if (direction > 0) { + nr++; + if (nr >= nritems) + break; } + search = btrfs_node_blockptr(node, nr); + if ((search >= lowest_read && search <= highest_read) || + (search < lowest_read && lowest_read - search <= 32768) || + (search > highest_read && search - highest_read <= 32768)) { + readahead_tree_block(root, search, blocksize); + nread += blocksize; + } + nscan++; + if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32)) + break; + if(nread > (1024 * 1024) || nscan > 128) + break; + + if (search < lowest_read) + lowest_read = search; + if (search > highest_read) + highest_read = search; } -#endif } /* * look for key in the tree. path is filled in with nodes along the way diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0636f79672e9..8e606e6658aa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -678,3 +678,63 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) balance_dirty_pages_ratelimited_nr( root->fs_info->btree_inode->i_mapping, nr); } + +void btrfs_set_buffer_defrag(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); +} + +void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, + GFP_NOFS); +} + +int btrfs_buffer_defrag(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); +} + +int btrfs_buffer_defrag_done(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + buf->start, buf->start + buf->len - 1, + EXTENT_DEFRAG_DONE, 0); +} + +int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + buf->start, buf->start + buf->len - 1, + EXTENT_DEFRAG_DONE, GFP_NOFS); +} + +int btrfs_clear_buffer_defrag(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + buf->start, buf->start + buf->len - 1, + EXTENT_DEFRAG, GFP_NOFS); +} + +int btrfs_read_buffer(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + buf, 1); +} diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index fd4db5f810cf..190b07bc642b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -51,4 +51,11 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf); int btrfs_set_buffer_uptodate(struct extent_buffer *buf); int wait_on_tree_block_writeback(struct btrfs_root *root, struct extent_buffer *buf); +void btrfs_set_buffer_defrag(struct extent_buffer *buf); +void btrfs_set_buffer_defrag_done(struct extent_buffer *buf); +int btrfs_buffer_defrag(struct extent_buffer *buf); +int btrfs_buffer_defrag_done(struct extent_buffer *buf); +int btrfs_clear_buffer_defrag(struct extent_buffer *buf); +int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf); +int btrfs_read_buffer(struct extent_buffer *buf); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1be8f9f04a13..0b0c947194ae 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1173,13 +1173,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, buf->alloc_addr = (unsigned long)__builtin_return_address(0); set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); - /* - set_buffer_checked(buf); - set_buffer_defrag(buf); - */ - /* FIXME!!!!!!!!!!!!!!!! - set_radix_bit(&trans->transaction->dirty_pages, buf->pages[0]->index); - */ + btrfs_set_buffer_defrag(buf); trans->blocks_used++; return buf; } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index d74a2b3e3b5d..8409b5cbeda7 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -14,6 +14,8 @@ #define EXTENT_LOCKED (1 << 3) #define EXTENT_NEW (1 << 4) #define EXTENT_DELALLOC (1 << 5) +#define EXTENT_DEFRAG (1 << 6) +#define EXTENT_DEFRAG_DONE (1 << 7) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 60f61345a8d0..87456ab74275 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -355,7 +355,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) return 0; trans = btrfs_start_transaction(root, 1); - while (0) { + while (1) { root->defrag_running = 1; ret = btrfs_defrag_leaves(trans, root, cacheonly); nr = trans->blocks_used; @@ -400,7 +400,7 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) btrfs_defrag_root(root, 1); } } - // btrfs_defrag_root(info->extent_root, 1); + btrfs_defrag_root(info->extent_root, 1); return err; } diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 3feac2f28628..d23216aa4ab5 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -116,10 +116,10 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, } WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); -#if 0 - clear_buffer_defrag(path->nodes[*level]); - clear_buffer_defrag_done(path->nodes[*level]); -#endif + + btrfs_clear_buffer_defrag(path->nodes[*level]); + btrfs_clear_buffer_defrag_done(path->nodes[*level]); + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -148,10 +148,8 @@ static int defrag_walk_up(struct btrfs_trans_handle *trans, root->defrag_level = i; return 0; } else { - /* - clear_buffer_defrag(path->nodes[*level]); - clear_buffer_defrag_done(path->nodes[*level]); - */ + btrfs_clear_buffer_defrag(path->nodes[*level]); + btrfs_clear_buffer_defrag_done(path->nodes[*level]); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; From e19caa5f0e34b571ed0c2617554af5c43cb124d1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:17:44 -0400 Subject: [PATCH 0252/2985] Btrfs: Fix allocation routines to avoid intermixing data and metadata allocations Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 38 ++++++++++++++++++++++---------------- fs/btrfs/extent_map.c | 6 +++--- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0b0c947194ae..72e6b274a450 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -165,7 +165,7 @@ static u64 find_search_start(struct btrfs_root *root, { int ret; struct btrfs_block_group_cache *cache = *cache_ret; - u64 last = max(search_start, cache->key.objectid); + u64 last; u64 start = 0; u64 end = 0; @@ -173,11 +173,14 @@ again: ret = cache_block_group(root, cache); if (ret) goto out; + last = max(search_start, cache->key.objectid); + while(1) { ret = find_first_extent_bit(&root->fs_info->free_space_cache, last, &start, &end, EXTENT_DIRTY); - if (ret) - goto out; + if (ret) { + goto new_group; + } start = max(last, start); last = end + 1; @@ -191,15 +194,13 @@ out: return search_start; new_group: - cache = btrfs_lookup_block_group(root->fs_info, - last + cache->key.offset - 1); + last = cache->key.objectid + cache->key.offset; + cache = btrfs_lookup_block_group(root->fs_info, last); if (!cache) { return search_start; } - cache = btrfs_find_block_group(root, cache, - last + cache->key.offset - 1, data, 0); + cache = btrfs_find_block_group(root, cache, last, data, 0); *cache_ret = cache; - last = min(cache->key.objectid, last); goto again; } @@ -257,12 +258,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (used < div_factor(hint->key.offset, factor)) { return hint; } - last = hint->key.offset * 3; - if (hint->key.objectid >= last) - last = max(search_start + hint->key.offset - 1, - hint->key.objectid - last); - else - last = hint->key.objectid + hint->key.offset; + last = hint->key.objectid + hint->key.offset; hint_last = last; } else { if (hint) @@ -913,7 +909,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root check_failed: search_start = find_search_start(root, &block_group, search_start, total_needed, data); - btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; @@ -958,6 +953,9 @@ check_failed: continue; if (ret < 0) goto error; + + search_start = max(search_start, + block_group->key.objectid); if (!start_found) { ins->objectid = search_start; ins->offset = search_end - search_start; @@ -967,6 +965,7 @@ check_failed: ins->objectid = last_byte > search_start ? last_byte : search_start; ins->offset = search_end - ins->objectid; + BUG_ON(ins->objectid >= search_end); goto check_pending; } btrfs_item_key_to_cpu(l, &key, slot); @@ -998,7 +997,7 @@ check_failed: block_group->key.offset) { btrfs_release_path(root, path); search_start = block_group->key.objectid + - block_group->key.offset * 2; + block_group->key.offset; goto new_group; } next: @@ -1015,6 +1014,12 @@ check_pending: if (ins->objectid + num_bytes >= search_end) goto enospc; + if (!full_scan && ins->objectid + num_bytes >= block_group-> + key.objectid + block_group->key.offset) { + search_start = block_group->key.objectid + + block_group->key.offset; + goto new_group; + } if (test_range_bit(&info->extent_ins, ins->objectid, ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { search_start = ins->objectid + num_bytes; @@ -1114,6 +1119,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, EXTENT_LOCKED, GFP_NOFS); + WARN_ON(data == 1); goto update_block; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 7ef3397a266d..e241699024da 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -981,7 +981,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start, struct extent_state *state; int ret = 1; - write_lock_irq(&tree->lock); + read_lock_irq(&tree->lock); /* * this search will find all the extents that end after * our range starts. @@ -993,7 +993,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start, while(1) { state = rb_entry(node, struct extent_state, rb_node); - if (state->state & bits) { + if (state->end >= start && (state->state & bits)) { *start_ret = state->start; *end_ret = state->end; ret = 0; @@ -1004,7 +1004,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start, break; } out: - write_unlock_irq(&tree->lock); + read_unlock_irq(&tree->lock); return ret; } EXPORT_SYMBOL(find_first_extent_bit); From 4dc119046d0d8501afa4346472917fb05586ad9c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:18:14 -0400 Subject: [PATCH 0253/2985] Btrfs: Add an extent buffer LRU to reduce radix tree hits Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 3 - fs/btrfs/disk-io.c | 25 +++--- fs/btrfs/extent-tree.c | 1 - fs/btrfs/extent_map.c | 195 +++++++++++++++++++++++------------------ fs/btrfs/extent_map.h | 16 ++-- fs/btrfs/transaction.c | 5 +- 6 files changed, 134 insertions(+), 111 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 08ddf1873ab1..bef61ee883e1 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -88,8 +88,6 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (IS_ERR(cow)) return PTR_ERR(cow); - cow->alloc_addr = (unsigned long)__builtin_return_address(0); - copy_extent_buffer(cow, buf, 0, 0, cow->len); btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); @@ -151,7 +149,6 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); - (*cow_ret)->alloc_addr = (unsigned long)__builtin_return_address(0); return ret; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e606e6658aa..fd7e6c182b9d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -50,8 +50,6 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, struct extent_buffer *eb; eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, bytenr, blocksize, GFP_NOFS); - if (eb) - eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } @@ -63,7 +61,6 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, bytenr, blocksize, GFP_NOFS); - eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } @@ -234,7 +231,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 1); - buf->alloc_addr = (unsigned long)__builtin_return_address(0); return buf; } @@ -638,6 +634,7 @@ int close_ctree(struct btrfs_root *root) btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); + extent_map_tree_cleanup(&BTRFS_I(fs_info->btree_inode)->extent_tree); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); kfree(fs_info->extent_root); @@ -647,20 +644,20 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->pages[0]->mapping->host; + struct inode *btree_inode = buf->last_page->mapping->host; return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->pages[0]->mapping->host; + struct inode *btree_inode = buf->last_page->mapping->host; return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; @@ -681,7 +678,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) void btrfs_set_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); @@ -689,7 +686,7 @@ void btrfs_set_buffer_defrag(struct extent_buffer *buf) void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, @@ -698,7 +695,7 @@ void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) int btrfs_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); @@ -706,7 +703,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf) int btrfs_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -715,7 +712,7 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf) int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -724,7 +721,7 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) int btrfs_clear_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -733,7 +730,7 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) int btrfs_read_buffer(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 1); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 72e6b274a450..525fa845d613 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1176,7 +1176,6 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); } btrfs_set_buffer_uptodate(buf); - buf->alloc_addr = (unsigned long)__builtin_return_address(0); set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); btrfs_set_buffer_defrag(buf); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index e241699024da..85b28a6a4e05 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "extent_map.h" /* temporary define until extent_map moves out of btrfs */ @@ -20,14 +21,11 @@ static struct kmem_cache *extent_map_cache; static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; -static LIST_HEAD(extent_buffers); static LIST_HEAD(buffers); static LIST_HEAD(states); -static spinlock_t extent_buffers_lock; static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; -static int nr_extent_buffers; -#define MAX_EXTENT_BUFFER_CACHE 128 +#define BUFFER_LRU_MAX 64 struct tree_entry { u64 start; @@ -47,20 +45,12 @@ void __init extent_map_init(void) extent_buffer_cache = btrfs_cache_create("extent_buffers", sizeof(struct extent_buffer), 0, NULL); - spin_lock_init(&extent_buffers_lock); } void __exit extent_map_exit(void) { - struct extent_buffer *eb; struct extent_state *state; - while (!list_empty(&extent_buffers)) { - eb = list_entry(extent_buffers.next, - struct extent_buffer, list); - list_del(&eb->list); - kmem_cache_free(extent_buffer_cache, eb); - } while (!list_empty(&states)) { state = list_entry(states.next, struct extent_state, list); printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); @@ -68,14 +58,6 @@ void __exit extent_map_exit(void) kmem_cache_free(extent_state_cache, state); } - while (!list_empty(&buffers)) { - eb = list_entry(buffers.next, - struct extent_buffer, leak_list); - printk("buffer leak start %Lu len %lu return %lX\n", eb->start, eb->len, eb->alloc_addr); - list_del(&eb->leak_list); - kmem_cache_free(extent_buffer_cache, eb); - } - if (extent_map_cache) kmem_cache_destroy(extent_map_cache); @@ -92,10 +74,25 @@ void extent_map_tree_init(struct extent_map_tree *tree, tree->state.rb_node = NULL; tree->ops = NULL; rwlock_init(&tree->lock); + spin_lock_init(&tree->lru_lock); tree->mapping = mapping; + INIT_LIST_HEAD(&tree->buffer_lru); + tree->lru_size = 0; } EXPORT_SYMBOL(extent_map_tree_init); +void extent_map_tree_cleanup(struct extent_map_tree *tree) +{ + struct extent_buffer *eb; + while(!list_empty(&tree->buffer_lru)) { + eb = list_entry(tree->buffer_lru.next, struct extent_buffer, + lru); + list_del(&eb->lru); + free_extent_buffer(eb); + } +} +EXPORT_SYMBOL(extent_map_tree_cleanup); + struct extent_map *alloc_extent_map(gfp_t mask) { struct extent_map *em; @@ -1915,59 +1912,43 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, return (em->block_start + start - em->start) >> inode->i_blkbits; } -static struct extent_buffer *__alloc_extent_buffer(gfp_t mask) +static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb) { - struct extent_buffer *eb = NULL; - - spin_lock(&extent_buffers_lock); - if (!list_empty(&extent_buffers)) { - eb = list_entry(extent_buffers.next, struct extent_buffer, - list); - list_del(&eb->list); - WARN_ON(nr_extent_buffers == 0); - nr_extent_buffers--; - } - spin_unlock(&extent_buffers_lock); - - if (eb) { - memset(eb, 0, sizeof(*eb)); - } else { - eb = kmem_cache_zalloc(extent_buffer_cache, mask); - } - spin_lock(&extent_buffers_lock); - list_add(&eb->leak_list, &buffers); - spin_unlock(&extent_buffers_lock); - - return eb; + if (list_empty(&eb->lru)) { + extent_buffer_get(eb); + list_add(&eb->lru, &tree->buffer_lru); + tree->lru_size++; + if (tree->lru_size >= BUFFER_LRU_MAX) { + struct extent_buffer *rm; + rm = list_entry(tree->buffer_lru.prev, + struct extent_buffer, lru); + tree->lru_size--; + list_del(&rm->lru); + free_extent_buffer(rm); + } + } else + list_move(&eb->lru, &tree->buffer_lru); + return 0; } - -static void __free_extent_buffer(struct extent_buffer *eb) +static struct extent_buffer *find_lru(struct extent_map_tree *tree, + u64 start, unsigned long len) { + struct list_head *lru = &tree->buffer_lru; + struct list_head *cur = lru->next; + struct extent_buffer *eb; - spin_lock(&extent_buffers_lock); - list_del_init(&eb->leak_list); - spin_unlock(&extent_buffers_lock); + if (list_empty(lru)) + return NULL; - if (nr_extent_buffers >= MAX_EXTENT_BUFFER_CACHE) { - kmem_cache_free(extent_buffer_cache, eb); - } else { - spin_lock(&extent_buffers_lock); - list_add(&eb->list, &extent_buffers); - nr_extent_buffers++; - spin_unlock(&extent_buffers_lock); - } -} - -static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i) -{ - struct page *p; - - if (i < EXTENT_INLINE_PAGES) - return eb->pages[i]; - i += eb->start >> PAGE_CACHE_SHIFT; - p = find_get_page(eb->pages[0]->mapping, i); - page_cache_release(p); - return p; + do { + eb = list_entry(cur, struct extent_buffer, lru); + if (eb->start == start && eb->len == len) { + extent_buffer_get(eb); + return eb; + } + cur = cur->next; + } while (cur != lru); + return NULL; } static inline unsigned long num_extent_pages(u64 start, u64 len) @@ -1975,6 +1956,55 @@ static inline unsigned long num_extent_pages(u64 start, u64 len) return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT); } + +static inline struct page *extent_buffer_page(struct extent_buffer *eb, + unsigned long i) +{ + struct page *p; + + if (i == 0) + return eb->last_page; + i += eb->start >> PAGE_CACHE_SHIFT; + p = find_get_page(eb->last_page->mapping, i); + page_cache_release(p); + return p; +} + +static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree, + u64 start, + unsigned long len, + gfp_t mask) +{ + struct extent_buffer *eb = NULL; + + spin_lock(&tree->lru_lock); + eb = find_lru(tree, start, len); + if (eb) + goto lru_add; + spin_unlock(&tree->lru_lock); + + if (eb) { + memset(eb, 0, sizeof(*eb)); + } else { + eb = kmem_cache_zalloc(extent_buffer_cache, mask); + } + INIT_LIST_HEAD(&eb->lru); + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + spin_lock(&tree->lru_lock); +lru_add: + add_lru(tree, eb); + spin_unlock(&tree->lru_lock); + return eb; +} + +static void __free_extent_buffer(struct extent_buffer *eb) +{ + kmem_cache_free(extent_buffer_cache, eb); +} + struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, gfp_t mask) @@ -1987,14 +2017,12 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, struct address_space *mapping = tree->mapping; int uptodate = 0; - eb = __alloc_extent_buffer(mask); + eb = __alloc_extent_buffer(tree, start, len, mask); if (!eb || IS_ERR(eb)) return NULL; - eb->alloc_addr = (unsigned long)__builtin_return_address(0); - eb->start = start; - eb->len = len; - atomic_set(&eb->refs, 1); + if (eb->flags & EXTENT_BUFFER_FILLED) + return eb; for (i = 0; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); @@ -2008,14 +2036,15 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); - if (i < EXTENT_INLINE_PAGES) - eb->pages[i] = p; + if (i == 0) + eb->last_page = p; if (!PageUptodate(p)) uptodate = 0; unlock_page(p); } if (uptodate) eb->flags |= EXTENT_UPTODATE; + eb->flags |= EXTENT_BUFFER_FILLED; return eb; fail: free_extent_buffer(eb); @@ -2035,14 +2064,12 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, struct address_space *mapping = tree->mapping; int uptodate = 1; - eb = __alloc_extent_buffer(mask); + eb = __alloc_extent_buffer(tree, start, len, mask); if (!eb || IS_ERR(eb)) return NULL; - eb->alloc_addr = (unsigned long)__builtin_return_address(0); - eb->start = start; - eb->len = len; - atomic_set(&eb->refs, 1); + if (eb->flags & EXTENT_BUFFER_FILLED) + return eb; for (i = 0; i < num_pages; i++, index++) { p = find_lock_page(mapping, index); @@ -2055,14 +2082,15 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); - if (i < EXTENT_INLINE_PAGES) - eb->pages[i] = p; + if (i == 0) + eb->last_page = p; if (!PageUptodate(p)) uptodate = 0; unlock_page(p); } if (uptodate) eb->flags |= EXTENT_UPTODATE; + eb->flags |= EXTENT_BUFFER_FILLED; return eb; fail: free_extent_buffer(eb); @@ -2231,7 +2259,8 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, ret = -EIO; } } - eb->flags |= EXTENT_UPTODATE; + if (!ret) + eb->flags |= EXTENT_UPTODATE; return ret; } EXPORT_SYMBOL(read_extent_buffer_pages); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 8409b5cbeda7..52a8b9394fc6 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -16,6 +16,7 @@ #define EXTENT_DELALLOC (1 << 5) #define EXTENT_DEFRAG (1 << 6) #define EXTENT_DEFRAG_DONE (1 << 7) +#define EXTENT_BUFFER_FILLED (1 << 8) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) @@ -33,6 +34,9 @@ struct extent_map_tree { struct address_space *mapping; rwlock_t lock; struct extent_map_ops *ops; + spinlock_t lru_lock; + struct list_head buffer_lru; + int lru_size; }; /* note, this must start with the same fields as fs/extent_map.c:tree_entry */ @@ -64,20 +68,17 @@ struct extent_state { struct list_head list; }; -#define EXTENT_INLINE_PAGES 32 struct extent_buffer { u64 start; unsigned long len; - atomic_t refs; - int flags; - struct list_head list; - struct list_head leak_list; - unsigned long alloc_addr; char *map_token; char *kaddr; unsigned long map_start; unsigned long map_len; - struct page *pages[EXTENT_INLINE_PAGES]; + struct page *last_page; + struct list_head lru; + atomic_t refs; + int flags; }; typedef struct extent_map *(get_extent_t)(struct inode *inode, @@ -88,6 +89,7 @@ typedef struct extent_map *(get_extent_t)(struct inode *inode, void extent_map_tree_init(struct extent_map_tree *tree, struct address_space *mapping, gfp_t mask); +void extent_map_tree_cleanup(struct extent_map_tree *tree); struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, u64 start, u64 end); int add_extent_mapping(struct extent_map_tree *tree, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 87456ab74275..67e4aca36a62 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -443,8 +443,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, BUG_ON(ret); mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root, nr); - schedule(); - + cond_resched(); mutex_lock(&tree_root->fs_info->fs_mutex); } BUG_ON(ret); @@ -471,7 +470,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root, nr); - schedule(); + cond_resched(); } return ret; } From 3326d1b07c0cb6a2ff5b835b7a2cffa54124d074 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:18:25 -0400 Subject: [PATCH 0254/2985] Btrfs: Allow tails larger than one page Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 24 ++++++-- fs/btrfs/ctree.h | 2 +- fs/btrfs/file.c | 150 ++++++++++++++++++++++++++++++++++++----------- fs/btrfs/inode.c | 32 ++++++---- fs/btrfs/super.c | 2 - 5 files changed, 155 insertions(+), 55 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index bef61ee883e1..f691542dfa15 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1766,8 +1766,9 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root /* first try to make some room by pushing left and right */ wret = push_leaf_left(trans, root, path, data_size); - if (wret < 0) + if (wret < 0) { return wret; + } if (wret) { wret = push_leaf_right(trans, root, path, data_size); if (wret < 0) @@ -1777,8 +1778,9 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root /* did the pushes work? */ if (btrfs_leaf_free_space(root, l) >= - sizeof(struct btrfs_item) + data_size) + sizeof(struct btrfs_item) + data_size) { return 0; + } if (!path->nodes[1]) { ret = insert_new_root(trans, root, path, 1); @@ -1822,7 +1824,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } mid = slot; - double_split = 1; + if (mid != nritems && + leaf_space_used(l, mid, nritems - mid) + + space_needed > BTRFS_LEAF_DATA_SIZE(root)) { + double_split = 1; + } } } else { if (leaf_space_used(l, 0, mid + 1) + space_needed > @@ -1910,8 +1916,9 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->slots[0] < 0); - if (!double_split) + if (!double_split) { return ret; + } right = btrfs_alloc_free_block(trans, root, root->leafsize, l->start, 0); @@ -2048,7 +2055,11 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, old_data = btrfs_item_end_nr(leaf, slot); BUG_ON(slot < 0); - BUG_ON(slot >= nritems); + if (slot >= nritems) { + btrfs_print_leaf(root, leaf); + printk("slot %d too large, nritems %d\n", slot, nritems); + BUG_ON(1); + } /* * item0..itemN ... dataN.offset..dataN.size .. data0.size @@ -2132,6 +2143,9 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, if (btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item) + data_size) { + btrfs_print_leaf(root, leaf); + printk("not enough freespace need %u have %d\n", + data_size, btrfs_leaf_free_space(root, leaf)); BUG(); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 54c754dd9a14..18994c53106c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1137,7 +1137,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end); extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end, u64 *hint_block); + u64 start, u64 end, u64 inline_end, u64 *hint_block); /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 844d8807e44a..1af2b6534dad 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -82,8 +82,9 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) static int insert_inline_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 offset, ssize_t size, - struct page *page, size_t page_offset) + u64 offset, size_t size, + struct page **pages, size_t page_offset, + int num_pages) { struct btrfs_key key; struct btrfs_path *path; @@ -91,9 +92,12 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, char *kaddr; unsigned long ptr; struct btrfs_file_extent_item *ei; + struct page *page; u32 datasize; int err = 0; int ret; + int i; + ssize_t cur_size; path = btrfs_alloc_path(); if (!path) @@ -104,25 +108,97 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, key.objectid = inode->i_ino; key.offset = offset; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); - BUG_ON(size >= PAGE_CACHE_SIZE); - datasize = btrfs_file_extent_calc_inline_size(size); + datasize = btrfs_file_extent_calc_inline_size(offset + size); - ret = btrfs_insert_empty_item(trans, root, path, &key, - datasize); - if (ret) { + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) { err = ret; goto fail; } - leaf = path->nodes[0]; - ei = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(leaf, ei, trans->transid); - btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); - ptr = btrfs_file_extent_inline_start(ei); + if (ret == 1) { + path->slots[0]--; + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); - kaddr = kmap_atomic(page, KM_USER1); - write_extent_buffer(leaf, kaddr + page_offset, ptr, size); - kunmap_atomic(kaddr, KM_USER1); + if (btrfs_file_extent_type(leaf, ei) != + BTRFS_FILE_EXTENT_INLINE) { + goto insert; + } + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + ret = 0; + } + if (ret == 0) { + u32 found_size; + u64 found_start; + + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(leaf, ei) != + BTRFS_FILE_EXTENT_INLINE) { + err = ret; + btrfs_print_leaf(root, leaf); + printk("found wasn't inline offset %Lu inode %lu\n", + offset, inode->i_ino); + goto fail; + } + found_start = key.offset; + found_size = btrfs_file_extent_inline_len(leaf, + btrfs_item_nr(leaf, path->slots[0])); + + if (found_size < offset + size) { + btrfs_release_path(root, path); + ret = btrfs_search_slot(trans, root, &key, path, + offset + size - found_size - + found_start, 1); + BUG_ON(ret != 0); + ret = btrfs_extend_item(trans, root, path, + offset + size - found_size - + found_start); + if (ret) { + err = ret; + goto fail; + } + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + } + } else { +insert: + btrfs_release_path(root, path); + ret = btrfs_insert_empty_item(trans, root, path, &key, + datasize); + if (ret) { + err = ret; + printk("got bad ret %d\n", ret); + goto fail; + } + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, ei, trans->transid); + btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); + } + ptr = btrfs_file_extent_inline_start(ei) + offset; + + cur_size = size; + i = 0; + while (size > 0) { + page = pages[i]; + kaddr = kmap_atomic(page, KM_USER0); + cur_size = min(PAGE_CACHE_SIZE - page_offset, size); + write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size); + kunmap_atomic(kaddr, KM_USER0); + page_offset = 0; + ptr += cur_size; + size -= cur_size; + if (i >= num_pages) { + printk("i %d num_pages %d\n", i, num_pages); + } + i++; + } btrfs_mark_buffer_dirty(leaf); fail: btrfs_free_path(path); @@ -193,6 +269,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, err = btrfs_drop_extents(trans, root, inode, last_pos_in_file, last_pos_in_file + hole_size, + last_pos_in_file, &hint_byte); if (err) goto failed; @@ -210,11 +287,12 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, * either allocate an extent for the new bytes or setup the key * to show we are doing inline data in the extent */ - inline_size = end_pos - start_pos; - if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || - inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root) || - inline_size >= PAGE_CACHE_SIZE) { + inline_size = end_pos; + if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) || + inline_size > 16384 || + inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { u64 last_end; + for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; SetPageUptodate(p); @@ -225,22 +303,18 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, set_extent_delalloc(em_tree, start_pos, end_of_last_block, GFP_NOFS); } else { - struct page *p = pages[0]; + u64 aligned_end; /* step one, delete the existing extents in this range */ + aligned_end = (pos + write_bytes + root->sectorsize - 1) & + ~((u64)root->sectorsize - 1); err = btrfs_drop_extents(trans, root, inode, start_pos, - (pos + write_bytes + root->sectorsize -1) & - ~((u64)root->sectorsize - 1), &hint_byte); + aligned_end, end_pos, &hint_byte); if (err) goto failed; - err = insert_inline_extent(trans, root, inode, start_pos, - end_pos - start_pos, p, 0); + end_pos - start_pos, pages, 0, + num_pages); BUG_ON(err); - em->start = start_pos; - em->end = end_pos - 1; - em->block_start = EXTENT_MAP_INLINE; - em->block_end = EXTENT_MAP_INLINE; - add_extent_mapping(em_tree, em); } if (end_pos > isize) { i_size_write(inode, end_pos); @@ -285,7 +359,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) */ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end, u64 *hint_byte) + u64 start, u64 end, u64 inline_end, u64 *hint_byte) { int ret; struct btrfs_key key; @@ -401,8 +475,8 @@ next_slot: BUG_ON(ret); } } - WARN_ON(found_inline); - bookend = 1; + if (!found_inline) + bookend = 1; } /* truncate existing extent */ if (start > key.offset) { @@ -425,8 +499,14 @@ next_slot: btrfs_set_file_extent_num_bytes(leaf, extent, new_num); btrfs_mark_buffer_dirty(leaf); - } else { - WARN_ON(1); + } else if (end > extent_end && + key.offset < inline_end && + inline_end < extent_end) { + u32 new_size; + new_size = btrfs_file_extent_calc_inline_size( + inline_end - key.offset); + btrfs_truncate_item(trans, root, path, + new_size); } } /* delete the entire extent */ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d6b3a55ed8e0..84f496c838c9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -87,7 +87,7 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) BUG_ON(!trans); num_bytes = (end - start + blocksize) & ~(blocksize - 1); ret = btrfs_drop_extents(trans, root, inode, - start, start + num_bytes, &alloc_hint); + start, start + num_bytes, start, &alloc_hint); ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_bytes, 0, alloc_hint, (u64)-1, &ins, 1); @@ -776,7 +776,8 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); err = btrfs_drop_extents(trans, root, inode, - pos, pos + hole_size, &alloc_hint); + pos, pos + hole_size, pos, + &alloc_hint); err = btrfs_insert_file_extent(trans, root, inode->i_ino, pos, 0, 0, hole_size); @@ -1581,7 +1582,9 @@ again: } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { unsigned long ptr; char *map; - u32 size; + size_t size; + size_t extent_offset; + size_t copy_size; size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, path->slots[0])); @@ -1600,26 +1603,31 @@ again: goto not_found_em; } + extent_offset = (page->index << PAGE_CACHE_SHIFT) - + extent_start; + ptr = btrfs_file_extent_inline_start(item) + extent_offset; + map = kmap(page); + copy_size = min(PAGE_CACHE_SIZE - page_offset, + size - extent_offset); + em->block_start = EXTENT_MAP_INLINE; em->block_end = EXTENT_MAP_INLINE; - em->start = extent_start; - em->end = extent_end; + em->start = extent_start + extent_offset; + em->end = (em->start + copy_size -1) | + ((u64)root->sectorsize -1); if (!page) { goto insert; } - ptr = btrfs_file_extent_inline_start(item); - map = kmap(page); - read_extent_buffer(leaf, map + page_offset, ptr, size); + read_extent_buffer(leaf, map + page_offset, ptr, copy_size); /* - memset(map + page_offset + size, 0, - root->sectorsize - (page_offset + size)); + memset(map + page_offset + copy_size, 0, + PAGE_CACHE_SIZE - copy_size - page_offset); */ flush_dcache_page(page); kunmap(page); - set_extent_uptodate(em_tree, extent_start, - extent_end, GFP_NOFS); + set_extent_uptodate(em_tree, em->start, em->end, GFP_NOFS); goto insert; } else { printk("unkknown found_type %d\n", found_type); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5c4370f3a5b8..f94aa1f97a0a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -280,7 +280,6 @@ error_s: error_bdev: close_bdev_excl(bdev); error: -printk("get_sb failed\n"); return error; } /* end copy & paste */ @@ -295,7 +294,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, btrfs_fill_super, mnt, subvol_name ? subvol_name : "default"); -printk("btrfs_get_sb returns %d\n", ret); return ret; } From 810191ff3087e8143b41a944fcf4fd8c693f00e3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:18:55 -0400 Subject: [PATCH 0255/2985] Btrfs: extent_map optimizations to cut down on CPU usage Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 1 + fs/btrfs/ctree.h | 74 ++++++++----------------------------------- fs/btrfs/disk-io.c | 20 ++++++------ fs/btrfs/extent_map.c | 33 ++++++++----------- fs/btrfs/extent_map.h | 2 +- fs/btrfs/file.c | 1 - 6 files changed, 40 insertions(+), 91 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f691542dfa15..54a5d006c562 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -463,6 +463,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, static int check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { + return 0; #if 0 struct extent_buffer *buf = path->nodes[level]; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 18994c53106c..a942a2427228 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -19,6 +19,8 @@ #ifndef __BTRFS__ #define __BTRFS__ +#include +#include #include #include #include @@ -499,70 +501,22 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \ #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ { \ - int err; \ - char *map_token; \ - char *kaddr; \ - unsigned long map_start; \ - unsigned long map_len; \ + char *kaddr = kmap_atomic(eb->first_page, KM_USER0); \ unsigned long offset = offsetof(type, member); \ - int unmap_on_exit = (eb->map_token == NULL); \ - if (eb->map_token && offset >= eb->map_start && \ - offset + sizeof(((type *)0)->member) <= eb->map_start + \ - eb->map_len) { \ - kaddr = eb->kaddr; \ - map_start = eb->map_start; \ - err = 0; \ - } else { \ - err = map_extent_buffer(eb, offset, \ - sizeof(((type *)0)->member), \ - &map_token, &kaddr, \ - &map_start, &map_len, KM_USER1); \ - } \ - if (!err) { \ - __le##bits *tmp = (__le##bits *)(kaddr + offset - \ - map_start); \ - u##bits res = le##bits##_to_cpu(*tmp); \ - if (unmap_on_exit) \ - unmap_extent_buffer(eb, map_token, KM_USER1); \ - return res; \ - } else { \ - __le##bits res; \ - read_eb_member(eb, NULL, type, member, &res); \ - return le##bits##_to_cpu(res); \ - } \ + u##bits res; \ + __le##bits *tmp = (__le##bits *)(kaddr + offset); \ + res = le##bits##_to_cpu(*tmp); \ + kunmap_atomic(kaddr, KM_USER0); \ + return res; \ } \ static inline void btrfs_set_##name(struct extent_buffer *eb, \ u##bits val) \ { \ - int err; \ - char *map_token; \ - char *kaddr; \ - unsigned long map_start; \ - unsigned long map_len; \ + char *kaddr = kmap_atomic(eb->first_page, KM_USER0); \ unsigned long offset = offsetof(type, member); \ - int unmap_on_exit = (eb->map_token == NULL); \ - if (eb->map_token && offset >= eb->map_start && \ - offset + sizeof(((type *)0)->member) <= eb->map_start + \ - eb->map_len) { \ - kaddr = eb->kaddr; \ - map_start = eb->map_start; \ - err = 0; \ - } else { \ - err = map_extent_buffer(eb, offset, \ - sizeof(((type *)0)->member), \ - &map_token, &kaddr, \ - &map_start, &map_len, KM_USER1); \ - } \ - if (!err) { \ - __le##bits *tmp = (__le##bits *)(kaddr + offset - \ - map_start); \ - *tmp = cpu_to_le##bits(val); \ - if (unmap_on_exit) \ - unmap_extent_buffer(eb, map_token, KM_USER1); \ - } else { \ - val = cpu_to_le##bits(val); \ - write_eb_member(eb, NULL, type, member, &val); \ - } \ + __le##bits *tmp = (__le##bits *)(kaddr + offset); \ + *tmp = cpu_to_le##bits(val); \ + kunmap_atomic(kaddr, KM_USER0); \ } #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ @@ -659,13 +613,13 @@ static inline void btrfs_set_node_blockptr(struct extent_buffer *eb, btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val); } -static unsigned long btrfs_node_key_ptr_offset(int nr) +static inline unsigned long btrfs_node_key_ptr_offset(int nr) { return offsetof(struct btrfs_node, ptrs) + sizeof(struct btrfs_key_ptr) * nr; } -static void btrfs_node_key(struct extent_buffer *eb, +static inline void btrfs_node_key(struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr) { unsigned long ptr; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fd7e6c182b9d..16f0260fca66 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -644,20 +644,20 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->last_page->mapping->host; + struct inode *btree_inode = buf->first_page->mapping->host; return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->last_page->mapping->host; + struct inode *btree_inode = buf->first_page->mapping->host; return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; @@ -678,7 +678,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) void btrfs_set_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); @@ -686,7 +686,7 @@ void btrfs_set_buffer_defrag(struct extent_buffer *buf) void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, @@ -695,7 +695,7 @@ void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) int btrfs_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); @@ -703,7 +703,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf) int btrfs_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -712,7 +712,7 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf) int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -721,7 +721,7 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) int btrfs_clear_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -730,7 +730,7 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) int btrfs_read_buffer(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 1); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 85b28a6a4e05..f8aaba8a30a2 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1963,9 +1963,9 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, struct page *p; if (i == 0) - return eb->last_page; + return eb->first_page; i += eb->start >> PAGE_CACHE_SHIFT; - p = find_get_page(eb->last_page->mapping, i); + p = find_get_page(eb->first_page->mapping, i); page_cache_release(p); return p; } @@ -2037,7 +2037,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, } set_page_extent_mapped(p); if (i == 0) - eb->last_page = p; + eb->first_page = p; if (!PageUptodate(p)) uptodate = 0; unlock_page(p); @@ -2083,7 +2083,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, } set_page_extent_mapped(p); if (i == 0) - eb->last_page = p; + eb->first_page = p; if (!PageUptodate(p)) uptodate = 0; unlock_page(p); @@ -2169,7 +2169,15 @@ EXPORT_SYMBOL(wait_on_extent_buffer_writeback); int set_extent_buffer_dirty(struct extent_map_tree *tree, struct extent_buffer *eb) { - return set_range_dirty(tree, eb->start, eb->start + eb->len - 1); + unsigned long i; + unsigned long num_pages; + + num_pages = num_extent_pages(eb->start, eb->len); + for (i = 0; i < num_pages; i++) { + __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); + } + return set_extent_dirty(tree, eb->start, + eb->start + eb->len - 1, GFP_NOFS); } EXPORT_SYMBOL(set_extent_buffer_dirty); @@ -2317,16 +2325,11 @@ static int __map_extent_buffer(struct extent_buffer *eb, unsigned long start, size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; unsigned long end_i = (start_offset + start + min_len) >> - PAGE_CACHE_SHIFT; + PAGE_CACHE_SHIFT; if (i != end_i) return -EINVAL; - if (start >= eb->len) { - printk("bad start in map eb start %Lu len %lu caller start %lu min %lu\n", eb->start, eb->len, start, min_len); - WARN_ON(1); - } - if (i == 0) { offset = start_offset; *map_start = 0; @@ -2353,14 +2356,6 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, int err; int save = 0; if (eb->map_token) { - if (start >= eb->map_start && - start + min_len <= eb->map_start + eb->map_len) { - *token = eb->map_token; - *map = eb->kaddr; - *map_start = eb->map_start; - *map_len = eb->map_len; - return 0; - } unmap_extent_buffer(eb, eb->map_token, km); eb->map_token = NULL; save = 1; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 52a8b9394fc6..f1dc28d260eb 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -75,7 +75,7 @@ struct extent_buffer { char *kaddr; unsigned long map_start; unsigned long map_len; - struct page *last_page; + struct page *first_page; struct list_head lru; atomic_t refs; int flags; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1af2b6534dad..fe28404ae7f4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -289,7 +289,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, */ inline_size = end_pos; if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) || - inline_size > 16384 || inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { u64 last_end; From 0f82731fc56448c2733f58e1f5db6c2cbfc90652 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:18:56 -0400 Subject: [PATCH 0256/2985] Breakout BTRFS_SETGET_FUNCS into a separate C file, the inlines were too big. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.c | 1 - fs/btrfs/ctree.h | 74 ++----------------------------- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/struct-funcs.c | 97 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/tree-defrag.c | 2 +- 6 files changed, 104 insertions(+), 74 deletions(-) create mode 100644 fs/btrfs/struct-funcs.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a3b51085d7f0..551743be5f03 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -5,7 +5,7 @@ obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ - extent_map.o sysfs.o + extent_map.o sysfs.o struct-funcs.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 54a5d006c562..0c6ed17ac1bc 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a942a2427228..d1c6f023a302 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -426,77 +426,11 @@ struct btrfs_root { offsetof(type, member), \ sizeof(((type *)0)->member))) +#ifndef BTRFS_SETGET_FUNCS #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ -static inline u##bits btrfs_##name(struct extent_buffer *eb, \ - type *s) \ -{ \ - int err; \ - char *map_token; \ - char *kaddr; \ - int unmap_on_exit = (eb->map_token == NULL); \ - unsigned long map_start; \ - unsigned long map_len; \ - unsigned long offset = (unsigned long)s + \ - offsetof(type, member); \ - if (eb->map_token && offset >= eb->map_start && \ - offset + sizeof(((type *)0)->member) <= eb->map_start + \ - eb->map_len) { \ - kaddr = eb->kaddr; \ - map_start = eb->map_start; \ - err = 0; \ - } else { \ - err = map_extent_buffer(eb, offset, \ - sizeof(((type *)0)->member), \ - &map_token, &kaddr, \ - &map_start, &map_len, KM_USER1); \ - } \ - if (!err) { \ - __le##bits *tmp = (__le##bits *)(kaddr + offset - \ - map_start); \ - u##bits res = le##bits##_to_cpu(*tmp); \ - if (unmap_on_exit) \ - unmap_extent_buffer(eb, map_token, KM_USER1); \ - return res; \ - } else { \ - __le##bits res; \ - read_eb_member(eb, s, type, member, &res); \ - return le##bits##_to_cpu(res); \ - } \ -} \ -static inline void btrfs_set_##name(struct extent_buffer *eb, \ - type *s, u##bits val) \ -{ \ - int err; \ - char *map_token; \ - char *kaddr; \ - unsigned long map_start; \ - unsigned long map_len; \ - int unmap_on_exit = (eb->map_token == NULL); \ - unsigned long offset = (unsigned long)s + \ - offsetof(type, member); \ - if (eb->map_token && offset >= eb->map_start && \ - offset + sizeof(((type *)0)->member) <= eb->map_start + \ - eb->map_len) { \ - kaddr = eb->kaddr; \ - map_start = eb->map_start; \ - err = 0; \ - } else { \ - err = map_extent_buffer(eb, offset, \ - sizeof(((type *)0)->member), \ - &map_token, &kaddr, \ - &map_start, &map_len, KM_USER1); \ - } \ - if (!err) { \ - __le##bits *tmp = (__le##bits *)(kaddr + offset - \ - map_start); \ - *tmp = cpu_to_le##bits(val); \ - if (unmap_on_exit) \ - unmap_extent_buffer(eb, map_token, KM_USER1); \ - } else { \ - val = cpu_to_le##bits(val); \ - write_eb_member(eb, s, type, member, &val); \ - } \ -} +u##bits btrfs_##name(struct extent_buffer *eb, type *s); \ +void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); +#endif #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 525fa845d613..e4e68ea96483 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1443,7 +1443,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { free_extent_buffer(path->nodes[i]); - path->nodes[i] = 0; + path->nodes[i] = NULL; } } out: diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c new file mode 100644 index 000000000000..cf68fcf9b552 --- /dev/null +++ b/fs/btrfs/struct-funcs.c @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#define BTRFS_SETGET_FUNCS(name, type, member, bits) \ +u##bits btrfs_##name(struct extent_buffer *eb, \ + type *s) \ +{ \ + unsigned long offset = (unsigned long)s + \ + offsetof(type, member); \ + __le##bits *tmp; \ + /* ugly, but we want the fast path here */ \ + if (eb->map_token && offset >= eb->map_start && \ + offset + sizeof(((type *)0)->member) <= eb->map_start + \ + eb->map_len) { \ + tmp = (__le##bits *)(eb->kaddr + offset - \ + eb->map_start); \ + return le##bits##_to_cpu(*tmp); \ + } \ + { \ + int err; \ + char *map_token; \ + char *kaddr; \ + int unmap_on_exit = (eb->map_token == NULL); \ + unsigned long map_start; \ + unsigned long map_len; \ + __le##bits res; \ + err = map_extent_buffer(eb, offset, \ + sizeof(((type *)0)->member), \ + &map_token, &kaddr, \ + &map_start, &map_len, KM_USER1); \ + if (err) { \ + read_eb_member(eb, s, type, member, &res); \ + return le##bits##_to_cpu(res); \ + } \ + tmp = (__le##bits *)(kaddr + offset - map_start); \ + res = le##bits##_to_cpu(*tmp); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ + return res; \ + } \ +} \ +void btrfs_set_##name(struct extent_buffer *eb, \ + type *s, u##bits val) \ +{ \ + unsigned long offset = (unsigned long)s + \ + offsetof(type, member); \ + __le##bits *tmp; \ + /* ugly, but we want the fast path here */ \ + if (eb->map_token && offset >= eb->map_start && \ + offset + sizeof(((type *)0)->member) <= eb->map_start + \ + eb->map_len) { \ + tmp = (__le##bits *)(eb->kaddr + offset - \ + eb->map_start); \ + *tmp = cpu_to_le##bits(val); \ + return; \ + } \ + { \ + int err; \ + char *map_token; \ + char *kaddr; \ + int unmap_on_exit = (eb->map_token == NULL); \ + unsigned long map_start; \ + unsigned long map_len; \ + err = map_extent_buffer(eb, offset, \ + sizeof(((type *)0)->member), \ + &map_token, &kaddr, \ + &map_start, &map_len, KM_USER1); \ + if (err) { \ + val = cpu_to_le##bits(val); \ + write_eb_member(eb, s, type, member, &val); \ + return; \ + } \ + tmp = (__le##bits *)(kaddr + offset - map_start); \ + *tmp = cpu_to_le##bits(val); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ + } \ +} + +#include "ctree.h" + diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index d23216aa4ab5..0d1a1be0995c 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -239,7 +239,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { free_extent_buffer(path->nodes[i]); - path->nodes[i] = 0; + path->nodes[i] = NULL; } } out: From 19c00ddcc31ad4bdfb86b57085e06d6135b9b1d7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:19:22 -0400 Subject: [PATCH 0257/2985] Btrfs: Add back metadata checksumming Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 + fs/btrfs/disk-io.c | 213 +++++++++++++++++++++++++++++------------ fs/btrfs/extent-tree.c | 4 + fs/btrfs/extent_map.c | 83 +++++++++++++--- fs/btrfs/extent_map.h | 18 +++- fs/btrfs/inode.c | 4 - 6 files changed, 244 insertions(+), 80 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d1c6f023a302..1af0a966f85b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -310,9 +310,11 @@ struct btrfs_fs_info { struct extent_buffer *sb_buffer; struct super_block *sb; struct inode *btree_inode; + spinlock_t hash_lock; struct mutex trans_mutex; struct mutex fs_mutex; struct list_head trans_list; + struct list_head hashers; struct list_head dead_roots; struct delayed_work trans_work; struct kobject super_kobj; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 16f0260fca66..1176e5420c56 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -60,7 +60,7 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, struct extent_buffer *eb; eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, - bytenr, blocksize, GFP_NOFS); + bytenr, blocksize, NULL, GFP_NOFS); return eb; } @@ -99,10 +99,102 @@ out: return em; } +u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) +{ + return crc32c(seed, data, len); +} + +void btrfs_csum_final(u32 crc, char *result) +{ + *(__le32 *)result = ~cpu_to_le32(crc); +} + +static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, + int verify) +{ + char result[BTRFS_CRC32_SIZE]; + unsigned long len; + unsigned long cur_len; + unsigned long offset = BTRFS_CSUM_SIZE; + char *map_token = NULL; + char *kaddr; + unsigned long map_start; + unsigned long map_len; + int err; + u32 crc = ~(u32)0; + + len = buf->len - offset; + while(len > 0) { + err = map_private_extent_buffer(buf, offset, 32, + &map_token, &kaddr, + &map_start, &map_len, KM_USER0); + if (err) { + printk("failed to map extent buffer! %lu\n", + offset); + return 1; + } + cur_len = min(len, map_len - (offset - map_start)); + crc = btrfs_csum_data(root, kaddr + offset - map_start, + crc, cur_len); + len -= cur_len; + offset += cur_len; + unmap_extent_buffer(buf, map_token, KM_USER0); + } + btrfs_csum_final(crc, result); + + if (verify) { + if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { + printk("btrfs: %s checksum verify failed on %llu\n", + root->fs_info->sb->s_id, + buf->start); + return 1; + } + } else { + write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE); + } + return 0; +} + + +int csum_dirty_buffer(struct btrfs_root *root, struct page *page) +{ + struct extent_map_tree *tree; + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 found_start; + int found_level; + unsigned long len; + struct extent_buffer *eb; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + + if (page->private == EXTENT_PAGE_PRIVATE) + goto out; + if (!page->private) + goto out; + len = page->private >> 2; + if (len == 0) { + WARN_ON(1); + } + eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); + read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1); + found_start = btrfs_header_bytenr(eb); + if (found_start != start) { + printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", + start, found_start, len); + } + found_level = btrfs_header_level(eb); + csum_tree_block(root, eb, 0); + free_extent_buffer(eb); +out: + return 0; +} + static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct extent_map_tree *tree; + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; tree = &BTRFS_I(page->mapping->host)->extent_tree; + + csum_dirty_buffer(root, page); return extent_write_full_page(tree, page, btree_get_extent, wbc); } int btree_readpage(struct file *file, struct page *page) @@ -117,7 +209,6 @@ static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) struct extent_map_tree *tree; int ret; - BUG_ON(page->private != 1); tree = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_mapping(tree, page); if (ret == 1) { @@ -136,46 +227,6 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) btree_releasepage(page, GFP_NOFS); } -int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, - char *result) -{ - return 0; -#if 0 - u32 crc; - crc = crc32c(0, data, len); - memcpy(result, &crc, BTRFS_CRC32_SIZE); - return 0; -#endif -} - -#if 0 -static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, - int verify) -{ - return 0; - char result[BTRFS_CRC32_SIZE]; - int ret; - struct btrfs_node *node; - - ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE, - bh->b_size - BTRFS_CSUM_SIZE, result); - if (ret) - return ret; - if (verify) { - if (memcmp(bh->b_data, result, BTRFS_CRC32_SIZE)) { - printk("btrfs: %s checksum verify failed on %llu\n", - root->fs_info->sb->s_id, - (unsigned long long)bh_blocknr(bh)); - return 1; - } - } else { - node = btrfs_buffer_node(bh); - memcpy(node->header.csum, result, BTRFS_CRC32_SIZE); - } - return 0; -} -#endif - #if 0 static int btree_writepage(struct page *page, struct writeback_control *wbc) { @@ -215,7 +266,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) if (!buf) return 0; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 0); + buf, 0, 0); free_extent_buffer(buf); return ret; } @@ -225,12 +276,29 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; + struct extent_map_tree *extent_tree; + int ret; + + extent_tree = &BTRFS_I(btree_inode)->extent_tree; buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 1); + buf, 0, 1); + if (buf->flags & EXTENT_CSUM) { + return buf; + } + if (test_range_bit(extent_tree, buf->start, buf->start + buf->len - 1, + EXTENT_CSUM, 1)) { + buf->flags |= EXTENT_CSUM; + return buf; + } + ret = csum_tree_block(root, buf, 1); + set_extent_bits(extent_tree, buf->start, + buf->start + buf->len - 1, + EXTENT_CSUM, GFP_NOFS); + buf->flags |= EXTENT_CSUM; return buf; } @@ -251,13 +319,6 @@ int wait_on_tree_block_writeback(struct btrfs_root *root, return 0; } -int set_tree_block_dirty(struct btrfs_root *root, struct extent_buffer *buf) -{ - struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); - return 0; -} - static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, @@ -416,7 +477,24 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, return root; } +#if 0 +static int add_hasher(struct btrfs_fs_info *info, char *type) { + struct btrfs_hasher *hasher; + hasher = kmalloc(sizeof(*hasher), GFP_NOFS); + if (!hasher) + return -ENOMEM; + hasher->hash_tfm = crypto_alloc_hash(type, 0, CRYPTO_ALG_ASYNC); + if (!hasher->hash_tfm) { + kfree(hasher); + return -EINVAL; + } + spin_lock(&info->hash_lock); + list_add(&hasher->list, &info->hashers); + spin_unlock(&info->hash_lock); + return 0; +} +#endif struct btrfs_root *open_ctree(struct super_block *sb) { u32 sectorsize; @@ -440,6 +518,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); + INIT_LIST_HEAD(&fs_info->hashers); + spin_lock_init(&fs_info->hash_lock); + memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); init_completion(&fs_info->kobj_unregister); sb_set_blocksize(sb, 4096); @@ -479,6 +560,14 @@ struct btrfs_root *open_ctree(struct super_block *sb) mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); +#if 0 + ret = add_hasher(fs_info, "crc32c"); + if (ret) { + printk("btrfs: failed hash setup, modprobe cryptomgr?\n"); + err = -ENOMEM; + goto fail_iput; + } +#endif __setup_root(512, 512, 512, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); @@ -509,25 +598,21 @@ struct btrfs_root *open_ctree(struct super_block *sb) i_size_write(fs_info->btree_inode, btrfs_super_total_bytes(disk_super)); - if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { printk("btrfs: valid FS not found on %s\n", sb->s_id); goto fail_sb_buffer; } + blocksize = btrfs_level_size(tree_root, btrfs_super_root_level(disk_super)); + tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), blocksize); if (!tree_root->node) goto fail_sb_buffer; -#if 0 - btrfs_print_leaf(tree_root, tree_root->node); - err = -EIO; - goto fail_tree_root; -#endif mutex_lock(&fs_info->fs_mutex); ret = find_and_setup_root(tree_root, fs_info, @@ -634,9 +719,19 @@ int close_ctree(struct btrfs_root *root) btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); - extent_map_tree_cleanup(&BTRFS_I(fs_info->btree_inode)->extent_tree); + extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); +#if 0 + while(!list_empty(&fs_info->hashers)) { + struct btrfs_hasher *hasher; + hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher, + hashers); + list_del(&hasher->hashers); + crypto_free_hash(&fs_info->hash_tfm); + kfree(hasher); + } +#endif kfree(fs_info->extent_root); kfree(fs_info->tree_root); return 0; @@ -733,5 +828,5 @@ int btrfs_read_buffer(struct extent_buffer *buf) struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 1); + buf, 0, 1); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e4e68ea96483..6b2f2b414fa1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1178,6 +1178,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, btrfs_set_buffer_uptodate(buf); set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); + set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree, + buf->start, buf->start + buf->len - 1, + EXTENT_CSUM, GFP_NOFS); + buf->flags |= EXTENT_CSUM; btrfs_set_buffer_defrag(buf); trans->blocks_used++; return buf; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f8aaba8a30a2..2a8bc4bd43a9 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -81,7 +81,7 @@ void extent_map_tree_init(struct extent_map_tree *tree, } EXPORT_SYMBOL(extent_map_tree_init); -void extent_map_tree_cleanup(struct extent_map_tree *tree) +void extent_map_tree_empty_lru(struct extent_map_tree *tree) { struct extent_buffer *eb; while(!list_empty(&tree->buffer_lru)) { @@ -91,7 +91,7 @@ void extent_map_tree_cleanup(struct extent_map_tree *tree) free_extent_buffer(eb); } } -EXPORT_SYMBOL(extent_map_tree_cleanup); +EXPORT_SYMBOL(extent_map_tree_empty_lru); struct extent_map *alloc_extent_map(gfp_t mask) { @@ -1464,7 +1464,7 @@ void set_page_extent_mapped(struct page *page) if (!PagePrivate(page)) { SetPagePrivate(page); WARN_ON(!page->mapping->a_ops->invalidatepage); - set_page_private(page, 1); + set_page_private(page, EXTENT_PAGE_PRIVATE); page_cache_get(page); } } @@ -1979,8 +1979,9 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree, spin_lock(&tree->lru_lock); eb = find_lru(tree, start, len); - if (eb) + if (eb) { goto lru_add; + } spin_unlock(&tree->lru_lock); if (eb) { @@ -2007,6 +2008,7 @@ static void __free_extent_buffer(struct extent_buffer *eb) struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, + struct page *page0, gfp_t mask) { unsigned long num_pages = num_extent_pages(start, len); @@ -2024,7 +2026,18 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, if (eb->flags & EXTENT_BUFFER_FILLED) return eb; - for (i = 0; i < num_pages; i++, index++) { + if (page0) { + eb->first_page = page0; + i = 1; + index++; + page_cache_get(page0); + set_page_extent_mapped(page0); + set_page_private(page0, EXTENT_PAGE_PRIVATE_FIRST_PAGE | + len << 2); + } else { + i = 0; + } + for (; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); if (!p) { WARN_ON(1); @@ -2036,8 +2049,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); - if (i == 0) + if (i == 0) { eb->first_page = p; + set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE | + len << 2); + } else { + set_page_private(p, EXTENT_PAGE_PRIVATE); + } if (!PageUptodate(p)) uptodate = 0; unlock_page(p); @@ -2057,8 +2075,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, gfp_t mask) { unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; - unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT; struct extent_buffer *eb; struct page *p; struct address_space *mapping = tree->mapping; @@ -2082,8 +2099,15 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); - if (i == 0) + + if (i == 0) { eb->first_page = p; + set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE | + len << 2); + } else { + set_page_private(p, EXTENT_PAGE_PRIVATE); + } + if (!PageUptodate(p)) uptodate = 0; unlock_page(p); @@ -2174,7 +2198,21 @@ int set_extent_buffer_dirty(struct extent_map_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { + struct page *page = extent_buffer_page(eb, i); + /* writepage may need to do something special for the + * first page, we have to make sure page->private is + * properly set. releasepage may drop page->private + * on us if the page isn't already dirty. + */ + if (i == 0) { + lock_page(page); + set_page_private(page, + EXTENT_PAGE_PRIVATE_FIRST_PAGE | + eb->len << 2); + } __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); + if (i == 0) + unlock_page(page); } return set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); @@ -2217,9 +2255,12 @@ int extent_buffer_uptodate(struct extent_map_tree *tree, EXPORT_SYMBOL(extent_buffer_uptodate); int read_extent_buffer_pages(struct extent_map_tree *tree, - struct extent_buffer *eb, int wait) + struct extent_buffer *eb, + u64 start, + int wait) { unsigned long i; + unsigned long start_i; struct page *page; int err; int ret = 0; @@ -2232,9 +2273,16 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, EXTENT_UPTODATE, 1)) { return 0; } + if (start) { + WARN_ON(start < eb->start); + start_i = (start >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT); + } else { + start_i = 0; + } num_pages = num_extent_pages(eb->start, eb->len); - for (i = 0; i < num_pages; i++) { + for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); if (PageUptodate(page)) { continue; @@ -2260,7 +2308,7 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, return ret; } - for (i = 0; i < num_pages; i++) { + for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); wait_on_page_locked(page); if (!PageUptodate(page)) { @@ -2314,7 +2362,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, } EXPORT_SYMBOL(read_extent_buffer); -static int __map_extent_buffer(struct extent_buffer *eb, unsigned long start, +int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, unsigned long min_len, char **token, char **map, unsigned long *map_start, unsigned long *map_len, int km) @@ -2337,6 +2385,10 @@ static int __map_extent_buffer(struct extent_buffer *eb, unsigned long start, offset = 0; *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; } + if (start + min_len >= eb->len) { +printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); + WARN_ON(1); + } p = extent_buffer_page(eb, i); WARN_ON(!PageUptodate(p)); @@ -2346,6 +2398,7 @@ static int __map_extent_buffer(struct extent_buffer *eb, unsigned long start, *map_len = PAGE_CACHE_SIZE - offset; return 0; } +EXPORT_SYMBOL(map_private_extent_buffer); int map_extent_buffer(struct extent_buffer *eb, unsigned long start, unsigned long min_len, @@ -2360,8 +2413,8 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, eb->map_token = NULL; save = 1; } - err = __map_extent_buffer(eb, start, min_len, token, map, - map_start, map_len, km); + err = map_private_extent_buffer(eb, start, min_len, token, map, + map_start, map_len, km); if (!err && save) { eb->map_token = *token; eb->kaddr = *map; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index f1dc28d260eb..39d78d31b6a2 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -17,8 +17,17 @@ #define EXTENT_DEFRAG (1 << 6) #define EXTENT_DEFRAG_DONE (1 << 7) #define EXTENT_BUFFER_FILLED (1 << 8) +#define EXTENT_CSUM (1 << 9) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) +/* + * page->private values. Every page that is controlled by the extent + * map has page->private set to one. + */ + +#define EXTENT_PAGE_PRIVATE 1 +#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 + struct extent_map_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); @@ -89,7 +98,7 @@ typedef struct extent_map *(get_extent_t)(struct inode *inode, void extent_map_tree_init(struct extent_map_tree *tree, struct address_space *mapping, gfp_t mask); -void extent_map_tree_cleanup(struct extent_map_tree *tree); +void extent_map_tree_empty_lru(struct extent_map_tree *tree); struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, u64 start, u64 end); int add_extent_mapping(struct extent_map_tree *tree, @@ -143,13 +152,14 @@ void set_page_extent_mapped(struct page *page); struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, + struct page *page0, gfp_t mask); struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, gfp_t mask); void free_extent_buffer(struct extent_buffer *eb); int read_extent_buffer_pages(struct extent_map_tree *tree, - struct extent_buffer *eb, int wait); + struct extent_buffer *eb, u64 start, int wait); static inline void extent_buffer_get(struct extent_buffer *eb) { @@ -187,5 +197,9 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long min_len, char **token, char **map, unsigned long *map_start, unsigned long *map_len, int km); +int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); #endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 84f496c838c9..80f84b48a70f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1713,10 +1713,6 @@ static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) struct extent_map_tree *tree; int ret; - if (page->private != 1) { - WARN_ON(1); - return try_to_free_buffers(page); - } tree = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_mapping(tree, page); if (ret == 1) { From 0f1ebbd159baade0417f8f62f0cd6810cc950832 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:19:46 -0400 Subject: [PATCH 0258/2985] Btrfs: Large block related defrag optimizations Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 4 +--- fs/btrfs/tree-defrag.c | 23 ++++++++++++++++++++++- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0c6ed17ac1bc..e1557e573d74 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -217,11 +217,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, root->fs_info->generation); WARN_ON(1); } - if (btrfs_buffer_defrag_done(parent)) - return 0; + parent_level = btrfs_header_level(parent); parent_nritems = btrfs_header_nritems(parent); - parent_level = btrfs_header_level(parent); blocksize = btrfs_level_size(root, parent_level - 1); start_slot = 0; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 0d1a1be0995c..d31c21ac6f23 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -118,7 +118,6 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, WARN_ON(*level >= BTRFS_MAX_LEVEL); btrfs_clear_buffer_defrag(path->nodes[*level]); - btrfs_clear_buffer_defrag_done(path->nodes[*level]); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; @@ -148,6 +147,26 @@ static int defrag_walk_up(struct btrfs_trans_handle *trans, root->defrag_level = i; return 0; } else { + if (*level > 1 && path->nodes[*level] != root->node) { + struct extent_buffer *next; + u64 last; + int ret; + ret = btrfs_cow_block(trans, root, + path->nodes[*level], + path->nodes[*level + 1], + path->slots[*level + 1], + &next); + BUG_ON(ret); + path->nodes[*level] = next; + last = next->start; + ret = btrfs_realloc_node(trans, root, next, + cache_only, &last); + BUG_ON(ret); + + if (root == root->fs_info->extent_root) + btrfs_extent_post_op(trans, root); + } + btrfs_clear_buffer_defrag(path->nodes[*level]); btrfs_clear_buffer_defrag_done(path->nodes[*level]); free_extent_buffer(path->nodes[*level]); @@ -183,9 +202,11 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, level = btrfs_header_level(root->node); orig_level = level; + if (level == 0) { goto out; } + if (root->defrag_progress.objectid == 0) { extent_buffer_get(root->node); ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); From 304fced668079ce222200d5742657d34cc44af09 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:21:17 -0400 Subject: [PATCH 0259/2985] Btrfs: balance_dirty_pages_ratelimited is causing problems, use nr == 1 always Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1176e5420c56..3878772fcffd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -768,7 +768,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) { balance_dirty_pages_ratelimited_nr( - root->fs_info->btree_inode->i_mapping, nr); + root->fs_info->btree_inode->i_mapping, 1); } void btrfs_set_buffer_defrag(struct extent_buffer *buf) From cf786e79e3f96f4bc037c69e1a7afb5fb85c6111 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:22:11 -0400 Subject: [PATCH 0260/2985] Btrfs: Defrag: only walk into nodes with the defrag bit set Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 1 - fs/btrfs/tree-defrag.c | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e1557e573d74..984f4745440e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -280,7 +280,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, *last_ret = search_start; if (parent_level == 1) btrfs_clear_buffer_defrag(tmp); - btrfs_set_buffer_defrag_done(tmp); free_extent_buffer(tmp); } return err; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index d31c21ac6f23..f86eccf511b9 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -87,8 +87,8 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, if (cache_only) { next = btrfs_find_tree_block(root, bytenr, btrfs_level_size(root, *level - 1)); - /* FIXME, test for defrag */ - if (!next || !btrfs_buffer_uptodate(next)) { + if (!next || !btrfs_buffer_uptodate(next) || + !btrfs_buffer_defrag(next)) { free_extent_buffer(next); path->slots[*level]++; continue; @@ -147,7 +147,8 @@ static int defrag_walk_up(struct btrfs_trans_handle *trans, root->defrag_level = i; return 0; } else { - if (*level > 1 && path->nodes[*level] != root->node) { + if (*level > 1 && path->nodes[*level] != root->node && + btrfs_buffer_defrag(path->nodes[*level])) { struct extent_buffer *next; u64 last; int ret; @@ -168,7 +169,6 @@ static int defrag_walk_up(struct btrfs_trans_handle *trans, } btrfs_clear_buffer_defrag(path->nodes[*level]); - btrfs_clear_buffer_defrag_done(path->nodes[*level]); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; From ff79f8190b6e955ff7a71faf804a3017d526e657 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:22:25 -0400 Subject: [PATCH 0261/2985] Btrfs: Add back file data checksumming Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + fs/btrfs/disk-io.h | 2 ++ fs/btrfs/extent_map.c | 3 +++ fs/btrfs/file-item.c | 17 +++++++++-------- fs/btrfs/file.c | 1 + fs/btrfs/inode.c | 21 ++++++++++----------- 6 files changed, 26 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3878772fcffd..5accced2858a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -594,6 +594,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) tree_root->nodesize = nodesize; tree_root->leafsize = leafsize; tree_root->sectorsize = sectorsize; + sb_set_blocksize(sb, sectorsize); i_size_write(fs_info->btree_inode, btrfs_super_total_bytes(disk_super)); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 190b07bc642b..33d2ccfd74d8 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -58,4 +58,6 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf); int btrfs_clear_buffer_defrag(struct extent_buffer *buf); int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf); int btrfs_read_buffer(struct extent_buffer *buf); +u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); +void btrfs_csum_final(u32 crc, char *result); #endif diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2a8bc4bd43a9..e87e476dca92 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2031,6 +2031,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, i = 1; index++; page_cache_get(page0); + mark_page_accessed(page0); set_page_extent_mapped(page0); set_page_private(page0, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); @@ -2049,6 +2050,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); + mark_page_accessed(p); if (i == 0) { eb->first_page = p; set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE | @@ -2099,6 +2101,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, goto fail; } set_page_extent_mapped(p); + mark_page_accessed(p); if (i == 0) { eb->first_page = p; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 9a65e97a4e28..10a4c7408600 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -136,27 +136,27 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, u64 objectid, u64 offset, char *data, size_t len) { - return 0; -#if 0 int ret; struct btrfs_key file_key; struct btrfs_key found_key; struct btrfs_path *path; struct btrfs_csum_item *item; - struct extent_buffer *leaf; + struct extent_buffer *leaf = NULL; u64 csum_offset; + u32 csum_result = ~(u32)0; path = btrfs_alloc_path(); BUG_ON(!path); file_key.objectid = objectid; file_key.offset = offset; - file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); item = btrfs_lookup_csum(trans, root, path, objectid, offset, 1); - if (!IS_ERR(item)) + if (!IS_ERR(item)) { + leaf = path->nodes[0]; goto found; + } ret = PTR_ERR(item); if (ret == -EFBIG) { u32 item_size; @@ -226,14 +226,15 @@ csum: item = (struct btrfs_csum_item *)((unsigned char *)item + csum_offset * BTRFS_CRC32_SIZE); found: - /* FIXME!!!!!!!!!!!! */ - ret = btrfs_csum_data(root, data, len, &item->csum); + csum_result = btrfs_csum_data(root, data, csum_result, len); + btrfs_csum_final(csum_result, (char *)&csum_result); + write_extent_buffer(leaf, &csum_result, (unsigned long)item, + BTRFS_CRC32_SIZE); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); btrfs_free_path(path); return ret; -#endif } int btrfs_csum_truncate(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fe28404ae7f4..1af2b6534dad 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -289,6 +289,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, */ inline_size = end_pos; if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) || + inline_size > 16384 || inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { u64 last_end; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 80f84b48a70f..ac69f6926afc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -135,7 +135,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct btrfs_csum_item *item; struct btrfs_path *path = NULL; - u64 private; + u32 csum; mutex_lock(&root->fs_info->fs_mutex); path = btrfs_alloc_path(); @@ -145,11 +145,12 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) /* a csum that isn't present is a preallocated region. */ if (ret == -ENOENT || ret == -EFBIG) ret = 0; - private = 0; + csum = 0; goto out; } - memcpy((char *)&private, &item->csum, BTRFS_CRC32_SIZE); - set_state_private(em_tree, start, private); + read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, + BTRFS_CRC32_SIZE); + set_state_private(em_tree, start, csum); out: if (path) btrfs_free_path(path); @@ -165,21 +166,19 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) char *kaddr; u64 private; int ret; + struct btrfs_root *root = BTRFS_I(inode)->root; + u32 csum = ~(u32)0; ret = get_state_private(em_tree, start, &private); kaddr = kmap_atomic(page, KM_IRQ0); if (ret) { goto zeroit; } - /* - struct btrfs_root *root = BTRFS_I(inode)->root; - char csum[BTRFS_CRC32_SIZE]; - ret = btrfs_csum_data(root, kaddr + offset, end - start + 1, csum); - BUG_ON(ret); - if (memcmp(csum, &private, BTRFS_CRC32_SIZE)) { + csum = btrfs_csum_data(root, kaddr + offset, csum, end - start + 1); + btrfs_csum_final(csum, (char *)&csum); + if (csum != private) { goto zeroit; } - */ kunmap_atomic(kaddr, KM_IRQ0); return 0; From a6b6e75e096f436f0cc56edf5bca96301e194491 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:22:39 -0400 Subject: [PATCH 0262/2985] Btrfs: Defrag only leaves, and only when the parent node has a single objectid This allows us to defrag huge directories, but skip the expensive defrag case in more common usage, where it does not help as much. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 22 ++++++++++++++++------ fs/btrfs/ctree.h | 3 ++- fs/btrfs/transaction.c | 2 -- fs/btrfs/tree-defrag.c | 36 +++++------------------------------- 4 files changed, 23 insertions(+), 40 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 984f4745440e..74fec6b83a8b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -190,7 +191,8 @@ static int should_defrag_leaf(struct extent_buffer *leaf) int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, - int cache_only, u64 *last_ret) + int start_slot, int cache_only, u64 *last_ret, + struct btrfs_key *progress) { struct extent_buffer *cur; struct extent_buffer *tmp; @@ -199,7 +201,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, u64 last_block = 0; u64 other; u32 parent_nritems; - int start_slot; int end_slot; int i; int err = 0; @@ -221,15 +222,24 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, parent_nritems = btrfs_header_nritems(parent); blocksize = btrfs_level_size(root, parent_level - 1); - - start_slot = 0; end_slot = parent_nritems; if (parent_nritems == 1) return 0; + if (root != root->fs_info->extent_root) { + struct btrfs_key first_key; + struct btrfs_key last_key; + + btrfs_node_key_to_cpu(parent, &first_key, 0); + btrfs_node_key_to_cpu(parent, &last_key, parent_nritems - 1); + if (first_key.objectid != last_key.objectid) + return 0; + } + for (i = start_slot; i < end_slot; i++) { int close = 1; + blocknr = btrfs_node_blockptr(parent, i); if (last_block == 0) last_block = blocknr; @@ -898,7 +908,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, u32 blocksize; u32 nscan = 0; - if (level == 0) + if (level != 1) return; if (!path->nodes[level]) @@ -2370,7 +2380,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, } /* delete the leaf if it is mostly empty */ - if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { + if (0 && used < BTRFS_LEAF_DATA_SIZE(root) / 3) { /* push_leaf_left fixes the path. * make sure the path still points to our leaf * for possible call to del_ptr below diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1af0a966f85b..37bccb1a9a75 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -913,7 +913,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow); int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, - int cache_only, u64 *last_ret); + int start_slot, int cache_only, u64 *last_ret, + struct btrfs_key *progress); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 67e4aca36a62..bdfe05cf26c6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -353,7 +353,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) if (root->defrag_running) return 0; - trans = btrfs_start_transaction(root, 1); while (1) { root->defrag_running = 1; @@ -361,7 +360,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) nr = trans->blocks_used; btrfs_end_transaction(trans, root); mutex_unlock(&info->fs_mutex); - btrfs_btree_balance_dirty(info->tree_root, nr); cond_resched(); diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index f86eccf511b9..b02355a7b143 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -76,7 +76,9 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, if (*level == 1) { ret = btrfs_realloc_node(trans, root, path->nodes[*level], - cache_only, last_ret); + path->slots[*level], + cache_only, last_ret, + &root->defrag_progress); if (is_extent) btrfs_extent_post_op(trans, root); @@ -100,10 +102,6 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, ret = btrfs_cow_block(trans, root, next, path->nodes[*level], path->slots[*level], &next); BUG_ON(ret); - ret = btrfs_realloc_node(trans, root, next, cache_only, - last_ret); - BUG_ON(ret); - if (is_extent) btrfs_extent_post_op(trans, root); @@ -122,8 +120,8 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; - WARN_ON(ret); - return 0; + WARN_ON(ret && ret != -EAGAIN); + return ret; } static int defrag_walk_up(struct btrfs_trans_handle *trans, @@ -147,27 +145,6 @@ static int defrag_walk_up(struct btrfs_trans_handle *trans, root->defrag_level = i; return 0; } else { - if (*level > 1 && path->nodes[*level] != root->node && - btrfs_buffer_defrag(path->nodes[*level])) { - struct extent_buffer *next; - u64 last; - int ret; - ret = btrfs_cow_block(trans, root, - path->nodes[*level], - path->nodes[*level + 1], - path->slots[*level + 1], - &next); - BUG_ON(ret); - path->nodes[*level] = next; - last = next->start; - ret = btrfs_realloc_node(trans, root, next, - cache_only, &last); - BUG_ON(ret); - - if (root == root->fs_info->extent_root) - btrfs_extent_post_op(trans, root); - } - btrfs_clear_buffer_defrag(path->nodes[*level]); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; @@ -211,9 +188,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, extent_buffer_get(root->node); ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); BUG_ON(ret); - ret = btrfs_realloc_node(trans, root, root->node, cache_only, - &last_ret); - BUG_ON(ret); path->nodes[level] = root->node; path->slots[level] = 0; if (is_extent) From 7936ca3883e5fef8ce5cc367a4356ad5fed67180 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 19 Oct 2007 09:22:41 -0400 Subject: [PATCH 0263/2985] Btrfs: Default to 8k max packed tails Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 +- fs/btrfs/file.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 74fec6b83a8b..54967099a8eb 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2380,7 +2380,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, } /* delete the leaf if it is mostly empty */ - if (0 && used < BTRFS_LEAF_DATA_SIZE(root) / 3) { + if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { /* push_leaf_left fixes the path. * make sure the path still points to our leaf * for possible call to del_ptr below diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1af2b6534dad..4aacf99bd97f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -289,7 +289,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, */ inline_size = end_pos; if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) || - inline_size > 16384 || + inline_size > 8192 || inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { u64 last_end; From 6d7231f7d33fc14f2d41abc3b9cb28dcb208735d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 19 Oct 2007 09:22:49 -0400 Subject: [PATCH 0264/2985] Btrfs: Fix typo: owner is a 64 bit field Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 37bccb1a9a75..1d2b54150075 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -522,12 +522,12 @@ BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_inode_timespec, nsec, 32); /* struct btrfs_extent_item */ BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); -BTRFS_SETGET_FUNCS(extent_owner, struct btrfs_extent_item, owner, 32); +BTRFS_SETGET_FUNCS(extent_owner, struct btrfs_extent_item, owner, 64); BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, refs, 32); BTRFS_SETGET_STACK_FUNCS(stack_extent_owner, struct btrfs_extent_item, - owner, 32); + owner, 64); /* struct btrfs_node */ BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); From 44b36eb20bb60d7da1375c1fb45784740a4dc358 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Fri, 19 Oct 2007 09:22:56 -0400 Subject: [PATCH 0265/2985] Btrfs: Properly setup root key while reading the root Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5accced2858a..5a7e60d7f443 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -410,6 +410,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, read_extent_buffer(l, &root->root_item, btrfs_item_ptr_offset(l, path->slots[0]), sizeof(root->root_item)); + memcpy(&root->root_key, location, sizeof(*location)); ret = 0; out: btrfs_release_path(root, path); From ae2f5411c4ce7180cca8418853db50c8e52d40db Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 19 Oct 2007 09:22:59 -0400 Subject: [PATCH 0266/2985] btrfs: 32-bit type problems An assorted set of casts to get rid of the warnings on 32-bit archs. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 +- fs/btrfs/extent-tree.c | 8 ++++---- fs/btrfs/extent_map.c | 6 +++--- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 54967099a8eb..17a322663f3b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1389,7 +1389,7 @@ int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf) ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); if (ret < 0) { printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n", - ret, BTRFS_LEAF_DATA_SIZE(root), + ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root), leaf_space_used(leaf, 0, nritems), nritems); } return ret; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6b2f2b414fa1..7de7707d939c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -149,7 +149,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct if (ret) return NULL; - block_group = (struct btrfs_block_group_cache *)ptr; + block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; if (block_group->key.objectid <= bytenr && bytenr <= @@ -279,7 +279,7 @@ again: if (ret) break; - cache = (struct btrfs_block_group_cache *)ptr; + cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; last = cache->key.objectid + cache->key.offset; used = btrfs_block_group_used(&cache->item); @@ -537,7 +537,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, if (ret) break; - cache = (struct btrfs_block_group_cache *)ptr; + cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; err = write_one_cache_group(trans, root, path, cache); /* @@ -1541,7 +1541,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) found_key.objectid + found_key.offset - 1, bit | EXTENT_LOCKED, GFP_NOFS); set_state_private(block_group_cache, found_key.objectid, - (u64)cache); + (unsigned long)cache); if (key.objectid >= btrfs_super_total_bytes(&info->super_copy)) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index e87e476dca92..89d0ebd0ea83 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2654,8 +2654,8 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - src_off_in_page)); - cur = min(cur, (unsigned long)(PAGE_CACHE_SIZE - - dst_off_in_page)); + cur = min_t(unsigned long, cur, + (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); copy_pages(extent_buffer_page(dst, dst_i), extent_buffer_page(dst, src_i), @@ -2707,7 +2707,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, if (dst_i == 0) dst_off_in_page += start_offset; - cur = min(len, src_off_in_page + 1); + cur = min_t(unsigned long, len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); move_pages(extent_buffer_page(dst, dst_i), extent_buffer_page(dst, src_i), diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4aacf99bd97f..96df1b10cb60 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -188,7 +188,7 @@ insert: while (size > 0) { page = pages[i]; kaddr = kmap_atomic(page, KM_USER0); - cur_size = min(PAGE_CACHE_SIZE - page_offset, size); + cur_size = min_t(size_t, PAGE_CACHE_SIZE - page_offset, size); write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size); kunmap_atomic(kaddr, KM_USER0); page_offset = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ac69f6926afc..0e1ca423e22d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1606,7 +1606,7 @@ again: extent_start; ptr = btrfs_file_extent_inline_start(item) + extent_offset; map = kmap(page); - copy_size = min(PAGE_CACHE_SIZE - page_offset, + copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset, size - extent_offset); em->block_start = EXTENT_MAP_INLINE; From 0a2118dfd4c328e8823cdcfebf839dff73ac21e9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 19 Oct 2007 09:23:05 -0400 Subject: [PATCH 0267/2985] Btrfs: Fix bi_end_io() functions on > 2.6.23 kernels It now returns void and it is never called for partial completions, so the bio->bi_size check must go. Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 89d0ebd0ea83..c0f834563ced 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "extent_map.h" /* temporary define until extent_map moves out of btrfs */ @@ -1274,8 +1275,12 @@ static int check_page_writeback(struct extent_map_tree *tree, * Scheduling is not allowed, so the extent state tree is expected * to have one and only one object corresponding to this IO. */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_writepage(struct bio *bio, int err) +#else static int end_bio_extent_writepage(struct bio *bio, unsigned int bytes_done, int err) +#endif { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -1284,8 +1289,10 @@ static int end_bio_extent_writepage(struct bio *bio, u64 end; int whole_page; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) if (bio->bi_size) return 1; +#endif do { struct page *page = bvec->bv_page; @@ -1316,7 +1323,9 @@ static int end_bio_extent_writepage(struct bio *bio, } while (bvec >= bio->bi_io_vec); bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) return 0; +#endif } /* @@ -1330,8 +1339,12 @@ static int end_bio_extent_writepage(struct bio *bio, * Scheduling is not allowed, so the extent state tree is expected * to have one and only one object corresponding to this IO. */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_readpage(struct bio *bio, int err) +#else static int end_bio_extent_readpage(struct bio *bio, unsigned int bytes_done, int err) +#endif { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -1341,8 +1354,10 @@ static int end_bio_extent_readpage(struct bio *bio, int whole_page; int ret; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) if (bio->bi_size) return 1; +#endif do { struct page *page = bvec->bv_page; @@ -1382,7 +1397,9 @@ static int end_bio_extent_readpage(struct bio *bio, } while (bvec >= bio->bi_io_vec); bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) return 0; +#endif } /* @@ -1390,8 +1407,12 @@ static int end_bio_extent_readpage(struct bio *bio, * the structs in the extent tree when done, and set the uptodate bits * as appropriate. */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_preparewrite(struct bio *bio, int err) +#else static int end_bio_extent_preparewrite(struct bio *bio, unsigned int bytes_done, int err) +#endif { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -1399,8 +1420,10 @@ static int end_bio_extent_preparewrite(struct bio *bio, u64 start; u64 end; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) if (bio->bi_size) return 1; +#endif do { struct page *page = bvec->bv_page; @@ -1422,7 +1445,9 @@ static int end_bio_extent_preparewrite(struct bio *bio, } while (bvec >= bio->bi_io_vec); bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) return 0; +#endif } static int submit_extent_page(int rw, struct extent_map_tree *tree, From bbf0d0062da2b3e1e239e381f8ac20971ef1b2ed Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 19 Oct 2007 09:23:07 -0400 Subject: [PATCH 0268/2985] Btrfs: KM_IRQ0 usage in end_io handling endio handling is typically called with interrupts disabled, but can also be called with it enabled. So save interrupts before using KM_IRQ0 to be completely safe. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0e1ca423e22d..02d802cdd59b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -168,8 +168,10 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) int ret; struct btrfs_root *root = BTRFS_I(inode)->root; u32 csum = ~(u32)0; + unsigned long flags; ret = get_state_private(em_tree, start, &private); + local_irq_save(flags); kaddr = kmap_atomic(page, KM_IRQ0); if (ret) { goto zeroit; @@ -180,6 +182,7 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) goto zeroit; } kunmap_atomic(kaddr, KM_IRQ0); + local_irq_restore(flags); return 0; zeroit: @@ -188,6 +191,7 @@ zeroit: memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page); kunmap_atomic(kaddr, KM_IRQ0); + local_irq_restore(flags); return 0; } From b4a2a0edfda5a721bb4c5fa0068d2ad7ea67f98b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 19 Oct 2007 09:23:08 -0400 Subject: [PATCH 0269/2985] Btrfs: sysfs compile fixup Just use kobject_set_name(), that works in all kernels (I think...). Kernels newer than 2.6.23 currently fail with: /home/axboe/git/btrfs/btrfs-unstable/sysfs.c:188: error: unknown field 'name' specified in initializer Signed-off-by: Chris Mason --- fs/btrfs/sysfs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 0bd1fd3d29df..1671c731800e 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -184,9 +184,7 @@ static struct kobj_type btrfs_super_ktype = { .release = btrfs_super_release, }; -static struct kset btrfs_kset = { - .kobj = {.name = "btrfs"}, -}; +static struct kset btrfs_kset; int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) { @@ -249,6 +247,7 @@ void btrfs_sysfs_del_super(struct btrfs_fs_info *fs) int btrfs_init_sysfs() { kobj_set_kset_s(&btrfs_kset, fs_subsys); + kobject_set_name(&btrfs_kset.kobj, "btrfs"); return kset_register(&btrfs_kset); } From 59d169e2b3d69ba0beca0fa0936789080c8d61a9 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 19 Oct 2007 09:23:09 -0400 Subject: [PATCH 0270/2985] Btrfs: Fix read/write_extent_buffer to use KM_USER1 instead of KM_USER0 This avoids recursive use of KM_USER0 during btrfs_file_write Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index c0f834563ced..3c81f5eab155 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2378,9 +2378,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, WARN_ON(!PageUptodate(page)); cur = min(len, (PAGE_CACHE_SIZE - offset)); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page, KM_USER1); memcpy(dst, kaddr + offset, cur); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr, KM_USER1); dst += cur; len -= cur; @@ -2523,9 +2523,9 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, WARN_ON(!PageUptodate(page)); cur = min(len, PAGE_CACHE_SIZE - offset); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page, KM_USER1); memcpy(kaddr + offset, src, cur); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr, KM_USER1); src += cur; len -= cur; From 3685f791659c9f21b763ee1702ac8ca58bc20f81 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 19 Oct 2007 09:23:27 -0400 Subject: [PATCH 0271/2985] Btrfs: CPU usage optimizations in push and the extent_map code Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 44 +++++++++++++++++++++++------------------- fs/btrfs/extent_map.c | 45 +++++++++++++++---------------------------- 2 files changed, 39 insertions(+), 50 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 17a322663f3b..e8466940fa30 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1588,6 +1588,11 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root if (!path->nodes[1]) return 1; + right_nritems = btrfs_header_nritems(right); + if (right_nritems == 0) { + return 1; + } + left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1], slot - 1), root->leafsize); free_space = btrfs_leaf_free_space(root, left); @@ -1604,18 +1609,13 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root free_extent_buffer(left); return 1; } + free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { free_extent_buffer(left); return 1; } - right_nritems = btrfs_header_nritems(right); - if (right_nritems == 0) { - free_extent_buffer(left); - return 1; - } - for (i = 0; i < right_nritems - 1; i++) { item = btrfs_item_nr(right, i); if (!right->map_token) { @@ -1772,21 +1772,25 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_disk_key disk_key; /* first try to make some room by pushing left and right */ - wret = push_leaf_left(trans, root, path, data_size); - if (wret < 0) { - return wret; - } - if (wret) { + if (ins_key->type != BTRFS_DIR_ITEM_KEY) { wret = push_leaf_right(trans, root, path, data_size); - if (wret < 0) + if (wret < 0) { return wret; - } - l = path->nodes[0]; + } + if (wret) { + wret = push_leaf_left(trans, root, path, data_size); + if (wret < 0) + return wret; + } + l = path->nodes[0]; - /* did the pushes work? */ - if (btrfs_leaf_free_space(root, l) >= - sizeof(struct btrfs_item) + data_size) { - return 0; + /* did the pushes work? */ + if (btrfs_leaf_free_space(root, l) >= + sizeof(struct btrfs_item) + data_size) { + return 0; + } + } else { + l = path->nodes[0]; } if (!path->nodes[1]) { @@ -2388,13 +2392,13 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, slot = path->slots[1]; extent_buffer_get(leaf); - wret = push_leaf_left(trans, root, path, 1); + wret = push_leaf_right(trans, root, path, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; if (path->nodes[0] == leaf && btrfs_header_nritems(leaf)) { - wret = push_leaf_right(trans, root, path, 1); + wret = push_leaf_left(trans, root, path, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 3c81f5eab155..caaf0bf0e059 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1986,12 +1986,15 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, unsigned long i) { struct page *p; + struct address_space *mapping; if (i == 0) return eb->first_page; i += eb->start >> PAGE_CACHE_SHIFT; - p = find_get_page(eb->first_page->mapping, i); - page_cache_release(p); + mapping = eb->first_page->mapping; + read_lock_irq(&mapping->tree_lock); + p = radix_tree_lookup(&mapping->page_tree, i); + read_unlock_irq(&mapping->tree_lock); return p; } @@ -2365,9 +2368,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); - if (i == 0) - offset += start_offset; + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); while(len > 0) { page = extent_buffer_page(eb, i); @@ -2475,9 +2476,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); - if (i == 0) - offset += start_offset; + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); while(len > 0) { page = extent_buffer_page(eb, i); @@ -2514,9 +2513,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); - if (i == 0) - offset += start_offset; + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); while(len > 0) { page = extent_buffer_page(eb, i); @@ -2548,9 +2545,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); - if (i == 0) - offset += start_offset; + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); while(len > 0) { page = extent_buffer_page(eb, i); @@ -2582,9 +2577,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, WARN_ON(src->len != dst_len); - offset = dst_offset & ((unsigned long)PAGE_CACHE_SIZE - 1); - if (i == 0) - offset += start_offset; + offset = (start_offset + dst_offset) & + ((unsigned long)PAGE_CACHE_SIZE - 1); while(len > 0) { page = extent_buffer_page(dst, i); @@ -2664,19 +2658,14 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, } while(len > 0) { - dst_off_in_page = dst_offset & + dst_off_in_page = (start_offset + dst_offset) & ((unsigned long)PAGE_CACHE_SIZE - 1); - src_off_in_page = src_offset & + src_off_in_page = (start_offset + src_offset) & ((unsigned long)PAGE_CACHE_SIZE - 1); dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; - if (src_i == 0) - src_off_in_page += start_offset; - if (dst_i == 0) - dst_off_in_page += start_offset; - cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - src_off_in_page)); cur = min_t(unsigned long, cur, @@ -2723,14 +2712,10 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; - dst_off_in_page = dst_end & + dst_off_in_page = (start_offset + dst_end) & ((unsigned long)PAGE_CACHE_SIZE - 1); - src_off_in_page = src_end & + src_off_in_page = (start_offset + src_end) & ((unsigned long)PAGE_CACHE_SIZE - 1); - if (src_i == 0) - src_off_in_page += start_offset; - if (dst_i == 0) - dst_off_in_page += start_offset; cur = min_t(unsigned long, len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); From ff190c0c004d8e51195c7bcf5a8490aeefccbce0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 19 Oct 2007 10:39:41 -0400 Subject: [PATCH 0272/2985] Btrfs: Avoid recursive KM_USER1 mappings in copy_extent_buffer Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index caaf0bf0e059..a61379230dd2 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2586,9 +2586,9 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); - kaddr = kmap_atomic(page, KM_USER1); + kaddr = kmap_atomic(page, KM_USER0); read_extent_buffer(src, kaddr + offset, src_offset, cur); - kunmap_atomic(kaddr, KM_USER1); + kunmap_atomic(kaddr, KM_USER0); src_offset += cur; len -= cur; From 5ee78ac70f6c01f2a5acf662c2af975665710edc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 19 Oct 2007 14:01:21 -0400 Subject: [PATCH 0273/2985] Btrfs: Fix split_leaf to avoid incorrect double splits Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e8466940fa30..53e40b5c084d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1865,7 +1865,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } mid = slot; - double_split = 1; + if (mid != nritems && + leaf_space_used(l, mid, nritems - mid) + + space_needed > BTRFS_LEAF_DATA_SIZE(root)) { + double_split = 1; + } } } nritems = nritems - mid; From f578d4bd7e141dd03ca7e8695c1cc118c326e69e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 25 Oct 2007 15:42:56 -0400 Subject: [PATCH 0274/2985] Btrfs: Optimize csum insertion to create larger items when possible This reduces the number of calls to btrfs_extend_item and greatly lowers the cpu usage while writing large files. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/file-item.c | 37 ++++++++++++++++++++++++++++++++++++- fs/btrfs/inode.c | 2 +- 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1d2b54150075..70e143b13577 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -991,6 +991,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, u64 bytenr, int mod); int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct inode *inode, u64 objectid, u64 offset, char *data, size_t len); struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 10a4c7408600..482a2b615327 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -133,17 +133,22 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct inode *inode, u64 objectid, u64 offset, char *data, size_t len) { int ret; struct btrfs_key file_key; struct btrfs_key found_key; + u64 next_offset = (u64)-1; + int found_next = 0; struct btrfs_path *path; struct btrfs_csum_item *item; struct extent_buffer *leaf = NULL; u64 csum_offset; u32 csum_result = ~(u32)0; + u32 nritems; + u32 ins_size; path = btrfs_alloc_path(); BUG_ON(!path); @@ -168,7 +173,27 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, goto insert; } } else { + int slot = path->slots[0] + 1; /* we didn't find a csum item, insert one */ + nritems = btrfs_header_nritems(path->nodes[0]); + if (path->slots[0] >= nritems - 1) { + ret = btrfs_next_leaf(root, path); + if (ret == 1) { + found_next = 1; + } else if (ret == 0) { + slot = 0; + } else { + goto insert; + } + } + btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); + if (found_key.objectid != objectid || + found_key.type != BTRFS_CSUM_ITEM_KEY) { + found_next = 1; + goto insert; + } + next_offset = found_key.offset; + found_next = 1; goto insert; } @@ -211,8 +236,18 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, insert: btrfs_release_path(root, path); csum_offset = 0; + if (found_next) { + u64 tmp = min((u64)i_size_read(inode), next_offset); + tmp -= offset + root->sectorsize - 1; + tmp >>= root->fs_info->sb->s_blocksize_bits; + tmp = max((u64)1, tmp); + tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root)); + ins_size = BTRFS_CRC32_SIZE * tmp; + } else { + ins_size = BTRFS_CRC32_SIZE; + } ret = btrfs_insert_empty_item(trans, root, path, &file_key, - BTRFS_CRC32_SIZE); + ins_size); if (ret < 0) goto fail; if (ret != 0) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 02d802cdd59b..7efc0470abf1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -118,7 +118,7 @@ int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); kaddr = kmap(page); - btrfs_csum_file_block(trans, root, inode->i_ino, + btrfs_csum_file_block(trans, root, inode, inode->i_ino, start, kaddr + offset, end - start + 1); kunmap(page); ret = btrfs_end_transaction(trans, root); From 65555a06b4d1ae116ce223dc4b82d6068b36df96 Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 25 Oct 2007 15:42:57 -0400 Subject: [PATCH 0275/2985] Btrfs: Off by one fixes in extent_map.c Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 10 +++++----- fs/btrfs/inode.c | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index a61379230dd2..c44989a1e520 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2045,7 +2045,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, struct extent_buffer *eb; struct page *p; struct address_space *mapping = tree->mapping; - int uptodate = 0; + int uptodate = 1; eb = __alloc_extent_buffer(tree, start, len, mask); if (!eb || IS_ERR(eb)) @@ -2197,7 +2197,7 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree, */ if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || ((i == num_pages - 1) && - ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { + ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { start = page->index << PAGE_CACHE_SHIFT; end = start + PAGE_CACHE_SIZE - 1; if (test_range_bit(tree, start, end, @@ -2265,7 +2265,7 @@ int set_extent_buffer_uptodate(struct extent_map_tree *tree, page = extent_buffer_page(eb, i); if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || ((i == num_pages - 1) && - ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { + ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { check_page_uptodate(tree, page); continue; } @@ -2401,7 +2401,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, struct page *p; size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - unsigned long end_i = (start_offset + start + min_len) >> + unsigned long end_i = (start_offset + start + min_len - 1) >> PAGE_CACHE_SHIFT; if (i != end_i) @@ -2414,7 +2414,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, offset = 0; *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; } - if (start + min_len >= eb->len) { + if (start + min_len > eb->len) { printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); WARN_ON(1); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7efc0470abf1..c07433072322 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -616,6 +616,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, break; } btrfs_set_key_type(&key, found_type); + btrfs_release_path(root, path); continue; } if (found_key.offset >= inode->i_size) From cc0c55384796b422133ff1f21646835b31590f88 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 25 Oct 2007 15:42:57 -0400 Subject: [PATCH 0276/2985] Btrfs: Fix split_leaf to detect when it is extending an item When making room for a new item, it is ok to create an empty leaf, but when making room to extend an item, split_leaf needs to make sure it keeps the item we're extending in the path and make sure we don't end up with an empty leaf. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 78 +++++++++++++++++------------------------------- 1 file changed, 27 insertions(+), 51 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 53e40b5c084d..3eb5a9f30d14 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -26,7 +26,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *ins_key, - struct btrfs_path *path, int data_size); + struct btrfs_path *path, int data_size, int extend); static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *dst, struct extent_buffer *src); @@ -1049,7 +1049,7 @@ again: if (ins_len > 0 && btrfs_leaf_free_space(root, b) < sizeof(struct btrfs_item) + ins_len) { int sret = split_leaf(trans, root, key, - p, ins_len); + p, ins_len, ret == 0); BUG_ON(sret > 0); if (sret) return sret; @@ -1755,7 +1755,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root */ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *ins_key, - struct btrfs_path *path, int data_size) + struct btrfs_path *path, int data_size, int extend) { struct extent_buffer *l; u32 nritems; @@ -1768,9 +1768,13 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root int i; int ret = 0; int wret; - int double_split = 0; + int double_split; + int num_doubles = 0; struct btrfs_disk_key disk_key; + if (extend) + space_needed = data_size; + /* first try to make some room by pushing left and right */ if (ins_key->type != BTRFS_DIR_ITEM_KEY) { wret = push_leaf_right(trans, root, path, data_size); @@ -1785,12 +1789,8 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root l = path->nodes[0]; /* did the pushes work? */ - if (btrfs_leaf_free_space(root, l) >= - sizeof(struct btrfs_item) + data_size) { + if (btrfs_leaf_free_space(root, l) >= space_needed) return 0; - } - } else { - l = path->nodes[0]; } if (!path->nodes[1]) { @@ -1798,6 +1798,9 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) return ret; } +again: + double_split = 0; + l = path->nodes[0]; slot = path->slots[0]; nritems = btrfs_header_nritems(l); mid = (nritems + 1)/ 2; @@ -1815,7 +1818,6 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root write_extent_buffer(right, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(right), BTRFS_FSID_SIZE); - if (mid <= slot) { if (nritems == 1 || leaf_space_used(l, mid, nritems - mid) + space_needed > @@ -1844,7 +1846,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root } else { if (leaf_space_used(l, 0, mid + 1) + space_needed > BTRFS_LEAF_DATA_SIZE(root)) { - if (slot == 0) { + if (!extend && slot == 0) { btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, @@ -1863,12 +1865,15 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; } return ret; - } - mid = slot; - if (mid != nritems && - leaf_space_used(l, mid, nritems - mid) + - space_needed > BTRFS_LEAF_DATA_SIZE(root)) { - double_split = 1; + } else if (extend && slot == 0) { + mid = 1; + } else { + mid = slot; + if (mid != nritems && + leaf_space_used(l, mid, nritems - mid) + + space_needed > BTRFS_LEAF_DATA_SIZE(root)) { + double_split = 1; + } } } } @@ -1931,39 +1936,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->slots[0] < 0); - if (!double_split) { - return ret; + if (double_split) { + BUG_ON(num_doubles != 0); + num_doubles++; + goto again; } - - right = btrfs_alloc_free_block(trans, root, root->leafsize, - l->start, 0); - if (IS_ERR(right)) - return PTR_ERR(right); - - memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); - btrfs_set_header_bytenr(right, right->start); - btrfs_set_header_generation(right, trans->transid); - btrfs_set_header_owner(right, root->root_key.objectid); - btrfs_set_header_level(right, 0); - write_extent_buffer(right, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(right), - BTRFS_FSID_SIZE); - - btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(right, 0); - wret = insert_ptr(trans, root, path, - &disk_key, right->start, - path->slots[1], 1); - if (wret) - ret = wret; - if (path->slots[1] == 0) { - wret = fixup_low_keys(trans, root, path, &disk_key, 1); - if (wret) - ret = wret; - } - free_extent_buffer(path->nodes[0]); - path->nodes[0] = right; - path->slots[0] = 0; return ret; } @@ -1992,8 +1969,7 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, slot = path->slots[0]; old_data_start = btrfs_item_offset_nr(leaf, slot); - old_size = btrfs_item_size_nr(leaf, slot); - BUG_ON(old_size <= new_size); + old_size = btrfs_item_size_nr(leaf, slot); BUG_ON(old_size <= new_size); size_diff = old_size - new_size; BUG_ON(slot < 0); From 18f16f7ba62a01c29e09b40ac6ad6d92a8955859 Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 25 Oct 2007 15:42:57 -0400 Subject: [PATCH 0277/2985] Btrfs: Fix for insert_inline_extent to handle offset != 0 This modifies inline extent size calculation, so that insert_inline_extent can handle the case that parameter 'offset' is not zero; it also a few codes to zero uninitialized area in inline extent. Signed-off-by: Chris Mason --- fs/btrfs/file.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 96df1b10cb60..9260d3478aad 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -108,7 +108,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, key.objectid = inode->i_ino; key.offset = offset; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); - datasize = btrfs_file_extent_calc_inline_size(offset + size); ret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (ret < 0) { @@ -130,7 +129,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, } if (ret == 0) { u32 found_size; - u64 found_start; + u64 found_end; leaf = path->nodes[0]; ei = btrfs_item_ptr(leaf, path->slots[0], @@ -144,19 +143,17 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, offset, inode->i_ino); goto fail; } - found_start = key.offset; found_size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, path->slots[0])); + found_end = key.offset + found_size; - if (found_size < offset + size) { + if (found_end < offset + size) { btrfs_release_path(root, path); ret = btrfs_search_slot(trans, root, &key, path, - offset + size - found_size - - found_start, 1); + offset + size - found_end, 1); BUG_ON(ret != 0); ret = btrfs_extend_item(trans, root, path, - offset + size - found_size - - found_start); + offset + size - found_end); if (ret) { err = ret; goto fail; @@ -165,9 +162,15 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); } + if (found_end < offset) { + ptr = btrfs_file_extent_inline_start(ei) + found_size; + memset_extent_buffer(leaf, 0, ptr, offset - found_end); + } } else { insert: btrfs_release_path(root, path); + datasize = offset + size - key.offset; + datasize = btrfs_file_extent_calc_inline_size(datasize); ret = btrfs_insert_empty_item(trans, root, path, &key, datasize); if (ret) { @@ -181,7 +184,7 @@ insert: btrfs_set_file_extent_generation(leaf, ei, trans->transid); btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); } - ptr = btrfs_file_extent_inline_start(ei) + offset; + ptr = btrfs_file_extent_inline_start(ei) + offset - key.offset; cur_size = size; i = 0; From 5708b9591617486bf1aa5b1a97f2c0549ec87933 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 25 Oct 2007 15:43:18 -0400 Subject: [PATCH 0278/2985] Btrfs: Tune the automatic defrag code 1) Forced defrag wasn't working properly (btrfsctl -d) because some cache only checks were incorrect. 2) Defrag only the leaves unless in forced defrag mode. 3) Don't use complex logic to figure out if a leaf is needs defrag Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 69 ++++++++++++++++-------------------------- fs/btrfs/tree-defrag.c | 1 - 2 files changed, 26 insertions(+), 44 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3eb5a9f30d14..516b90224a1b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -161,34 +161,6 @@ static int close_blocks(u64 blocknr, u64 other, u32 blocksize) return 0; } -static int should_defrag_leaf(struct extent_buffer *leaf) -{ - struct btrfs_key key; - u32 nritems; - - if (btrfs_buffer_defrag(leaf)) - return 1; - - nritems = btrfs_header_nritems(leaf); - if (nritems == 0) - return 0; - - btrfs_item_key_to_cpu(leaf, &key, 0); - if (key.type == BTRFS_DIR_ITEM_KEY) - return 1; - - - btrfs_item_key_to_cpu(leaf, &key, nritems - 1); - if (key.type == BTRFS_DIR_ITEM_KEY) - return 1; - if (nritems > 4) { - btrfs_item_key_to_cpu(leaf, &key, nritems / 2); - if (key.type == BTRFS_DIR_ITEM_KEY) - return 1; - } - return 0; -} - int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, int start_slot, int cache_only, u64 *last_ret, @@ -208,6 +180,10 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, int uptodate; u32 blocksize; + parent_level = btrfs_header_level(parent); + if (cache_only && parent_level != 1) + return 0; + if (trans->transaction != root->fs_info->running_transaction) { printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, root->fs_info->running_transaction->transid); @@ -218,7 +194,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, root->fs_info->generation); WARN_ON(1); } - parent_level = btrfs_header_level(parent); parent_nritems = btrfs_header_nritems(parent); blocksize = btrfs_level_size(root, parent_level - 1); @@ -227,27 +202,26 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (parent_nritems == 1) return 0; - if (root != root->fs_info->extent_root) { - struct btrfs_key first_key; - struct btrfs_key last_key; - - btrfs_node_key_to_cpu(parent, &first_key, 0); - btrfs_node_key_to_cpu(parent, &last_key, parent_nritems - 1); - if (first_key.objectid != last_key.objectid) - return 0; - } - for (i = start_slot; i < end_slot; i++) { int close = 1; + if (!parent->map_token) { + map_extent_buffer(parent, + btrfs_node_key_ptr_offset(i), + sizeof(struct btrfs_key_ptr), + &parent->map_token, &parent->kaddr, + &parent->map_start, &parent->map_len, + KM_USER1); + } blocknr = btrfs_node_blockptr(parent, i); if (last_block == 0) last_block = blocknr; + if (i > 0) { other = btrfs_node_blockptr(parent, i - 1); close = close_blocks(blocknr, other, blocksize); } - if (close && i < end_slot - 1) { + if (close && i < end_slot - 2) { other = btrfs_node_blockptr(parent, i + 1); close = close_blocks(blocknr, other, blocksize); } @@ -255,15 +229,18 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, last_block = blocknr; continue; } + if (parent->map_token) { + unmap_extent_buffer(parent, parent->map_token, + KM_USER1); + parent->map_token = NULL; + } cur = btrfs_find_tree_block(root, blocknr, blocksize); if (cur) uptodate = btrfs_buffer_uptodate(cur); else uptodate = 0; - if (!cur || !uptodate || - (parent_level != 1 && !btrfs_buffer_defrag(cur)) || - (parent_level == 1 && !should_defrag_leaf(cur))) { + if (!cur || !uptodate) { if (cache_only) { free_extent_buffer(cur); continue; @@ -287,11 +264,17 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, break; } search_start = tmp->start; + last_block = tmp->start; *last_ret = search_start; if (parent_level == 1) btrfs_clear_buffer_defrag(tmp); free_extent_buffer(tmp); } + if (parent->map_token) { + unmap_extent_buffer(parent, parent->map_token, + KM_USER1); + parent->map_token = NULL; + } return err; } diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index b02355a7b143..65ef12351566 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -183,7 +183,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (level == 0) { goto out; } - if (root->defrag_progress.objectid == 0) { extent_buffer_get(root->node); ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); From 0d9f7f3e27a92b07212090a1772fdbb3a92fef81 Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 25 Oct 2007 15:48:28 -0400 Subject: [PATCH 0279/2985] btrfs_inode_by_name return random value. When inode is found, the return value is from the uninitialized variable 'ret'. -- Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c07433072322..b74a831fab19 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -847,15 +847,16 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, struct btrfs_dir_item *di; struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(dir)->root; - int ret; + int ret = 0; path = btrfs_alloc_path(); BUG_ON(!path); di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, namelen, 0); + if (IS_ERR(di)) + ret = PTR_ERR(di); if (!di || IS_ERR(di)) { location->objectid = 0; - ret = 0; goto out; } btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); From 134d45120164141c0847daf5222aaec77e04724a Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 25 Oct 2007 15:49:25 -0400 Subject: [PATCH 0280/2985] Fix ENOTEMPTY check in btrfs_rmdir The ENOTEMPTY check in btrfs_rmdir isn't reliable. It's possible that the backward search finds . or .. at first, then some other directory entry. In that case, btrfs_rmdir delete . or .. improperly. The patch also fixes a fs_mutex unlock issue in btrfs_rmdir. -- Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b74a831fab19..7e4b2c1b4a56 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -448,6 +448,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) char *goodnames = ".."; unsigned long nr; + if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) + return -ENOTEMPTY; + path = btrfs_alloc_path(); BUG_ON(!path); mutex_lock(&root->fs_info->fs_mutex); @@ -501,9 +504,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) out: btrfs_release_path(root, path); btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); nr = trans->blocks_used; ret = btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); if (ret && !err) err = ret; From 44ec0b7179b9bce7df1080370f51314a24a00216 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 29 Oct 2007 10:55:05 -0400 Subject: [PATCH 0281/2985] Btrfs: Compile fixes for 2.6.24-rc1 Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7e4b2c1b4a56..c88f13a88555 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2176,8 +2176,12 @@ void btrfs_destroy_inode(struct inode *inode) kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void init_once(struct kmem_cache * cachep, void *foo) +#else static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) +#endif { struct btrfs_inode *ei = (struct btrfs_inode *) foo; @@ -2200,8 +2204,13 @@ void btrfs_destroy_cachep(void) struct kmem_cache *btrfs_cache_create(const char *name, size_t size, unsigned long extra_flags, +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) + void (*ctor)(struct kmem_cache *, void *) +#else void (*ctor)(void *, struct kmem_cache *, - unsigned long)) + unsigned long) +#endif + ) { return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | extra_flags), ctor From c67cda1758130f3e437744c4c2ec4a7cc25564e3 Mon Sep 17 00:00:00 2001 From: Yan Date: Mon, 29 Oct 2007 11:41:05 -0400 Subject: [PATCH 0282/2985] Btrfs: Fix extent_map leak in extent_bmap Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index c44989a1e520..e4ee980cc138 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1924,6 +1924,7 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, struct inode *inode = mapping->host; u64 start = iblock << inode->i_blkbits; u64 end = start + (1 << inode->i_blkbits) - 1; + sector_t sector = 0; struct extent_map *em; em = get_extent(inode, NULL, 0, start, end, 0); @@ -1932,9 +1933,12 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, if (em->block_start == EXTENT_MAP_INLINE || em->block_start == EXTENT_MAP_HOLE) - return 0; + goto out; - return (em->block_start + start - em->start) >> inode->i_blkbits; + sector = (em->block_start + start - em->start) >> inode->i_blkbits; +out: + free_extent_map(em); + return sector; } static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb) From 689f9346612d96ab93890160cf2fe0df5217ab73 Mon Sep 17 00:00:00 2001 From: Yan Date: Mon, 29 Oct 2007 11:41:07 -0400 Subject: [PATCH 0283/2985] Fix inline extent handling in btrfs_get_extent 1. Reorder kmap and the test for 'page != NULL' 2. Zero-fill rest area of a block when inline extent isn't big enough. 3. Do not insert extent_map into the map tree when page == NULL. (If insert the extent_map into the map tree, subsequent read requests will find it in the map tree directly and the corresponding inline extent data aren't copied into page by the the get_extent function. extent_read_full_page can't handle that case) Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c88f13a88555..0ba1cdbdf112 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1596,8 +1596,7 @@ again: size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, path->slots[0])); - - extent_end = (extent_start + size) | + extent_end = (extent_start + size - 1) | ((u64)root->sectorsize - 1); if (start < extent_start || start >= extent_end) { em->start = start; @@ -1610,29 +1609,32 @@ again: } goto not_found_em; } - - extent_offset = (page->index << PAGE_CACHE_SHIFT) - - extent_start; - ptr = btrfs_file_extent_inline_start(item) + extent_offset; - map = kmap(page); - copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset, - size - extent_offset); - em->block_start = EXTENT_MAP_INLINE; em->block_end = EXTENT_MAP_INLINE; + + if (!page) { + em->start = extent_start; + em->end = extent_start + size - 1; + goto out; + } + + extent_offset = (page->index << PAGE_CACHE_SHIFT) - + extent_start + page_offset; + copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset, + size - extent_offset); em->start = extent_start + extent_offset; em->end = (em->start + copy_size -1) | ((u64)root->sectorsize -1); - - if (!page) { - goto insert; + map = kmap(page); + ptr = btrfs_file_extent_inline_start(item) + extent_offset; + read_extent_buffer(leaf, map + page_offset, ptr, copy_size); + + if (em->start + copy_size <= em->end) { + size = min_t(u64, em->end + 1 - em->start, + PAGE_CACHE_SIZE - page_offset) - copy_size; + memset(map + page_offset + copy_size, 0, size); } - read_extent_buffer(leaf, map + page_offset, ptr, copy_size); - /* - memset(map + page_offset + copy_size, 0, - PAGE_CACHE_SIZE - copy_size - page_offset); - */ flush_dcache_page(page); kunmap(page); set_extent_uptodate(em_tree, em->start, em->end, GFP_NOFS); From b56baf5bedccd3258643b09289f17ceab3ddea52 Mon Sep 17 00:00:00 2001 From: Yan Date: Mon, 29 Oct 2007 12:01:05 -0400 Subject: [PATCH 0284/2985] Minor fix for btrfs_csum_file_block. Execution should goto label 'insert' when 'btrfs_next_leaf' return a non-zero value, otherwise the parameter 'slot' for 'btrfs_item_key_to_cpu' may be out of bounds. The original codes jump to label 'insert' only when 'btrfs_next_leaf' return a negative value. Signed-off-by: Chris Mason --- fs/btrfs/file-item.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 482a2b615327..7eb9a5412e2f 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -178,13 +178,11 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, nritems = btrfs_header_nritems(path->nodes[0]); if (path->slots[0] >= nritems - 1) { ret = btrfs_next_leaf(root, path); - if (ret == 1) { + if (ret == 1) found_next = 1; - } else if (ret == 0) { - slot = 0; - } else { + if (ret != 0) goto insert; - } + slot = 0; } btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); if (found_key.objectid != objectid || @@ -238,7 +236,7 @@ insert: csum_offset = 0; if (found_next) { u64 tmp = min((u64)i_size_read(inode), next_offset); - tmp -= offset + root->sectorsize - 1; + tmp -= offset & ~((u64)root->sectorsize -1); tmp >>= root->fs_info->sb->s_blocksize_bits; tmp = max((u64)1, tmp); tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root)); From 2ff3e9b61d02b03e3157f7d43ba20ee1452814de Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 29 Oct 2007 14:36:41 -0400 Subject: [PATCH 0285/2985] Add O_SYNC support to btrfs_file_write Signed-off-by: Chris Mason --- fs/btrfs/file.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9260d3478aad..843e920388ed 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -635,8 +635,9 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { loff_t pos; - size_t num_written = 0; - int err = 0; + loff_t start_pos; + ssize_t num_written = 0; + ssize_t err = 0; int ret = 0; struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -652,7 +653,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, pinned[1] = NULL; if (file->f_flags & O_DIRECT) return -EINVAL; + pos = *ppos; + start_pos = pos; + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); current->backing_dev_info = inode->i_mapping->backing_dev_info; err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); @@ -743,6 +747,13 @@ out: if (pinned[1]) page_cache_release(pinned[1]); *ppos = pos; + + if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { + err = sync_page_range(inode, inode->i_mapping, + start_pos, num_written); + if (err < 0) + num_written = err; + } current->backing_dev_info = NULL; return num_written ? num_written : err; } From 35ebb934bd7fcc7ca991b155b7980c3c4ff9f1a5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 30 Oct 2007 16:56:53 -0400 Subject: [PATCH 0286/2985] Btrfs: Fix PAGE_CACHE_SHIFT shifts on 32 bit machines Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent_map.c | 27 +++++++++++++++------------ fs/btrfs/file.c | 3 ++- fs/btrfs/inode.c | 14 +++++++------- fs/btrfs/transaction.c | 2 +- 5 files changed, 26 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5a7e60d7f443..8935eec31ee2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -159,7 +159,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { struct extent_map_tree *tree; - u64 start = page->index << PAGE_CACHE_SHIFT; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 found_start; int found_level; unsigned long len; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index e4ee980cc138..238cb1d81d56 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1229,7 +1229,7 @@ EXPORT_SYMBOL(test_range_bit); static int check_page_uptodate(struct extent_map_tree *tree, struct page *page) { - u64 start = page->index << PAGE_CACHE_SHIFT; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) SetPageUptodate(page); @@ -1243,7 +1243,7 @@ static int check_page_uptodate(struct extent_map_tree *tree, static int check_page_locked(struct extent_map_tree *tree, struct page *page) { - u64 start = page->index << PAGE_CACHE_SHIFT; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) unlock_page(page); @@ -1257,7 +1257,7 @@ static int check_page_locked(struct extent_map_tree *tree, static int check_page_writeback(struct extent_map_tree *tree, struct page *page) { - u64 start = page->index << PAGE_CACHE_SHIFT; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) end_page_writeback(page); @@ -1296,7 +1296,8 @@ static int end_bio_extent_writepage(struct bio *bio, do { struct page *page = bvec->bv_page; - start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; end = start + bvec->bv_len - 1; if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) @@ -1361,7 +1362,8 @@ static int end_bio_extent_readpage(struct bio *bio, do { struct page *page = bvec->bv_page; - start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; end = start + bvec->bv_len - 1; if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) @@ -1427,7 +1429,8 @@ static int end_bio_extent_preparewrite(struct bio *bio, do { struct page *page = bvec->bv_page; - start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; end = start + bvec->bv_len - 1; if (--bvec >= bio->bi_io_vec) @@ -1503,7 +1506,7 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, get_extent_t *get_extent) { struct inode *inode = page->mapping->host; - u64 start = page->index << PAGE_CACHE_SHIFT; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = start + PAGE_CACHE_SIZE - 1; u64 end; u64 cur = start; @@ -1608,7 +1611,7 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; - u64 start = page->index << PAGE_CACHE_SHIFT; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = start + PAGE_CACHE_SIZE - 1; u64 end; u64 cur = start; @@ -1750,7 +1753,7 @@ EXPORT_SYMBOL(extent_write_full_page); int extent_invalidatepage(struct extent_map_tree *tree, struct page *page, unsigned long offset) { - u64 start = (page->index << PAGE_CACHE_SHIFT); + u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); u64 end = start + PAGE_CACHE_SIZE - 1; size_t blocksize = page->mapping->host->i_sb->s_blocksize; @@ -1792,7 +1795,7 @@ int extent_prepare_write(struct extent_map_tree *tree, struct inode *inode, struct page *page, unsigned from, unsigned to, get_extent_t *get_extent) { - u64 page_start = page->index << PAGE_CACHE_SHIFT; + u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = page_start + PAGE_CACHE_SIZE - 1; u64 block_start; u64 orig_block_start; @@ -1890,7 +1893,7 @@ EXPORT_SYMBOL(extent_prepare_write); int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) { struct extent_map *em; - u64 start = page->index << PAGE_CACHE_SHIFT; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; u64 orig_start = start; int ret = 1; @@ -2202,7 +2205,7 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree, if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || ((i == num_pages - 1) && ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { - start = page->index << PAGE_CACHE_SHIFT; + start = (u64)page->index << PAGE_CACHE_SHIFT; end = start + PAGE_CACHE_SIZE - 1; if (test_range_bit(tree, start, end, EXTENT_DIRTY, 0)) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 843e920388ed..abdd9caad94e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -301,7 +301,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, SetPageUptodate(p); set_page_dirty(p); } - last_end = pages[num_pages -1]->index << PAGE_CACHE_SHIFT; + last_end = (u64)(pages[num_pages -1]->index) << + PAGE_CACHE_SHIFT; last_end += PAGE_CACHE_SIZE - 1; set_extent_delalloc(em_tree, start_pos, end_of_last_block, GFP_NOFS); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0ba1cdbdf112..5d10b64e42df 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -111,7 +111,7 @@ int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end) struct btrfs_trans_handle *trans; char *kaddr; int ret; - u64 page_start = page->index << PAGE_CACHE_SHIFT; + u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; size_t offset = start - page_start; mutex_lock(&root->fs_info->fs_mutex); @@ -160,7 +160,7 @@ out: int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) { - size_t offset = start - (page->index << PAGE_CACHE_SHIFT); + size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); struct inode *inode = page->mapping->host; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; char *kaddr; @@ -688,7 +688,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, char *kaddr; int ret = 0; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - u64 page_start = page->index << PAGE_CACHE_SHIFT; + u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = page_start + PAGE_CACHE_SIZE - 1; set_page_extent_mapped(page); @@ -739,7 +739,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) goto out; } } - page_start = page->index << PAGE_CACHE_SHIFT; + page_start = (u64)page->index << PAGE_CACHE_SHIFT; ret = btrfs_cow_one_page(inode, page, offset); @@ -1618,7 +1618,7 @@ again: goto out; } - extent_offset = (page->index << PAGE_CACHE_SHIFT) - + extent_offset = ((u64)page->index << PAGE_CACHE_SHIFT) - extent_start + page_offset; copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset, size - extent_offset); @@ -1769,7 +1769,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) lock_page(page); wait_on_page_writeback(page); size = i_size_read(inode); - page_start = page->index << PAGE_CACHE_SHIFT; + page_start = (u64)page->index << PAGE_CACHE_SHIFT; if ((page->mapping != inode->i_mapping) || (page_start > size)) { @@ -2065,7 +2065,7 @@ int btrfs_defrag_file(struct file *file) { goto out_unlock; } } - page_start = page->index << PAGE_CACHE_SHIFT; + page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; lock_extent(em_tree, page_start, page_end, GFP_NOFS); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bdfe05cf26c6..4af1c0dab7f8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -169,7 +169,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); while(start <= end) { index = start >> PAGE_CACHE_SHIFT; - start = (index + 1) << PAGE_CACHE_SHIFT; + start = (u64)(index + 1) << PAGE_CACHE_SHIFT; page = find_lock_page(btree_inode->i_mapping, index); if (!page) continue; From 179e29e488cc74f1e9bd67bc45f70b832740e9ec Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 1 Nov 2007 11:28:41 -0400 Subject: [PATCH 0287/2985] Btrfs: Fix a number of inline extent problems that Yan Zheng reported. The fixes do a number of things: 1) Most btrfs_drop_extent callers will try to leave the inline extents in place. It can truncate bytes off the beginning of the inline extent if required. 2) writepage can now update the inline extent, allowing mmap writes to go directly into the inline extent. 3) btrfs_truncate_in_transaction truncates inline extents 4) extent_map.c fixed to not merge inline extent mappings and hole mappings together Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 52 +++++++++++++++++++++++++++++---- fs/btrfs/ctree.h | 2 +- fs/btrfs/dir-item.c | 2 +- fs/btrfs/extent_map.c | 11 +++++-- fs/btrfs/extent_map.h | 1 - fs/btrfs/file-item.c | 2 +- fs/btrfs/file.c | 36 +++++++++++++++++++---- fs/btrfs/inode.c | 67 +++++++++++++++++++++++++++++++------------ 8 files changed, 135 insertions(+), 38 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 516b90224a1b..eef9c92f86d5 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1930,7 +1930,7 @@ again: int btrfs_truncate_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - u32 new_size) + u32 new_size, int from_end) { int ret = 0; int slot; @@ -1946,13 +1946,17 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, slot_orig = path->slots[0]; leaf = path->nodes[0]; + slot = path->slots[0]; + + old_size = btrfs_item_size_nr(leaf, slot); + if (old_size == new_size) + return 0; nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); - slot = path->slots[0]; old_data_start = btrfs_item_offset_nr(leaf, slot); - old_size = btrfs_item_size_nr(leaf, slot); BUG_ON(old_size <= new_size); + size_diff = old_size - new_size; BUG_ON(slot < 0); @@ -1984,9 +1988,45 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, } /* shift the data */ - memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + - data_end + size_diff, btrfs_leaf_data(leaf) + - data_end, old_data_start + new_size - data_end); + if (from_end) { + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + + data_end + size_diff, btrfs_leaf_data(leaf) + + data_end, old_data_start + new_size - data_end); + } else { + struct btrfs_disk_key disk_key; + u64 offset; + + btrfs_item_key(leaf, &disk_key, slot); + + if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) { + unsigned long ptr; + struct btrfs_file_extent_item *fi; + + fi = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + fi = (struct btrfs_file_extent_item *)( + (unsigned long)fi - size_diff); + + if (btrfs_file_extent_type(leaf, fi) == + BTRFS_FILE_EXTENT_INLINE) { + ptr = btrfs_item_ptr_offset(leaf, slot); + memmove_extent_buffer(leaf, ptr, + (unsigned long)fi, + offsetof(struct btrfs_file_extent_item, + disk_bytenr)); + } + } + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + + data_end + size_diff, btrfs_leaf_data(leaf) + + data_end, old_data_start - data_end); + + offset = btrfs_disk_key_offset(&disk_key); + btrfs_set_disk_key_offset(&disk_key, offset + size_diff); + btrfs_set_item_key(leaf, &disk_key, slot); + if (slot == 0) + fixup_low_keys(trans, root, path, &disk_key, 1); + } item = btrfs_item_nr(leaf, slot); btrfs_set_item_size(leaf, item, new_size); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 70e143b13577..d82afb618bf1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -907,7 +907,7 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_truncate_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - u32 new_size); + u32 new_size, int from_end); int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 6f19de41b878..514a1dc337a8 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -249,7 +249,7 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, item_len - (ptr + sub_item_len - start)); ret = btrfs_truncate_item(trans, root, path, - item_len - sub_item_len); + item_len - sub_item_len, 1); } return 0; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 238cb1d81d56..44be9cfd30ee 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -263,7 +263,12 @@ int add_extent_mapping(struct extent_map_tree *tree, if (prev && prev->end + 1 == em->start && ((em->block_start == EXTENT_MAP_HOLE && prev->block_start == EXTENT_MAP_HOLE) || - (em->block_start == prev->block_end + 1))) { + (em->block_start == EXTENT_MAP_INLINE && + prev->block_start == EXTENT_MAP_INLINE) || + (em->block_start == EXTENT_MAP_DELALLOC && + prev->block_start == EXTENT_MAP_DELALLOC) || + (em->block_start < EXTENT_MAP_DELALLOC - 1 && + em->block_start == prev->block_end + 1))) { em->start = prev->start; em->block_start = prev->block_start; rb_erase(&prev->rb_node, &tree->map); @@ -1618,13 +1623,13 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, u64 extent_offset; u64 last_byte = i_size_read(inode); u64 block_start; + u64 iosize; sector_t sector; struct extent_map *em; struct block_device *bdev; int ret; int nr = 0; size_t page_offset = 0; - size_t iosize; size_t blocksize; loff_t i_size = i_size_read(inode); unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; @@ -1684,7 +1689,7 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, clear_extent_dirty(tree, cur, page_end, GFP_NOFS); break; } - em = get_extent(inode, page, page_offset, cur, end, 0); + em = get_extent(inode, page, page_offset, cur, end, 1); if (IS_ERR(em) || !em) { SetPageError(page); break; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 39d78d31b6a2..b6f6519f9ba3 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -24,7 +24,6 @@ * page->private values. Every page that is controlled by the extent * map has page->private set to one. */ - #define EXTENT_PAGE_PRIVATE 1 #define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 7eb9a5412e2f..614176e52851 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -291,7 +291,7 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, new_item_size = blocks * BTRFS_CRC32_SIZE; if (new_item_size >= btrfs_item_size_nr(leaf, slot)) return 0; - ret = btrfs_truncate_item(trans, root, path, new_item_size); + ret = btrfs_truncate_item(trans, root, path, new_item_size, 1); BUG_ON(ret); return ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index abdd9caad94e..4e52f7ec1cbe 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -115,8 +115,20 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, goto fail; } if (ret == 1) { + struct btrfs_key found_key; + + if (path->slots[0] == 0) + goto insert; + path->slots[0]--; leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.objectid != inode->i_ino) + goto insert; + + if (found_key.type != BTRFS_EXTENT_DATA_KEY) + goto insert; ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); @@ -152,6 +164,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, root, &key, path, offset + size - found_end, 1); BUG_ON(ret != 0); + ret = btrfs_extend_item(trans, root, path, offset + size - found_end); if (ret) { @@ -292,7 +305,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, */ inline_size = end_pos; if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) || - inline_size > 8192 || + inline_size > 32768 || inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { u64 last_end; @@ -312,7 +325,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, aligned_end = (pos + write_bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); err = btrfs_drop_extents(trans, root, inode, start_pos, - aligned_end, end_pos, &hint_byte); + aligned_end, aligned_end, &hint_byte); if (err) goto failed; err = insert_inline_extent(trans, root, inode, start_pos, @@ -456,13 +469,15 @@ next_slot: goto next_slot; } - /* FIXME, there's only one inline extent allowed right now */ if (found_inline) { u64 mask = root->sectorsize - 1; search_start = (extent_end + mask) & ~mask; } else search_start = extent_end; + if (end <= extent_end && start >= key.offset && found_inline) { + *hint_byte = EXTENT_MAP_INLINE; + } if (end < extent_end && end >= key.offset) { if (found_extent) { u64 disk_bytenr = @@ -479,8 +494,10 @@ next_slot: BUG_ON(ret); } } - if (!found_inline) - bookend = 1; + bookend = 1; + if (found_inline && start <= key.offset && + inline_end < extent_end) + keep = 1; } /* truncate existing extent */ if (start > key.offset) { @@ -510,7 +527,7 @@ next_slot: new_size = btrfs_file_extent_calc_inline_size( inline_end - key.offset); btrfs_truncate_item(trans, root, path, - new_size); + new_size, 1); } } /* delete the entire extent */ @@ -551,6 +568,13 @@ next_slot: if (!bookend) continue; } + if (bookend && found_inline && start <= key.offset && + inline_end < extent_end) { + u32 new_size; + new_size = btrfs_file_extent_calc_inline_size( + extent_end - inline_end); + btrfs_truncate_item(trans, root, path, new_size, 0); + } /* create bookend, splitting the extent in two */ if (bookend && found_extent) { struct btrfs_key ins; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5d10b64e42df..0c65141b9993 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -89,6 +89,9 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) ret = btrfs_drop_extents(trans, root, inode, start, start + num_bytes, start, &alloc_hint); + if (alloc_hint == EXTENT_MAP_INLINE) + goto out; + ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_bytes, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { @@ -558,6 +561,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, u64 item_end = 0; int found_extent; int del_item; + int extent_type = -1; btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1); path = btrfs_alloc_path(); @@ -597,10 +601,15 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, if (found_type == BTRFS_EXTENT_DATA_KEY) { fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, fi) != - BTRFS_FILE_EXTENT_INLINE) { + extent_type = btrfs_file_extent_type(leaf, fi); + if (extent_type != BTRFS_FILE_EXTENT_INLINE) { item_end += btrfs_file_extent_num_bytes(leaf, fi); + } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { + struct btrfs_item *item = btrfs_item_nr(leaf, + path->slots[0]); + item_end += btrfs_file_extent_inline_len(leaf, + item); } } if (found_type == BTRFS_CSUM_ITEM_KEY) { @@ -608,7 +617,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, inode->i_size); BUG_ON(ret); } - if (item_end < inode->i_size) { + if (item_end <= inode->i_size) { if (found_type == BTRFS_DIR_ITEM_KEY) { found_type = BTRFS_INODE_ITEM_KEY; } else if (found_type == BTRFS_EXTENT_ITEM_KEY) { @@ -629,9 +638,10 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, found_extent = 0; /* FIXME, shrink the extent if the ref count is only 1 */ - if (found_type == BTRFS_EXTENT_DATA_KEY && - btrfs_file_extent_type(leaf, fi) != - BTRFS_FILE_EXTENT_INLINE) { + if (found_type != BTRFS_EXTENT_DATA_KEY) + goto delete; + + if (extent_type != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); if (!del_item) { @@ -659,7 +669,15 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, inode->i_blocks -= num_dec; } } + } else if (extent_type == BTRFS_FILE_EXTENT_INLINE && + !del_item) { + u32 newsize = inode->i_size - found_key.offset; + newsize = btrfs_file_extent_calc_inline_size(newsize); + ret = btrfs_truncate_item(trans, root, path, + newsize, 1); + BUG_ON(ret); } +delete: if (del_item) { ret = btrfs_del_item(trans, root, path); if (ret) @@ -769,7 +787,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) u64 pos = (inode->i_size + mask) & ~mask; u64 block_end = attr->ia_size | mask; u64 hole_size; - u64 alloc_hint; + u64 alloc_hint = 0; if (attr->ia_size <= pos) goto out; @@ -786,8 +804,11 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) pos, pos + hole_size, pos, &alloc_hint); - err = btrfs_insert_file_extent(trans, root, inode->i_ino, - pos, 0, 0, hole_size); + if (alloc_hint != EXTENT_MAP_INLINE) { + err = btrfs_insert_file_extent(trans, root, + inode->i_ino, + pos, 0, 0, hole_size); + } btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); unlock_extent(em_tree, pos, block_end, GFP_NOFS); @@ -1531,8 +1552,8 @@ again: em->end = EXTENT_MAP_HOLE; } em->bdev = inode->i_sb->s_bdev; - ret = btrfs_lookup_file_extent(NULL, root, path, - objectid, start, 0); + ret = btrfs_lookup_file_extent(trans, root, path, + objectid, start, trans != NULL); if (ret < 0) { err = ret; goto out; @@ -1627,15 +1648,23 @@ again: ((u64)root->sectorsize -1); map = kmap(page); ptr = btrfs_file_extent_inline_start(item) + extent_offset; - read_extent_buffer(leaf, map + page_offset, ptr, copy_size); - - if (em->start + copy_size <= em->end) { - size = min_t(u64, em->end + 1 - em->start, - PAGE_CACHE_SIZE - page_offset) - copy_size; - memset(map + page_offset + copy_size, 0, size); + if (create == 0 && !PageUptodate(page)) { + read_extent_buffer(leaf, map + page_offset, ptr, + copy_size); + flush_dcache_page(page); + } else if (create && PageUptodate(page)) { + if (!trans) { + kunmap(page); + free_extent_map(em); + em = NULL; + btrfs_release_path(root, path); + trans = btrfs_start_transaction(root, 1); + goto again; + } + write_extent_buffer(leaf, map + page_offset, ptr, + copy_size); + btrfs_mark_buffer_dirty(leaf); } - - flush_dcache_page(page); kunmap(page); set_extent_uptodate(em_tree, em->start, em->end, GFP_NOFS); goto insert; From b97f9203b4d672c06eca50a1b1b99e4f69e1daf8 Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 1 Nov 2007 11:28:41 -0400 Subject: [PATCH 0288/2985] Btrfs: Fix typo and memory leak in extent-tree.c This patch fixes a typo in update_block_group and memory leak in btrfs_free_block_groups. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7de7707d939c..e7192ea8f96c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -588,16 +588,15 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val < (cache->key.offset >> 1)) { int bit_to_clear; int bit_to_set; - cache->data = data; if (data) { - bit_to_clear = BLOCK_GROUP_DATA; - bit_to_set = BLOCK_GROUP_METADATA; + bit_to_clear = BLOCK_GROUP_METADATA; + bit_to_set = BLOCK_GROUP_DATA; cache->item.flags |= BTRFS_BLOCK_GROUP_DATA; } else { - bit_to_clear = BLOCK_GROUP_METADATA; - bit_to_set = BLOCK_GROUP_DATA; + bit_to_clear = BLOCK_GROUP_DATA; + bit_to_set = BLOCK_GROUP_METADATA; cache->item.flags &= ~BTRFS_BLOCK_GROUP_DATA; } @@ -1459,13 +1458,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) { u64 start; u64 end; + u64 ptr; int ret; - while(1) { ret = find_first_extent_bit(&info->block_group_cache, 0, &start, &end, (unsigned int)-1); if (ret) break; + ret = get_state_private(&info->block_group_cache, start, &ptr); + if (!ret) + kfree((void *)(unsigned long)ptr); clear_extent_bits(&info->block_group_cache, start, end, (unsigned int)-1, GFP_NOFS); } From 944746ec7575258475958d858cdf3a0e4c8f7b9e Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 1 Nov 2007 11:28:42 -0400 Subject: [PATCH 0289/2985] Btrfs: small fixes for find_lock_delalloc_range. There is a 'finish_wait', but no 'prepare_to_wait' . So I think that the 'prepare_to_wait' is missing. The second change is according to the name of variable. Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 44be9cfd30ee..ff8881fb56d6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1044,6 +1044,8 @@ search_again: if (state->state & EXTENT_LOCKED) { DEFINE_WAIT(wait); atomic_inc(&state->refs); + prepare_to_wait(&state->wq, &wait, + TASK_UNINTERRUPTIBLE); write_unlock_irq(&tree->lock); schedule(); write_lock_irq(&tree->lock); @@ -1059,7 +1061,7 @@ search_again: node = rb_next(node); if (!node) break; - total_bytes = state->end - state->start + 1; + total_bytes += state->end - state->start + 1; if (total_bytes >= max_bytes) break; } From b293f02e1423f2099744f3ade23ddd83b65321fc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 1 Nov 2007 19:45:34 -0400 Subject: [PATCH 0290/2985] Btrfs: Add writepages support Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 135 ++++++++++++++++++++++++++++++++++-------- fs/btrfs/extent_map.h | 4 ++ fs/btrfs/inode.c | 10 ++++ 3 files changed, 124 insertions(+), 25 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index ff8881fb56d6..b0677c84bb75 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "extent_map.h" /* temporary define until extent_map moves out of btrfs */ @@ -35,6 +36,12 @@ struct tree_entry { struct rb_node rb_node; }; +struct extent_page_data { + struct bio *bio; + struct extent_map_tree *tree; + get_extent_t *get_extent; +}; + void __init extent_map_init(void) { extent_map_cache = btrfs_cache_create("extent_map", @@ -1460,37 +1467,73 @@ static int end_bio_extent_preparewrite(struct bio *bio, #endif } +static struct bio * +extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, + gfp_t gfp_flags) +{ + struct bio *bio; + + bio = bio_alloc(gfp_flags, nr_vecs); + + if (bio == NULL && (current->flags & PF_MEMALLOC)) { + while (!bio && (nr_vecs /= 2)) + bio = bio_alloc(gfp_flags, nr_vecs); + } + + if (bio) { + bio->bi_bdev = bdev; + bio->bi_sector = first_sector; + } + return bio; +} + +static int submit_one_bio(int rw, struct bio *bio) +{ + int ret = 0; + bio_get(bio); + submit_bio(rw, bio); + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + bio_put(bio); + return ret; +} + static int submit_extent_page(int rw, struct extent_map_tree *tree, struct page *page, sector_t sector, size_t size, unsigned long offset, struct block_device *bdev, + struct bio **bio_ret, + int max_pages, bio_end_io_t end_io_func) { - struct bio *bio; int ret = 0; + struct bio *bio; + int nr; - bio = bio_alloc(GFP_NOIO, 1); - - bio->bi_sector = sector; - bio->bi_bdev = bdev; - bio->bi_io_vec[0].bv_page = page; - bio->bi_io_vec[0].bv_len = size; - bio->bi_io_vec[0].bv_offset = offset; - - bio->bi_vcnt = 1; - bio->bi_idx = 0; - bio->bi_size = size; - + if (bio_ret && *bio_ret) { + bio = *bio_ret; + if (bio->bi_sector + (bio->bi_size >> 9) != sector || + bio_add_page(bio, page, size, offset) < size) { + ret = submit_one_bio(rw, bio); + bio = NULL; + } else { + return 0; + } + } + nr = min(max_pages, bio_get_nr_vecs(bdev)); + bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); + if (!bio) { + printk("failed to allocate bio nr %d\n", nr); + } + bio_add_page(bio, page, size, offset); bio->bi_end_io = end_io_func; bio->bi_private = tree; + if (bio_ret) { + *bio_ret = bio; + } else { + ret = submit_one_bio(rw, bio); + } - bio_get(bio); - submit_bio(rw, bio); - - if (bio_flagged(bio, BIO_EOPNOTSUPP)) - ret = -EOPNOTSUPP; - - bio_put(bio); return ret; } @@ -1590,7 +1633,8 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, if (!ret) { ret = submit_extent_page(READ, tree, page, sector, iosize, page_offset, - bdev, end_bio_extent_readpage); + bdev, NULL, 1, + end_bio_extent_readpage); } if (ret) SetPageError(page); @@ -1613,11 +1657,12 @@ EXPORT_SYMBOL(extent_read_full_page); * are found, they are marked writeback. Then the lock bits are removed * and the end_io handler clears the writeback ranges */ -int extent_write_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent, - struct writeback_control *wbc) +static int __extent_writepage(struct page *page, struct writeback_control *wbc, + void *data) { struct inode *inode = page->mapping->host; + struct extent_page_data *epd = data; + struct extent_map_tree *tree = epd->tree; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = start + PAGE_CACHE_SIZE - 1; u64 end; @@ -1691,7 +1736,7 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, clear_extent_dirty(tree, cur, page_end, GFP_NOFS); break; } - em = get_extent(inode, page, page_offset, cur, end, 1); + em = epd->get_extent(inode, page, page_offset, cur, end, 1); if (IS_ERR(em) || !em) { SetPageError(page); break; @@ -1734,9 +1779,12 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, if (ret) SetPageError(page); else { + unsigned long nr = end_index + 1; set_range_writeback(tree, cur, cur + iosize - 1); + ret = submit_extent_page(WRITE, tree, page, sector, iosize, page_offset, bdev, + &epd->bio, nr, end_bio_extent_writepage); if (ret) SetPageError(page); @@ -1750,8 +1798,44 @@ done: unlock_page(page); return 0; } + +int extent_write_full_page(struct extent_map_tree *tree, struct page *page, + get_extent_t *get_extent, + struct writeback_control *wbc) +{ + int ret; + struct extent_page_data epd = { + .bio = NULL, + .tree = tree, + .get_extent = get_extent, + }; + + ret = __extent_writepage(page, wbc, &epd); + if (epd.bio) + submit_one_bio(WRITE, epd.bio); + return ret; +} EXPORT_SYMBOL(extent_write_full_page); +int extent_writepages(struct extent_map_tree *tree, + struct address_space *mapping, + get_extent_t *get_extent, + struct writeback_control *wbc) +{ + int ret; + struct extent_page_data epd = { + .bio = NULL, + .tree = tree, + .get_extent = get_extent, + }; + + ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); + if (epd.bio) + submit_one_bio(WRITE, epd.bio); + return ret; +} +EXPORT_SYMBOL(extent_writepages); + /* * basic invalidatepage code, this waits on any locked or writeback * ranges corresponding to the page, and then deletes any extent state @@ -1869,6 +1953,7 @@ int extent_prepare_write(struct extent_map_tree *tree, EXTENT_LOCKED, 0, NULL, GFP_NOFS); ret = submit_extent_page(READ, tree, page, sector, iosize, page_offset, em->bdev, + NULL, 1, end_bio_extent_preparewrite); iocount++; block_start = block_start + iosize; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index b6f6519f9ba3..0b0dcf19fe45 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -136,6 +136,10 @@ int extent_invalidatepage(struct extent_map_tree *tree, int extent_write_full_page(struct extent_map_tree *tree, struct page *page, get_extent_t *get_extent, struct writeback_control *wbc); +int extent_writepages(struct extent_map_tree *tree, + struct address_space *mapping, + get_extent_t *get_extent, + struct writeback_control *wbc); int extent_prepare_write(struct extent_map_tree *tree, struct inode *inode, struct page *page, unsigned from, unsigned to, get_extent_t *get_extent); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0c65141b9993..25f32d7c7ee8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1747,6 +1747,15 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) return extent_write_full_page(tree, page, btrfs_get_extent, wbc); } + +static int btrfs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct extent_map_tree *tree; + tree = &BTRFS_I(mapping->host)->extent_tree; + return extent_writepages(tree, mapping, btrfs_get_extent, wbc); +} + static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) { struct extent_map_tree *tree; @@ -2526,6 +2535,7 @@ static struct extent_map_ops btrfs_extent_map_ops = { static struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, .writepage = btrfs_writepage, + .writepages = btrfs_writepages, .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, .commit_write = btrfs_commit_write, From c549228ff63498025bf0b065c5a5909a06f21647 Mon Sep 17 00:00:00 2001 From: Yan Date: Tue, 6 Nov 2007 10:25:25 -0500 Subject: [PATCH 0291/2985] Btrfs: Properly update free space cache in __free_extent When pin_down_bytes decides not to pin a block because it was from the current transaction, make sure the in memory cache of free extents is updated Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e7192ea8f96c..9eb2ee02f643 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -714,7 +714,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, root->fs_info->running_transaction->transid; if (btrfs_header_generation(buf) == transid) { free_extent_buffer(buf); - return 0; + return 1; } } free_extent_buffer(buf); @@ -774,7 +774,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (pin) { ret = pin_down_bytes(root, bytenr, num_bytes, 0); - BUG_ON(ret); + if (ret > 0) + mark_free = 1; + BUG_ON(ret < 0); } /* block accounting for super block */ From 081e95736d9118a96e9f6dcc5cec02dc75c9e1cb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 6 Nov 2007 10:26:24 -0500 Subject: [PATCH 0292/2985] Btrfs: Make defrag check nodes against the progress key to prevent repeating work Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 56 ++++++++++++++++++++++++------------------ fs/btrfs/tree-defrag.c | 3 ++- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index eef9c92f86d5..ea9b46699349 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -161,6 +161,31 @@ static int close_blocks(u64 blocknr, u64 other, u32 blocksize) return 0; } +/* + * compare two keys in a memcmp fashion + */ +static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) +{ + struct btrfs_key k1; + + btrfs_disk_key_to_cpu(&k1, disk); + + if (k1.objectid > k2->objectid) + return 1; + if (k1.objectid < k2->objectid) + return -1; + if (k1.type > k2->type) + return 1; + if (k1.type < k2->type) + return -1; + if (k1.offset > k2->offset) + return 1; + if (k1.offset < k2->offset) + return -1; + return 0; +} + + int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, int start_slot, int cache_only, u64 *last_ret, @@ -179,6 +204,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, int parent_level; int uptodate; u32 blocksize; + int progress_passed = 0; + struct btrfs_disk_key disk_key; parent_level = btrfs_header_level(parent); if (cache_only && parent_level != 1) @@ -213,6 +240,11 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, &parent->map_start, &parent->map_len, KM_USER1); } + btrfs_node_key(parent, &disk_key, i); + if (!progress_passed && comp_keys(&disk_key, progress) < 0) + continue; + + progress_passed = 1; blocknr = btrfs_node_blockptr(parent, i); if (last_block == 0) last_block = blocknr; @@ -292,30 +324,6 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root, return btrfs_item_offset_nr(leaf, nr - 1); } -/* - * compare two keys in a memcmp fashion - */ -static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) -{ - struct btrfs_key k1; - - btrfs_disk_key_to_cpu(&k1, disk); - - if (k1.objectid > k2->objectid) - return 1; - if (k1.objectid < k2->objectid) - return -1; - if (k1.type > k2->type) - return 1; - if (k1.type < k2->type) - return -1; - if (k1.offset > k2->offset) - return 1; - if (k1.offset < k2->offset) - return -1; - return 0; -} - static int check_node(struct btrfs_root *root, struct btrfs_path *path, int level) { diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 65ef12351566..6ef1ba5f9c2f 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -227,7 +227,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, break; if (wret < 0) ret = wret; - ret = -EAGAIN; + else + ret = -EAGAIN; break; } for (i = 0; i <= orig_level; i++) { From dcfec0dcb1b1a037fb26177789e8f108bc429cb3 Mon Sep 17 00:00:00 2001 From: Yan Date: Tue, 6 Nov 2007 10:26:26 -0500 Subject: [PATCH 0293/2985] Btrfs: Fix u32 overflow in dirty_and_release_pages. When calculating the size of inline extent, inode->i_size should also be take into consideration, otherwise sys_write may drop some data silently. You can test this bug by: #dd if=/dev/zero bs=4k count=1 of=test_file #dd if=/dev/zero bs=2k count=1 of=test_file conv=notrunc Signed-off-by: Chris Mason --- fs/btrfs/file.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4e52f7ec1cbe..bb98f52f4ea4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -239,9 +239,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, u64 start_pos; u64 end_of_last_block; u64 end_pos = pos + write_bytes; - u32 inline_size; + u64 inline_size; loff_t isize = i_size_read(inode); - em = alloc_extent_map(GFP_NOFS); if (!em) return -ENOMEM; @@ -328,9 +327,11 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, aligned_end, aligned_end, &hint_byte); if (err) goto failed; + if (isize > inline_size) + inline_size = min_t(u64, isize, aligned_end); + inline_size -= start_pos; err = insert_inline_extent(trans, root, inode, start_pos, - end_pos - start_pos, pages, 0, - num_pages); + inline_size, pages, 0, num_pages); BUG_ON(err); } if (end_pos > isize) { From a273208edd55463b3bcd8b77a6fe8ba54afc6940 Mon Sep 17 00:00:00 2001 From: Yan Date: Tue, 6 Nov 2007 10:26:28 -0500 Subject: [PATCH 0294/2985] Fix EXTENT_MAP_INLINE off by one in btrfs_drop_extents Don't set hint_byte to EXTENT_MAP_INLINE when 'end == extent_end' or 'start == key.offset' . The inline extent will be truncated in these cases. Signed-off-by: Chris Mason --- fs/btrfs/file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index bb98f52f4ea4..5ceaed25dce1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -475,8 +475,7 @@ next_slot: search_start = (extent_end + mask) & ~mask; } else search_start = extent_end; - - if (end <= extent_end && start >= key.offset && found_inline) { + if (end < extent_end && start > key.offset && found_inline) { *hint_byte = EXTENT_MAP_INLINE; } if (end < extent_end && end >= key.offset) { From f84a8b362d9785ca1fa0598d8a90f35184bd8750 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 6 Nov 2007 10:26:29 -0500 Subject: [PATCH 0295/2985] Btrfs: Optimize allocations as we need to mix data and metadata into one group Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/extent-tree.c | 42 ++++++++++++++++++++++++++++++++++++------ fs/btrfs/tree-defrag.c | 6 +++++- 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d82afb618bf1..f7907b02fa77 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -279,6 +279,8 @@ struct btrfs_csum_item { #define BTRFS_BLOCK_GROUP_DATA 1 +#define BTRFS_BLOCK_GROUP_MIXED 2 + struct btrfs_block_group_item { __le64 used; u8 flags; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9eb2ee02f643..d54ab8ef06d5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -168,11 +168,13 @@ static u64 find_search_start(struct btrfs_root *root, u64 last; u64 start = 0; u64 end = 0; + int wrapped = 0; again: ret = cache_block_group(root, cache); if (ret) goto out; + last = max(search_start, cache->key.objectid); while(1) { @@ -195,8 +197,15 @@ out: new_group: last = cache->key.objectid + cache->key.offset; +wrapped: cache = btrfs_lookup_block_group(root->fs_info, last); if (!cache) { + if (!wrapped) { + wrapped = 1; + last = search_start; + data = BTRFS_BLOCK_GROUP_MIXED; + goto wrapped; + } return search_start; } cache = btrfs_find_block_group(root, cache, last, data, 0); @@ -236,9 +245,11 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, block_group_cache = &info->block_group_cache; if (!owner) - factor = 5; + factor = 8; - if (data) + if (data == BTRFS_BLOCK_GROUP_MIXED) + bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; + else if (data) bit = BLOCK_GROUP_DATA; else bit = BLOCK_GROUP_METADATA; @@ -246,14 +257,16 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (search_start) { struct btrfs_block_group_cache *shint; shint = btrfs_lookup_block_group(info, search_start); - if (shint && shint->data == data) { + if (shint && (shint->data == data || + shint->data == BTRFS_BLOCK_GROUP_MIXED)) { used = btrfs_block_group_used(&shint->item); if (used < div_factor(shint->key.offset, factor)) { return shint; } } } - if (hint && hint->data == data) { + if (hint && (hint->data == data || + hint->data == BTRFS_BLOCK_GROUP_MIXED)) { used = btrfs_block_group_used(&hint->item); if (used < div_factor(hint->key.offset, factor)) { return hint; @@ -592,11 +605,15 @@ static int update_block_group(struct btrfs_trans_handle *trans, if (data) { bit_to_clear = BLOCK_GROUP_METADATA; bit_to_set = BLOCK_GROUP_DATA; + cache->item.flags &= + ~BTRFS_BLOCK_GROUP_MIXED; cache->item.flags |= BTRFS_BLOCK_GROUP_DATA; } else { bit_to_clear = BLOCK_GROUP_DATA; bit_to_set = BLOCK_GROUP_METADATA; + cache->item.flags &= + ~BTRFS_BLOCK_GROUP_MIXED; cache->item.flags &= ~BTRFS_BLOCK_GROUP_DATA; } @@ -606,6 +623,14 @@ static int update_block_group(struct btrfs_trans_handle *trans, set_extent_bits(&info->block_group_cache, start, end, bit_to_set, GFP_NOFS); + } else if (cache->data != data && + cache->data != BTRFS_BLOCK_GROUP_MIXED) { + cache->data = BTRFS_BLOCK_GROUP_MIXED; + set_extent_bits(&info->block_group_cache, + start, end, + BLOCK_GROUP_DATA | + BLOCK_GROUP_METADATA, + GFP_NOFS); } old_val += num_bytes; } else { @@ -886,6 +911,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; + u64 cached_start; WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); @@ -910,6 +936,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root check_failed: search_start = find_search_start(root, &block_group, search_start, total_needed, data); + cached_start = search_start; btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; @@ -1532,9 +1559,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); - if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { + if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) { + bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; + cache->data = BTRFS_BLOCK_GROUP_MIXED; + } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { bit = BLOCK_GROUP_DATA; - cache->data = 1; + cache->data = BTRFS_BLOCK_GROUP_DATA; } else { bit = BLOCK_GROUP_METADATA; cache->data = 0; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 6ef1ba5f9c2f..3994795edfeb 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -58,6 +58,10 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, if (root->fs_info->extent_root == root) is_extent = 1; + if (*level == 1 && cache_only && path->nodes[1] && + !btrfs_buffer_defrag(path->nodes[1])) { + goto out; + } while(*level > 0) { WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); @@ -116,7 +120,7 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, WARN_ON(*level >= BTRFS_MAX_LEVEL); btrfs_clear_buffer_defrag(path->nodes[*level]); - +out: free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; From e644d021e328d3902559e5db687383f2da85993c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 6 Nov 2007 15:09:29 -0500 Subject: [PATCH 0296/2985] Fix recursive KM_USER1 usage in btrfs_realloc_node Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 11 +++-------- fs/btrfs/struct-funcs.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f7907b02fa77..030d21d7f98c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -557,14 +557,9 @@ static inline unsigned long btrfs_node_key_ptr_offset(int nr) sizeof(struct btrfs_key_ptr) * nr; } -static inline void btrfs_node_key(struct extent_buffer *eb, - struct btrfs_disk_key *disk_key, int nr) -{ - unsigned long ptr; - ptr = btrfs_node_key_ptr_offset(nr); - read_eb_member(eb, (struct btrfs_key_ptr *)ptr, - struct btrfs_key_ptr, key, disk_key); -} +void btrfs_node_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr); + static inline void btrfs_set_node_key(struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr) { diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index cf68fcf9b552..c5715a60554c 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c @@ -95,3 +95,19 @@ void btrfs_set_##name(struct extent_buffer *eb, \ #include "ctree.h" +void btrfs_node_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) +{ + unsigned long ptr = btrfs_node_key_ptr_offset(nr); + if (eb->map_token && ptr >= eb->map_start && + ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) { + memcpy(disk_key, eb->kaddr + ptr - eb->map_start, + sizeof(*disk_key)); + return; + } else if (eb->map_token) { + unmap_extent_buffer(eb, eb->map_token, KM_USER1); + eb->map_token = NULL; + } + read_eb_member(eb, (struct btrfs_key_ptr *)ptr, + struct btrfs_key_ptr, key, disk_key); +} From 34a3821873aeabff2607c8093bce82cd1fbcfd60 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 7 Nov 2007 13:31:03 -0500 Subject: [PATCH 0297/2985] Btrfs: Change push_leaf_{leaf,right} to empty the src leave during item deletion Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 72 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 23 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ea9b46699349..1b47fe71e0b4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1394,19 +1394,21 @@ int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf) * room, 0 if everything worked out and < 0 if there were major errors. */ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int data_size) + *root, struct btrfs_path *path, int data_size, + int empty) { struct extent_buffer *left = path->nodes[0]; struct extent_buffer *right; struct extent_buffer *upper; struct btrfs_disk_key disk_key; int slot; - int i; + u32 i; int free_space; int push_space = 0; int push_items = 0; struct btrfs_item *item; u32 left_nritems; + u32 nr; u32 right_nritems; u32 data_end; u32 this_item_size; @@ -1447,7 +1449,13 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } - for (i = left_nritems - 1; i >= 1; i--) { + if (empty) + nr = 0; + else + nr = 1; + + i = left_nritems - 1; + while (i >= nr) { item = btrfs_item_nr(left, i); if (path->slots[0] == i) @@ -1466,6 +1474,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root break; push_items++; push_space += this_item_size + sizeof(*item); + if (i == 0) + break; + i--; } if (left->map_token) { unmap_extent_buffer(left, left->map_token, KM_USER1); @@ -1477,11 +1488,12 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } - if (push_items == left_nritems) + if (!empty && push_items == left_nritems) WARN_ON(1); /* push left to right */ right_nritems = btrfs_header_nritems(right); + push_space = btrfs_item_end_nr(left, left_nritems - push_items); push_space -= leaf_data_end(root, left); @@ -1511,7 +1523,6 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root right_nritems += push_items; btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); - for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(right, i); if (!right->map_token) { @@ -1532,7 +1543,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root left_nritems -= push_items; btrfs_set_header_nritems(left, left_nritems); - btrfs_mark_buffer_dirty(left); + if (left_nritems) + btrfs_mark_buffer_dirty(left); btrfs_mark_buffer_dirty(right); btrfs_item_key(right, &disk_key, 0); @@ -1555,7 +1567,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root * least data_size bytes. returns zero if the push worked, nonzero otherwise */ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int data_size) + *root, struct btrfs_path *path, int data_size, + int empty) { struct btrfs_disk_key disk_key; struct extent_buffer *right = path->nodes[0]; @@ -1568,6 +1581,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_item *item; u32 old_left_nritems; u32 right_nritems; + u32 nr; int ret = 0; int wret; u32 this_item_size; @@ -1607,7 +1621,12 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } - for (i = 0; i < right_nritems - 1; i++) { + if (empty) + nr = right_nritems; + else + nr = right_nritems - 1; + + for (i = 0; i < nr; i++) { item = btrfs_item_nr(right, i); if (!right->map_token) { map_extent_buffer(right, (unsigned long)item, @@ -1637,7 +1656,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root free_extent_buffer(left); return 1; } - if (push_items == btrfs_header_nritems(right)) + if (!empty && push_items == btrfs_header_nritems(right)) WARN_ON(1); /* push data from right to left */ @@ -1681,20 +1700,26 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root } /* fixup right node */ - push_space = btrfs_item_offset_nr(right, push_items - 1) - - leaf_data_end(root, right); - memmove_extent_buffer(right, btrfs_leaf_data(right) + - BTRFS_LEAF_DATA_SIZE(root) - push_space, - btrfs_leaf_data(right) + - leaf_data_end(root, right), push_space); + if (push_items > right_nritems) { + printk("push items %d nr %u\n", push_items, right_nritems); + WARN_ON(1); + } - memmove_extent_buffer(right, btrfs_item_nr_offset(0), + if (push_items < right_nritems) { + push_space = btrfs_item_offset_nr(right, push_items - 1) - + leaf_data_end(root, right); + memmove_extent_buffer(right, btrfs_leaf_data(right) + + BTRFS_LEAF_DATA_SIZE(root) - push_space, + btrfs_leaf_data(right) + + leaf_data_end(root, right), push_space); + + memmove_extent_buffer(right, btrfs_item_nr_offset(0), btrfs_item_nr_offset(push_items), (btrfs_header_nritems(right) - push_items) * sizeof(struct btrfs_item)); - right_nritems = btrfs_header_nritems(right) - push_items; - btrfs_set_header_nritems(right, right_nritems); + } + btrfs_set_header_nritems(right, right_nritems - push_items); push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < right_nritems; i++) { @@ -1717,7 +1742,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root } btrfs_mark_buffer_dirty(left); - btrfs_mark_buffer_dirty(right); + if (right_nritems) + btrfs_mark_buffer_dirty(right); btrfs_item_key(right, &disk_key, 0); wret = fixup_low_keys(trans, root, path, &disk_key, 1); @@ -1768,12 +1794,12 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root /* first try to make some room by pushing left and right */ if (ins_key->type != BTRFS_DIR_ITEM_KEY) { - wret = push_leaf_right(trans, root, path, data_size); + wret = push_leaf_right(trans, root, path, data_size, 0); if (wret < 0) { return wret; } if (wret) { - wret = push_leaf_left(trans, root, path, data_size); + wret = push_leaf_left(trans, root, path, data_size, 0); if (wret < 0) return wret; } @@ -2403,13 +2429,13 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, slot = path->slots[1]; extent_buffer_get(leaf); - wret = push_leaf_right(trans, root, path, 1); + wret = push_leaf_right(trans, root, path, 1, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; if (path->nodes[0] == leaf && btrfs_header_nritems(leaf)) { - wret = push_leaf_left(trans, root, path, 1); + wret = push_leaf_left(trans, root, path, 1, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; } From 008630c17cc5654e141c7fd24a3e737de643fec1 Mon Sep 17 00:00:00 2001 From: Yan Date: Wed, 7 Nov 2007 13:31:09 -0500 Subject: [PATCH 0298/2985] Properly delete csum item in btrfs_truncate_in_trans. When 'item_end' is equal to 'inode->i_size', 'found_type' is updated and current item is skipped. This behavior is correct for extent item, but incorrect for csum item. For example, there is a csum item with 'offset == 0'. When deleting the inode, 'inode->i_size' is set to 0, so the csum item isn't deleted. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 25f32d7c7ee8..90cb71d68294 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -611,13 +611,14 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, item_end += btrfs_file_extent_inline_len(leaf, item); } + item_end--; } if (found_type == BTRFS_CSUM_ITEM_KEY) { ret = btrfs_csum_truncate(trans, root, path, inode->i_size); BUG_ON(ret); } - if (item_end <= inode->i_size) { + if (item_end < inode->i_size) { if (found_type == BTRFS_DIR_ITEM_KEY) { found_type = BTRFS_INODE_ITEM_KEY; } else if (found_type == BTRFS_EXTENT_ITEM_KEY) { From 0da5468f4724a59e745d938395beffe830c424ee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 7 Nov 2007 21:08:01 -0500 Subject: [PATCH 0299/2985] Btrfs: Use writepages for the metadata too Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8935eec31ee2..6c8533fba7c9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -43,6 +43,8 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) } #endif +static struct extent_map_ops btree_extent_map_ops; + struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { @@ -188,15 +190,29 @@ out: return 0; } +static int btree_writepage_io_hook(struct page *page, u64 start, u64 end) +{ + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; + + csum_dirty_buffer(root, page); + return 0; +} + static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct extent_map_tree *tree; - struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; tree = &BTRFS_I(page->mapping->host)->extent_tree; - - csum_dirty_buffer(root, page); return extent_write_full_page(tree, page, btree_get_extent, wbc); } + +static int btree_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct extent_map_tree *tree; + tree = &BTRFS_I(mapping->host)->extent_tree; + return extent_writepages(tree, mapping, btree_get_extent, wbc); +} + int btree_readpage(struct file *file, struct page *page) { struct extent_map_tree *tree; @@ -251,6 +267,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) static struct address_space_operations btree_aops = { .readpage = btree_readpage, .writepage = btree_writepage, + .writepages = btree_writepages, .releasepage = btree_releasepage, .invalidatepage = btree_invalidatepage, .sync_page = block_sync_page, @@ -538,6 +555,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, fs_info->btree_inode->i_mapping, GFP_NOFS); + BTRFS_I(fs_info->btree_inode)->extent_tree.ops = &btree_extent_map_ops; + extent_map_tree_init(&fs_info->free_space_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); extent_map_tree_init(&fs_info->block_group_cache, @@ -832,3 +851,7 @@ int btrfs_read_buffer(struct extent_buffer *buf) return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 0, 1); } + +static struct extent_map_ops btree_extent_map_ops = { + .writepage_io_hook = btree_writepage_io_hook, +}; From 09be207d1ba224531a61de9afdc07a125e45318c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 7 Nov 2007 21:08:16 -0500 Subject: [PATCH 0300/2985] Btrfs: Fix failure cleanups when allocating extent buffers fail Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 56 +++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index b0677c84bb75..c976615dcda3 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2106,25 +2106,17 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree, spin_lock(&tree->lru_lock); eb = find_lru(tree, start, len); - if (eb) { - goto lru_add; - } spin_unlock(&tree->lru_lock); - if (eb) { - memset(eb, 0, sizeof(*eb)); - } else { - eb = kmem_cache_zalloc(extent_buffer_cache, mask); + return eb; } + + eb = kmem_cache_zalloc(extent_buffer_cache, mask); INIT_LIST_HEAD(&eb->lru); eb->start = start; eb->len = len; atomic_set(&eb->refs, 1); - spin_lock(&tree->lru_lock); -lru_add: - add_lru(tree, eb); - spin_unlock(&tree->lru_lock); return eb; } @@ -2151,7 +2143,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, return NULL; if (eb->flags & EXTENT_BUFFER_FILLED) - return eb; + goto lru_add; if (page0) { eb->first_page = page0; @@ -2169,11 +2161,6 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); if (!p) { WARN_ON(1); - /* make sure the free only frees the pages we've - * grabbed a reference on - */ - eb->len = i << PAGE_CACHE_SHIFT; - eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); goto fail; } set_page_extent_mapped(p); @@ -2192,9 +2179,20 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, if (uptodate) eb->flags |= EXTENT_UPTODATE; eb->flags |= EXTENT_BUFFER_FILLED; + +lru_add: + spin_lock(&tree->lru_lock); + add_lru(tree, eb); + spin_unlock(&tree->lru_lock); return eb; + fail: - free_extent_buffer(eb); + if (!atomic_dec_and_test(&eb->refs)) + return NULL; + for (index = 0; index < i; index++) { + page_cache_release(extent_buffer_page(eb, index)); + } + __free_extent_buffer(eb); return NULL; } EXPORT_SYMBOL(alloc_extent_buffer); @@ -2204,7 +2202,8 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, gfp_t mask) { unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; struct extent_buffer *eb; struct page *p; struct address_space *mapping = tree->mapping; @@ -2215,16 +2214,11 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, return NULL; if (eb->flags & EXTENT_BUFFER_FILLED) - return eb; + goto lru_add; for (i = 0; i < num_pages; i++, index++) { p = find_lock_page(mapping, index); if (!p) { - /* make sure the free only frees the pages we've - * grabbed a reference on - */ - eb->len = i << PAGE_CACHE_SHIFT; - eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); goto fail; } set_page_extent_mapped(p); @@ -2245,9 +2239,19 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, if (uptodate) eb->flags |= EXTENT_UPTODATE; eb->flags |= EXTENT_BUFFER_FILLED; + +lru_add: + spin_lock(&tree->lru_lock); + add_lru(tree, eb); + spin_unlock(&tree->lru_lock); return eb; fail: - free_extent_buffer(eb); + if (!atomic_dec_and_test(&eb->refs)) + return NULL; + for (index = 0; index < i; index++) { + page_cache_release(extent_buffer_page(eb, index)); + } + __free_extent_buffer(eb); return NULL; } EXPORT_SYMBOL(find_extent_buffer); From 257d0ce36f215937602eb5c1b328b69afaf3bcd8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 7 Nov 2007 21:08:16 -0500 Subject: [PATCH 0301/2985] Btrfs: Allow large data extents in a single file to span into metadata block groups Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 36 ++++++++++++++++++++++++++++++------ fs/btrfs/file.c | 6 ++++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d54ab8ef06d5..7405bd5301af 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -168,6 +168,7 @@ static u64 find_search_start(struct btrfs_root *root, u64 last; u64 start = 0; u64 end = 0; + u64 cache_miss = 0; int wrapped = 0; again: @@ -181,14 +182,20 @@ again: ret = find_first_extent_bit(&root->fs_info->free_space_cache, last, &start, &end, EXTENT_DIRTY); if (ret) { + if (!cache_miss) + cache_miss = last; goto new_group; } start = max(last, start); last = end + 1; - if (end + 1 - start < num) + if (last - start < num) { + if (last == cache->key.objectid + cache->key.offset) + cache_miss = start; continue; - if (start + num >= cache->key.objectid + cache->key.offset) + } + if (data != BTRFS_BLOCK_GROUP_MIXED && + start + num >= cache->key.objectid + cache->key.offset) goto new_group; return start; } @@ -208,13 +215,22 @@ wrapped: } return search_start; } + if (cache_miss && !cache->cached) { + cache_block_group(root, cache); + last = cache_miss; + + cache = btrfs_lookup_block_group(root->fs_info, last); + } cache = btrfs_find_block_group(root, cache, last, data, 0); *cache_ret = cache; + cache_miss = 0; goto again; } static u64 div_factor(u64 num, int factor) { + if (factor == 10) + return num; num *= factor; do_div(num, 10); return num; @@ -247,9 +263,10 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (!owner) factor = 8; - if (data == BTRFS_BLOCK_GROUP_MIXED) + if (data == BTRFS_BLOCK_GROUP_MIXED) { bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; - else if (data) + factor = 10; + } else if (data) bit = BLOCK_GROUP_DATA; else bit = BLOCK_GROUP_METADATA; @@ -918,6 +935,10 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root level = btrfs_header_level(root->node); + if (num_bytes >= 96 * 1024 * 1024 && hint_byte) { + data = BTRFS_BLOCK_GROUP_MIXED; + } + if (search_end == (u64)-1) search_end = btrfs_super_total_bytes(&info->super_copy); if (hint_byte) { @@ -937,6 +958,7 @@ check_failed: search_start = find_search_start(root, &block_group, search_start, total_needed, data); cached_start = search_start; + btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; @@ -1021,7 +1043,8 @@ check_failed: start_found = 1; last_byte = key.objectid + key.offset; - if (!full_scan && last_byte >= block_group->key.objectid + + if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED && + last_byte >= block_group->key.objectid + block_group->key.offset) { btrfs_release_path(root, path); search_start = block_group->key.objectid + @@ -1042,7 +1065,8 @@ check_pending: if (ins->objectid + num_bytes >= search_end) goto enospc; - if (!full_scan && ins->objectid + num_bytes >= block_group-> + if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED && + ins->objectid + num_bytes >= block_group-> key.objectid + block_group->key.offset) { search_start = block_group->key.objectid + block_group->key.offset; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5ceaed25dce1..9dd2c5c23b56 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -439,6 +439,12 @@ next_slot: struct btrfs_file_extent_item); found_type = btrfs_file_extent_type(leaf, extent); if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_end = + btrfs_file_extent_disk_bytenr(leaf, + extent); + if (extent_end) + *hint_byte = extent_end; + extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, extent); found_extent = 1; From 856bf3e592f917e7d663cb7fa93e83fe795e8f4e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 8 Nov 2007 10:59:05 -0500 Subject: [PATCH 0302/2985] Btrfs: Avoid extent_buffer lru corruption Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index c976615dcda3..754bc42c162a 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2047,7 +2047,7 @@ static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb) rm = list_entry(tree->buffer_lru.prev, struct extent_buffer, lru); tree->lru_size--; - list_del(&rm->lru); + list_del_init(&rm->lru); free_extent_buffer(rm); } } else @@ -2187,6 +2187,9 @@ lru_add: return eb; fail: + spin_lock(&tree->lru_lock); + list_del_init(&eb->lru); + spin_unlock(&tree->lru_lock); if (!atomic_dec_and_test(&eb->refs)) return NULL; for (index = 0; index < i; index++) { @@ -2246,6 +2249,9 @@ lru_add: spin_unlock(&tree->lru_lock); return eb; fail: + spin_lock(&tree->lru_lock); + list_del_init(&eb->lru); + spin_unlock(&tree->lru_lock); if (!atomic_dec_and_test(&eb->refs)) return NULL; for (index = 0; index < i; index++) { From 3ab2fb5a8cb003897016b6eb38ddad916226c1b2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 8 Nov 2007 10:59:22 -0500 Subject: [PATCH 0303/2985] Btrfs: Add readpages support Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 70 ++++++++++++++++++++++++++++++++++++++----- fs/btrfs/extent_map.h | 4 +++ fs/btrfs/inode.c | 12 +++++++- 3 files changed, 78 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 754bc42c162a..0077c6c1d9f9 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "extent_map.h" /* temporary define until extent_map moves out of btrfs */ @@ -1503,7 +1504,7 @@ static int submit_extent_page(int rw, struct extent_map_tree *tree, size_t size, unsigned long offset, struct block_device *bdev, struct bio **bio_ret, - int max_pages, + unsigned long max_pages, bio_end_io_t end_io_func) { int ret = 0; @@ -1520,7 +1521,7 @@ static int submit_extent_page(int rw, struct extent_map_tree *tree, return 0; } } - nr = min(max_pages, bio_get_nr_vecs(bdev)); + nr = min_t(int, max_pages, bio_get_nr_vecs(bdev)); bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); if (!bio) { printk("failed to allocate bio nr %d\n", nr); @@ -1552,8 +1553,10 @@ void set_page_extent_mapped(struct page *page) * into the tree that are removed when the IO is done (by the end_io * handlers) */ -int extent_read_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent) +static int __extent_read_full_page(struct extent_map_tree *tree, + struct page *page, + get_extent_t *get_extent, + struct bio **bio) { struct inode *inode = page->mapping->host; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; @@ -1631,10 +1634,12 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, cur + iosize - 1); } if (!ret) { + unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1; + nr -= page->index; ret = submit_extent_page(READ, tree, page, - sector, iosize, page_offset, - bdev, NULL, 1, - end_bio_extent_readpage); + sector, iosize, page_offset, + bdev, bio, nr, + end_bio_extent_readpage); } if (ret) SetPageError(page); @@ -1649,6 +1654,18 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, } return 0; } + +int extent_read_full_page(struct extent_map_tree *tree, struct page *page, + get_extent_t *get_extent) +{ + struct bio *bio = NULL; + int ret; + + ret = __extent_read_full_page(tree, page, get_extent, &bio); + if (bio) + submit_one_bio(READ, bio); + return ret; +} EXPORT_SYMBOL(extent_read_full_page); /* @@ -1836,6 +1853,45 @@ int extent_writepages(struct extent_map_tree *tree, } EXPORT_SYMBOL(extent_writepages); +int extent_readpages(struct extent_map_tree *tree, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages, + get_extent_t get_extent) +{ + struct bio *bio = NULL; + unsigned page_idx; + struct pagevec pvec; + + pagevec_init(&pvec, 0); + for (page_idx = 0; page_idx < nr_pages; page_idx++) { + struct page *page = list_entry(pages->prev, struct page, lru); + + prefetchw(&page->flags); + list_del(&page->lru); + /* + * what we want to do here is call add_to_page_cache_lru, + * but that isn't exported, so we reproduce it here + */ + if (!add_to_page_cache(page, mapping, + page->index, GFP_KERNEL)) { + + /* open coding of lru_cache_add, also not exported */ + page_cache_get(page); + if (!pagevec_add(&pvec, page)) + __pagevec_lru_add(&pvec); + __extent_read_full_page(tree, page, get_extent, &bio); + } + page_cache_release(page); + } + if (pagevec_count(&pvec)) + __pagevec_lru_add(&pvec); + BUG_ON(!list_empty(pages)); + if (bio) + submit_one_bio(READ, bio); + return 0; +} +EXPORT_SYMBOL(extent_readpages); + /* * basic invalidatepage code, this waits on any locked or writeback * ranges corresponding to the page, and then deletes any extent state diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 0b0dcf19fe45..fbd23a72f53b 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -140,6 +140,10 @@ int extent_writepages(struct extent_map_tree *tree, struct address_space *mapping, get_extent_t *get_extent, struct writeback_control *wbc); +int extent_readpages(struct extent_map_tree *tree, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages, + get_extent_t get_extent); int extent_prepare_write(struct extent_map_tree *tree, struct inode *inode, struct page *page, unsigned from, unsigned to, get_extent_t *get_extent); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 90cb71d68294..d6d32465bc9a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1748,7 +1748,6 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) return extent_write_full_page(tree, page, btrfs_get_extent, wbc); } - static int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { @@ -1757,6 +1756,16 @@ static int btrfs_writepages(struct address_space *mapping, return extent_writepages(tree, mapping, btrfs_get_extent, wbc); } +static int +btrfs_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + struct extent_map_tree *tree; + tree = &BTRFS_I(mapping->host)->extent_tree; + return extent_readpages(tree, mapping, pages, nr_pages, + btrfs_get_extent); +} + static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) { struct extent_map_tree *tree; @@ -2537,6 +2546,7 @@ static struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, .writepage = btrfs_writepage, .writepages = btrfs_writepages, + .readpages = btrfs_readpages, .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, .commit_write = btrfs_commit_write, From 0591fb56fb732c7bf500d1446d0bf927046d7002 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 11 Nov 2007 08:22:00 -0500 Subject: [PATCH 0304/2985] Btrfs: Fix extent bit range testing It could return the bit as set when there was actually a hole at the very end of the range. Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 0077c6c1d9f9..b3ff63c4bf2a 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -96,7 +96,7 @@ void extent_map_tree_empty_lru(struct extent_map_tree *tree) while(!list_empty(&tree->buffer_lru)) { eb = list_entry(tree->buffer_lru.next, struct extent_buffer, lru); - list_del(&eb->lru); + list_del_init(&eb->lru); free_extent_buffer(eb); } } @@ -1212,13 +1212,15 @@ int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, node = tree_search(&tree->state, start); while (node && start <= end) { state = rb_entry(node, struct extent_state, rb_node); - if (state->start > end) - break; if (filled && state->start > start) { bitset = 0; break; } + + if (state->start > end) + break; + if (state->state & bits) { bitset = 1; if (!filled) @@ -2208,6 +2210,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, page_cache_get(page0); mark_page_accessed(page0); set_page_extent_mapped(page0); + WARN_ON(!PageUptodate(page0)); set_page_private(page0, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); } else { @@ -2248,9 +2251,11 @@ fail: spin_unlock(&tree->lru_lock); if (!atomic_dec_and_test(&eb->refs)) return NULL; - for (index = 0; index < i; index++) { + for (index = 1; index < i; index++) { page_cache_release(extent_buffer_page(eb, index)); } + if (i > 0) + page_cache_release(extent_buffer_page(eb, 0)); __free_extent_buffer(eb); return NULL; } @@ -2310,9 +2315,11 @@ fail: spin_unlock(&tree->lru_lock); if (!atomic_dec_and_test(&eb->refs)) return NULL; - for (index = 0; index < i; index++) { + for (index = 1; index < i; index++) { page_cache_release(extent_buffer_page(eb, index)); } + if (i > 0) + page_cache_release(extent_buffer_page(eb, 0)); __free_extent_buffer(eb); return NULL; } @@ -2329,11 +2336,13 @@ void free_extent_buffer(struct extent_buffer *eb) if (!atomic_dec_and_test(&eb->refs)) return; + WARN_ON(!list_empty(&eb->lru)); num_pages = num_extent_pages(eb->start, eb->len); - for (i = 0; i < num_pages; i++) { + for (i = 1; i < num_pages; i++) { page_cache_release(extent_buffer_page(eb, i)); } + page_cache_release(extent_buffer_page(eb, 0)); __free_extent_buffer(eb); } EXPORT_SYMBOL(free_extent_buffer); @@ -2469,6 +2478,7 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, EXTENT_UPTODATE, 1)) { return 0; } + if (start) { WARN_ON(start < eb->start); start_i = (start >> PAGE_CACHE_SHIFT) - @@ -2577,7 +2587,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, *map_start = 0; } else { offset = 0; - *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; + *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; } if (start + min_len > eb->len) { printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); From 5103e947b9b7ac18ddb21a04ee3486e94c6504d7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 16 Nov 2007 11:45:54 -0500 Subject: [PATCH 0305/2985] xattr support for btrfs Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/acl.c | 66 ++++++ fs/btrfs/ctree.h | 22 +- fs/btrfs/dir-item.c | 100 ++++++++- fs/btrfs/inode.c | 15 +- fs/btrfs/print-tree.c | 3 +- fs/btrfs/super.c | 2 + fs/btrfs/xattr.c | 497 ++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/xattr.h | 58 +++++ 9 files changed, 751 insertions(+), 14 deletions(-) create mode 100644 fs/btrfs/acl.c create mode 100644 fs/btrfs/xattr.c create mode 100644 fs/btrfs/xattr.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 551743be5f03..d5804c5ca075 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -5,7 +5,7 @@ obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ - extent_map.o sysfs.o struct-funcs.o + extent_map.o sysfs.o struct-funcs.o xattr.o acl.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c new file mode 100644 index 000000000000..aee9f0657c35 --- /dev/null +++ b/fs/btrfs/acl.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2007 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include +#include +#include +#include "ctree.h" +#include "xattr.h" + +static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name, + void *value, size_t size) +{ + return btrfs_xattr_get(inode, BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS, name, + value, size); +} + +static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + return btrfs_xattr_set(inode, BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS, name, + value, size, flags); +} + +static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name, + void *value, size_t size) +{ + return btrfs_xattr_get(inode, BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT, + name, value, size); +} + +static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + return btrfs_xattr_set(inode, BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT, + name, value, size, flags); +} + +struct xattr_handler btrfs_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .list = btrfs_xattr_generic_list, + .get = btrfs_xattr_acl_default_get, + .set = btrfs_xattr_acl_default_set, +}; + +struct xattr_handler btrfs_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .list = btrfs_xattr_generic_list, + .get = btrfs_xattr_acl_access_get, + .set = btrfs_xattr_acl_access_set, +}; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 030d21d7f98c..27cadae1af63 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -63,7 +63,8 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_FT_FIFO 5 #define BTRFS_FT_SOCK 6 #define BTRFS_FT_SYMLINK 7 -#define BTRFS_FT_MAX 8 +#define BTRFS_FT_XATTR 8 +#define BTRFS_FT_MAX 9 /* * the key defines the order in the tree, and so it also defines (optimal) @@ -226,7 +227,7 @@ struct btrfs_inode_item { struct btrfs_dir_item { struct btrfs_disk_key location; - __le16 flags; + __le16 data_len; __le16 name_len; u8 type; } __attribute__ ((__packed__)); @@ -367,7 +368,7 @@ struct btrfs_root { * the FS */ #define BTRFS_INODE_ITEM_KEY 1 - +#define BTRFS_XATTR_ITEM_KEY 2 /* reserve 2-15 close to the inode for later flexibility */ /* @@ -621,7 +622,7 @@ static inline void btrfs_set_item_key(struct extent_buffer *eb, } /* struct btrfs_dir_item */ -BTRFS_SETGET_FUNCS(dir_flags, struct btrfs_dir_item, flags, 16); +BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16); BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); @@ -962,6 +963,15 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_dir_item *di); +int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, const char *name, + u16 name_len, const void *data, u16 data_len, + u64 dir); +struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + const char *name, u16 name_len, + int mod); /* inode-map.c */ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_root *fs_root, @@ -1039,4 +1049,8 @@ int btrfs_sysfs_add_root(struct btrfs_root *root); void btrfs_sysfs_del_root(struct btrfs_root *root); void btrfs_sysfs_del_super(struct btrfs_fs_info *root); +/* xattr.c */ +ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); +int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode); #endif diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 514a1dc337a8..ddbe12ae0d63 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -43,8 +43,6 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle return ERR_PTR(-EEXIST); ret = btrfs_extend_item(trans, root, path, data_size); WARN_ON(ret > 0); - if (ret) - return ERR_PTR(ret); } if (ret < 0) return ERR_PTR(ret); @@ -57,6 +55,57 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle return (struct btrfs_dir_item *)ptr; } +int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, const char *name, + u16 name_len, const void *data, u16 data_len, + u64 dir) +{ + int ret = 0; + struct btrfs_path *path; + struct btrfs_dir_item *dir_item; + unsigned long name_ptr, data_ptr; + struct btrfs_key key, location; + struct btrfs_disk_key disk_key; + struct extent_buffer *leaf; + u32 data_size; + + key.objectid = dir; + btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); + ret = btrfs_name_hash(name, name_len, &key.offset); + BUG_ON(ret); + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + data_size = sizeof(*dir_item) + name_len + data_len; + dir_item = insert_with_overflow(trans, root, path, &key, data_size, + name, name_len); + /* + * FIXME: at some point we should handle xattr's that are larger than + * what we can fit in our leaf. We set location to NULL b/c we arent + * pointing at anything else, that will change if we store the xattr + * data in a separate inode. + */ + BUG_ON(IS_ERR(dir_item)); + memset(&location, 0, sizeof(location)); + + leaf = path->nodes[0]; + btrfs_cpu_key_to_disk(&disk_key, &location); + btrfs_set_dir_item_key(leaf, dir_item, &disk_key); + btrfs_set_dir_type(leaf, dir_item, BTRFS_FT_XATTR); + btrfs_set_dir_name_len(leaf, dir_item, name_len); + btrfs_set_dir_data_len(leaf, dir_item, data_len); + name_ptr = (unsigned long)(dir_item + 1); + data_ptr = (unsigned long)((char *)name_ptr + name_len); + + write_extent_buffer(leaf, name, name_ptr, name_len); + write_extent_buffer(leaf, data, data_ptr, data_len); + btrfs_mark_buffer_dirty(path->nodes[0]); + + btrfs_free_path(path); + return ret; +} + int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, struct btrfs_key *location, u8 type) @@ -90,7 +139,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_cpu_key_to_disk(&disk_key, location); btrfs_set_dir_item_key(leaf, dir_item, &disk_key); btrfs_set_dir_type(leaf, dir_item, type); - btrfs_set_dir_flags(leaf, dir_item, 0); + btrfs_set_dir_data_len(leaf, dir_item, 0); btrfs_set_dir_name_len(leaf, dir_item, name_len); name_ptr = (unsigned long)(dir_item + 1); @@ -117,7 +166,7 @@ second_insert: btrfs_cpu_key_to_disk(&disk_key, location); btrfs_set_dir_item_key(leaf, dir_item, &disk_key); btrfs_set_dir_type(leaf, dir_item, type); - btrfs_set_dir_flags(leaf, dir_item, 0); + btrfs_set_dir_data_len(leaf, dir_item, 0); btrfs_set_dir_name_len(leaf, dir_item, name_len); name_ptr = (unsigned long)(dir_item + 1); write_extent_buffer(leaf, name, name_ptr, name_len); @@ -194,6 +243,43 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, return btrfs_match_dir_item_name(root, path, name, name_len); } +struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + const char *name, u16 name_len, + int mod) +{ + int ret; + struct btrfs_key key; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + struct btrfs_key found_key; + struct extent_buffer *leaf; + + key.objectid = dir; + btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); + ret = btrfs_name_hash(name, name_len, &key.offset); + BUG_ON(ret); + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) { + if (path->slots[0] == 0) + return NULL; + path->slots[0]--; + } + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.objectid != dir || + btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY || + found_key.offset != key.offset) + return NULL; + + return btrfs_match_dir_item_name(root, path, name, name_len); +} + struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len) @@ -210,7 +296,8 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, total_len = btrfs_item_size_nr(leaf, path->slots[0]); while(cur < total_len) { this_len = sizeof(*dir_item) + - btrfs_dir_name_len(leaf, dir_item); + btrfs_dir_name_len(leaf, dir_item) + + btrfs_dir_data_len(leaf, dir_item); name_ptr = (unsigned long)(dir_item + 1); if (btrfs_dir_name_len(leaf, dir_item) == name_len && @@ -236,7 +323,8 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, int ret = 0; leaf = path->nodes[0]; - sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di); + sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di) + + btrfs_dir_data_len(leaf, di); item_len = btrfs_item_size_nr(leaf, path->slots[0]); if (sub_item_len == item_len) { ret = btrfs_del_item(trans, root, path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d6d32465bc9a..37f8e0321af8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -839,6 +840,9 @@ void btrfs_delete_inode(struct inode *inode) btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); + if (ret) + goto no_delete_lock; + ret = btrfs_delete_xattrs(trans, root, inode); if (ret) goto no_delete_lock; ret = btrfs_free_inode(trans, root, inode); @@ -1110,7 +1114,8 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (over) goto nopos; - di_len = btrfs_dir_name_len(leaf, di) + sizeof(*di); + di_len = btrfs_dir_name_len(leaf, di) + + btrfs_dir_data_len(leaf, di) +sizeof(*di); di_cur += di_len; di = (struct btrfs_dir_item *)((char *)di + di_len); } @@ -2519,6 +2524,10 @@ static struct inode_operations btrfs_dir_inode_operations = { .symlink = btrfs_symlink, .setattr = btrfs_setattr, .mknod = btrfs_mknod, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = btrfs_listxattr, + .removexattr = generic_removexattr, }; static struct inode_operations btrfs_dir_ro_inode_operations = { @@ -2567,6 +2576,10 @@ static struct inode_operations btrfs_file_inode_operations = { .truncate = btrfs_truncate, .getattr = btrfs_getattr, .setattr = btrfs_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = btrfs_listxattr, + .removexattr = generic_removexattr, }; static struct inode_operations btrfs_special_inode_operations = { diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 9f8696c8a8e8..030324febf6c 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -58,9 +58,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); btrfs_dir_item_key_to_cpu(l, di, &found_key); - printk("\t\tdir oid %llu flags %u type %u\n", + printk("\t\tdir oid %llu type %u\n", (unsigned long long)found_key.objectid, - btrfs_dir_flags(l, di), btrfs_dir_type(l, di)); break; case BTRFS_ROOT_ITEM_KEY: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f94aa1f97a0a..c46bc3911798 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -40,6 +40,7 @@ #include "btrfs_inode.h" #include "ioctl.h" #include "print-tree.h" +#include "xattr.h" #define BTRFS_SUPER_MAGIC 0x9123683E @@ -106,6 +107,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_magic = BTRFS_SUPER_MAGIC; sb->s_op = &btrfs_super_ops; + sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; tree_root = open_ctree(sb); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c new file mode 100644 index 000000000000..f4ac5e0bbad1 --- /dev/null +++ b/fs/btrfs/xattr.c @@ -0,0 +1,497 @@ +/* + * Copyright (C) 2007 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include +#include +#include +#include +#include "ctree.h" +#include "btrfs_inode.h" +#include "transaction.h" +#include "xattr.h" +#include "disk-io.h" + +static struct xattr_handler *btrfs_xattr_handler_map[] = { + [BTRFS_XATTR_INDEX_USER] = &btrfs_xattr_user_handler, + [BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &btrfs_xattr_acl_access_handler, + [BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &btrfs_xattr_acl_default_handler, + [BTRFS_XATTR_INDEX_TRUSTED] = &btrfs_xattr_trusted_handler, + [BTRFS_XATTR_INDEX_SECURITY] = &btrfs_xattr_security_handler, + [BTRFS_XATTR_INDEX_SYSTEM] = &btrfs_xattr_system_handler, +}; + +struct xattr_handler *btrfs_xattr_handlers[] = { + &btrfs_xattr_user_handler, + &btrfs_xattr_acl_access_handler, + &btrfs_xattr_acl_default_handler, + &btrfs_xattr_trusted_handler, + &btrfs_xattr_security_handler, + &btrfs_xattr_system_handler, + NULL, +}; + +/* + * @param name - the xattr name + * @return - the xattr_handler for the xattr, NULL if its not found + * + * use this with listxattr where we don't already know the type of xattr we + * have + */ +static struct xattr_handler *find_btrfs_xattr_handler(struct extent_buffer *l, + unsigned long name_ptr, + u16 name_len) +{ + struct xattr_handler *handler = NULL; + int i = 0; + + for (handler = btrfs_xattr_handlers[i]; handler != NULL; i++, + handler = btrfs_xattr_handlers[i]) { + u16 prefix_len = strlen(handler->prefix); + + if (name_len < prefix_len) + continue; + + if (memcmp_extent_buffer(l, handler->prefix, name_ptr, + prefix_len) == 0) + break; + } + + return handler; +} + +/* + * @param name_index - the index for the xattr handler + * @return the xattr_handler if we found it, NULL otherwise + * + * use this if we know the type of the xattr already + */ +static struct xattr_handler *btrfs_xattr_handler(int name_index) +{ + struct xattr_handler *handler = NULL; + + if (name_index >= 0 && + name_index < ARRAY_SIZE(btrfs_xattr_handler_map)) + handler = btrfs_xattr_handler_map[name_index]; + + return handler; +} + +static inline char *get_name(const char *name, int name_index) +{ + char *ret = NULL; + struct xattr_handler *handler = btrfs_xattr_handler(name_index); + int prefix_len; + + if (!handler) + return ret; + + prefix_len = strlen(handler->prefix); + + ret = kmalloc(strlen(name) + prefix_len + 1, GFP_KERNEL); + if (!ret) + return ret; + + memcpy(ret, handler->prefix, prefix_len); + memcpy(ret+prefix_len, name, strlen(name)); + ret[prefix_len + strlen(name)] = '\0'; + + return ret; +} + +size_t btrfs_xattr_generic_list(struct inode *inode, char *list, + size_t list_size, const char *name, + size_t name_len) +{ + if (list && (name_len+1) <= list_size) { + memcpy(list, name, name_len); + list[name_len] = '\0'; + } else + return -ERANGE; + + return name_len+1; +} + +ssize_t btrfs_xattr_get(struct inode *inode, int name_index, + const char *attr_name, void *buffer, size_t size) +{ + struct btrfs_dir_item *di; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct xattr_handler *handler = btrfs_xattr_handler(name_index); + int ret = 0; + unsigned long data_ptr; + char *name; + + if (!handler) + return -EOPNOTSUPP; + + /* just in case... */ + if (*attr_name == '\0') + return -EINVAL; + + name = get_name(attr_name, name_index); + if (!name) + return -ENOMEM; + + path = btrfs_alloc_path(); + if (!path) { + kfree(name); + return -ENOMEM; + } + + mutex_lock(&root->fs_info->fs_mutex); + /* lookup the xattr by name */ + di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name, + strlen(name), 0); + if (!di || IS_ERR(di)) { + ret = -ENODATA; + goto out; + } + + leaf = path->nodes[0]; + /* if size is 0, that means we want the size of the attr */ + if (!size) { + ret = btrfs_dir_data_len(leaf, di); + goto out; + } + + /* now get the data out of our dir_item */ + if (btrfs_dir_data_len(leaf, di) > size) { + ret = -ERANGE; + goto out; + } + data_ptr = (unsigned long)((char *)(di + 1) + + btrfs_dir_name_len(leaf, di)); + read_extent_buffer(leaf, buffer, data_ptr, + btrfs_dir_name_len(leaf, di)); + ret = btrfs_dir_data_len(leaf, di); + +out: + mutex_unlock(&root->fs_info->fs_mutex); + kfree(name); + btrfs_free_path(path); + return ret; +} + +int btrfs_xattr_set(struct inode *inode, int name_index, + const char *attr_name, const void *value, size_t size, + int flags) +{ + struct btrfs_dir_item *di; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + struct btrfs_path *path; + struct xattr_handler *handler = btrfs_xattr_handler(name_index); + char *name; + int ret = 0, mod = 0; + + if (!handler) + return -EOPNOTSUPP; + + /* just in case... */ + if (*attr_name == '\0') + return -EINVAL; + + name = get_name(attr_name, name_index); + if (!name) + return -ENOMEM; + + path = btrfs_alloc_path(); + if (!path) { + kfree(name); + return -ENOMEM; + } + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + /* first lets see if we already have this xattr */ + di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name, + strlen(name), -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out; + } + + /* ok we already have this xattr, lets remove it */ + if (di) { + /* if we want create only exit */ + if (flags & XATTR_CREATE) { + ret = -EEXIST; + goto out; + } + + ret = btrfs_delete_one_dir_name(trans, root, path, di); + if (ret) + goto out; + btrfs_release_path(root, path); + + /* if we don't have a value then we are removing the xattr */ + if (!value) { + mod = 1; + goto out; + } + } else if (flags & XATTR_REPLACE) { + /* we couldn't find the attr to replace, so error out */ + ret = -ENODATA; + goto out; + } + + /* ok we have to create a completely new xattr */ + ret = btrfs_insert_xattr_item(trans, root, name, strlen(name), + value, size, inode->i_ino); + if (ret) + goto out; + mod = 1; + +out: + if (mod) { + inode->i_ctime = CURRENT_TIME; + ret = btrfs_update_inode(trans, root, inode); + } + + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + kfree(name); + btrfs_free_path(path); + + return ret; +} + +ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + struct btrfs_key key, found_key; + struct inode *inode = dentry->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_path *path; + struct btrfs_item *item; + struct extent_buffer *leaf; + struct btrfs_dir_item *di; + struct xattr_handler *handler; + int ret = 0, slot, advance; + size_t total_size = 0, size_left = size, written; + unsigned long name_ptr; + char *name; + u32 nritems; + + /* + * ok we want all objects associated with this id. + * NOTE: we set key.offset = 0; because we want to start with the + * first xattr that we find and walk forward + */ + key.objectid = inode->i_ino; + btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); + key.offset = 0; + + path = btrfs_alloc_path(); + path->reada = 2; + if (!path) + return -ENOMEM; + + mutex_lock(&root->fs_info->fs_mutex); + + /* search for our xattrs */ + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto err; + ret = 0; + advance = 0; + while (1) { + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + slot = path->slots[0]; + + /* this is where we start walking through the path */ + if (advance || slot >= nritems) { + /* + * if we've reached the last slot in this leaf we need + * to go to the next leaf and reset everything + */ + if (slot >= nritems-1) { + ret = btrfs_next_leaf(root, path); + if (ret) + break; + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + slot = path->slots[0]; + } else { + /* + * just walking through the slots on this leaf + */ + slot++; + path->slots[0]++; + } + } + advance = 1; + + item = btrfs_item_nr(leaf, slot); + btrfs_item_key_to_cpu(leaf, &found_key, slot); + + /* check to make sure this item is what we want */ + if (found_key.objectid != key.objectid) + break; + if (btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY) + break; + + di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); + + total_size += btrfs_dir_name_len(leaf, di)+1; + + /* we are just looking for how big our buffer needs to be */ + if (!size) + continue; + + /* find our handler for this xattr */ + name_ptr = (unsigned long)(di + 1); + handler = find_btrfs_xattr_handler(leaf, name_ptr, + btrfs_dir_name_len(leaf, di)); + if (!handler) { + printk(KERN_ERR "btrfs: unsupported xattr found\n"); + continue; + } + + name = kmalloc(btrfs_dir_name_len(leaf, di), GFP_KERNEL); + read_extent_buffer(leaf, name, name_ptr, + btrfs_dir_name_len(leaf, di)); + + /* call the list function associated with this xattr */ + written = handler->list(inode, buffer, size_left, name, + btrfs_dir_name_len(leaf, di)); + kfree(name); + + if (written < 0) { + ret = -ERANGE; + break; + } + + size_left -= written; + buffer += written; + } + ret = total_size; + +err: + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_free_path(path); + + return ret; +} + +/* + * delete all the xattrs associated with the inode. fs_mutex should be + * held when we come into here + */ +int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode) +{ + struct btrfs_path *path; + struct btrfs_key key, found_key; + struct btrfs_item *item; + struct extent_buffer *leaf; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = inode->i_ino; + btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); + key.offset = (u64)-1; + + while(1) { + /* look for our next xattr */ + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + goto out; + BUG_ON(ret == 0); + + if (path->slots[0] == 0) + break; + + path->slots[0]--; + leaf = path->nodes[0]; + item = btrfs_item_nr(leaf, path->slots[0]); + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.objectid != key.objectid) + break; + if (btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY) + break; + + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + btrfs_release_path(root, path); + } + ret = 0; +out: + btrfs_free_path(path); + + return ret; +} + +/* + * Handler functions + */ +#define BTRFS_XATTR_SETGET_FUNCS(name, index) \ +static int btrfs_xattr_##name##_get(struct inode *inode, \ + const char *name, void *value, \ + size_t size) \ +{ \ + return btrfs_xattr_get(inode, index, name, value, size); \ +} \ +static int btrfs_xattr_##name##_set(struct inode *inode, \ + const char *name, const void *value,\ + size_t size, int flags) \ +{ \ + return btrfs_xattr_set(inode, index, name, value, size, flags); \ +} \ + +BTRFS_XATTR_SETGET_FUNCS(security, BTRFS_XATTR_INDEX_SECURITY); +BTRFS_XATTR_SETGET_FUNCS(system, BTRFS_XATTR_INDEX_SYSTEM); +BTRFS_XATTR_SETGET_FUNCS(user, BTRFS_XATTR_INDEX_USER); +BTRFS_XATTR_SETGET_FUNCS(trusted, BTRFS_XATTR_INDEX_TRUSTED); + +struct xattr_handler btrfs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .list = btrfs_xattr_generic_list, + .get = btrfs_xattr_security_get, + .set = btrfs_xattr_security_set, +}; + +struct xattr_handler btrfs_xattr_system_handler = { + .prefix = XATTR_SYSTEM_PREFIX, + .list = btrfs_xattr_generic_list, + .get = btrfs_xattr_system_get, + .set = btrfs_xattr_system_set, +}; + +struct xattr_handler btrfs_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .list = btrfs_xattr_generic_list, + .get = btrfs_xattr_user_get, + .set = btrfs_xattr_user_set, +}; + +struct xattr_handler btrfs_xattr_trusted_handler = { + .prefix = XATTR_USER_PREFIX, + .list = btrfs_xattr_generic_list, + .get = btrfs_xattr_trusted_get, + .set = btrfs_xattr_trusted_set, +}; diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h new file mode 100644 index 000000000000..b2e47e3f2442 --- /dev/null +++ b/fs/btrfs/xattr.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2007 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __XATTR__ +#define __XATTR__ + +#include +#include "ctree.h" + +/* Name indexes */ +enum { + BTRFS_XATTR_INDEX_USER, + BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS, + BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT, + BTRFS_XATTR_INDEX_TRUSTED, + BTRFS_XATTR_INDEX_SECURITY, + BTRFS_XATTR_INDEX_SYSTEM, + BTRFS_XATTR_INDEX_END, +}; + +extern struct xattr_handler btrfs_xattr_user_handler; +extern struct xattr_handler btrfs_xattr_trusted_handler; +extern struct xattr_handler btrfs_xattr_acl_access_handler; +extern struct xattr_handler btrfs_xattr_acl_default_handler; +extern struct xattr_handler btrfs_xattr_security_handler; +extern struct xattr_handler btrfs_xattr_system_handler; + +extern struct xattr_handler *btrfs_xattr_handlers[]; + +ssize_t btrfs_xattr_get(struct inode *inode, int name_index, const char *name, + void *buffer, size_t size); +int btrfs_xattr_set(struct inode *inode, int name_index, const char *name, + const void *value, size_t size, int flags); + +/* + * the only reason this is public is for acl.c. There may be a point where + * acl.c doesn't need it, and if thats the case we need to remove it and make + * it static in xattr.c + */ +size_t btrfs_xattr_generic_list(struct inode *inode, char *list, + size_t list_size, const char *name, + size_t name_len); +#endif /* __XATTR__ */ From 324ae4df00fdc1a6a179bf584d8addf027bb75fb Mon Sep 17 00:00:00 2001 From: Yan Date: Fri, 16 Nov 2007 14:57:08 -0500 Subject: [PATCH 0306/2985] Btrfs: Add block group pinned accounting back This patch adds a helper function 'update_pinned_extents' to extent-tree.c. The usage of the helper function is similar to 'update_block_group', the last parameter of the function indicates pin vs unpin. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 5 ++-- fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 54 +++++++++++++++++++++++++++++++----------- 3 files changed, 44 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 27cadae1af63..56b977ffe918 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -292,8 +292,8 @@ struct btrfs_block_group_cache { struct btrfs_block_group_item item; int data; int cached; + u64 pinned; }; - struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; struct btrfs_root *extent_root; @@ -324,8 +324,9 @@ struct btrfs_fs_info { struct completion kobj_unregister; int do_barriers; int closing; -}; + u64 total_pinned; +}; /* * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6c8533fba7c9..3e16cca72b49 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -569,6 +569,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; + fs_info->total_pinned = 0; INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); BTRFS_I(fs_info->btree_inode)->root = tree_root; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7405bd5301af..4ef6dc3d7d32 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -277,7 +277,8 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (shint && (shint->data == data || shint->data == BTRFS_BLOCK_GROUP_MIXED)) { used = btrfs_block_group_used(&shint->item); - if (used < div_factor(shint->key.offset, factor)) { + if (used + shint->pinned < + div_factor(shint->key.offset, factor)) { return shint; } } @@ -285,7 +286,8 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (hint && (hint->data == data || hint->data == BTRFS_BLOCK_GROUP_MIXED)) { used = btrfs_block_group_used(&hint->item); - if (used < div_factor(hint->key.offset, factor)) { + if (used + hint->pinned < + div_factor(hint->key.offset, factor)) { return hint; } last = hint->key.objectid + hint->key.offset; @@ -317,8 +319,7 @@ again: free_check = cache->key.offset; else free_check = div_factor(cache->key.offset, factor); - - if (used < free_check) { + if (used + cache->pinned < free_check) { found_group = cache; goto found; } @@ -664,6 +665,37 @@ static int update_block_group(struct btrfs_trans_handle *trans, } return 0; } +static int update_pinned_extents(struct btrfs_root *root, + u64 bytenr, u64 num, int pin) +{ + u64 len; + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *fs_info = root->fs_info; + + if (pin) { + set_extent_dirty(&fs_info->pinned_extents, + bytenr, bytenr + num - 1, GFP_NOFS); + } else { + clear_extent_dirty(&fs_info->pinned_extents, + bytenr, bytenr + num - 1, GFP_NOFS); + } + while (num > 0) { + cache = btrfs_lookup_block_group(fs_info, bytenr); + WARN_ON(!cache); + len = min(num, cache->key.offset - + (bytenr - cache->key.objectid)); + if (pin) { + cache->pinned += len; + fs_info->total_pinned += len; + } else { + cache->pinned -= len; + fs_info->total_pinned -= len; + } + bytenr += len; + num -= len; + } + return 0; +} int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy) { @@ -691,9 +723,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, u64 start; u64 end; int ret; - struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents; struct extent_map_tree *free_space_cache; - free_space_cache = &root->fs_info->free_space_cache; while(1) { @@ -701,9 +731,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, EXTENT_DIRTY); if (ret) break; - - clear_extent_dirty(pinned_extents, start, end, - GFP_NOFS); + update_pinned_extents(root, start, end + 1 - start, 0); clear_extent_dirty(unpin, start, end, GFP_NOFS); set_extent_dirty(free_space_cache, start, end, GFP_NOFS); } @@ -761,8 +789,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, } free_extent_buffer(buf); } - set_extent_dirty(&root->fs_info->pinned_extents, - bytenr, bytenr + num_bytes - 1, GFP_NOFS); + update_pinned_extents(root, bytenr, num_bytes, 1); } else { set_extent_bits(&root->fs_info->pending_del, bytenr, bytenr + num_bytes - 1, @@ -866,8 +893,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct EXTENT_LOCKED); if (ret) break; - - set_extent_dirty(pinned_extents, start, end, GFP_NOFS); + update_pinned_extents(extent_root, start, end + 1 - start, 1); clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, GFP_NOFS); ret = __free_extent(trans, extent_root, @@ -1579,7 +1605,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) sizeof(cache->item)); memcpy(&cache->key, &found_key, sizeof(found_key)); cache->cached = 0; - + cache->pinned = 0; key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); From 5e5745dcaf73c5860a16c09572d59383ecee6d28 Mon Sep 17 00:00:00 2001 From: Yan Date: Fri, 16 Nov 2007 14:57:09 -0500 Subject: [PATCH 0307/2985] Btrfs: Add full_scan parameter to find_search_start This patch adds a new parameter 'full_scan' to 'find_search_start', thereby 'find_search_start' can know whether 'find_free_extent' is in full scan phrase. I feel that 'find_search_start' should skip calling 'btrfs_find_block_group' when 'find_free_extent' is in full scan phrase. In my test on a 2GB volume, Oops occurs when space usage is about 76%. After apply the patch, Oops occurs when space usage is near 100%. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4ef6dc3d7d32..e5340677d6c4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -158,10 +158,10 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct return NULL; } - static u64 find_search_start(struct btrfs_root *root, struct btrfs_block_group_cache **cache_ret, - u64 search_start, int num, int data) + u64 search_start, int num, + int data, int full_scan) { int ret; struct btrfs_block_group_cache *cache = *cache_ret; @@ -218,10 +218,10 @@ wrapped: if (cache_miss && !cache->cached) { cache_block_group(root, cache); last = cache_miss; - cache = btrfs_lookup_block_group(root->fs_info, last); } - cache = btrfs_find_block_group(root, cache, last, data, 0); + if (!full_scan) + cache = btrfs_find_block_group(root, cache, last, data, 0); *cache_ret = cache; cache_miss = 0; goto again; @@ -979,12 +979,10 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root total_needed += empty_size; path = btrfs_alloc_path(); - check_failed: - search_start = find_search_start(root, &block_group, - search_start, total_needed, data); + search_start = find_search_start(root, &block_group, search_start, + total_needed, data, full_scan); cached_start = search_start; - btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; From 5cf664263b05beb080bf95037e4c3bd50c80b096 Mon Sep 17 00:00:00 2001 From: Yan Date: Fri, 16 Nov 2007 14:57:09 -0500 Subject: [PATCH 0308/2985] Btrfs: Off by one fixes for extent-tree.c Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e5340677d6c4..6a7a9ba75fd9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -150,12 +150,9 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct return NULL; block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; - - - if (block_group->key.objectid <= bytenr && bytenr <= + if (block_group->key.objectid <= bytenr && bytenr < block_group->key.objectid + block_group->key.offset) return block_group; - return NULL; } static u64 find_search_start(struct btrfs_root *root, @@ -195,7 +192,7 @@ again: continue; } if (data != BTRFS_BLOCK_GROUP_MIXED && - start + num >= cache->key.objectid + cache->key.offset) + start + num > cache->key.objectid + cache->key.offset) goto new_group; return start; } @@ -1088,9 +1085,8 @@ check_pending: if (ins->objectid + num_bytes >= search_end) goto enospc; - if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED && - ins->objectid + num_bytes >= block_group-> + ins->objectid + num_bytes > block_group-> key.objectid + block_group->key.offset) { search_start = block_group->key.objectid + block_group->key.offset; From 1caf9342fc71d020e788a59598a44d4ea0136b36 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 19 Nov 2007 10:18:17 -0500 Subject: [PATCH 0309/2985] Btrfs: Make ACLs return EOPNOTSUPP for now There was a slight problem with ACL's returning EINVAL when you tried to set an ACL. This isn't correct, we should be returning EOPNOTSUPP, so I did a very ugly thing and just commented everybody out and made them return EOPNOTSUPP. This is only temporary, I'm going back to implement ACL's, but Chris wants to push out a release so this will suffice for now. Also Yan suggested setting reada to -1 in the delete case to enable backwards readahead, and in the listxattr case I moved path->reada = 2; to after the if (!path) check so we can avoid a possible null dereference. Thank you, Signed-off-by: Chris Mason --- fs/btrfs/acl.c | 18 ++++++++++++++++++ fs/btrfs/xattr.c | 4 ++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index aee9f0657c35..889ba89d3f9f 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -23,32 +23,50 @@ #include "ctree.h" #include "xattr.h" +/* + * FIXME: At this point this is all place holder stuff, we just return + * -EOPNOTSUPP so cp won't complain when it tries to copy over a file with an + * acl on it. + */ + static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name, void *value, size_t size) { + /* return btrfs_xattr_get(inode, BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS, name, value, size); + */ + return -EOPNOTSUPP; } static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) { + /* return btrfs_xattr_set(inode, BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS, name, value, size, flags); + */ + return -EOPNOTSUPP; } static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name, void *value, size_t size) { + /* return btrfs_xattr_get(inode, BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT, name, value, size); + */ + return -EOPNOTSUPP; } static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) { + /* return btrfs_xattr_set(inode, BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT, name, value, size, flags); + */ + return -EOPNOTSUPP; } struct xattr_handler btrfs_xattr_acl_default_handler = { diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index f4ac5e0bbad1..984616cca254 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -302,9 +302,9 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) key.offset = 0; path = btrfs_alloc_path(); - path->reada = 2; if (!path) return -ENOMEM; + path->reada = 2; mutex_lock(&root->fs_info->fs_mutex); @@ -410,7 +410,7 @@ int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - + path->reada = -1; key.objectid = inode->i_ino; btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); key.offset = (u64)-1; From 3acd7ee876c508973eb4722de5b00b3dff4c7372 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 19 Nov 2007 10:18:19 -0500 Subject: [PATCH 0310/2985] Btrfs: xattr fixes This fixes two typos that Yan pointed out to me. Thanks much, Signed-off-by: Chris Mason --- fs/btrfs/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 984616cca254..33caaf5b4b0c 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -180,7 +180,7 @@ ssize_t btrfs_xattr_get(struct inode *inode, int name_index, data_ptr = (unsigned long)((char *)(di + 1) + btrfs_dir_name_len(leaf, di)); read_extent_buffer(leaf, buffer, data_ptr, - btrfs_dir_name_len(leaf, di)); + btrfs_dir_data_len(leaf, di)); ret = btrfs_dir_data_len(leaf, di); out: @@ -490,7 +490,7 @@ struct xattr_handler btrfs_xattr_user_handler = { }; struct xattr_handler btrfs_xattr_trusted_handler = { - .prefix = XATTR_USER_PREFIX, + .prefix = XATTR_TRUSTED_PREFIX, .list = btrfs_xattr_generic_list, .get = btrfs_xattr_trusted_get, .set = btrfs_xattr_trusted_set, From 2f4cbe6442d3be7b5e4cf1607a5ab33995fe2d25 Mon Sep 17 00:00:00 2001 From: Wyatt Banks Date: Mon, 19 Nov 2007 10:22:33 -0500 Subject: [PATCH 0311/2985] Btrfs: Return value checking in module init Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 16 ++++++++++++++-- fs/btrfs/extent_map.h | 2 +- fs/btrfs/super.c | 21 ++++++++++++++++++--- 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index b3ff63c4bf2a..f91f28efdb59 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -42,18 +42,30 @@ struct extent_page_data { struct extent_map_tree *tree; get_extent_t *get_extent; }; - -void __init extent_map_init(void) +int __init extent_map_init(void) { extent_map_cache = btrfs_cache_create("extent_map", sizeof(struct extent_map), 0, NULL); + if (!extent_map_cache) + return -ENOMEM; extent_state_cache = btrfs_cache_create("extent_state", sizeof(struct extent_state), 0, NULL); + if (!extent_state_cache) + goto free_map_cache; extent_buffer_cache = btrfs_cache_create("extent_buffers", sizeof(struct extent_buffer), 0, NULL); + if (!extent_buffer_cache) + goto free_state_cache; + return 0; + +free_state_cache: + kmem_cache_destroy(extent_state_cache); +free_map_cache: + kmem_cache_destroy(extent_map_cache); + return -ENOMEM; } void __exit extent_map_exit(void) diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index fbd23a72f53b..24ddc8c93461 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -110,7 +110,7 @@ struct extent_map *alloc_extent_map(gfp_t mask); void free_extent_map(struct extent_map *em); int extent_read_full_page(struct extent_map_tree *tree, struct page *page, get_extent_t *get_extent); -void __init extent_map_init(void); +int __init extent_map_init(void); void __exit extent_map_exit(void); int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c46bc3911798..ffa02872c4d6 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -347,9 +347,24 @@ static int __init init_btrfs_fs(void) btrfs_init_transaction_sys(); err = btrfs_init_cachep(); if (err) - return err; - extent_map_init(); - return register_filesystem(&btrfs_fs_type); + goto free_transaction_sys; + err = extent_map_init(); + if (err) + goto free_cachep; + + err = register_filesystem(&btrfs_fs_type); + if (err) + goto free_extent_map; + return 0; + +free_extent_map: + extent_map_exit(); +free_cachep: + btrfs_destroy_cachep(); +free_transaction_sys: + btrfs_exit_transaction_sys(); + btrfs_exit_sysfs(); + return err; } static void __exit exit_btrfs_fs(void) From 3e9fd94ff0028a044d55690eb0a801fd1472e3c6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 20 Nov 2007 10:47:25 -0500 Subject: [PATCH 0312/2985] Btrfs: Avoid fragmentation from parallel delalloc filling Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 83 +++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 34 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f91f28efdb59..7fd4eb7a8f03 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1033,11 +1033,11 @@ out: EXPORT_SYMBOL(find_first_extent_bit); u64 find_lock_delalloc_range(struct extent_map_tree *tree, - u64 start, u64 lock_start, u64 *end, u64 max_bytes) + u64 *start, u64 *end, u64 max_bytes) { struct rb_node *node; struct extent_state *state; - u64 cur_start = start; + u64 cur_start = *start; u64 found = 0; u64 total_bytes = 0; @@ -1054,27 +1054,43 @@ search_again: while(1) { state = rb_entry(node, struct extent_state, rb_node); - if (state->start != cur_start) { + if (found && state->start != cur_start) { goto out; } if (!(state->state & EXTENT_DELALLOC)) { goto out; } - if (state->start >= lock_start) { - if (state->state & EXTENT_LOCKED) { - DEFINE_WAIT(wait); - atomic_inc(&state->refs); - prepare_to_wait(&state->wq, &wait, - TASK_UNINTERRUPTIBLE); - write_unlock_irq(&tree->lock); - schedule(); - write_lock_irq(&tree->lock); - finish_wait(&state->wq, &wait); - free_extent_state(state); - goto search_again; + if (!found) { + struct extent_state *prev_state; + struct rb_node *prev_node = node; + while(1) { + prev_node = rb_prev(prev_node); + if (!prev_node) + break; + prev_state = rb_entry(prev_node, + struct extent_state, + rb_node); + if (!(prev_state->state & EXTENT_DELALLOC)) + break; + state = prev_state; + node = prev_node; } - state->state |= EXTENT_LOCKED; } + if (state->state & EXTENT_LOCKED) { + DEFINE_WAIT(wait); + atomic_inc(&state->refs); + prepare_to_wait(&state->wq, &wait, + TASK_UNINTERRUPTIBLE); + write_unlock_irq(&tree->lock); + schedule(); + write_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + free_extent_state(state); + goto search_again; + } + state->state |= EXTENT_LOCKED; + if (!found) + *start = state->start; found++; *end = state->end; cur_start = state->end + 1; @@ -1695,6 +1711,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, struct extent_page_data *epd = data; struct extent_map_tree *tree = epd->tree; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 delalloc_start; u64 page_end = start + PAGE_CACHE_SIZE - 1; u64 end; u64 cur = start; @@ -1729,25 +1746,23 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, set_page_extent_mapped(page); - lock_extent(tree, start, page_end, GFP_NOFS); - nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1, - &delalloc_end, - 128 * 1024 * 1024); - if (nr_delalloc) { - tree->ops->fill_delalloc(inode, start, delalloc_end); - if (delalloc_end >= page_end + 1) { - clear_extent_bit(tree, page_end + 1, delalloc_end, - EXTENT_LOCKED | EXTENT_DELALLOC, - 1, 0, GFP_NOFS); - } - clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC, - 0, 0, GFP_NOFS); - if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { - printk("found delalloc bits after clear extent_bit\n"); - } - } else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { - printk("found delalloc bits after find_delalloc_range returns 0\n"); + delalloc_start = start; + delalloc_end = 0; + while(delalloc_end < page_end) { + nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, + &delalloc_end, + 128 * 1024 * 1024); + if (nr_delalloc <= 0) + break; + tree->ops->fill_delalloc(inode, delalloc_start, + delalloc_end); + clear_extent_bit(tree, delalloc_start, + delalloc_end, + EXTENT_LOCKED | EXTENT_DELALLOC, + 1, 0, GFP_NOFS); + delalloc_start = delalloc_end + 1; } + lock_extent(tree, start, page_end, GFP_NOFS); end = page_end; if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { From 7073c8e852946274e4d50fdf072438612f9dc845 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 20 Nov 2007 13:44:45 -0500 Subject: [PATCH 0313/2985] Btrfs: Make sure page mapping dirty tag is properly cleared Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 7fd4eb7a8f03..a4e9096754fc 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1825,12 +1825,18 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (ret) SetPageError(page); else { - unsigned long nr = end_index + 1; + unsigned long max_nr = end_index + 1; set_range_writeback(tree, cur, cur + iosize - 1); + if (!PageWriteback(page)) { + printk("warning page %lu not writeback, " + "cur %llu end %llu\n", page->index, + (unsigned long long)cur, + (unsigned long long)end); + } ret = submit_extent_page(WRITE, tree, page, sector, iosize, page_offset, bdev, - &epd->bio, nr, + &epd->bio, max_nr, end_bio_extent_writepage); if (ret) SetPageError(page); @@ -1840,6 +1846,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, nr++; } done: + if (nr == 0) { + /* make sure the mapping tag for page dirty gets cleared */ + set_page_writeback(page); + end_page_writeback(page); + } unlock_extent(tree, start, page_end, GFP_NOFS); unlock_page(page); return 0; @@ -2408,6 +2419,13 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree, } } clear_page_dirty_for_io(page); + write_lock_irq(&page->mapping->tree_lock); + if (!PageDirty(page)) { + radix_tree_tag_clear(&page->mapping->page_tree, + page_index(page), + PAGECACHE_TAG_DIRTY); + } + write_unlock_irq(&page->mapping->tree_lock); unlock_page(page); } return 0; From 0e4de58432f552c0ecead965b0fad8a9fb1d31ec Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Nov 2007 10:55:49 -0500 Subject: [PATCH 0314/2985] Btrfs: Add check for null block group to find_search_start Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6a7a9ba75fd9..39be6baccc36 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -204,6 +204,7 @@ new_group: wrapped: cache = btrfs_lookup_block_group(root->fs_info, last); if (!cache) { +no_cache: if (!wrapped) { wrapped = 1; last = search_start; @@ -219,6 +220,8 @@ wrapped: } if (!full_scan) cache = btrfs_find_block_group(root, cache, last, data, 0); + if (!cache) + goto no_cache; *cache_ret = cache; cache_miss = 0; goto again; From eef1c494a2d4212e13c67d05e9cc3cd1e6dfed5d Mon Sep 17 00:00:00 2001 From: Yan Date: Mon, 26 Nov 2007 10:58:13 -0500 Subject: [PATCH 0315/2985] Btrfs: Properly update right_nritems in push_leaf_left The codes that fixup the right leaf and the codes that dirty the extnet buffer use the variable 'right_nritems' , both of them expect 'right_nritems' is the number of items in right leaf after the push. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1b47fe71e0b4..3b16051b121b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1717,11 +1717,10 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_item_nr_offset(push_items), (btrfs_header_nritems(right) - push_items) * sizeof(struct btrfs_item)); - } - btrfs_set_header_nritems(right, right_nritems - push_items); + right_nritems -= push_items; + btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); - for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(right, i); From 015a739c7c238768fbfa4eea8ea2ebc1a35e7bb1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Nov 2007 16:15:16 -0800 Subject: [PATCH 0316/2985] Btrfs: Handle writeback under high memory pressure better Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/extent_map.c | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 39be6baccc36..3c00f967eccb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -961,7 +961,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root level = btrfs_header_level(root->node); - if (num_bytes >= 96 * 1024 * 1024 && hint_byte) { + if (num_bytes >= 32 * 1024 * 1024 && hint_byte) { data = BTRFS_BLOCK_GROUP_MIXED; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index a4e9096754fc..55f272c335c6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1861,13 +1861,25 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, struct writeback_control *wbc) { int ret; + struct address_space *mapping = page->mapping; struct extent_page_data epd = { .bio = NULL, .tree = tree, .get_extent = get_extent, }; + struct writeback_control wbc_writepages = { + .bdi = wbc->bdi, + .sync_mode = WB_SYNC_NONE, + .older_than_this = NULL, + .nr_to_write = 64, + .range_start = page_offset(page) + PAGE_CACHE_SIZE, + .range_end = (loff_t)-1, + }; + ret = __extent_writepage(page, wbc, &epd); + + write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); if (epd.bio) submit_one_bio(WRITE, epd.bio); return ret; From 793955bca66c99defdffc857ae6eb7e8431d6bbe Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Nov 2007 16:34:41 -0800 Subject: [PATCH 0317/2985] Btrfs: Limit btree writeback to prevent seeks Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 9 +++++++++ fs/btrfs/extent_map.c | 39 +++++++++++++++++++++++++++++++++++++++ fs/btrfs/extent_map.h | 3 +++ 3 files changed, 51 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3e16cca72b49..28e9ef8a3253 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -210,6 +210,15 @@ static int btree_writepages(struct address_space *mapping, { struct extent_map_tree *tree; tree = &BTRFS_I(mapping->host)->extent_tree; + if (wbc->sync_mode == WB_SYNC_NONE) { + u64 num_dirty; + u64 start = 0; + unsigned long thresh = 96 * 1024 * 1024; + num_dirty = count_range_bits(tree, &start, thresh, EXTENT_DIRTY); + if (num_dirty < thresh) { + return 0; + } + } return extent_writepages(tree, mapping, btree_get_extent, wbc); } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 55f272c335c6..b6a4974ecc23 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1106,6 +1106,45 @@ out: return found; } +u64 count_range_bits(struct extent_map_tree *tree, + u64 *start, u64 max_bytes, unsigned long bits) +{ + struct rb_node *node; + struct extent_state *state; + u64 cur_start = *start; + u64 total_bytes = 0; + int found = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, cur_start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if ((state->state & bits)) { + total_bytes += state->end - state->start + 1; + if (total_bytes >= max_bytes) + break; + if (!found) { + *start = state->start; + found = 1; + } + } + node = rb_next(node); + if (!node) + break; + } +out: + write_unlock_irq(&tree->lock); + return total_bytes; +} + /* * helper function to lock both pages and extents in the tree. * pages must be locked first. diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 24ddc8c93461..13c562f7cb6f 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -113,6 +113,9 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, int __init extent_map_init(void); void __exit extent_map_exit(void); +u64 count_range_bits(struct extent_map_tree *tree, + u64 *start, u64 max_bytes, unsigned long bits); + int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, int filled); int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, From 8790bad7fb5ac4929dcd52c5dd8d6b81f8c05be8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Nov 2007 19:47:05 -0500 Subject: [PATCH 0318/2985] Btrfs: Only limit btree writeback for pdflush Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 28e9ef8a3253..8a74f9027d65 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -210,7 +210,7 @@ static int btree_writepages(struct address_space *mapping, { struct extent_map_tree *tree; tree = &BTRFS_I(mapping->host)->extent_tree; - if (wbc->sync_mode == WB_SYNC_NONE) { + if (wbc->sync_mode == WB_SYNC_NONE && current_is_pdflush()) { u64 num_dirty; u64 start = 0; unsigned long thresh = 96 * 1024 * 1024; From 448d640b668dae3928591e83f2bf4ca9f4c06e52 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 Nov 2007 07:52:01 -0800 Subject: [PATCH 0319/2985] Btrfs: Fine tune the btree writeback exclusion some more Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++++ fs/btrfs/file.c | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8a74f9027d65..de0552532d23 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -214,6 +214,10 @@ static int btree_writepages(struct address_space *mapping, u64 num_dirty; u64 start = 0; unsigned long thresh = 96 * 1024 * 1024; + + if (wbc->for_kupdate) + return 0; + num_dirty = count_range_bits(tree, &start, thresh, EXTENT_DIRTY); if (num_dirty < thresh) { return 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9dd2c5c23b56..ba624ae16e61 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -767,7 +767,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, num_written += write_bytes; balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); - btrfs_btree_balance_dirty(root, 1); + if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) + btrfs_btree_balance_dirty(root, 1); cond_resched(); } mutex_unlock(&inode->i_mutex); From ca6646264b7dab662d84435441164bb2a8e8885a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 Nov 2007 11:16:35 -0500 Subject: [PATCH 0320/2985] Btrfs: Add efficient dirty accounting to the extent_map tree Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 ++++++- fs/btrfs/extent_map.c | 32 +++++++++++++++++++++++++++++--- fs/btrfs/extent_map.h | 1 + 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index de0552532d23..eef4ab56b9ca 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -210,7 +210,7 @@ static int btree_writepages(struct address_space *mapping, { struct extent_map_tree *tree; tree = &BTRFS_I(mapping->host)->extent_tree; - if (wbc->sync_mode == WB_SYNC_NONE && current_is_pdflush()) { + if (wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; unsigned long thresh = 96 * 1024 * 1024; @@ -218,6 +218,11 @@ static int btree_writepages(struct address_space *mapping, if (wbc->for_kupdate) return 0; + if (current_is_pdflush()) { + thresh = 96 * 1024 * 1024; + } else { + thresh = 8 * 1024 * 1024; + } num_dirty = count_range_bits(tree, &start, thresh, EXTENT_DIRTY); if (num_dirty < thresh) { return 0; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index b6a4974ecc23..06e437723dc3 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -42,6 +42,7 @@ struct extent_page_data { struct extent_map_tree *tree; get_extent_t *get_extent; }; + int __init extent_map_init(void) { extent_map_cache = btrfs_cache_create("extent_map", @@ -94,6 +95,7 @@ void extent_map_tree_init(struct extent_map_tree *tree, tree->map.rb_node = NULL; tree->state.rb_node = NULL; tree->ops = NULL; + tree->dirty_bytes = 0; rwlock_init(&tree->lock); spin_lock_init(&tree->lru_lock); tree->mapping = mapping; @@ -414,6 +416,8 @@ static int insert_state(struct extent_map_tree *tree, printk("end < start %Lu %Lu\n", end, start); WARN_ON(1); } + if (bits & EXTENT_DIRTY) + tree->dirty_bytes += end - start + 1; state->state |= bits; state->start = start; state->end = end; @@ -476,6 +480,12 @@ static int clear_state_bit(struct extent_map_tree *tree, int delete) { int ret = state->state & bits; + + if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { + u64 range = state->end - state->start + 1; + WARN_ON(range > tree->dirty_bytes); + tree->dirty_bytes -= range; + } state->state &= ~bits; if (wake) wake_up(&state->wq); @@ -668,6 +678,17 @@ out: } EXPORT_SYMBOL(wait_extent_bit); +static void set_state_bits(struct extent_map_tree *tree, + struct extent_state *state, + int bits) +{ + if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { + u64 range = state->end - state->start + 1; + tree->dirty_bytes += range; + } + state->state |= bits; +} + /* * set some bits on a range in the tree. This may require allocations * or sleeping, so the gfp mask is used to indicate what is allowed. @@ -727,7 +748,7 @@ again: err = -EEXIST; goto out; } - state->state |= bits; + set_state_bits(tree, state, bits); start = state->end + 1; merge_state(tree, state); goto search_again; @@ -762,7 +783,7 @@ again: if (err) goto out; if (state->end <= end) { - state->state |= bits; + set_state_bits(tree, state, bits); start = state->end + 1; merge_state(tree, state); } else { @@ -808,7 +829,7 @@ again: err = split_state(tree, state, prealloc, end + 1); BUG_ON(err == -EEXIST); - prealloc->state |= bits; + set_state_bits(tree, prealloc, bits); merge_state(tree, prealloc); prealloc = NULL; goto out; @@ -1116,6 +1137,11 @@ u64 count_range_bits(struct extent_map_tree *tree, int found = 0; write_lock_irq(&tree->lock); + if (bits == EXTENT_DIRTY) { + *start = 0; + total_bytes = tree->dirty_bytes; + goto out; + } /* * this search will find all the extents that end after * our range starts. diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 13c562f7cb6f..2ffc0c76338c 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -40,6 +40,7 @@ struct extent_map_tree { struct rb_root map; struct rb_root state; struct address_space *mapping; + u64 dirty_bytes; rwlock_t lock; struct extent_map_ops *ops; spinlock_t lru_lock; From 00f5c795fca47d038fedd3f0c9311da3be710c9f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 30 Nov 2007 10:09:33 -0500 Subject: [PATCH 0321/2985] btrfs_drop_extents: make sure the item is getting smaller before truncate Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 8 ++++++++ fs/btrfs/file.c | 32 ++++++++++++++++---------------- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 56b977ffe918..b29b911dd825 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1037,7 +1037,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end); extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end, u64 inline_end, u64 *hint_block); + u64 start, u64 end, u64 inline_limit, u64 *hint_block); /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3c00f967eccb..55abdf997ca5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -45,6 +45,9 @@ static int cache_block_group(struct btrfs_root *root, u64 first_free; int found = 0; + if (!block_group) + return 0; + root = root->fs_info->extent_root; free_space_cache = &root->fs_info->free_space_cache; @@ -168,6 +171,11 @@ static u64 find_search_start(struct btrfs_root *root, u64 cache_miss = 0; int wrapped = 0; + if (!cache) { + cache = btrfs_lookup_block_group(root->fs_info, search_start); + if (!cache) + return search_start; + } again: ret = cache_block_group(root, cache); if (ret) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ba624ae16e61..b0d637787264 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -377,23 +377,23 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) */ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end, u64 inline_end, u64 *hint_byte) + u64 start, u64 end, u64 inline_limit, u64 *hint_byte) { - int ret; - struct btrfs_key key; - struct extent_buffer *leaf; - int slot; - struct btrfs_file_extent_item *extent; u64 extent_end = 0; - int keep; - struct btrfs_file_extent_item old; - struct btrfs_path *path; u64 search_start = start; + struct extent_buffer *leaf; + struct btrfs_file_extent_item *extent; + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_file_extent_item old; + int keep; + int slot; int bookend; int found_type; int found_extent; int found_inline; int recow; + int ret; btrfs_drop_extent_cache(inode, start, end - 1); @@ -502,7 +502,7 @@ next_slot: } bookend = 1; if (found_inline && start <= key.offset && - inline_end < extent_end) + inline_limit < extent_end) keep = 1; } /* truncate existing extent */ @@ -526,12 +526,12 @@ next_slot: btrfs_set_file_extent_num_bytes(leaf, extent, new_num); btrfs_mark_buffer_dirty(leaf); - } else if (end > extent_end && - key.offset < inline_end && - inline_end < extent_end) { + } else if (key.offset < inline_limit && + (end > extent_end) && + (inline_limit < extent_end)) { u32 new_size; new_size = btrfs_file_extent_calc_inline_size( - inline_end - key.offset); + inline_limit - key.offset); btrfs_truncate_item(trans, root, path, new_size, 1); } @@ -575,10 +575,10 @@ next_slot: continue; } if (bookend && found_inline && start <= key.offset && - inline_end < extent_end) { + inline_limit < extent_end && key.offset <= inline_limit) { u32 new_size; new_size = btrfs_file_extent_calc_inline_size( - extent_end - inline_end); + extent_end - inline_limit); btrfs_truncate_item(trans, root, path, new_size, 0); } /* create bookend, splitting the extent in two */ From 87ee04eb0f2f0c63314cef4a76bd1adac748425e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 30 Nov 2007 11:30:34 -0500 Subject: [PATCH 0322/2985] Btrfs: Add simple stripe size parameter Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 5 +++++ fs/btrfs/disk-io.c | 15 ++++++++++----- fs/btrfs/extent-tree.c | 36 ++++++++++++++++++++++++++++-------- 3 files changed, 43 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b29b911dd825..9bc1b0a8615f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -135,6 +135,7 @@ struct btrfs_super_block { __le32 sectorsize; __le32 nodesize; __le32 leafsize; + __le32 stripesize; u8 root_level; } __attribute__ ((__packed__)); @@ -353,6 +354,8 @@ struct btrfs_root { /* leaf allocations are done in leafsize units */ u32 leafsize; + u32 stripesize; + u32 type; u64 highest_inode; u64 last_inode_alloc; @@ -776,6 +779,8 @@ BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, nodesize, 32); BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block, leafsize, 32); +BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block, + stripesize, 32); BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, root_dir_objectid, 64); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eef4ab56b9ca..60a30da6af00 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -355,7 +355,7 @@ int wait_on_tree_block_writeback(struct btrfs_root *root, } static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, - struct btrfs_root *root, + u32 stripesize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { @@ -365,6 +365,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->sectorsize = sectorsize; root->nodesize = nodesize; root->leafsize = leafsize; + root->stripesize = stripesize; root->ref_cows = 0; root->fs_info = fs_info; root->objectid = objectid; @@ -393,7 +394,8 @@ static int find_and_setup_root(struct btrfs_root *tree_root, u32 blocksize; __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, root, fs_info, objectid); + tree_root->sectorsize, tree_root->stripesize, + root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); @@ -430,8 +432,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, } __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, root, fs_info, - location->objectid); + tree_root->sectorsize, tree_root->stripesize, + root, fs_info, location->objectid); path = btrfs_alloc_path(); BUG_ON(!path); @@ -537,6 +539,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) u32 nodesize; u32 leafsize; u32 blocksize; + u32 stripesize; struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), @@ -607,7 +610,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) goto fail_iput; } #endif - __setup_root(512, 512, 512, tree_root, + __setup_root(512, 512, 512, 512, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); fs_info->sb_buffer = read_tree_block(tree_root, @@ -630,9 +633,11 @@ struct btrfs_root *open_ctree(struct super_block *sb) nodesize = btrfs_super_nodesize(disk_super); leafsize = btrfs_super_leafsize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); + stripesize = btrfs_super_stripesize(disk_super); tree_root->nodesize = nodesize; tree_root->leafsize = leafsize; tree_root->sectorsize = sectorsize; + tree_root->stripesize = stripesize; sb_set_blocksize(sb, sectorsize); i_size_write(fs_info->btree_inode, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 55abdf997ca5..91397e989393 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -932,6 +932,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root return ret ? ret : pending_ret; } +static u64 stripe_align(struct btrfs_root *root, u64 val) +{ + u64 mask = ((u64)root->stripesize - 1); + u64 ret = (val + mask) & ~mask; + return ret; +} + /* * walks the btree of allocated extents and find a hole of a given size. * The key ins is changed to record the hole: @@ -948,8 +955,9 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root { struct btrfs_path *path; struct btrfs_key key; - int ret; u64 hole_size = 0; + u64 aligned; + int ret; int slot = 0; u64 last_byte = 0; u64 orig_search_start = search_start; @@ -990,6 +998,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root check_failed: search_start = find_search_start(root, &block_group, search_start, total_needed, data, full_scan); + search_start = stripe_align(root, search_start); cached_start = search_start; btrfs_init_path(path); ins->objectid = search_start; @@ -1039,13 +1048,23 @@ check_failed: search_start = max(search_start, block_group->key.objectid); if (!start_found) { - ins->objectid = search_start; - ins->offset = search_end - search_start; + aligned = stripe_align(root, search_start); + ins->objectid = aligned; + if (aligned >= search_end) { + ret = -ENOSPC; + goto error; + } + ins->offset = search_end - aligned; start_found = 1; goto check_pending; } - ins->objectid = last_byte > search_start ? - last_byte : search_start; + ins->objectid = stripe_align(root, + last_byte > search_start ? + last_byte : search_start); + if (search_end <= ins->objectid) { + ret = -ENOSPC; + goto error; + } ins->offset = search_end - ins->objectid; BUG_ON(ins->objectid >= search_end); goto check_pending; @@ -1056,9 +1075,10 @@ check_failed: start_found) { if (last_byte < search_start) last_byte = search_start; - hole_size = key.objectid - last_byte; - if (hole_size >= num_bytes) { - ins->objectid = last_byte; + aligned = stripe_align(root, last_byte); + hole_size = key.objectid - aligned; + if (key.objectid > aligned && hole_size >= num_bytes) { + ins->objectid = aligned; ins->offset = hole_size; goto check_pending; } From 1a2b2ac78a4151a49267a1e3f48a131f5810814e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 4 Dec 2007 13:18:24 -0500 Subject: [PATCH 0323/2985] Btrfs: Fix extent allocation for btree blocks as the disk fills Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 91397e989393..8ab4954f6ad0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -172,9 +172,7 @@ static u64 find_search_start(struct btrfs_root *root, int wrapped = 0; if (!cache) { - cache = btrfs_lookup_block_group(root->fs_info, search_start); - if (!cache) - return search_start; + goto out; } again: ret = cache_block_group(root, cache); @@ -205,6 +203,13 @@ again: return start; } out: + cache = btrfs_lookup_block_group(root->fs_info, search_start); + if (!cache) { + printk("Unable to find block group for %Lu\n", + search_start); + WARN_ON(1); + return search_start; + } return search_start; new_group: @@ -219,15 +224,14 @@ no_cache: data = BTRFS_BLOCK_GROUP_MIXED; goto wrapped; } - return search_start; + goto out; } if (cache_miss && !cache->cached) { cache_block_group(root, cache); last = cache_miss; cache = btrfs_lookup_block_group(root->fs_info, last); } - if (!full_scan) - cache = btrfs_find_block_group(root, cache, last, data, 0); + cache = btrfs_find_block_group(root, cache, last, data, 0); if (!cache) goto no_cache; *cache_ret = cache; @@ -985,12 +989,14 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root search_end = btrfs_super_total_bytes(&info->super_copy); if (hint_byte) { block_group = btrfs_lookup_block_group(info, hint_byte); + if (!block_group) + hint_byte = search_start; block_group = btrfs_find_block_group(root, block_group, hint_byte, data, 1); } else { block_group = btrfs_find_block_group(root, - trans->block_group, 0, - data, 1); + trans->block_group, + search_start, data, 1); } total_needed += empty_size; @@ -1159,14 +1165,14 @@ enospc: if (!full_scan) total_needed -= empty_size; full_scan = 1; + data = BTRFS_BLOCK_GROUP_MIXED; } else wrapped = 1; } block_group = btrfs_lookup_block_group(info, search_start); cond_resched(); - if (!full_scan) - block_group = btrfs_find_block_group(root, block_group, - search_start, data, 0); + block_group = btrfs_find_block_group(root, block_group, + search_start, data, 0); goto check_failed; error: From 9691975dd663b255fa52404dc7de4b18892bf244 Mon Sep 17 00:00:00 2001 From: Yan Date: Tue, 4 Dec 2007 13:20:20 -0500 Subject: [PATCH 0324/2985] Btrfs: Fix buffer get/release issue in create_snapshot btrfs_cow_block expects a reference to be held on the buffer being cow'd. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 37f8e0321af8..d0af0807d9f8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2034,11 +2034,11 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) key.objectid = objectid; key.offset = 1; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - + extent_buffer_get(root->node); btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); + free_extent_buffer(tmp); btrfs_set_root_bytenr(&new_root_item, root->node->start); btrfs_set_root_level(&new_root_item, btrfs_header_level(root->node)); - ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); if (ret) From 17636e03f42a1a42fed3834859de4702bd655fd1 Mon Sep 17 00:00:00 2001 From: Christian Hesse Date: Tue, 11 Dec 2007 09:25:06 -0500 Subject: [PATCH 0325/2985] Btrfs: section mismatch warnings --Boundary-00=_CcOWHFYK4T+JwSj Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: 7bit Content-Disposition: inline Hello everybody, compiling btrfs into the kernel results in section mismatch warnings. __exit functions are called where they are not allowed to. The attached patch fixes this for me. Not sure if it is correct though. Signed-off-by: Christian Hesse -- Regards, Chris --Boundary-00=_CcOWHFYK4T+JwSj Content-Type: text/x-diff; charset="iso-8859-1"; name="btrfs-section_mismatches.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="btrfs-section_mismatches.patch" Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 2 +- fs/btrfs/extent_map.h | 2 +- fs/btrfs/transaction.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 06e437723dc3..8326a18413ff 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -69,7 +69,7 @@ free_map_cache: return -ENOMEM; } -void __exit extent_map_exit(void) +void extent_map_exit(void) { struct extent_state *state; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 2ffc0c76338c..6e572d3e8924 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -112,7 +112,7 @@ void free_extent_map(struct extent_map *em); int extent_read_full_page(struct extent_map_tree *tree, struct page *page, get_extent_t *get_extent); int __init extent_map_init(void); -void __exit extent_map_exit(void); +void extent_map_exit(void); u64 count_range_bits(struct extent_map_tree *tree, u64 *start, u64 max_bytes, unsigned long bits); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4af1c0dab7f8..0229e69bd2ff 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -666,7 +666,7 @@ void __init btrfs_init_transaction_sys(void) trans_wq = create_workqueue("btrfs"); } -void __exit btrfs_exit_transaction_sys(void) +void btrfs_exit_transaction_sys(void) { destroy_workqueue(trans_wq); } From 74493f7a59bfd4d1c7029c74ab2cd0e400612c6b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 11 Dec 2007 09:25:06 -0500 Subject: [PATCH 0326/2985] Btrfs: Implement generation numbers in block pointers Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 16 +++++++++++ fs/btrfs/ctree.h | 47 ++++++++++++++++++++++++++++----- fs/btrfs/extent-tree.c | 60 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 115 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3b16051b121b..5697705f7530 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -114,6 +114,9 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, } else { btrfs_set_node_blockptr(parent, parent_slot, cow->start); + WARN_ON(trans->transid == 0); + btrfs_set_node_ptr_generation(parent, parent_slot, + trans->transid); btrfs_mark_buffer_dirty(parent); WARN_ON(btrfs_header_generation(parent) != trans->transid); btrfs_free_extent(trans, root, buf->start, buf->len, 1); @@ -967,6 +970,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root { struct extent_buffer *b; u64 bytenr; + u64 ptr_gen; int slot; int ret; int level; @@ -1031,10 +1035,18 @@ again: if (level == lowest_level) break; bytenr = btrfs_node_blockptr(b, slot); + ptr_gen = btrfs_node_ptr_generation(b, slot); if (should_reada) reada_for_search(root, p, level, slot); b = read_tree_block(root, bytenr, btrfs_level_size(root, level - 1)); + if (ptr_gen != btrfs_header_generation(b)) { + printk("block %llu bad gen wanted %llu " + "found %llu\n", + (unsigned long long)b->start, + (unsigned long long)ptr_gen, + (unsigned long long)btrfs_header_generation(b)); + } } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < @@ -1218,6 +1230,8 @@ static int insert_new_root(struct btrfs_trans_handle *trans, btrfs_node_key(lower, &lower_key, 0); btrfs_set_node_key(c, &lower_key, 0); btrfs_set_node_blockptr(c, 0, lower->start); + WARN_ON(btrfs_header_generation(lower) == 0); + btrfs_set_node_ptr_generation(c, 0, btrfs_header_generation(lower)); btrfs_mark_buffer_dirty(c); @@ -1261,6 +1275,8 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root } btrfs_set_node_key(lower, key, slot); btrfs_set_node_blockptr(lower, slot, bytenr); + WARN_ON(trans->transid == 0); + btrfs_set_node_ptr_generation(lower, slot, trans->transid); btrfs_set_header_nritems(lower, nritems + 1); btrfs_mark_buffer_dirty(lower); return 0; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9bc1b0a8615f..fd58dd846e61 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -110,7 +110,7 @@ struct btrfs_header { #define BTRFS_MAX_LEVEL 8 #define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \ sizeof(struct btrfs_header)) / \ - (sizeof(struct btrfs_disk_key) + sizeof(u64))) + sizeof(struct btrfs_key_ptr)) #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize)) #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ @@ -168,6 +168,7 @@ struct btrfs_leaf { struct btrfs_key_ptr { struct btrfs_disk_key key; __le64 blockptr; + __le64 generation; } __attribute__ ((__packed__)); struct btrfs_node { @@ -196,7 +197,13 @@ struct btrfs_path { */ struct btrfs_extent_item { __le32 refs; - __le64 owner; +} __attribute__ ((__packed__)); + +struct btrfs_extent_ref { + __le64 root; + __le64 generation; + __le64 objectid; + __le64 offset; } __attribute__ ((__packed__)); struct btrfs_inode_timespec { @@ -402,12 +409,13 @@ struct btrfs_root { * are used, and how many references there are to each block */ #define BTRFS_EXTENT_ITEM_KEY 33 +#define BTRFS_EXTENT_REF_KEY 34 /* * block groups give us hints into the extent allocation trees. Which * blocks are free etc etc */ -#define BTRFS_BLOCK_GROUP_ITEM_KEY 34 +#define BTRFS_BLOCK_GROUP_ITEM_KEY 50 /* * string items are for debugging. They just store a short string of @@ -529,15 +537,25 @@ BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_inode_timespec, nsec, 32); /* struct btrfs_extent_item */ BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); -BTRFS_SETGET_FUNCS(extent_owner, struct btrfs_extent_item, owner, 64); + +/* struct btrfs_extent_ref */ +BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); +BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); +BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64); +BTRFS_SETGET_FUNCS(ref_offset, struct btrfs_extent_ref, offset, 64); + +BTRFS_SETGET_STACK_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); +BTRFS_SETGET_STACK_FUNCS(ref_generation, struct btrfs_extent_ref, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64); +BTRFS_SETGET_STACK_FUNCS(ref_offset, struct btrfs_extent_ref, offset, 64); BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, refs, 32); -BTRFS_SETGET_STACK_FUNCS(stack_extent_owner, struct btrfs_extent_item, - owner, 64); /* struct btrfs_node */ BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); +BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64); static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) { @@ -556,6 +574,23 @@ static inline void btrfs_set_node_blockptr(struct extent_buffer *eb, btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val); } +static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr) +{ + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr); +} + +static inline void btrfs_set_node_ptr_generation(struct extent_buffer *eb, + int nr, u64 val) +{ + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + btrfs_set_key_generation(eb, (struct btrfs_key_ptr *)ptr, val); +} + static inline unsigned long btrfs_node_key_ptr_offset(int nr) { return offsetof(struct btrfs_node, ptrs) + diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8ab4954f6ad0..0f1ebdd4e925 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -17,6 +17,7 @@ */ #include +#include "hash.h" #include "ctree.h" #include "disk-io.h" #include "print-tree.h" @@ -352,9 +353,63 @@ found: return found_group; } +static u64 hash_extent_ref(u64 root_objectid, u64 root_generation, + u64 owner, u64 owner_offset) +{ + u32 high_crc = ~(u32)0; + u32 low_crc = ~(u32)0; + __le64 lenum; + + lenum = cpu_to_le64(root_objectid); + high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); + lenum = cpu_to_le64(root_generation); + high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); + + lenum = cpu_to_le64(owner); + low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + + lenum = cpu_to_le64(owner_offset); + low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + + return ((u64)high_crc << 32) | (u64)low_crc; +} + +int insert_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, + u64 root_objectid, u64 root_generation, + u64 owner, u64 owner_offset) +{ + u64 hash; + struct btrfs_key key; + struct btrfs_extent_ref ref; + struct extent_buffer *l; + struct btrfs_extent_item *item; + int ret; + + btrfs_set_stack_ref_root(&ref, root_objectid); + btrfs_set_stack_ref_generation(&ref, root_generation); + btrfs_set_stack_ref_objectid(&ref, owner); + btrfs_set_stack_ref_offset(&ref, owner_offset); + + ret = btrfs_name_hash(&ref, sizeof(ref), &hash); + key.offset = hash; + key.objectid = bytenr; + key.type = BTRFS_EXTENT_REF_KEY; + + ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref)); + while (ret == -EEXIST) { + + } + +} + int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, u64 num_bytes) + u64 bytenr, u64 num_bytes, + u64 root_objectid, u64 root_generation, + u64 owner, u64 owner_offset) { struct btrfs_path *path; int ret; @@ -386,9 +441,10 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root->fs_info->extent_root, path); - btrfs_free_path(path); finish_current_insert(trans, root->fs_info->extent_root); del_pending_extents(trans, root->fs_info->extent_root); + + btrfs_free_path(path); return 0; } From 7bb86316c3961d1bc401ef184fd996f999556c7f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 11 Dec 2007 09:25:06 -0500 Subject: [PATCH 0327/2985] Btrfs: Add back pointers from extents to the btree or file referencing them Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 196 +++++++++++++++++++--- fs/btrfs/ctree.h | 41 ++++- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 373 ++++++++++++++++++++++++++++++++++------- fs/btrfs/file.c | 20 ++- fs/btrfs/inode.c | 38 +++-- fs/btrfs/print-tree.c | 10 ++ fs/btrfs/tree-defrag.c | 2 + 8 files changed, 577 insertions(+), 105 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5697705f7530..fd8233e05cf4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -77,13 +77,37 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct extent_buffer **cow_ret, u64 search_start, u64 empty_size) { + u64 root_gen; struct extent_buffer *cow; + u32 nritems; int ret = 0; int different_trans = 0; + int level; + struct btrfs_key first_key; + if (root->ref_cows) { + root_gen = trans->transid; + } else { + root_gen = 0; + } + + WARN_ON(root->ref_cows && trans->transid != + root->fs_info->running_transaction->transid); WARN_ON(root->ref_cows && trans->transid != root->last_trans); - cow = btrfs_alloc_free_block(trans, root, buf->len, + level = btrfs_header_level(buf); + nritems = btrfs_header_nritems(buf); + if (nritems) { + if (level == 0) + btrfs_item_key_to_cpu(buf, &first_key, 0); + else + btrfs_node_key_to_cpu(buf, &first_key, 0); + } else { + first_key.objectid = 0; + } + cow = __btrfs_alloc_free_block(trans, root, buf->len, + root->root_key.objectid, + root_gen, first_key.objectid, level, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -104,14 +128,17 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, } if (buf == root->node) { + root_gen = btrfs_header_generation(buf); root->node = cow; extent_buffer_get(cow); if (buf != root->commit_root) { btrfs_free_extent(trans, root, buf->start, - buf->len, 1); + buf->len, root->root_key.objectid, + root_gen, 0, 0, 1); } free_extent_buffer(buf); } else { + root_gen = btrfs_header_generation(parent); btrfs_set_node_blockptr(parent, parent_slot, cow->start); WARN_ON(trans->transid == 0); @@ -119,7 +146,9 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, trans->transid); btrfs_mark_buffer_dirty(parent); WARN_ON(btrfs_header_generation(parent) != trans->transid); - btrfs_free_extent(trans, root, buf->start, buf->len, 1); + btrfs_free_extent(trans, root, buf->start, buf->len, + btrfs_header_owner(parent), root_gen, + 0, 0, 1); } free_extent_buffer(buf); btrfs_mark_buffer_dirty(cow); @@ -606,6 +635,8 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root return 0; mid = path->nodes[level]; + WARN_ON(btrfs_header_generation(mid) != trans->transid); + orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) @@ -631,7 +662,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root wait_on_tree_block_writeback(root, mid); /* once for the path */ free_extent_buffer(mid); - ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1); + ret = btrfs_free_extent(trans, root, mid->start, mid->len, + root->root_key.objectid, + btrfs_header_generation(mid), 0, 0, 1); /* once for the root ptr */ free_extent_buffer(mid); return ret; @@ -681,6 +714,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; if (btrfs_header_nritems(right) == 0) { u64 bytenr = right->start; + u64 generation = btrfs_header_generation(parent); u32 blocksize = right->len; clean_tree_block(trans, root, right); @@ -692,7 +726,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (wret) ret = wret; wret = btrfs_free_extent(trans, root, bytenr, - blocksize, 1); + blocksize, + btrfs_header_owner(parent), + generation, 0, 0, 1); if (wret) ret = wret; } else { @@ -722,6 +758,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } if (btrfs_header_nritems(mid) == 0) { /* we've managed to empty the middle node, drop it */ + u64 root_gen = btrfs_header_generation(parent); u64 bytenr = mid->start; u32 blocksize = mid->len; clean_tree_block(trans, root, mid); @@ -731,7 +768,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root wret = del_ptr(trans, root, path, level + 1, pslot); if (wret) ret = wret; - wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1); + wret = btrfs_free_extent(trans, root, bytenr, blocksize, + btrfs_header_owner(parent), + root_gen, 0, 0, 1); if (wret) ret = wret; } else { @@ -788,6 +827,7 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, return 1; mid = path->nodes[level]; + WARN_ON(btrfs_header_generation(mid) != trans->transid); orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) @@ -1113,6 +1153,8 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root src_nritems = btrfs_header_nritems(src); dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; + WARN_ON(btrfs_header_generation(src) != trans->transid); + WARN_ON(btrfs_header_generation(dst) != trans->transid); if (push_items <= 0) { return 1; @@ -1159,6 +1201,9 @@ static int balance_node_right(struct btrfs_trans_handle *trans, int dst_nritems; int ret = 0; + WARN_ON(btrfs_header_generation(src) != trans->transid); + WARN_ON(btrfs_header_generation(dst) != trans->transid); + src_nritems = btrfs_header_nritems(src); dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; @@ -1202,6 +1247,8 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { + u64 root_gen; + u64 lower_gen; struct extent_buffer *lower; struct extent_buffer *c; struct btrfs_disk_key lower_key; @@ -1209,7 +1256,20 @@ static int insert_new_root(struct btrfs_trans_handle *trans, BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - c = btrfs_alloc_free_block(trans, root, root->nodesize, + if (root->ref_cows) + root_gen = trans->transid; + else + root_gen = 0; + + lower = path->nodes[level-1]; + if (level == 1) + btrfs_item_key(lower, &lower_key, 0); + else + btrfs_node_key(lower, &lower_key, 0); + + c = __btrfs_alloc_free_block(trans, root, root->nodesize, + root->root_key.objectid, + root_gen, lower_key.objectid, level, root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); @@ -1219,19 +1279,16 @@ static int insert_new_root(struct btrfs_trans_handle *trans, btrfs_set_header_bytenr(c, c->start); btrfs_set_header_generation(c, trans->transid); btrfs_set_header_owner(c, root->root_key.objectid); - lower = path->nodes[level-1]; write_extent_buffer(c, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(c), BTRFS_FSID_SIZE); - if (level == 1) - btrfs_item_key(lower, &lower_key, 0); - else - btrfs_node_key(lower, &lower_key, 0); btrfs_set_node_key(c, &lower_key, 0); btrfs_set_node_blockptr(c, 0, lower->start); - WARN_ON(btrfs_header_generation(lower) == 0); - btrfs_set_node_ptr_generation(c, 0, btrfs_header_generation(lower)); + lower_gen = btrfs_header_generation(lower); + WARN_ON(lower_gen == 0); + + btrfs_set_node_ptr_generation(c, 0, lower_gen); btrfs_mark_buffer_dirty(c); @@ -1241,6 +1298,18 @@ static int insert_new_root(struct btrfs_trans_handle *trans, extent_buffer_get(c); path->nodes[level] = c; path->slots[level] = 0; + + if (root->ref_cows && lower_gen != trans->transid) { + struct btrfs_path *back_path = btrfs_alloc_path(); + int ret; + ret = btrfs_insert_extent_backref(trans, + root->fs_info->extent_root, + path, lower->start, + root->root_key.objectid, + trans->transid, 0, 0); + BUG_ON(ret); + btrfs_free_path(back_path); + } return 0; } @@ -1294,6 +1363,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { + u64 root_gen; struct extent_buffer *c; struct extent_buffer *split; struct btrfs_disk_key disk_key; @@ -1303,6 +1373,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root u32 c_nritems; c = path->nodes[level]; + WARN_ON(btrfs_header_generation(c) != trans->transid); if (c == root->node) { /* trying to split the root, lets make a new one */ ret = insert_new_root(trans, root, path, level + 1); @@ -1319,8 +1390,17 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(c); - split = btrfs_alloc_free_block(trans, root, root->nodesize, - c->start, 0); + if (root->ref_cows) + root_gen = trans->transid; + else + root_gen = 0; + + btrfs_node_key(c, &disk_key, 0); + split = __btrfs_alloc_free_block(trans, root, root->nodesize, + root->root_key.objectid, + root_gen, + btrfs_disk_key_objectid(&disk_key), + level, c->start, 0); if (IS_ERR(split)) return PTR_ERR(split); @@ -1789,6 +1869,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *ins_key, struct btrfs_path *path, int data_size, int extend) { + u64 root_gen; struct extent_buffer *l; u32 nritems; int mid; @@ -1807,6 +1888,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (extend) space_needed = data_size; + if (root->ref_cows) + root_gen = trans->transid; + else + root_gen = 0; + /* first try to make some room by pushing left and right */ if (ins_key->type != BTRFS_DIR_ITEM_KEY) { wret = push_leaf_right(trans, root, path, data_size, 0); @@ -1837,8 +1923,12 @@ again: nritems = btrfs_header_nritems(l); mid = (nritems + 1)/ 2; - right = btrfs_alloc_free_block(trans, root, root->leafsize, - l->start, 0); + btrfs_item_key(l, &disk_key, 0); + + right = __btrfs_alloc_free_block(trans, root, root->leafsize, + root->root_key.objectid, + root_gen, disk_key.objectid, 0, + l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); @@ -2413,13 +2503,16 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (leaf == root->node) { btrfs_set_header_level(leaf, 0); } else { + u64 root_gen = btrfs_header_generation(path->nodes[1]); clean_tree_block(trans, root, leaf); wait_on_tree_block_writeback(root, leaf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - leaf->start, leaf->len, 1); + leaf->start, leaf->len, + btrfs_header_owner(path->nodes[1]), + root_gen, 0, 0, 1); if (wret) ret = wret; } @@ -2456,9 +2549,13 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, } if (btrfs_header_nritems(leaf) == 0) { + u64 root_gen; u64 bytenr = leaf->start; u32 blocksize = leaf->len; + root_gen = btrfs_header_generation( + path->nodes[1]); + clean_tree_block(trans, root, leaf); wait_on_tree_block_writeback(root, leaf); @@ -2468,7 +2565,9 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, free_extent_buffer(leaf); wret = btrfs_free_extent(trans, root, bytenr, - blocksize, 1); + blocksize, + btrfs_header_owner(path->nodes[1]), + root_gen, 0, 0, 1); if (wret) ret = wret; } else { @@ -2482,6 +2581,61 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, return ret; } +/* + * walk up the tree as far as required to find the previous leaf. + * returns 0 if it found something or 1 if there are no lesser leaves. + * returns < 0 on io errors. + */ +int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) +{ + int slot; + int level = 1; + u64 bytenr; + struct extent_buffer *c; + struct extent_buffer *next = NULL; + + while(level < BTRFS_MAX_LEVEL) { + if (!path->nodes[level]) + return 1; + + slot = path->slots[level]; + c = path->nodes[level]; + if (slot == 0) { + level++; + if (level == BTRFS_MAX_LEVEL) + return 1; + continue; + } + slot--; + + bytenr = btrfs_node_blockptr(c, slot); + if (next) + free_extent_buffer(next); + + if (path->reada < 0) + reada_for_search(root, path, level, slot); + + next = read_tree_block(root, bytenr, + btrfs_level_size(root, level - 1)); + break; + } + path->slots[level] = slot; + while(1) { + level--; + c = path->nodes[level]; + free_extent_buffer(c); + path->nodes[level] = next; + path->slots[level] = 0; + if (!level) + break; + if (path->reada) + reada_for_search(root, path, level, 0); + next = read_tree_block(root, btrfs_node_blockptr(next, 0), + btrfs_level_size(root, level - 1)); + } + return 0; +} + /* * walk up the tree as far as required to find the next leaf. * returns 0 if it found something or 1 if there are no greater leaves. @@ -2503,6 +2657,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) c = path->nodes[level]; if (slot >= btrfs_header_nritems(c)) { level++; + if (level == BTRFS_MAX_LEVEL) + return 1; continue; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fd58dd846e61..cb1b156d954e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -544,11 +544,12 @@ BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64); BTRFS_SETGET_FUNCS(ref_offset, struct btrfs_extent_ref, offset, 64); -BTRFS_SETGET_STACK_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); -BTRFS_SETGET_STACK_FUNCS(ref_generation, struct btrfs_extent_ref, +BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64); +BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref, generation, 64); -BTRFS_SETGET_STACK_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64); -BTRFS_SETGET_STACK_FUNCS(ref_offset, struct btrfs_extent_ref, offset, 64); +BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref, + objectid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_ref_offset, struct btrfs_extent_ref, offset, 64); BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, refs, 32); @@ -914,24 +915,45 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, *hint, u64 search_start, int data, int owner); int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root); + struct btrfs_root *root, u64 owner_objectid); struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 size, + u64 root_objectid, u64 hint, u64 empty_size); +struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u32 blocksize, + u64 root_objectid, + u64 ref_generation, + u64 first_objectid, + int level, + u64 hint, + u64 empty_size); +int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 bytenr, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 owner, - u64 num_bytes, u64 empty_size, u64 search_start, + struct btrfs_root *root, + u64 num_bytes, u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 bytenr, u64 num_bytes, int pin); + *root, u64 bytenr, u64 num_bytes, + u64 root_objectid, u64 ref_generation, + u64 owner_objectid, u64 owner_offset, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_map_tree *unpin); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, u64 num_bytes); + u64 bytenr, u64 num_bytes, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_block_groups(struct btrfs_fs_info *info); @@ -966,6 +988,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *cpu_key, u32 data_size); int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); +int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 60a30da6af00..0ac21e3aac87 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -210,7 +210,7 @@ static int btree_writepages(struct address_space *mapping, { struct extent_map_tree *tree; tree = &BTRFS_I(mapping->host)->extent_tree; - if (wbc->sync_mode == WB_SYNC_NONE) { + if (0 && wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; unsigned long thresh = 96 * 1024 * 1024; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0f1ebdd4e925..32991f73e9db 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -17,6 +17,7 @@ */ #include +#include #include "hash.h" #include "ctree.h" #include "disk-io.h" @@ -89,7 +90,8 @@ static int cache_block_group(struct btrfs_root *root, btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid < block_group->key.objectid) { - if (key.objectid + key.offset > first_free) + if (btrfs_key_type(&key) != BTRFS_EXTENT_REF_KEY && + key.objectid + key.offset > first_free) first_free = key.objectid + key.offset; goto next; } @@ -353,7 +355,7 @@ found: return found_group; } -static u64 hash_extent_ref(u64 root_objectid, u64 root_generation, +static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset) { u32 high_crc = ~(u32)0; @@ -362,53 +364,149 @@ static u64 hash_extent_ref(u64 root_objectid, u64 root_generation, lenum = cpu_to_le64(root_objectid); high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); - lenum = cpu_to_le64(root_generation); - high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); + lenum = cpu_to_le64(ref_generation); + low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); +#if 0 lenum = cpu_to_le64(owner); low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); - lenum = cpu_to_le64(owner_offset); low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); - +#endif return ((u64)high_crc << 32) | (u64)low_crc; } -int insert_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - u64 bytenr, - u64 root_objectid, u64 root_generation, - u64 owner, u64 owner_offset) +static int match_extent_ref(struct extent_buffer *leaf, + struct btrfs_extent_ref *disk_ref, + struct btrfs_extent_ref *cpu_ref) +{ + int ret; + int len; + + if (cpu_ref->objectid) + len = sizeof(*cpu_ref); + else + len = 2 * sizeof(u64); + ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref, + len); + return ret == 0; +} + +static int lookup_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 bytenr, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, int del) +{ + u64 hash; + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_extent_ref ref; + struct extent_buffer *leaf; + struct btrfs_extent_ref *disk_ref; + int ret; + int ret2; + + btrfs_set_stack_ref_root(&ref, root_objectid); + btrfs_set_stack_ref_generation(&ref, ref_generation); + btrfs_set_stack_ref_objectid(&ref, owner); + btrfs_set_stack_ref_offset(&ref, owner_offset); + + hash = hash_extent_ref(root_objectid, ref_generation, owner, + owner_offset); + key.offset = hash; + key.objectid = bytenr; + key.type = BTRFS_EXTENT_REF_KEY; + + while (1) { + ret = btrfs_search_slot(trans, root, &key, path, + del ? -1 : 0, del); + if (ret < 0) + goto out; + leaf = path->nodes[0]; + if (ret != 0) { + u32 nritems = btrfs_header_nritems(leaf); + if (path->slots[0] >= nritems) { + ret2 = btrfs_next_leaf(root, path); + if (ret2) + goto out; + leaf = path->nodes[0]; + } + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid != bytenr || + found_key.type != BTRFS_EXTENT_REF_KEY) + goto out; + key.offset = found_key.offset; + if (del) { + btrfs_release_path(root, path); + continue; + } + } + disk_ref = btrfs_item_ptr(path->nodes[0], + path->slots[0], + struct btrfs_extent_ref); + if (match_extent_ref(path->nodes[0], disk_ref, &ref)) { + ret = 0; + goto out; + } + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + key.offset = found_key.offset + 1; + btrfs_release_path(root, path); + } +out: + return ret; +} + +int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 bytenr, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset) { u64 hash; struct btrfs_key key; struct btrfs_extent_ref ref; - struct extent_buffer *l; - struct btrfs_extent_item *item; + struct btrfs_extent_ref *disk_ref; int ret; btrfs_set_stack_ref_root(&ref, root_objectid); - btrfs_set_stack_ref_generation(&ref, root_generation); + btrfs_set_stack_ref_generation(&ref, ref_generation); btrfs_set_stack_ref_objectid(&ref, owner); btrfs_set_stack_ref_offset(&ref, owner_offset); - ret = btrfs_name_hash(&ref, sizeof(ref), &hash); + hash = hash_extent_ref(root_objectid, ref_generation, owner, + owner_offset); key.offset = hash; key.objectid = bytenr; key.type = BTRFS_EXTENT_REF_KEY; ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref)); while (ret == -EEXIST) { - + disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_extent_ref); + if (match_extent_ref(path->nodes[0], disk_ref, &ref)) + goto out; + key.offset++; + btrfs_release_path(root, path); + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(ref)); } - + if (ret) + goto out; + disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_extent_ref); + write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref, + sizeof(ref)); + btrfs_mark_buffer_dirty(path->nodes[0]); +out: + btrfs_release_path(root, path); + return ret; } int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, - u64 root_objectid, u64 root_generation, + u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset) { struct btrfs_path *path; @@ -441,6 +539,11 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root->fs_info->extent_root, path); + + ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root, + path, bytenr, root_objectid, + ref_generation, owner, owner_offset); + BUG_ON(ret); finish_current_insert(trans, root->fs_info->extent_root); del_pending_extents(trans, root->fs_info->extent_root); @@ -489,10 +592,29 @@ out: } int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root) + struct btrfs_root *root, u64 owner_objectid) { + u64 generation; + u64 key_objectid; + u64 level; + u32 nritems; + struct btrfs_disk_key disk_key; + + level = btrfs_header_level(root->node); + generation = trans->transid; + nritems = btrfs_header_nritems(root->node); + if (nritems > 0) { + if (level == 0) + btrfs_item_key(root->node, &disk_key, 0); + else + btrfs_node_key(root->node, &disk_key, 0); + key_objectid = btrfs_disk_key_objectid(&disk_key); + } else { + key_objectid = 0; + } return btrfs_inc_extent_ref(trans, root, root->node->start, - root->node->len); + root->node->len, owner_objectid, + generation, 0, 0); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -506,7 +628,6 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int level; int ret; int faili; - int err; if (!root->ref_cows) return 0; @@ -528,7 +649,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (disk_bytenr == 0) continue; ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, - btrfs_file_extent_disk_num_bytes(buf, fi)); + btrfs_file_extent_disk_num_bytes(buf, fi), + root->root_key.objectid, trans->transid, + key.objectid, key.offset); if (ret) { faili = i; goto fail; @@ -536,7 +659,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, } else { bytenr = btrfs_node_blockptr(buf, i); ret = btrfs_inc_extent_ref(trans, root, bytenr, - btrfs_level_size(root, level - 1)); + btrfs_level_size(root, level - 1), + root->root_key.objectid, + trans->transid, 0, 0); if (ret) { faili = i; goto fail; @@ -546,6 +671,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; fail: WARN_ON(1); +#if 0 for (i =0; i < faili; i++) { if (level == 0) { u64 disk_bytenr; @@ -571,6 +697,7 @@ fail: BUG_ON(err); } } +#endif return ret; } @@ -809,18 +936,18 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root) { + u64 start; + u64 end; + struct btrfs_fs_info *info = extent_root->fs_info; + struct btrfs_path *path; struct btrfs_key ins; struct btrfs_extent_item extent_item; int ret; int err = 0; - u64 start; - u64 end; - struct btrfs_fs_info *info = extent_root->fs_info; btrfs_set_stack_extent_refs(&extent_item, 1); btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); - btrfs_set_stack_extent_owner(&extent_item, - extent_root->root_key.objectid); + path = btrfs_alloc_path(); while(1) { ret = find_first_extent_bit(&info->extent_ins, 0, &start, @@ -834,7 +961,12 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct &extent_item, sizeof(extent_item)); clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, GFP_NOFS); + err = btrfs_insert_extent_backref(trans, extent_root, path, + start, extent_root->root_key.objectid, + 0, 0, 0); + BUG_ON(err); } + btrfs_free_path(path); return 0; } @@ -871,7 +1003,9 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, * remove an extent from the root, returns 0 on success */ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 bytenr, u64 num_bytes, int pin, + *root, u64 bytenr, u64 num_bytes, + u64 root_objectid, u64 ref_generation, + u64 owner_objectid, u64 owner_offset, int pin, int mark_free) { struct btrfs_path *path; @@ -891,6 +1025,24 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; + if (ref_generation && owner_objectid == 0 && root_objectid == 3) { +//printk("drop backref root %Lu gen %Lu byte %Lu\n", root_objectid, ref_generation, bytenr ); + } + ret = lookup_extent_backref(trans, extent_root, path, + bytenr, root_objectid, + ref_generation, + owner_objectid, owner_offset, 1); + if (ret == 0) { + ret = btrfs_del_item(trans, extent_root, path); + } else { + btrfs_print_leaf(extent_root, path->nodes[0]); + WARN_ON(1); + printk("Unable to find ref byte nr %Lu root %Lu " + " gen %Lu owner %Lu offset %Lu\n", bytenr, + root_objectid, ref_generation, owner_objectid, + owner_offset); + } + btrfs_release_path(extent_root, path); ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); if (ret < 0) return ret; @@ -965,7 +1117,9 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, GFP_NOFS); ret = __free_extent(trans, extent_root, - start, end + 1 - start, 0, 0); + start, end + 1 - start, + extent_root->root_key.objectid, + 0, 0, 0, 0, 0); if (ret) err = ret; } @@ -976,18 +1130,25 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct * remove an extent from the root, returns 0 on success */ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 bytenr, u64 num_bytes, int pin) + *root, u64 bytenr, u64 num_bytes, + u64 root_objectid, u64 ref_generation, + u64 owner_objectid, u64 owner_offset, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; int pending_ret; int ret; WARN_ON(num_bytes < root->sectorsize); + if (!root->ref_cows) + ref_generation = 0; + if (root == extent_root) { pin_down_bytes(root, bytenr, num_bytes, 1); return 0; } - ret = __free_extent(trans, root, bytenr, num_bytes, pin, pin == 0); + ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid, + ref_generation, owner_objectid, owner_offset, + pin, pin == 0); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } @@ -1080,23 +1241,26 @@ check_failed: btrfs_item_key_to_cpu(l, &key, path->slots[0]); /* - * a rare case, go back one key if we hit a block group item - * instead of an extent item + * walk backwards to find the first extent item key */ - if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY && - key.objectid + key.offset >= search_start) { - ins->objectid = key.objectid; - ins->offset = key.offset - 1; - btrfs_release_path(root, path); - ret = btrfs_search_slot(trans, root, ins, path, 0, 0); - if (ret < 0) - goto error; - - if (path->slots[0] > 0) { + while(btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { + if (path->slots[0] == 0) { + ret = btrfs_prev_leaf(root, path); + if (ret != 0) { + ret = btrfs_search_slot(trans, root, ins, + path, 0, 0); + if (ret < 0) + goto error; + if (path->slots[0] > 0) + path->slots[0]--; + break; + } + } else { path->slots[0]--; } + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &key, path->slots[0]); } - while (1) { l = path->nodes[0]; slot = path->slots[0]; @@ -1146,7 +1310,8 @@ check_failed: } } if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { - if (!start_found) { + if (!start_found && btrfs_key_type(&key) == + BTRFS_BLOCK_GROUP_ITEM_KEY) { last_byte = key.objectid; start_found = 1; } @@ -1244,8 +1409,10 @@ error: * returns 0 if everything worked, non-zero otherwise. */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 owner, - u64 num_bytes, u64 empty_size, u64 hint_byte, + struct btrfs_root *root, + u64 num_bytes, u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, int data) { int ret; @@ -1255,9 +1422,9 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; + struct btrfs_path *path; btrfs_set_stack_extent_refs(&extent_item, 1); - btrfs_set_stack_extent_owner(&extent_item, owner); WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, @@ -1296,8 +1463,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, trans->alloc_exclude_start = 0; trans->alloc_exclude_nr = 0; + BUG_ON(ret); + + path = btrfs_alloc_path(); + BUG_ON(!path); + ret = btrfs_insert_extent_backref(trans, extent_root, path, + ins->objectid, root_objectid, + ref_generation, owner, owner_offset); BUG_ON(ret); + btrfs_free_path(path); finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); @@ -1321,15 +1496,43 @@ update_block: */ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u32 blocksize, u64 hint, + u32 blocksize, + u64 root_objectid, u64 hint, + u64 empty_size) +{ + u64 ref_generation; + + if (root->ref_cows) + ref_generation = trans->transid; + else + ref_generation = 0; + + + return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid, + ref_generation, 0, 0, hint, empty_size); +} + +/* + * helper function to allocate a block for a given tree + * returns the tree buffer or NULL. + */ +struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u32 blocksize, + u64 root_objectid, + u64 ref_generation, + u64 first_objectid, + int level, + u64 hint, u64 empty_size) { struct btrfs_key ins; int ret; struct extent_buffer *buf; - ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - blocksize, empty_size, hint, + ret = btrfs_alloc_extent(trans, root, blocksize, + root_objectid, ref_generation, + first_objectid, level, empty_size, hint, (u64)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); @@ -1337,7 +1540,9 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, } buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize); if (!buf) { - btrfs_free_extent(trans, root, ins.objectid, blocksize, 0); + btrfs_free_extent(trans, root, ins.objectid, blocksize, + root->root_key.objectid, ref_generation, + 0, 0, 0); return ERR_PTR(-ENOMEM); } btrfs_set_buffer_uptodate(buf); @@ -1355,6 +1560,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, static int drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *leaf) { + u64 leaf_owner; + u64 leaf_generation; struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; @@ -1363,6 +1570,9 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, BUG_ON(!btrfs_is_leaf(leaf)); nritems = btrfs_header_nritems(leaf); + leaf_owner = btrfs_header_owner(leaf); + leaf_generation = btrfs_header_generation(leaf); + for (i = 0; i < nritems; i++) { u64 disk_bytenr; @@ -1381,7 +1591,9 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, if (disk_bytenr == 0) continue; ret = btrfs_free_extent(trans, root, disk_bytenr, - btrfs_file_extent_disk_num_bytes(leaf, fi), 0); + btrfs_file_extent_disk_num_bytes(leaf, fi), + leaf_owner, leaf_generation, + key.objectid, key.offset, 0); BUG_ON(ret); } return 0; @@ -1423,9 +1635,12 @@ static void reada_walk_down(struct btrfs_root *root, static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level) { + u64 root_owner; + u64 root_gen; + u64 bytenr; struct extent_buffer *next; struct extent_buffer *cur; - u64 bytenr; + struct extent_buffer *parent; u32 blocksize; int ret; u32 refs; @@ -1466,9 +1681,13 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); BUG_ON(ret); if (refs != 1) { + parent = path->nodes[*level]; + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); path->slots[*level]++; ret = btrfs_free_extent(trans, root, bytenr, - blocksize, 1); + blocksize, root_owner, + root_gen, 0, 0, 1); BUG_ON(ret); continue; } @@ -1484,10 +1703,16 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root blocksize, &refs); BUG_ON(ret); if (refs != 1) { + parent = path->nodes[*level]; + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); + path->slots[*level]++; free_extent_buffer(next); - ret = btrfs_free_extent(trans, root, - bytenr, blocksize, 1); + ret = btrfs_free_extent(trans, root, bytenr, + blocksize, + root_owner, + root_gen, 0, 0, 1); BUG_ON(ret); continue; } @@ -1502,8 +1727,19 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root out: WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); + + if (path->nodes[*level] == root->node) { + root_owner = root->root_key.objectid; + parent = path->nodes[*level]; + } else { + parent = path->nodes[*level + 1]; + root_owner = btrfs_header_owner(parent); + } + + root_gen = btrfs_header_generation(parent); ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, - path->nodes[*level]->len, 1); + path->nodes[*level]->len, + root_owner, root_gen, 0, 0, 1); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -1519,10 +1755,12 @@ out: static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level) { + u64 root_owner; + u64 root_gen; + struct btrfs_root_item *root_item = &root->root_item; int i; int slot; int ret; - struct btrfs_root_item *root_item = &root->root_item; for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; @@ -1539,9 +1777,20 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root root_item->drop_level = i; return 0; } else { + if (path->nodes[*level] == root->node) { + root_owner = root->root_key.objectid; + root_gen = + btrfs_header_generation(path->nodes[*level]); + } else { + struct extent_buffer *node; + node = path->nodes[*level + 1]; + root_owner = btrfs_header_owner(node); + root_gen = btrfs_header_generation(node); + } ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, - path->nodes[*level]->len, 1); + path->nodes[*level]->len, + root_owner, root_gen, 0, 0, 1); BUG_ON(ret); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b0d637787264..5b1f90f06e03 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -496,7 +496,10 @@ next_slot: sizeof(old)); if (disk_bytenr != 0) { ret = btrfs_inc_extent_ref(trans, root, - disk_bytenr, disk_num_bytes); + disk_bytenr, disk_num_bytes, + root->root_key.objectid, + trans->transid, + key.objectid, end); BUG_ON(ret); } } @@ -541,6 +544,14 @@ next_slot: u64 disk_bytenr = 0; u64 disk_num_bytes = 0; u64 extent_num_bytes = 0; + u64 root_gen; + + if (leaf != root->node) { + root_gen = + btrfs_header_generation(path->nodes[1]); + } else { + root_gen = btrfs_header_generation(leaf); + } if (found_extent) { disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, @@ -562,8 +573,11 @@ next_slot: if (found_extent && disk_bytenr != 0) { inode->i_blocks -= extent_num_bytes >> 9; ret = btrfs_free_extent(trans, root, - disk_bytenr, - disk_num_bytes, 0); + disk_bytenr, + disk_num_bytes, + root->root_key.objectid, + root_gen, inode->i_ino, + key.offset, 0); } BUG_ON(ret); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d0af0807d9f8..bb70db0c9df4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -93,7 +93,9 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) if (alloc_hint == EXTENT_MAP_INLINE) goto out; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_bytes, 0, + ret = btrfs_alloc_extent(trans, root, num_bytes, + root->root_key.objectid, trans->transid, + inode->i_ino, start, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { WARN_ON(1); @@ -560,6 +562,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, u64 extent_start = 0; u64 extent_num_bytes = 0; u64 item_end = 0; + u64 root_gen = 0; int found_extent; int del_item; int extent_type = -1; @@ -670,6 +673,15 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, found_extent = 1; inode->i_blocks -= num_dec; } + if (leaf == root->node) { + root_gen = + btrfs_header_generation(leaf); + } else { + struct extent_buffer *parent; + parent = path->nodes[1]; + root_gen = + btrfs_header_generation(parent); + } } } else if (extent_type == BTRFS_FILE_EXTENT_INLINE && !del_item) { @@ -690,7 +702,10 @@ delete: btrfs_release_path(root, path); if (found_extent) { ret = btrfs_free_extent(trans, root, extent_start, - extent_num_bytes, 0); + extent_num_bytes, + root->root_key.objectid, + root_gen, inode->i_ino, + found_key.offset, 0); BUG_ON(ret); } } @@ -1900,7 +1915,14 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 0); + ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, + 0, &objectid); + if (ret) + goto fail; + + leaf = __btrfs_alloc_free_block(trans, root, root->leafsize, + objectid, trans->transid, 0, 0, + 0, 0); if (IS_ERR(leaf)) return PTR_ERR(leaf); @@ -1908,7 +1930,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_header_level(leaf, 0); btrfs_set_header_bytenr(leaf, leaf->start); btrfs_set_header_generation(leaf, trans->transid); - btrfs_set_header_owner(leaf, root->root_key.objectid); + btrfs_set_header_owner(leaf, objectid); + write_extent_buffer(leaf, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(leaf), BTRFS_FSID_SIZE); @@ -1933,11 +1956,6 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) free_extent_buffer(leaf); leaf = NULL; - ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, - 0, &objectid); - if (ret) - goto fail; - btrfs_set_root_dirid(&root_item, new_dirid); key.objectid = objectid; @@ -2056,7 +2074,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail; - ret = btrfs_inc_root_ref(trans, root); + ret = btrfs_inc_root_ref(trans, root, objectid); if (ret) goto fail; fail: diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 030324febf6c..da0b4dcf3617 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -33,6 +33,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) struct btrfs_file_extent_item *fi; struct btrfs_key key; struct btrfs_key found_key; + struct btrfs_extent_ref *ref; u32 type; printk("leaf %llu total ptrs %d free space %d\n", @@ -73,6 +74,15 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) printk("\t\textent data refs %u\n", btrfs_extent_refs(l, ei)); break; + case BTRFS_EXTENT_REF_KEY: + ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref); + printk("\t\textent back ref root %llu gen %llu " + "owner %llu offset %llu\n", + (unsigned long long)btrfs_ref_root(l, ref), + (unsigned long long)btrfs_ref_generation(l, ref), + (unsigned long long)btrfs_ref_objectid(l, ref), + (unsigned long long)btrfs_ref_offset(l, ref)); + break; case BTRFS_EXTENT_DATA_KEY: fi = btrfs_item_ptr(l, i, diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 3994795edfeb..5c58630dce03 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -78,6 +78,8 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, break; if (*level == 1) { + WARN_ON(btrfs_header_generation(path->nodes[*level]) != + trans->transid); ret = btrfs_realloc_node(trans, root, path->nodes[*level], path->slots[*level], From d8d5f3e16d1ae4fe9b93312e083f2c04a95520f0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 11 Dec 2007 12:42:00 -0500 Subject: [PATCH 0328/2985] Btrfs: Add lowest key information to back refs for extent tree blocks as well. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 102 ++++++++++++++++++++++++++++++++++++++++- fs/btrfs/file.c | 11 ++--- fs/btrfs/inode.c | 14 ++---- 4 files changed, 110 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0ac21e3aac87..60a30da6af00 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -210,7 +210,7 @@ static int btree_writepages(struct address_space *mapping, { struct extent_map_tree *tree; tree = &BTRFS_I(mapping->host)->extent_tree; - if (0 && wbc->sync_mode == WB_SYNC_NONE) { + if (wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; unsigned long thresh = 96 * 1024 * 1024; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 32991f73e9db..187be4012474 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -457,6 +457,94 @@ out: return ret; } +/* + * Back reference rules. Back refs have three main goals: + * + * 1) differentiate between all holders of references to an extent so that + * when a reference is dropped we can make sure it was a valid reference + * before freeing the extent. + * + * 2) Provide enough information to quickly find the holders of an extent + * if we notice a given block is corrupted or bad. + * + * 3) Make it easy to migrate blocks for FS shrinking or storage pool + * maintenance. This is actually the same as #2, but with a slightly + * different use case. + * + * File extents can be referenced by: + * + * - multiple snapshots, subvolumes, or different generations in one subvol + * - different files inside a single subvolume (in theory, not implemented yet) + * - different offsets inside a file (bookend extents in file.c) + * + * The extent ref structure has fields for: + * + * - Objectid of the subvolume root + * - Generation number of the tree holding the reference + * - objectid of the file holding the reference + * - offset in the file corresponding to the key holding the reference + * + * When a file extent is allocated the fields are filled in: + * (root_key.objectid, trans->transid, inode objectid, offset in file) + * + * When a leaf is cow'd new references are added for every file extent found + * in the leaf. It looks the same as the create case, but trans->transid + * will be different when the block is cow'd. + * + * (root_key.objectid, trans->transid, inode objectid, offset in file) + * + * When a file extent is removed either during snapshot deletion or file + * truncation, the corresponding back reference is found + * by searching for: + * + * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), + * inode objectid, offset in file) + * + * Btree extents can be referenced by: + * + * - Different subvolumes + * - Different generations of the same subvolume + * + * Storing sufficient information for a full reverse mapping of a btree + * block would require storing the lowest key of the block in the backref, + * and it would require updating that lowest key either before write out or + * every time it changed. Instead, the objectid of the lowest key is stored + * along with the level of the tree block. This provides a hint + * about where in the btree the block can be found. Searches through the + * btree only need to look for a pointer to that block, so they stop one + * level higher than the level recorded in the backref. + * + * Some btrees do not do reference counting on their extents. These + * include the extent tree and the tree of tree roots. Backrefs for these + * trees always have a generation of zero. + * + * When a tree block is created, back references are inserted: + * + * (root->root_key.objectid, trans->transid or zero, lowest_key_objectid, level) + * + * When a tree block is cow'd in a reference counted root, + * new back references are added for all the blocks it points to. + * These are of the form (trans->transid will have increased since creation): + * + * (root->root_key.objectid, trans->transid, lowest_key_objectid, level) + * + * Because the lowest_key_objectid and the level are just hints + * they are not used when backrefs are deleted. When a backref is deleted: + * + * if backref was for a tree root: + * root_objectid = root->root_key.objectid + * else + * root_objectid = btrfs_header_owner(parent) + * + * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0) + * + * Back Reference Key hashing: + * + * Back references have four fields, each 64 bits long. Unfortunately, + * This is hashed into a single 64 bit number and placed into the key offset. + * The key objectid corresponds to the first byte in the extent, and the + * key type is set to BTRFS_EXTENT_REF_KEY + */ int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 bytenr, @@ -939,10 +1027,13 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct u64 start; u64 end; struct btrfs_fs_info *info = extent_root->fs_info; + struct extent_buffer *eb; struct btrfs_path *path; struct btrfs_key ins; + struct btrfs_disk_key first; struct btrfs_extent_item extent_item; int ret; + int level; int err = 0; btrfs_set_stack_extent_refs(&extent_item, 1); @@ -961,10 +1052,19 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct &extent_item, sizeof(extent_item)); clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, GFP_NOFS); + eb = read_tree_block(extent_root, ins.objectid, ins.offset); + level = btrfs_header_level(eb); + if (level == 0) { + btrfs_item_key(eb, &first, 0); + } else { + btrfs_node_key(eb, &first, 0); + } err = btrfs_insert_extent_backref(trans, extent_root, path, start, extent_root->root_key.objectid, - 0, 0, 0); + 0, btrfs_disk_key_objectid(&first), + level); BUG_ON(err); + free_extent_buffer(eb); } btrfs_free_path(path); return 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5b1f90f06e03..1cc4d285951c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -545,13 +545,10 @@ next_slot: u64 disk_num_bytes = 0; u64 extent_num_bytes = 0; u64 root_gen; + u64 root_owner; - if (leaf != root->node) { - root_gen = - btrfs_header_generation(path->nodes[1]); - } else { - root_gen = btrfs_header_generation(leaf); - } + root_gen = btrfs_header_generation(leaf); + root_owner = btrfs_header_owner(leaf); if (found_extent) { disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, @@ -575,7 +572,7 @@ next_slot: ret = btrfs_free_extent(trans, root, disk_bytenr, disk_num_bytes, - root->root_key.objectid, + root_owner, root_gen, inode->i_ino, key.offset, 0); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bb70db0c9df4..03fea037667e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -563,6 +563,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, u64 extent_num_bytes = 0; u64 item_end = 0; u64 root_gen = 0; + u64 root_owner = 0; int found_extent; int del_item; int extent_type = -1; @@ -673,15 +674,8 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, found_extent = 1; inode->i_blocks -= num_dec; } - if (leaf == root->node) { - root_gen = - btrfs_header_generation(leaf); - } else { - struct extent_buffer *parent; - parent = path->nodes[1]; - root_gen = - btrfs_header_generation(parent); - } + root_gen = btrfs_header_generation(leaf); + root_owner = btrfs_header_owner(leaf); } } else if (extent_type == BTRFS_FILE_EXTENT_INLINE && !del_item) { @@ -703,7 +697,7 @@ delete: if (found_extent) { ret = btrfs_free_extent(trans, root, extent_start, extent_num_bytes, - root->root_key.objectid, + root_owner, root_gen, inode->i_ino, found_key.offset, 0); BUG_ON(ret); From 3954401fa6013bb2f2c8758b903e9bffcf25b64b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 12 Dec 2007 14:38:19 -0500 Subject: [PATCH 0329/2985] Btrfs: Add back pointers from the inode to the directory that references it Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 21 +++- fs/btrfs/inode-item.c | 136 ++++++++++++++++++++++ fs/btrfs/inode.c | 258 +++++++++++++++++++----------------------- 3 files changed, 272 insertions(+), 143 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index cb1b156d954e..035fa1550343 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -53,7 +53,7 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_CSUM_SIZE 32 /* four bytes for CRC32 */ #define BTRFS_CRC32_SIZE 4 -#define BTRFS_EMPTY_DIR_SIZE 6 +#define BTRFS_EMPTY_DIR_SIZE 0 #define BTRFS_FT_UNKNOWN 0 #define BTRFS_FT_REG_FILE 1 @@ -206,6 +206,11 @@ struct btrfs_extent_ref { __le64 offset; } __attribute__ ((__packed__)); +struct btrfs_inode_ref { + __le16 name_len; + /* name goes here */ +} __attribute__ ((__packed__)); + struct btrfs_inode_timespec { __le64 sec; __le32 nsec; @@ -379,7 +384,8 @@ struct btrfs_root { * the FS */ #define BTRFS_INODE_ITEM_KEY 1 -#define BTRFS_XATTR_ITEM_KEY 2 +#define BTRFS_INODE_REF_KEY 2 +#define BTRFS_XATTR_ITEM_KEY 8 /* reserve 2-15 close to the inode for later flexibility */ /* @@ -486,6 +492,9 @@ BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, used, 64); +/* struct btrfs_inode_ref */ +BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); + /* struct btrfs_inode_item */ BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); @@ -1043,6 +1052,14 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid); /* inode-item.c */ +int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + u64 inode_objectid, u64 ref_objectid); +int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + u64 inode_objectid, u64 ref_objectid); int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid); diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 35d2608f8918..cba30b6cc6fe 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -20,6 +20,142 @@ #include "disk-io.h" #include "transaction.h" +int find_name_in_backref(struct btrfs_path *path, const char * name, + int name_len, struct btrfs_inode_ref **ref_ret) +{ + struct extent_buffer *leaf; + struct btrfs_inode_ref *ref; + unsigned long ptr; + unsigned long name_ptr; + u32 item_size; + u32 cur_offset = 0; + int len; + + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + while (cur_offset < item_size) { + ref = (struct btrfs_inode_ref *)(ptr + cur_offset); + len = btrfs_inode_ref_name_len(leaf, ref); + name_ptr = (unsigned long)(ref + 1); + cur_offset += len + sizeof(*ref); + if (len != name_len) + continue; + if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) { + *ref_ret = ref; + return 1; + } + } + return 0; +} + +int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + u64 inode_objectid, u64 ref_objectid) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_inode_ref *ref; + struct extent_buffer *leaf; + unsigned long ptr; + unsigned long item_start; + u32 item_size; + u32 sub_item_len; + int ret; + int del_len = name_len + sizeof(*ref); + + key.objectid = inode_objectid; + key.offset = ref_objectid; + btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + ret = -ENOENT; + goto out; + } else if (ret < 0) { + goto out; + } + if (!find_name_in_backref(path, name, name_len, &ref)) { + ret = -ENOENT; + goto out; + } + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + if (del_len == item_size) { + ret = btrfs_del_item(trans, root, path); + goto out; + } + ptr = (unsigned long)ref; + sub_item_len = name_len + sizeof(*ref); + item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); + memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, + item_size - (ptr + sub_item_len - item_start)); + ret = btrfs_truncate_item(trans, root, path, + item_size - sub_item_len, 1); + BUG_ON(ret); +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + u64 inode_objectid, u64 ref_objectid) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_inode_ref *ref; + unsigned long ptr; + int ret; + int ins_len = name_len + sizeof(*ref); + + key.objectid = inode_objectid; + key.offset = ref_objectid; + btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_insert_empty_item(trans, root, path, &key, + ins_len); + if (ret == -EEXIST) { + u32 old_size; + + if (find_name_in_backref(path, name, name_len, &ref)) + goto out; + + old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); + ret = btrfs_extend_item(trans, root, path, ins_len); + BUG_ON(ret); + ref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_ref); + ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); + btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); + ptr = (unsigned long)(ref + 1); + ret = 0; + } else if (ret < 0) { + goto out; + } else { + ref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_ref); + btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); + ptr = (unsigned long)(ref + 1); + } + write_extent_buffer(path->nodes[0], name, ptr, name_len); + btrfs_mark_buffer_dirty(path->nodes[0]); + +out: + btrfs_free_path(path); + return ret; +} + int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 03fea037667e..cefe740b6c79 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -404,6 +404,17 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, ret = btrfs_delete_one_dir_name(trans, root, path, di); dentry->d_inode->i_ctime = dir->i_ctime; + if (!S_ISLNK(dentry->d_inode->i_mode)) { + ret = btrfs_del_inode_ref(trans, root, name, name_len, + dentry->d_inode->i_ino, + dentry->d_parent->d_inode->i_ino); + if (ret) { + printk("failed to delete reference to %.*s, " + "inode %lu parent %lu\n", name_len, name, + dentry->d_inode->i_ino, + dentry->d_parent->d_inode->i_ino); + } + } err: btrfs_free_path(path); if (!ret) { @@ -445,75 +456,27 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) int err; int ret; struct btrfs_root *root = BTRFS_I(dir)->root; - struct btrfs_path *path; - struct btrfs_key key; struct btrfs_trans_handle *trans; - struct btrfs_key found_key; - int found_type; - struct extent_buffer *leaf; - char *goodnames = ".."; unsigned long nr; if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) return -ENOTEMPTY; - path = btrfs_alloc_path(); - BUG_ON(!path); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, dir); - key.objectid = inode->i_ino; - key.offset = (u64)-1; - key.type = (u8)-1; - while(1) { - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) { - err = ret; - goto out; - } - BUG_ON(ret == 0); - if (path->slots[0] == 0) { - err = -ENOENT; - goto out; - } - path->slots[0]--; - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - found_type = btrfs_key_type(&found_key); - if (found_key.objectid != inode->i_ino) { - err = -ENOENT; - goto out; - } - if ((found_type != BTRFS_DIR_ITEM_KEY && - found_type != BTRFS_DIR_INDEX_KEY) || - (!btrfs_match_dir_item_name(root, path, goodnames, 2) && - !btrfs_match_dir_item_name(root, path, goodnames, 1))) { - err = -ENOTEMPTY; - goto out; - } - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); - - if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1) - break; - btrfs_release_path(root, path); - } - ret = 0; - btrfs_release_path(root, path); /* now the directory is empty */ err = btrfs_unlink_trans(trans, root, dir, dentry); if (!err) { inode->i_size = 0; } -out: - btrfs_release_path(root, path); - btrfs_free_path(path); + nr = trans->blocks_used; ret = btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); + if (ret && !err) err = ret; return err; @@ -887,21 +850,59 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, struct btrfs_root *root = BTRFS_I(dir)->root; int ret = 0; + if (namelen == 1 && strcmp(name, ".") == 0) { + location->objectid = dir->i_ino; + location->type = BTRFS_INODE_ITEM_KEY; + location->offset = 0; + return 0; + } path = btrfs_alloc_path(); BUG_ON(!path); + + if (namelen == 1 && strcmp(name, "..") == 0) { + struct btrfs_key key; + struct extent_buffer *leaf; + u32 nritems; + int slot; + + key.objectid = dir->i_ino; + btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); + key.offset = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + BUG_ON(ret == 0); + ret = 0; + + leaf = path->nodes[0]; + slot = path->slots[0]; + nritems = btrfs_header_nritems(leaf); + if (slot >= nritems) + goto out_err; + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != dir->i_ino || + key.type != BTRFS_INODE_REF_KEY) { + goto out_err; + } + location->objectid = key.offset; + location->type = BTRFS_INODE_ITEM_KEY; + location->offset = 0; + goto out; + } + di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, namelen, 0); if (IS_ERR(di)) ret = PTR_ERR(di); if (!di || IS_ERR(di)) { - location->objectid = 0; - goto out; + goto out_err; } btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); out: - btrfs_release_path(root, path); btrfs_free_path(path); return ret; +out_err: + location->objectid = 0; + goto out; } /* @@ -1053,13 +1054,50 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (root->fs_info->tree_root == root) key_type = BTRFS_DIR_ITEM_KEY; + /* special case for "." */ + if (filp->f_pos == 0) { + over = filldir(dirent, ".", 1, + 1, inode->i_ino, + DT_DIR); + if (over) + return 0; + filp->f_pos = 1; + } + mutex_lock(&root->fs_info->fs_mutex); key.objectid = inode->i_ino; + path = btrfs_alloc_path(); + path->reada = 2; + + /* special case for .., just use the back ref */ + if (filp->f_pos == 1) { + btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); + key.offset = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + BUG_ON(ret == 0); + leaf = path->nodes[0]; + slot = path->slots[0]; + nritems = btrfs_header_nritems(leaf); + if (slot >= nritems) { + btrfs_release_path(root, path); + goto read_dir_items; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + btrfs_release_path(root, path); + if (found_key.objectid != key.objectid || + found_key.type != BTRFS_INODE_REF_KEY) + goto read_dir_items; + over = filldir(dirent, "..", 2, + 2, found_key.offset, DT_DIR); + if (over) + goto nopos; + filp->f_pos = 2; + } + +read_dir_items: btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; - path = btrfs_alloc_path(); - path->reada = 2; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; @@ -1255,6 +1293,13 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, dentry->d_parent->d_inode->i_ino, &key, btrfs_inode_type(inode)); if (ret == 0) { + if (!S_ISLNK(inode->i_mode)) { + ret = btrfs_insert_inode_ref(trans, root, + dentry->d_name.name, + dentry->d_name.len, + inode->i_ino, + dentry->d_parent->d_inode->i_ino); + } parent_inode = dentry->d_parent->d_inode; parent_inode->i_size += dentry->d_name.len * 2; parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; @@ -1430,35 +1475,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, return err; } -static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 objectid, u64 dirid) -{ - int ret; - char buf[2]; - struct btrfs_key key; - - buf[0] = '.'; - buf[1] = '.'; - - key.objectid = objectid; - key.offset = 0; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); - - ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid, - &key, BTRFS_FT_DIR); - if (ret) - goto error; - - key.objectid = dirid; - ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid, - &key, BTRFS_FT_DIR); - if (ret) - goto error; -error: - return ret; -} - static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { struct inode *inode; @@ -1496,11 +1512,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode->i_fop = &btrfs_dir_file_operations; btrfs_set_trans_block_group(trans, inode); - err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino); - if (err) - goto out_fail; - - inode->i_size = 6; + inode->i_size = 0; err = btrfs_update_inode(trans, root, inode); if (err) goto out_fail; @@ -1971,6 +1983,12 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail; + ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, + name, namelen, objectid, + root->fs_info->sb->s_root->d_inode->i_ino); + if (ret) + goto fail; + ret = btrfs_commit_transaction(trans, root); if (ret) goto fail_commit; @@ -1989,12 +2007,10 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) inode->i_fop = &btrfs_dir_file_operations; new_root->inode = inode; - ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid); - if (ret) - goto fail; - + ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid, + new_dirid); inode->i_nlink = 1; - inode->i_size = 6; + inode->i_size = 0; ret = btrfs_update_inode(trans, new_root, inode); if (ret) goto fail; @@ -2068,6 +2084,13 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail; + ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, + name, namelen, objectid, + root->fs_info->sb->s_root->d_inode->i_ino); + + if (ret) + goto fail; + ret = btrfs_inc_root_ref(trans, root, objectid); if (ret) goto fail; @@ -2338,7 +2361,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode *old_inode = old_dentry->d_inode; struct timespec ctime = CURRENT_TIME; struct btrfs_path *path; - struct btrfs_dir_item *di; int ret; if (S_ISDIR(old_inode->i_mode) && new_inode && @@ -2361,52 +2383,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, new_dir->i_ctime = new_dir->i_mtime = ctime; old_inode->i_ctime = ctime; - if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) { - struct btrfs_key *location = &BTRFS_I(new_dir)->location; - struct btrfs_key old_parent_key; - di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino, - "..", 2, -1); - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto out_fail; - } - if (!di) { - ret = -ENOENT; - goto out_fail; - } - btrfs_dir_item_key_to_cpu(path->nodes[0], di, &old_parent_key); - ret = btrfs_del_item(trans, root, path); - if (ret) { - goto out_fail; - } - btrfs_release_path(root, path); - - di = btrfs_lookup_dir_index_item(trans, root, path, - old_inode->i_ino, - old_parent_key.objectid, - "..", 2, -1); - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto out_fail; - } - if (!di) { - ret = -ENOENT; - goto out_fail; - } - ret = btrfs_del_item(trans, root, path); - if (ret) { - goto out_fail; - } - btrfs_release_path(root, path); - - ret = btrfs_insert_dir_item(trans, root, "..", 2, - old_inode->i_ino, location, - BTRFS_FT_DIR); - if (ret) - goto out_fail; - } - - ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry); if (ret) goto out_fail; From 70b043f0c7d7c78d8a0a01e65067c7c8fe338485 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 13 Dec 2007 09:02:46 -0500 Subject: [PATCH 0330/2985] Btrfs: Extra NULL block group checks in find_free_extent Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 187be4012474..7c953b2ecf15 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1319,6 +1319,12 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root total_needed += empty_size; path = btrfs_alloc_path(); check_failed: + if (!block_group) { + block_group = btrfs_lookup_block_group(info, search_start); + if (!block_group) + block_group = btrfs_lookup_block_group(info, + orig_search_start); + } search_start = find_search_start(root, &block_group, search_start, total_needed, data, full_scan); search_start = stripe_align(root, search_start); From 76fea00a0522185658b733d441d6ec049344456b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 13 Dec 2007 09:06:01 -0500 Subject: [PATCH 0331/2985] Btrfs: Add backrefs for symbolic link inodes Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cefe740b6c79..06d92be16805 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -404,16 +404,14 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, ret = btrfs_delete_one_dir_name(trans, root, path, di); dentry->d_inode->i_ctime = dir->i_ctime; - if (!S_ISLNK(dentry->d_inode->i_mode)) { - ret = btrfs_del_inode_ref(trans, root, name, name_len, - dentry->d_inode->i_ino, - dentry->d_parent->d_inode->i_ino); - if (ret) { - printk("failed to delete reference to %.*s, " - "inode %lu parent %lu\n", name_len, name, - dentry->d_inode->i_ino, - dentry->d_parent->d_inode->i_ino); - } + ret = btrfs_del_inode_ref(trans, root, name, name_len, + dentry->d_inode->i_ino, + dentry->d_parent->d_inode->i_ino); + if (ret) { + printk("failed to delete reference to %.*s, " + "inode %lu parent %lu\n", name_len, name, + dentry->d_inode->i_ino, + dentry->d_parent->d_inode->i_ino); } err: btrfs_free_path(path); @@ -1293,13 +1291,11 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, dentry->d_parent->d_inode->i_ino, &key, btrfs_inode_type(inode)); if (ret == 0) { - if (!S_ISLNK(inode->i_mode)) { - ret = btrfs_insert_inode_ref(trans, root, - dentry->d_name.name, - dentry->d_name.len, - inode->i_ino, - dentry->d_parent->d_inode->i_ino); - } + ret = btrfs_insert_inode_ref(trans, root, + dentry->d_name.name, + dentry->d_name.len, + inode->i_ino, + dentry->d_parent->d_inode->i_ino); parent_inode = dentry->d_parent->d_inode; parent_inode->i_size += dentry->d_name.len * 2; parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; From 7a7205367dfd9adf129f0ebc7d040a557fe0d1ba Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 13 Dec 2007 09:06:59 -0500 Subject: [PATCH 0332/2985] Btrfs: Fix typo in .. check (thanks Yan) Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 06d92be16805..41b0c7de7a59 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -857,7 +857,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, path = btrfs_alloc_path(); BUG_ON(!path); - if (namelen == 1 && strcmp(name, "..") == 0) { + if (namelen == 2 && strcmp(name, "..") == 0) { struct btrfs_key key; struct extent_buffer *leaf; u32 nritems; From 6caab489c5866788ae64a72986520b1f0e40c706 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 13 Dec 2007 09:48:07 -0500 Subject: [PATCH 0333/2985] Fix btrfs_inc_ref to add backref hints Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7c953b2ecf15..057cdd49c09b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -702,7 +702,7 @@ int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, } return btrfs_inc_extent_ref(trans, root, root->node->start, root->node->len, owner_objectid, - generation, 0, 0); + generation, key_objectid, level); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -746,10 +746,12 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, } } else { bytenr = btrfs_node_blockptr(buf, i); + btrfs_node_key_to_cpu(buf, &key, i); ret = btrfs_inc_extent_ref(trans, root, bytenr, btrfs_level_size(root, level - 1), root->root_key.objectid, - trans->transid, 0, 0); + trans->transid, key.objectid, + level - 1); if (ret) { faili = i; goto fail; From f6dbff55d77dee363c22873481db54d3bada3ea6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 13 Dec 2007 11:13:32 -0500 Subject: [PATCH 0334/2985] Btrfs: Reorder extent back refs to differentiate btree blocks from file data Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/extent-tree.c | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 035fa1550343..052555ced44d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -37,11 +37,12 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_MAGIC "_B2RfS_M" +#define BTRFS_MAX_LEVEL 8 #define BTRFS_ROOT_TREE_OBJECTID 1ULL #define BTRFS_EXTENT_TREE_OBJECTID 2ULL #define BTRFS_FS_TREE_OBJECTID 3ULL #define BTRFS_ROOT_TREE_DIR_OBJECTID 4ULL -#define BTRFS_FIRST_FREE_OBJECTID 5ULL +#define BTRFS_FIRST_FREE_OBJECTID 256ULL /* * we can actually store much bigger names, but lets not confuse the rest @@ -107,7 +108,6 @@ struct btrfs_header { u8 level; } __attribute__ ((__packed__)); -#define BTRFS_MAX_LEVEL 8 #define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \ sizeof(struct btrfs_header)) / \ sizeof(struct btrfs_key_ptr)) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 057cdd49c09b..00414836e963 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -520,13 +520,13 @@ out: * * When a tree block is created, back references are inserted: * - * (root->root_key.objectid, trans->transid or zero, lowest_key_objectid, level) + * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid) * * When a tree block is cow'd in a reference counted root, * new back references are added for all the blocks it points to. * These are of the form (trans->transid will have increased since creation): * - * (root->root_key.objectid, trans->transid, lowest_key_objectid, level) + * (root->root_key.objectid, trans->transid, level, lowest_key_objectid) * * Because the lowest_key_objectid and the level are just hints * they are not used when backrefs are deleted. When a backref is deleted: @@ -702,7 +702,7 @@ int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, } return btrfs_inc_extent_ref(trans, root, root->node->start, root->node->len, owner_objectid, - generation, key_objectid, level); + generation, level, key_objectid); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -750,8 +750,8 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = btrfs_inc_extent_ref(trans, root, bytenr, btrfs_level_size(root, level - 1), root->root_key.objectid, - trans->transid, key.objectid, - level - 1); + trans->transid, + level - 1, key.objectid); if (ret) { faili = i; goto fail; @@ -1063,8 +1063,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct } err = btrfs_insert_extent_backref(trans, extent_root, path, start, extent_root->root_key.objectid, - 0, btrfs_disk_key_objectid(&first), - level); + 0, level, + btrfs_disk_key_objectid(&first)); BUG_ON(err); free_extent_buffer(eb); } @@ -1640,7 +1640,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, ret = btrfs_alloc_extent(trans, root, blocksize, root_objectid, ref_generation, - first_objectid, level, empty_size, hint, + level, first_objectid, empty_size, hint, (u64)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); From 6e3b96668eb34da6060a5189e6a73e9d62a3b178 Mon Sep 17 00:00:00 2001 From: Yan Date: Fri, 14 Dec 2007 11:14:42 -0500 Subject: [PATCH 0335/2985] Off by one fix for btrfs_drop_extents One of my old patches introduces a new bug to btrfs_drop_extents(changeset 275). Inline extents are not truncated properly when "extent_end == end", it can trigger the BUG_ON at file.c:600. I hope I don't introduce new bug this time. --- Signed-off-by: Chris Mason --- fs/btrfs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1cc4d285951c..a280b9562221 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -481,8 +481,9 @@ next_slot: search_start = (extent_end + mask) & ~mask; } else search_start = extent_end; - if (end < extent_end && start > key.offset && found_inline) { + if (end <= extent_end && start >= key.offset && found_inline) { *hint_byte = EXTENT_MAP_INLINE; + continue; } if (end < extent_end && end >= key.offset) { if (found_extent) { From e9906a98497c26a3c1a7532b51b81cc1fbdf9e1d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 14 Dec 2007 12:56:58 -0500 Subject: [PATCH 0336/2985] Fixes for loopback files in btrfs Signed-off-by: Chris Mason --- fs/btrfs/file.c | 1 + fs/btrfs/inode.c | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a280b9562221..62fcd79d8ab3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -861,6 +861,7 @@ struct file_operations btrfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, .aio_read = generic_file_aio_read, + .splice_read = generic_file_splice_read, .write = btrfs_file_write, .mmap = btrfs_file_mmap, .open = generic_file_open, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 41b0c7de7a59..e535c50e669b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1893,8 +1893,19 @@ static void btrfs_truncate(struct inode *inode) int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) { - return extent_commit_write(&BTRFS_I(page->mapping->host)->extent_tree, - page->mapping->host, page, from, to); + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + struct inode *inode = page->mapping->host; + + btrfs_cow_one_page(inode, page, PAGE_CACHE_SIZE); + + set_page_extent_mapped(page); + set_page_dirty(page); + + if (pos > inode->i_size) { + i_size_write(inode, pos); + mark_inode_dirty(inode); + } + return 0; } static int create_subvol(struct btrfs_root *root, char *name, int namelen) From b6cda9bcb4df7544c67fc3548a53bc1607d59f46 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 14 Dec 2007 15:30:32 -0500 Subject: [PATCH 0337/2985] Btrfs: Add mount -o nodatasum to turn of file data checksumming Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 14 ++++++++------ fs/btrfs/disk-io.c | 1 + fs/btrfs/inode.c | 11 ++++++++++- fs/btrfs/super.c | 17 +++++++++++++++-- 4 files changed, 34 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 052555ced44d..5e255cabfd10 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -321,6 +321,7 @@ struct btrfs_fs_info { u64 generation; u64 last_trans_committed; + unsigned long mount_opt; struct btrfs_transaction *running_transaction; struct btrfs_super_block super_copy; struct extent_buffer *sb_buffer; @@ -429,6 +430,13 @@ struct btrfs_root { */ #define BTRFS_STRING_ITEM_KEY 253 +#define BTRFS_MOUNT_NODATASUM 0x1 + +#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) +#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) +#define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \ + BTRFS_MOUNT_##opt) + /* some macros to generate set/get funcs for the struct fields. This * assumes there is a lefoo_to_cpu for every type, so lets make a simple * one for u8: @@ -906,12 +914,6 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { ((unsigned long)(btrfs_leaf_data(leaf) + \ btrfs_item_offset_nr(leaf, slot))) -/* mount option defines and helpers */ -#define BTRFS_MOUNT_SUBVOL 0x000001 -#define btrfs_clear_opt(o, opt) o &= ~BTRFS_MOUNT_##opt -#define btrfs_set_opt(o, opt) o |= BTRFS_MOUNT_##opt -#define btrfs_test_opt(sb, opt) (BTRFS_SB(sb)->s_mount_opt & \ - BTRFS_MOUNT_##opt) /* extent-tree.c */ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 60a30da6af00..4338b00c2a25 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -568,6 +568,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->sb = sb; + fs_info->mount_opt = 0; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e535c50e669b..11885cb114e2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -116,10 +116,13 @@ int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end) struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; char *kaddr; - int ret; + int ret = 0; u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; size_t offset = start - page_start; + if (btrfs_test_opt(root, NODATASUM)) + return 0; + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -143,6 +146,9 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) struct btrfs_path *path = NULL; u32 csum; + if (btrfs_test_opt(root, NODATASUM)) + return 0; + mutex_lock(&root->fs_info->fs_mutex); path = btrfs_alloc_path(); item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); @@ -176,6 +182,9 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) u32 csum = ~(u32)0; unsigned long flags; + if (btrfs_test_opt(root, NODATASUM)) + return 0; + ret = get_state_private(em_tree, start, &private); local_irq_save(flags); kaddr = kmap_atomic(page, KM_IRQ0); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ffa02872c4d6..ad4f280ec777 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -61,11 +61,12 @@ static void btrfs_put_super (struct super_block * sb) } enum { - Opt_subvol, Opt_err, + Opt_subvol, Opt_nodatasum, Opt_err, }; static match_table_t tokens = { {Opt_subvol, "subvol=%s"}, + {Opt_nodatasum, "nodatasum"}, {Opt_err, NULL} }; @@ -74,7 +75,12 @@ static int parse_options (char * options, char **subvol_name) { char * p; + struct btrfs_fs_info *info = NULL; substring_t args[MAX_OPT_ARGS]; + + if (root) + info = root->fs_info; + if (!options) return 1; @@ -86,7 +92,12 @@ static int parse_options (char * options, token = match_token(p, tokens, args); switch (token) { case Opt_subvol: - *subvol_name = match_strdup(&args[0]); + if (subvol_name) + *subvol_name = match_strdup(&args[0]); + break; + case Opt_nodatasum: + if (root) + btrfs_set_opt(info->mount_opt, NODATASUM); break; default: return 0; @@ -143,6 +154,8 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) goto fail_close; } + parse_options((char *)data, tree_root, NULL); + /* this does the super kobj at the same time */ err = btrfs_sysfs_add_super(tree_root->fs_info); if (err) From be20aa9dbadc8c06283784ee12bbc0d97dea3418 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Dec 2007 20:14:01 -0500 Subject: [PATCH 0338/2985] Btrfs: Add mount option to turn off data cow A number of workloads do not require copy on write data or checksumming. mount -o nodatasum to disable checksums and -o nodatacow to disable both copy on write and checksumming. In nodatacow mode, copy on write is still performed when a given extent is under snapshot. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 53 +++++++++++++++++- fs/btrfs/ctree.h | 8 +++ fs/btrfs/extent-tree.c | 101 +++++++++++++++++++++++++++++++++- fs/btrfs/inode.c | 122 ++++++++++++++++++++++++++++++++++++----- fs/btrfs/super.c | 34 +++++++++--- 5 files changed, 293 insertions(+), 25 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index fd8233e05cf4..585f279d1112 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -70,7 +70,58 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) memset(p, 0, sizeof(*p)); } -static int __btrfs_cow_block(struct btrfs_trans_handle *trans, +int btrfs_copy_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer **cow_ret, u64 new_root_objectid) +{ + struct extent_buffer *cow; + u32 nritems; + int ret = 0; + int level; + struct btrfs_key first_key; + struct btrfs_root new_root; + + memcpy(&new_root, root, sizeof(new_root)); + new_root.root_key.objectid = new_root_objectid; + + WARN_ON(root->ref_cows && trans->transid != + root->fs_info->running_transaction->transid); + WARN_ON(root->ref_cows && trans->transid != root->last_trans); + + level = btrfs_header_level(buf); + nritems = btrfs_header_nritems(buf); + if (nritems) { + if (level == 0) + btrfs_item_key_to_cpu(buf, &first_key, 0); + else + btrfs_node_key_to_cpu(buf, &first_key, 0); + } else { + first_key.objectid = 0; + } + cow = __btrfs_alloc_free_block(trans, &new_root, buf->len, + new_root_objectid, + trans->transid, first_key.objectid, + level, buf->start, 0); + if (IS_ERR(cow)) + return PTR_ERR(cow); + + copy_extent_buffer(cow, buf, 0, 0, cow->len); + btrfs_set_header_bytenr(cow, cow->start); + btrfs_set_header_generation(cow, trans->transid); + btrfs_set_header_owner(cow, new_root_objectid); + + WARN_ON(btrfs_header_generation(buf) > trans->transid); + ret = btrfs_inc_ref(trans, &new_root, buf); + if (ret) + return ret; + + btrfs_mark_buffer_dirty(cow); + *cow_ret = cow; + return 0; +} + +int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5e255cabfd10..b51b021fff85 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -431,6 +431,7 @@ struct btrfs_root { #define BTRFS_STRING_ITEM_KEY 253 #define BTRFS_MOUNT_NODATASUM 0x1 +#define BTRFS_MOUNT_NODATACOW 0x2 #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) @@ -915,6 +916,9 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { btrfs_item_offset_nr(leaf, slot))) /* extent-tree.c */ +u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, + struct btrfs_path *count_path, + u64 first_extent); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy); @@ -974,6 +978,10 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, struct extent_buffer **cow_ret); +int btrfs_copy_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer **cow_ret, u64 new_root_objectid); int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); int btrfs_truncate_item(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 00414836e963..1412d556313f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -679,6 +679,104 @@ out: return 0; } +u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, + struct btrfs_path *count_path, + u64 first_extent) +{ + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_path *path; + u64 bytenr; + u64 found_objectid; + u64 root_objectid = 0; + u32 total_count = 0; + u32 cur_count; + u32 refs; + u32 nritems; + int ret; + struct btrfs_key key; + struct btrfs_key found_key; + struct extent_buffer *l; + struct btrfs_extent_item *item; + struct btrfs_extent_ref *ref_item; + int level = -1; + + path = btrfs_alloc_path(); +again: + if (level == -1) + bytenr = first_extent; + else + bytenr = count_path->nodes[level]->start; + + cur_count = 0; + key.objectid = bytenr; + key.offset = 0; + + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); + if (ret < 0) + goto out; + BUG_ON(ret == 0); + + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); + + if (found_key.objectid != bytenr || + found_key.type != BTRFS_EXTENT_ITEM_KEY) { + goto out; + } + + item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); + refs = btrfs_extent_refs(l, item); + while (1) { + nritems = btrfs_header_nritems(l); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(extent_root, path); + if (ret == 0) + continue; + break; + } + btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); + if (found_key.objectid != bytenr) + break; + if (found_key.type != BTRFS_EXTENT_REF_KEY) { + path->slots[0]++; + continue; + } + + cur_count++; + ref_item = btrfs_item_ptr(l, path->slots[0], + struct btrfs_extent_ref); + found_objectid = btrfs_ref_root(l, ref_item); + + if (found_objectid != root_objectid) + total_count++; + + if (total_count > 1) + goto out; + + if (root_objectid == 0) + root_objectid = found_objectid; + + path->slots[0]++; + } + if (cur_count == 0) { + total_count = 0; + goto out; + } + if (total_count > 1) + goto out; + if (level >= 0 && root->node == count_path->nodes[level]) + goto out; + level++; + btrfs_release_path(root, path); + goto again; + +out: + btrfs_free_path(path); + return total_count; + +} + int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner_objectid) { @@ -1127,9 +1225,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; - if (ref_generation && owner_objectid == 0 && root_objectid == 3) { -//printk("drop backref root %Lu gen %Lu byte %Lu\n", root_objectid, ref_generation, bytenr ); - } ret = lookup_extent_backref(trans, extent_root, path, bytenr, root_objectid, ref_generation, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 11885cb114e2..91f3fc43e2a9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -72,21 +72,22 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, }; -static int run_delalloc_range(struct inode *inode, u64 start, u64 end) +static int cow_file_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; - struct btrfs_key ins; u64 alloc_hint = 0; u64 num_bytes; - int ret; u64 blocksize = root->sectorsize; + struct btrfs_key ins; + int ret; - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); BUG_ON(!trans); + btrfs_set_trans_block_group(trans, inode); + num_bytes = (end - start + blocksize) & ~(blocksize - 1); + num_bytes = max(blocksize, num_bytes); ret = btrfs_drop_extents(trans, root, inode, start, start + num_bytes, start, &alloc_hint); @@ -106,6 +107,101 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) ins.offset); out: btrfs_end_transaction(trans, root); + return ret; +} + +static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) +{ + u64 extent_start; + u64 extent_end; + u64 bytenr; + u64 cow_end; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_buffer *leaf; + int found_type; + struct btrfs_path *path; + struct btrfs_file_extent_item *item; + int ret; + int err; + struct btrfs_key found_key; + + path = btrfs_alloc_path(); + BUG_ON(!path); +again: + ret = btrfs_lookup_file_extent(NULL, root, path, + inode->i_ino, start, 0); + if (ret < 0) { + btrfs_free_path(path); + return ret; + } + + cow_end = end; + if (ret != 0) { + if (path->slots[0] == 0) + goto not_found; + path->slots[0]--; + } + + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + + /* are we inside the extent that was found? */ + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + found_type = btrfs_key_type(&found_key); + if (found_key.objectid != inode->i_ino || + found_type != BTRFS_EXTENT_DATA_KEY) { + goto not_found; + } + + found_type = btrfs_file_extent_type(leaf, item); + extent_start = found_key.offset; + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_end = extent_start + + btrfs_file_extent_num_bytes(leaf, item); + err = 0; + + if (start < extent_start || start >= extent_end) + goto not_found; + + cow_end = min(end, extent_end - 1); + bytenr = btrfs_file_extent_disk_bytenr(leaf, item); + if (bytenr == 0) + goto not_found; + + bytenr += btrfs_file_extent_offset(leaf, item); + if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) { + goto not_found; + } + + start = extent_end; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + goto not_found; + } +loop: + if (start > end) { + btrfs_free_path(path); + return 0; + } + btrfs_release_path(root, path); + goto again; + +not_found: + cow_file_range(inode, start, cow_end); + start = cow_end + 1; + goto loop; +} + +static int run_delalloc_range(struct inode *inode, u64 start, u64 end) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + + mutex_lock(&root->fs_info->fs_mutex); + if (btrfs_test_opt(root, NODATACOW)) + ret = run_delalloc_nocow(inode, start, end); + else + ret = cow_file_range(inode, start, end); mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -1907,9 +2003,6 @@ int btrfs_commit_write(struct file *file, struct page *page, btrfs_cow_one_page(inode, page, PAGE_CACHE_SIZE); - set_page_extent_mapped(page); - set_page_dirty(page); - if (pos > inode->i_size) { i_size_write(inode, pos); mark_inode_dirty(inode); @@ -2078,13 +2171,18 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) key.objectid = objectid; key.offset = 1; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + extent_buffer_get(root->node); btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); free_extent_buffer(tmp); - btrfs_set_root_bytenr(&new_root_item, root->node->start); - btrfs_set_root_level(&new_root_item, btrfs_header_level(root->node)); + + btrfs_copy_root(trans, root, root->node, &tmp, objectid); + + btrfs_set_root_bytenr(&new_root_item, tmp->start); + btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); + free_extent_buffer(tmp); if (ret) goto fail; @@ -2106,10 +2204,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail; - - ret = btrfs_inc_root_ref(trans, root, objectid); - if (ret) - goto fail; fail: nr = trans->blocks_used; err = btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ad4f280ec777..2116728d1f98 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -61,12 +61,13 @@ static void btrfs_put_super (struct super_block * sb) } enum { - Opt_subvol, Opt_nodatasum, Opt_err, + Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_err, }; static match_table_t tokens = { {Opt_subvol, "subvol=%s"}, {Opt_nodatasum, "nodatasum"}, + {Opt_nodatacow, "nodatacow"}, {Opt_err, NULL} }; @@ -78,12 +79,20 @@ static int parse_options (char * options, struct btrfs_fs_info *info = NULL; substring_t args[MAX_OPT_ARGS]; - if (root) - info = root->fs_info; - if (!options) return 1; + /* + * strsep changes the string, duplicate it because parse_options + * gets called twice + */ + options = kstrdup(options, GFP_NOFS); + if (!options) + return -ENOMEM; + + if (root) + info = root->fs_info; + while ((p = strsep (&options, ",")) != NULL) { int token; if (!*p) @@ -92,17 +101,28 @@ static int parse_options (char * options, token = match_token(p, tokens, args); switch (token) { case Opt_subvol: - if (subvol_name) + if (subvol_name) { *subvol_name = match_strdup(&args[0]); + } break; case Opt_nodatasum: - if (root) + if (info) { + printk("btrfs: setting nodatacsum\n"); btrfs_set_opt(info->mount_opt, NODATASUM); + } + break; + case Opt_nodatacow: + if (info) { + printk("btrfs: setting nodatacow\n"); + btrfs_set_opt(info->mount_opt, NODATACOW); + btrfs_set_opt(info->mount_opt, NODATASUM); + } break; default: - return 0; + break; } } + kfree(options); return 1; } From d10c5f31c79c6a703974ae29d0a05781b828fe30 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Dec 2007 20:14:04 -0500 Subject: [PATCH 0339/2985] Btrfs: Fix extent_map and extent_state leaks by flushing lrus on FS unmount Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4338b00c2a25..278564178993 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -765,8 +765,18 @@ int close_ctree(struct btrfs_root *root) btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); + + filemap_write_and_wait(fs_info->btree_inode->i_mapping); + + extent_map_tree_empty_lru(&fs_info->free_space_cache); + extent_map_tree_empty_lru(&fs_info->block_group_cache); + extent_map_tree_empty_lru(&fs_info->pinned_extents); + extent_map_tree_empty_lru(&fs_info->pending_del); + extent_map_tree_empty_lru(&fs_info->extent_ins); extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree); + truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + iput(fs_info->btree_inode); #if 0 while(!list_empty(&fs_info->hashers)) { From c59f8951d48c5eb000926935f3ab063d8181d1ee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Dec 2007 20:14:04 -0500 Subject: [PATCH 0340/2985] Btrfs: Add mount option to enforce a max extent size Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/inode.c | 28 ++++++++++++++++++---------- fs/btrfs/super.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 65 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b51b021fff85..32b24460ec82 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -322,6 +322,7 @@ struct btrfs_fs_info { u64 generation; u64 last_trans_committed; unsigned long mount_opt; + u64 max_extent; struct btrfs_transaction *running_transaction; struct btrfs_super_block super_copy; struct extent_buffer *sb_buffer; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 278564178993..7213012c27d5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -569,6 +569,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->extent_root = extent_root; fs_info->sb = sb; fs_info->mount_opt = 0; + fs_info->max_extent = (u64)-1; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 91f3fc43e2a9..686dd03f34f2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -78,6 +78,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; u64 alloc_hint = 0; u64 num_bytes; + u64 cur_alloc_size; u64 blocksize = root->sectorsize; struct btrfs_key ins; int ret; @@ -94,17 +95,24 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) if (alloc_hint == EXTENT_MAP_INLINE) goto out; - ret = btrfs_alloc_extent(trans, root, num_bytes, - root->root_key.objectid, trans->transid, - inode->i_ino, start, 0, - alloc_hint, (u64)-1, &ins, 1); - if (ret) { - WARN_ON(1); - goto out; + while(num_bytes > 0) { + cur_alloc_size = min(num_bytes, root->fs_info->max_extent); + ret = btrfs_alloc_extent(trans, root, cur_alloc_size, + root->root_key.objectid, + trans->transid, + inode->i_ino, start, 0, + alloc_hint, (u64)-1, &ins, 1); + if (ret) { + WARN_ON(1); + goto out; + } + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + start, ins.objectid, ins.offset, + ins.offset); + num_bytes -= cur_alloc_size; + alloc_hint = ins.objectid + ins.offset; + start += cur_alloc_size; } - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - start, ins.objectid, ins.offset, - ins.offset); out: btrfs_end_transaction(trans, root); return ret; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2116728d1f98..b7323914bf5a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -61,16 +62,42 @@ static void btrfs_put_super (struct super_block * sb) } enum { - Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_err, + Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, Opt_err, }; static match_table_t tokens = { {Opt_subvol, "subvol=%s"}, {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, + {Opt_max_extent, "max_extent=%s"}, {Opt_err, NULL} }; +static unsigned long parse_size(char *str) +{ + unsigned long res; + int mult = 1; + char *end; + char last; + + res = simple_strtoul(str, &end, 10); + + last = end[0]; + if (isalpha(last)) { + last = tolower(last); + switch (last) { + case 'g': + mult *= 1024; + case 'm': + mult *= 1024; + case 'k': + mult *= 1024; + } + res = res * mult; + } + return res; +} + static int parse_options (char * options, struct btrfs_root *root, char **subvol_name) @@ -118,6 +145,21 @@ static int parse_options (char * options, btrfs_set_opt(info->mount_opt, NODATASUM); } break; + case Opt_max_extent: + if (info) { + char *num = match_strdup(&args[0]); + if (num) { + info->max_extent = parse_size(num); + kfree(num); + + info->max_extent = max_t(u64, + info->max_extent, + root->sectorsize); + printk("btrfs: max_extent at %Lu\n", + info->max_extent); + } + } + break; default: break; } @@ -329,6 +371,8 @@ static int btrfs_get_sb(struct file_system_type *fs_type, ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, btrfs_fill_super, mnt, subvol_name ? subvol_name : "default"); + if (subvol_name) + kfree(subvol_name); return ret; } From 6da6abae027e2dbc59bca5f4168b0760f25068c7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 18 Dec 2007 16:15:09 -0500 Subject: [PATCH 0341/2985] Btrfs: Back port to 2.6.18-el kernels Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 13 ++++ fs/btrfs/disk-io.c | 5 +- fs/btrfs/extent_map.c | 168 +++++++++++++++++++++++++++++++++++++++-- fs/btrfs/file.c | 15 +++- fs/btrfs/inode.c | 21 ++++-- fs/btrfs/super.c | 1 + fs/btrfs/sysfs.c | 23 +++++- fs/btrfs/transaction.c | 8 ++ fs/btrfs/transaction.h | 5 ++ 9 files changed, 239 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 32b24460ec82..2559f5d5d00a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -19,6 +19,7 @@ #ifndef __BTRFS__ #define __BTRFS__ +#include #include #include #include @@ -334,7 +335,11 @@ struct btrfs_fs_info { struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + struct work_struct trans_work; +#else struct delayed_work trans_work; +#endif struct kobject super_kobj; struct completion kobj_unregister; int do_barriers; @@ -916,6 +921,14 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { ((unsigned long)(btrfs_leaf_data(leaf) + \ btrfs_item_offset_nr(leaf, slot))) +static inline struct dentry *fdentry(struct file *file) { +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + return file->f_dentry; +#else + return file->f_path.dentry; +#endif +} + /* extent-tree.c */ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, struct btrfs_path *count_path, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7213012c27d5..ebb2db624fdd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -593,8 +593,11 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->do_barriers = 1; fs_info->closing = 0; fs_info->total_pinned = 0; - +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); +#else INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); +#endif BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 8326a18413ff..0d1e59a86e49 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1585,8 +1585,18 @@ extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, static int submit_one_bio(int rw, struct bio *bio) { + u64 maxsector; int ret = 0; + bio_get(bio); + + maxsector = bio->bi_bdev->bd_inode->i_size >> 9; + if (maxsector < bio->bi_sector) { + printk("sector too large max %Lu got %llu\n", maxsector, + (unsigned long long)bio->bi_sector); + WARN_ON(1); + } + submit_bio(rw, bio); if (bio_flagged(bio, BIO_EOPNOTSUPP)) ret = -EOPNOTSUPP; @@ -1678,8 +1688,12 @@ static int __extent_read_full_page(struct extent_map_tree *tree, while (cur <= end) { if (cur >= last_byte) { + char *userpage; iosize = PAGE_CACHE_SIZE - page_offset; - zero_user_page(page, page_offset, iosize, KM_USER0); + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); set_extent_uptodate(tree, cur, cur + iosize - 1, GFP_NOFS); unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); @@ -1707,7 +1721,12 @@ static int __extent_read_full_page(struct extent_map_tree *tree, /* we've found a hole, just zero and go on */ if (block_start == EXTENT_MAP_HOLE) { - zero_user_page(page, page_offset, iosize, KM_USER0); + char *userpage; + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + set_extent_uptodate(tree, cur, cur + iosize - 1, GFP_NOFS); unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); @@ -1804,9 +1823,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, } if (page->index == end_index) { + char *userpage; + size_t offset = i_size & (PAGE_CACHE_SIZE - 1); - zero_user_page(page, offset, - PAGE_CACHE_SIZE - offset, KM_USER0); + + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); } set_page_extent_mapped(page); @@ -1921,6 +1945,129 @@ done: return 0; } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + +/* Taken directly from 2.6.23 for 2.6.18 back port */ +typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, + void *data); + +/** + * write_cache_pages - walk the list of dirty pages of the given address space + * and write all of them. + * @mapping: address space structure to write + * @wbc: subtract the number of written pages from *@wbc->nr_to_write + * @writepage: function called for each page + * @data: data passed to writepage function + * + * If a page is already under I/O, write_cache_pages() skips it, even + * if it's dirty. This is desirable behaviour for memory-cleaning writeback, + * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() + * and msync() need to guarantee that all the data which was dirty at the time + * the call was made get new I/O started against them. If wbc->sync_mode is + * WB_SYNC_ALL then we were called for data integrity and we must wait for + * existing IO to complete. + */ +static int write_cache_pages(struct address_space *mapping, + struct writeback_control *wbc, writepage_t writepage, + void *data) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; /* Inclusive */ + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + unsigned i; + + scanned = 1; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + done = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + ret = (*writepage)(page, wbc, data); + + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { + unlock_page(page); + ret = 0; + } + if (ret || (--(wbc->nr_to_write) <= 0)) + done = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + done = 1; + } + } + pagevec_release(&pvec); + cond_resched(); + } + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} +#endif + int extent_write_full_page(struct extent_map_tree *tree, struct page *page, get_extent_t *get_extent, struct writeback_control *wbc) @@ -1945,18 +2092,20 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, ret = __extent_writepage(page, wbc, &epd); write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); - if (epd.bio) + if (epd.bio) { submit_one_bio(WRITE, epd.bio); + } return ret; } EXPORT_SYMBOL(extent_write_full_page); + int extent_writepages(struct extent_map_tree *tree, struct address_space *mapping, get_extent_t *get_extent, struct writeback_control *wbc) { - int ret; + int ret = 0; struct extent_page_data epd = { .bio = NULL, .tree = tree, @@ -1964,8 +2113,9 @@ int extent_writepages(struct extent_map_tree *tree, }; ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); - if (epd.bio) + if (epd.bio) { submit_one_bio(WRITE, epd.bio); + } return ret; } EXPORT_SYMBOL(extent_writepages); @@ -2106,7 +2256,9 @@ int extent_prepare_write(struct extent_map_tree *tree, flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); } - if (!isnew && !PageUptodate(page) && + if ((em->block_start != EXTENT_MAP_HOLE && + em->block_start != EXTENT_MAP_INLINE) && + !isnew && !PageUptodate(page) && (block_off_end > to || block_off_start < from) && !test_range_bit(tree, block_start, cur_end, EXTENT_UPTODATE, 1)) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 62fcd79d8ab3..461b09663fed 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -231,7 +231,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, { int err = 0; int i; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; struct extent_map *em; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 hint_byte; @@ -652,7 +652,7 @@ static int prepare_pages(struct btrfs_root *root, { int i; unsigned long index = pos >> PAGE_CACHE_SHIFT; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; int err = 0; u64 start_pos; @@ -666,7 +666,11 @@ static int prepare_pages(struct btrfs_root *root, err = -ENOMEM; BUG_ON(1); } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + ClearPageDirty(pages[i]); +#else cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); +#endif wait_on_page_writeback(pages[i]); set_page_extent_mapped(pages[i]); WARN_ON(!PageLocked(pages[i])); @@ -682,7 +686,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ssize_t num_written = 0; ssize_t err = 0; int ret = 0; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct page **pages = NULL; int nrptrs; @@ -707,7 +711,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, goto out; if (count == 0) goto out; - err = remove_suid(file->f_path.dentry); + err = remove_suid(fdentry(file)); if (err) goto out; file_update_time(file); @@ -862,6 +866,9 @@ struct file_operations btrfs_file_operations = { .read = do_sync_read, .aio_read = generic_file_aio_read, .splice_read = generic_file_splice_read, +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + .sendfile = generic_file_sendfile, +#endif .write = btrfs_file_write, .mmap = btrfs_file_mmap, .open = generic_file_open, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 686dd03f34f2..6a7d9160df27 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -532,7 +532,11 @@ err: dir->i_size -= name_len * 2; dir->i_mtime = dir->i_ctime = CURRENT_TIME; btrfs_update_inode(trans, root, dir); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + dentry->d_inode->i_nlink--; +#else drop_nlink(dentry->d_inode); +#endif ret = btrfs_update_inode(trans, root, dentry->d_inode); dir->i_sb->s_dirt = 1; } @@ -1139,7 +1143,7 @@ static unsigned char btrfs_filetype_table[] = { static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = filp->f_dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_item *item; struct btrfs_dir_item *di; @@ -1554,7 +1558,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink == 0) return -ENOENT; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + inode->i_nlink++; +#else inc_nlink(inode); +#endif mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -1825,6 +1833,9 @@ insert: if (ret == -EEXIST) { free_extent_map(em); em = NULL; + if (0 && failed_insert == 1) { + btrfs_drop_extent_cache(inode, start, end); + } failed_insert++; if (failed_insert > 5) { printk("failing to insert %Lu %Lu\n", start, end); @@ -1942,7 +1953,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) */ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) { - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = fdentry(vma->vm_file)->d_inode; unsigned long end; loff_t size; int ret = -EINVAL; @@ -2243,7 +2254,7 @@ static unsigned long force_ra(struct address_space *mapping, } int btrfs_defrag_file(struct file *file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct page *page; unsigned long last_index; @@ -2329,7 +2340,7 @@ static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg) static int btrfs_ioctl_defrag(struct file *file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; switch (inode->i_mode & S_IFMT) { @@ -2350,7 +2361,7 @@ static int btrfs_ioctl_defrag(struct file *file) long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct btrfs_root *root = BTRFS_I(file->f_path.dentry->d_inode)->root; + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; switch (cmd) { case BTRFS_IOC_SNAP_CREATE: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b7323914bf5a..7cab680418a1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 1671c731800e..cd673ca9bb98 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -189,12 +189,29 @@ static struct kset btrfs_kset; int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) { int error; + char *name; + char c; + int len = strlen(fs->sb->s_id) + 1; + int i; + + name = kmalloc(len, GFP_NOFS); + if (!name) { + error = -ENOMEM; + goto fail; + } + + for (i = 0; i < len; i++) { + c = fs->sb->s_id[i]; + if (c == '/' || c == '\\') + c = '!'; + name[i] = c; + } + name[len] = '\0'; fs->super_kobj.kset = &btrfs_kset; fs->super_kobj.ktype = &btrfs_super_ktype; - error = kobject_set_name(&fs->super_kobj, "%s", - fs->sb->s_id); + error = kobject_set_name(&fs->super_kobj, "%s", name); if (error) goto fail; @@ -202,9 +219,11 @@ int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) if (error) goto fail; + kfree(name); return 0; fail: + kfree(name); printk(KERN_ERR "btrfs: sysfs creation for super failed\n"); return error; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0229e69bd2ff..1ad611b9f61b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -614,12 +614,20 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) } return 0; } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) +void btrfs_transaction_cleaner(void *p) +#else void btrfs_transaction_cleaner(struct work_struct *work) +#endif { +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + struct btrfs_fs_info *fs_info = p; +#else struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, trans_work.work); +#endif struct btrfs_root *root = fs_info->tree_root; struct btrfs_transaction *cur; struct btrfs_trans_handle *trans; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index ae39fcfc169a..eef840bca91e 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -73,7 +73,12 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) +void btrfs_transaction_cleaner(void *p); +#else void btrfs_transaction_cleaner(struct work_struct *work); +#endif + void btrfs_transaction_flush_work(struct btrfs_root *root); void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); void btrfs_init_transaction_sys(void); From 4aec2b52327b3a3a20faa3f1af3102c0d97c7c92 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 18 Dec 2007 16:25:45 -0500 Subject: [PATCH 0342/2985] kmalloc a few large stack objects in the btrfs_ioctl path Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 20 +++++++++++----- fs/btrfs/inode.c | 60 +++++++++++++++++++++++++++++++++--------------- 2 files changed, 56 insertions(+), 24 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 585f279d1112..7f764455f26e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -80,10 +80,14 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, int ret = 0; int level; struct btrfs_key first_key; - struct btrfs_root new_root; + struct btrfs_root *new_root; - memcpy(&new_root, root, sizeof(new_root)); - new_root.root_key.objectid = new_root_objectid; + new_root = kmalloc(sizeof(*new_root), GFP_NOFS); + if (!new_root) + return -ENOMEM; + + memcpy(new_root, root, sizeof(*new_root)); + new_root->root_key.objectid = new_root_objectid; WARN_ON(root->ref_cows && trans->transid != root->fs_info->running_transaction->transid); @@ -99,12 +103,14 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, } else { first_key.objectid = 0; } - cow = __btrfs_alloc_free_block(trans, &new_root, buf->len, + cow = __btrfs_alloc_free_block(trans, new_root, buf->len, new_root_objectid, trans->transid, first_key.objectid, level, buf->start, 0); - if (IS_ERR(cow)) + if (IS_ERR(cow)) { + kfree(new_root); return PTR_ERR(cow); + } copy_extent_buffer(cow, buf, 0, 0, cow->len); btrfs_set_header_bytenr(cow, cow->start); @@ -112,7 +118,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, btrfs_set_header_owner(cow, new_root_objectid); WARN_ON(btrfs_header_generation(buf) > trans->transid); - ret = btrfs_inc_ref(trans, &new_root, buf); + ret = btrfs_inc_ref(trans, new_root, buf); + kfree(new_root); + if (ret) return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6a7d9160df27..1e725a48467c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2302,40 +2302,64 @@ out_unlock: static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg) { - struct btrfs_ioctl_vol_args vol_args; + struct btrfs_ioctl_vol_args *vol_args; struct btrfs_dir_item *di; struct btrfs_path *path; - int namelen; u64 root_dirid; + int namelen; + int ret; - if (copy_from_user(&vol_args, arg, sizeof(vol_args))) - return -EFAULT; + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); - namelen = strlen(vol_args.name); - if (namelen > BTRFS_VOL_NAME_MAX) - return -EINVAL; - if (strchr(vol_args.name, '/')) - return -EINVAL; + if (!vol_args) + return -ENOMEM; + + if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { + ret = -EFAULT; + goto out; + } + + namelen = strlen(vol_args->name); + if (namelen > BTRFS_VOL_NAME_MAX) { + ret = -EINVAL; + goto out; + } + if (strchr(vol_args->name, '/')) { + ret = -EINVAL; + goto out; + } path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + ret = -ENOMEM; + goto out; + } root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, mutex_lock(&root->fs_info->fs_mutex); di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path, root_dirid, - vol_args.name, namelen, 0); + vol_args->name, namelen, 0); mutex_unlock(&root->fs_info->fs_mutex); btrfs_free_path(path); - if (di && !IS_ERR(di)) - return -EEXIST; - if (IS_ERR(di)) - return PTR_ERR(di); + + if (di && !IS_ERR(di)) { + ret = -EEXIST; + goto out; + } + + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out; + } if (root == root->fs_info->tree_root) - return create_subvol(root, vol_args.name, namelen); - return create_snapshot(root, vol_args.name, namelen); + ret = create_subvol(root, vol_args->name, namelen); + else + ret = create_snapshot(root, vol_args->name, namelen); +out: + kfree(vol_args); + return ret; } static int btrfs_ioctl_defrag(struct file *file) From 190662b2128dd648749e197f5563e9f6bbb5e05c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 18 Dec 2007 16:25:45 -0500 Subject: [PATCH 0343/2985] Btrfs: Fix delayed allocation to avoid missing delalloc extents find_lock_delalloc_range could exit out too early Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 9 +++++++-- fs/btrfs/inode.c | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 0d1e59a86e49..a0dff34dd437 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1070,6 +1070,7 @@ u64 find_lock_delalloc_range(struct extent_map_tree *tree, search_again: node = tree_search(&tree->state, cur_start); if (!node || IS_ERR(node)) { + *end = (u64)-1; goto out; } @@ -1079,6 +1080,8 @@ search_again: goto out; } if (!(state->state & EXTENT_DELALLOC)) { + if (!found) + *end = state->end; goto out; } if (!found) { @@ -1841,8 +1844,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, &delalloc_end, 128 * 1024 * 1024); - if (nr_delalloc <= 0) - break; + if (nr_delalloc == 0) { + delalloc_start = delalloc_end + 1; + continue; + } tree->ops->fill_delalloc(inode, delalloc_start, delalloc_end); clear_extent_bit(tree, delalloc_start, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1e725a48467c..55b2e1426024 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -798,6 +798,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + WARN_ON(!PageLocked(page)); set_page_extent_mapped(page); lock_extent(em_tree, page_start, page_end, GFP_NOFS); From 879c1cfc3104b632dea85d07490f1b55e54275a9 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 19 Dec 2007 09:11:48 -0500 Subject: [PATCH 0344/2985] Btrfs: Fix nodatacow extent lookup Yan Zheng noticed the offset into the extent was incorrectly being added to the extent start before trying to find it in the extent allocation tree. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 55b2e1426024..1c61c85dc3d8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -177,7 +177,6 @@ again: if (bytenr == 0) goto not_found; - bytenr += btrfs_file_extent_offset(leaf, item); if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) { goto not_found; } From 01f466580502c57001bf80fff709479fdb9e87a5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 21 Dec 2007 16:24:26 -0500 Subject: [PATCH 0345/2985] Btrfs: Less aggressive readahead on deletes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7f764455f26e..2b3ffa707eae 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -987,9 +987,10 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, * readahead one full node of leaves */ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, - int level, int slot) + int level, int slot, u64 objectid) { struct extent_buffer *node; + struct btrfs_disk_key disk_key; u32 nritems; u64 search; u64 lowest_read; @@ -1031,6 +1032,11 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, if (nr >= nritems) break; } + if (path->reada < 0 && objectid) { + btrfs_node_key(node, &disk_key, nr); + if (btrfs_disk_key_objectid(&disk_key) != objectid) + break; + } search = btrfs_node_blockptr(node, nr); if ((search >= lowest_read && search <= highest_read) || (search < lowest_read && lowest_read - search <= 32768) || @@ -1136,7 +1142,8 @@ again: bytenr = btrfs_node_blockptr(b, slot); ptr_gen = btrfs_node_ptr_generation(b, slot); if (should_reada) - reada_for_search(root, p, level, slot); + reada_for_search(root, p, level, slot, + key->objectid); b = read_tree_block(root, bytenr, btrfs_level_size(root, level - 1)); if (ptr_gen != btrfs_header_generation(b)) { @@ -2671,9 +2678,6 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) if (next) free_extent_buffer(next); - if (path->reada < 0) - reada_for_search(root, path, level, slot); - next = read_tree_block(root, bytenr, btrfs_level_size(root, level - 1)); break; @@ -2687,8 +2691,6 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) path->slots[level] = 0; if (!level) break; - if (path->reada) - reada_for_search(root, path, level, 0); next = read_tree_block(root, btrfs_node_blockptr(next, 0), btrfs_level_size(root, level - 1)); } @@ -2726,7 +2728,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) free_extent_buffer(next); if (path->reada) - reada_for_search(root, path, level, slot); + reada_for_search(root, path, level, slot, 0); next = read_tree_block(root, bytenr, btrfs_level_size(root, level -1)); @@ -2742,7 +2744,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) if (!level) break; if (path->reada) - reada_for_search(root, path, level, 0); + reada_for_search(root, path, level, 0, 0); next = read_tree_block(root, btrfs_node_blockptr(next, 0), btrfs_level_size(root, level - 1)); } From 1832a6d5ee3b1af61001cadba9e10da9e91af4a4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 21 Dec 2007 16:27:21 -0500 Subject: [PATCH 0346/2985] Btrfs: Implement basic support for -ENOSPC This is intended to prevent accidentally filling the drive. A determined user can still make things oops. It includes some accounting of the current bytes under delayed allocation, but this will change as things get optimized Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++ fs/btrfs/disk-io.c | 5 +- fs/btrfs/extent_map.c | 20 ++++-- fs/btrfs/extent_map.h | 3 +- fs/btrfs/file.c | 28 ++++++-- fs/btrfs/inode.c | 161 +++++++++++++++++++++++++++++++++++++----- 6 files changed, 193 insertions(+), 28 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2559f5d5d00a..10129cc6656f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -346,6 +346,8 @@ struct btrfs_fs_info { int closing; u64 total_pinned; + spinlock_t delalloc_lock; + u64 delalloc_bytes; }; /* * in ram representation of the tree. extent_root is used for all allocations @@ -1115,6 +1117,8 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); /* inode.c */ +int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, + int for_del); int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ebb2db624fdd..eebb4fb65c61 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -223,7 +223,8 @@ static int btree_writepages(struct address_space *mapping, } else { thresh = 8 * 1024 * 1024; } - num_dirty = count_range_bits(tree, &start, thresh, EXTENT_DIRTY); + num_dirty = count_range_bits(tree, &start, (u64)-1, + thresh, EXTENT_DIRTY); if (num_dirty < thresh) { return 0; } @@ -559,6 +560,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); spin_lock_init(&fs_info->hash_lock); + spin_lock_init(&fs_info->delalloc_lock); memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); init_completion(&fs_info->kobj_unregister); @@ -570,6 +572,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->sb = sb; fs_info->mount_opt = 0; fs_info->max_extent = (u64)-1; + fs_info->delalloc_bytes = 0; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index a0dff34dd437..2b92f1070274 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1131,7 +1131,8 @@ out: } u64 count_range_bits(struct extent_map_tree *tree, - u64 *start, u64 max_bytes, unsigned long bits) + u64 *start, u64 search_end, u64 max_bytes, + unsigned long bits) { struct rb_node *node; struct extent_state *state; @@ -1139,9 +1140,14 @@ u64 count_range_bits(struct extent_map_tree *tree, u64 total_bytes = 0; int found = 0; + if (search_end <= cur_start) { + printk("search_end %Lu start %Lu\n", search_end, cur_start); + WARN_ON(1); + return 0; + } + write_lock_irq(&tree->lock); - if (bits == EXTENT_DIRTY) { - *start = 0; + if (cur_start == 0 && bits == EXTENT_DIRTY) { total_bytes = tree->dirty_bytes; goto out; } @@ -1156,8 +1162,11 @@ u64 count_range_bits(struct extent_map_tree *tree, while(1) { state = rb_entry(node, struct extent_state, rb_node); - if ((state->state & bits)) { - total_bytes += state->end - state->start + 1; + if (state->start > search_end) + break; + if (state->end >= cur_start && (state->state & bits)) { + total_bytes += min(search_end, state->end) + 1 - + max(cur_start, state->start); if (total_bytes >= max_bytes) break; if (!found) { @@ -1173,7 +1182,6 @@ out: write_unlock_irq(&tree->lock); return total_bytes; } - /* * helper function to lock both pages and extents in the tree. * pages must be locked first. diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 6e572d3e8924..ea60f5447b5b 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -115,7 +115,8 @@ int __init extent_map_init(void); void extent_map_exit(void); u64 count_range_bits(struct extent_map_tree *tree, - u64 *start, u64 max_bytes, unsigned long bits); + u64 *start, u64 search_end, + u64 max_bytes, unsigned long bits); int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, int filled); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 461b09663fed..71dc2d33b6c6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -307,6 +307,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, inline_size > 32768 || inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { u64 last_end; + u64 existing_delalloc = 0; for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; @@ -316,8 +317,19 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, last_end = (u64)(pages[num_pages -1]->index) << PAGE_CACHE_SHIFT; last_end += PAGE_CACHE_SIZE - 1; + if (start_pos < isize) { + u64 delalloc_start = start_pos; + existing_delalloc = count_range_bits(em_tree, + &delalloc_start, + end_of_last_block, (u64)-1, + EXTENT_DELALLOC); + } set_extent_delalloc(em_tree, start_pos, end_of_last_block, GFP_NOFS); + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->delalloc_bytes += (end_of_last_block + 1 - + start_pos) - existing_delalloc; + spin_unlock(&root->fs_info->delalloc_lock); } else { u64 aligned_end; /* step one, delete the existing extents in this range */ @@ -708,12 +720,12 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, current->backing_dev_info = inode->i_mapping->backing_dev_info; err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) - goto out; + goto out_nolock; if (count == 0) - goto out; + goto out_nolock; err = remove_suid(fdentry(file)); if (err) - goto out; + goto out_nolock; file_update_time(file); pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); @@ -758,6 +770,13 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, WARN_ON(num_pages > nrptrs); memset(pages, 0, sizeof(pages)); + + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_check_free_space(root, write_bytes, 0); + mutex_unlock(&root->fs_info->fs_mutex); + if (ret) + goto out; + ret = prepare_pages(root, file, pages, num_pages, pos, first_index, last_index, write_bytes); @@ -787,8 +806,9 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, btrfs_btree_balance_dirty(root, 1); cond_resched(); } - mutex_unlock(&inode->i_mutex); out: + mutex_unlock(&inode->i_mutex); +out_nolock: kfree(pages); if (pinned[0]) page_cache_release(pinned[0]); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1c61c85dc3d8..a9f5d6d417f0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -72,6 +72,26 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, }; +int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, + int for_del) +{ + u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy); + u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy); + u64 thresh; + int ret = 0; + + if (for_del) + thresh = (total * 90) / 100; + else + thresh = (total * 85) / 100; + + spin_lock(&root->fs_info->delalloc_lock); + if (used + root->fs_info->delalloc_bytes + num_required > thresh) + ret = -ENOSPC; + spin_unlock(&root->fs_info->delalloc_lock); + return ret; +} + static int cow_file_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -124,6 +144,7 @@ static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) u64 extent_end; u64 bytenr; u64 cow_end; + u64 loops = 0; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_buffer *leaf; int found_type; @@ -169,6 +190,9 @@ again: btrfs_file_extent_num_bytes(leaf, item); err = 0; + if (loops && start != extent_start) + goto not_found; + if (start < extent_start || start >= extent_end) goto not_found; @@ -191,6 +215,7 @@ loop: return 0; } btrfs_release_path(root, path); + loops++; goto again; not_found: @@ -202,6 +227,7 @@ not_found: static int run_delalloc_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; + u64 num_bytes; int ret; mutex_lock(&root->fs_info->fs_mutex); @@ -209,6 +235,17 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) ret = run_delalloc_nocow(inode, start, end); else ret = cow_file_range(inode, start, end); + + spin_lock(&root->fs_info->delalloc_lock); + num_bytes = end + 1 - start; + if (root->fs_info->delalloc_bytes < num_bytes) { + printk("delalloc accounting error total %llu sub %llu\n", + root->fs_info->delalloc_bytes, num_bytes); + } else { + root->fs_info->delalloc_bytes -= num_bytes; + } + spin_unlock(&root->fs_info->delalloc_lock); + mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -547,10 +584,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) struct btrfs_root *root; struct btrfs_trans_handle *trans; int ret; - unsigned long nr; + unsigned long nr = 0; root = BTRFS_I(dir)->root; mutex_lock(&root->fs_info->fs_mutex); + + ret = btrfs_check_free_space(root, 1, 1); + if (ret) + goto fail; + trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -558,25 +600,29 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) nr = trans->blocks_used; btrfs_end_transaction(trans, root); +fail: mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); - return ret; } static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; - int err; + int err = 0; int ret; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_trans_handle *trans; - unsigned long nr; + unsigned long nr = 0; if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) return -ENOTEMPTY; mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_check_free_space(root, 1, 1); + if (ret) + goto fail; + trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -588,6 +634,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) nr = trans->blocks_used; ret = btrfs_end_transaction(trans, root); +fail: mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); @@ -792,17 +839,29 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, size_t zero_start) { char *kaddr; - int ret = 0; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct btrfs_root *root = BTRFS_I(inode)->root; u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + u64 existing_delalloc; + u64 delalloc_start; + int ret = 0; WARN_ON(!PageLocked(page)); set_page_extent_mapped(page); lock_extent(em_tree, page_start, page_end, GFP_NOFS); + delalloc_start = page_start; + existing_delalloc = count_range_bits(&BTRFS_I(inode)->extent_tree, + &delalloc_start, page_end, + PAGE_CACHE_SIZE, EXTENT_DELALLOC); set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start, page_end, GFP_NOFS); + + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - existing_delalloc; + spin_unlock(&root->fs_info->delalloc_lock); + if (zero_start != PAGE_CACHE_SIZE) { kaddr = kmap(page); memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); @@ -881,6 +940,12 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_size <= pos) goto out; + mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_check_free_space(root, 1, 0); + mutex_unlock(&root->fs_info->fs_mutex); + if (err) + goto fail; + btrfs_truncate_page(inode->i_mapping, inode->i_size); lock_extent(em_tree, pos, block_end, GFP_NOFS); @@ -906,7 +971,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) } out: err = inode_setattr(inode, attr); - +fail: return err; } void btrfs_delete_inode(struct inode *inode) @@ -1440,16 +1505,20 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; - struct inode *inode; + struct inode *inode = NULL; int err; int drop_inode = 0; u64 objectid; - unsigned long nr; + unsigned long nr = 0; if (!new_valid_dev(rdev)) return -EINVAL; mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_check_free_space(root, 1, 0); + if (err) + goto fail; + trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -1480,6 +1549,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, out_unlock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); +fail: mutex_unlock(&root->fs_info->fs_mutex); if (drop_inode) { @@ -1495,13 +1565,16 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; - struct inode *inode; + struct inode *inode = NULL; int err; int drop_inode = 0; - unsigned long nr; + unsigned long nr = 0; u64 objectid; mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_check_free_space(root, 1, 0); + if (err) + goto fail; trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -1535,6 +1608,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, out_unlock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); +fail: mutex_unlock(&root->fs_info->fs_mutex); if (drop_inode) { @@ -1551,7 +1625,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode = old_dentry->d_inode; - unsigned long nr; + unsigned long nr = 0; int err; int drop_inode = 0; @@ -1564,6 +1638,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, inc_nlink(inode); #endif mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_check_free_space(root, 1, 0); + if (err) + goto fail; trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -1582,6 +1659,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, nr = trans->blocks_used; btrfs_end_transaction(trans, root); +fail: mutex_unlock(&root->fs_info->fs_mutex); if (drop_inode) { @@ -1603,6 +1681,10 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) unsigned long nr = 1; mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_check_free_space(root, 1, 0); + if (err) + goto out_unlock; + trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -1869,6 +1951,15 @@ static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) static int btrfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; + int err; + + mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); + mutex_lock(&root->fs_info->fs_mutex); + if (err) + return -ENOSPC; + return extent_prepare_write(&BTRFS_I(page->mapping->host)->extent_tree, page->mapping->host, page, from, to, btrfs_get_extent); @@ -1880,6 +1971,7 @@ int btrfs_readpage(struct file *file, struct page *page) tree = &BTRFS_I(page->mapping->host)->extent_tree; return extent_read_full_page(tree, page, btrfs_get_extent); } + static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { struct extent_map_tree *tree; @@ -1954,11 +2046,20 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) { struct inode *inode = fdentry(vma->vm_file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; unsigned long end; loff_t size; - int ret = -EINVAL; + int ret; u64 page_start; + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); + mutex_lock(&root->fs_info->fs_mutex); + if (ret) + goto out; + + ret = -EINVAL; + down_read(&BTRFS_I(inode)->root->snap_sem); lock_page(page); wait_on_page_writeback(page); @@ -1982,6 +2083,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) out_unlock: up_read(&BTRFS_I(inode)->root->snap_sem); unlock_page(page); +out: return ret; } @@ -2046,6 +2148,10 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) unsigned long nr = 1; mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_check_free_space(root, 1, 0); + if (ret) + goto fail_commit; + trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); @@ -2162,7 +2268,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) int ret; int err; u64 objectid; - unsigned long nr; + unsigned long nr = 0; if (!root->ref_cows) return -EINVAL; @@ -2172,6 +2278,10 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) thaw_bdev(root->fs_info->sb->s_bdev, root->fs_info->sb); mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_check_free_space(root, 1, 0); + if (ret) + goto fail_unlock; + trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); @@ -2229,7 +2339,7 @@ fail: if (err && !ret) ret = err; - +fail_unlock: mutex_unlock(&root->fs_info->fs_mutex); up_write(&root->snap_sem); btrfs_btree_balance_dirty(root, nr); @@ -2255,6 +2365,7 @@ static unsigned long force_ra(struct address_space *mapping, int btrfs_defrag_file(struct file *file) { struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct page *page; unsigned long last_index; @@ -2262,6 +2373,13 @@ int btrfs_defrag_file(struct file *file) { u64 page_start; u64 page_end; unsigned long i; + int ret; + + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_check_free_space(root, inode->i_size, 0); + mutex_unlock(&root->fs_info->fs_mutex); + if (ret) + return -ENOSPC; mutex_lock(&inode->i_mutex); last_index = inode->i_size >> PAGE_CACHE_SHIFT; @@ -2522,6 +2640,10 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, } mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_check_free_space(root, 1, 0); + if (ret) + goto out_unlock; + trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, new_dir); @@ -2553,6 +2675,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, out_fail: btrfs_free_path(path); btrfs_end_transaction(trans, root); +out_unlock: mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -2564,7 +2687,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_path *path; struct btrfs_key key; - struct inode *inode; + struct inode *inode = NULL; int err; int drop_inode = 0; u64 objectid; @@ -2573,12 +2696,17 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, unsigned long ptr; struct btrfs_file_extent_item *ei; struct extent_buffer *leaf; - unsigned long nr; + unsigned long nr = 0; name_len = strlen(symname) + 1; if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) return -ENAMETOOLONG; + mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_check_free_space(root, 1, 0); + if (err) + goto out_fail; + trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -2645,6 +2773,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, out_unlock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); +out_fail: mutex_unlock(&root->fs_info->fs_mutex); if (drop_inode) { inode_dec_link_count(inode); From 5d4fb734b4cb89a3902d6de309af0f6f3e63fe73 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 21 Dec 2007 16:27:23 -0500 Subject: [PATCH 0347/2985] Btrfs: Fix an off by one in the extent_map prepare write code Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 4 ++-- fs/btrfs/inode.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2b92f1070274..f53d8c976219 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -2270,7 +2270,7 @@ int extent_prepare_write(struct extent_map_tree *tree, kunmap_atomic(kaddr, KM_USER0); } if ((em->block_start != EXTENT_MAP_HOLE && - em->block_start != EXTENT_MAP_INLINE) && + em->block_start != EXTENT_MAP_INLINE) && !isnew && !PageUptodate(page) && (block_off_end > to || block_off_start < from) && !test_range_bit(tree, block_start, cur_end, @@ -2279,7 +2279,7 @@ int extent_prepare_write(struct extent_map_tree *tree, u64 extent_offset = block_start - em->start; size_t iosize; sector = (em->block_start + extent_offset) >> 9; - iosize = (cur_end - block_start + blocksize - 1) & + iosize = (cur_end - block_start + blocksize) & ~((u64)blocksize - 1); /* * we've already got the extent locked, but we diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a9f5d6d417f0..5f5b7b89b144 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1956,7 +1956,7 @@ static int btrfs_prepare_write(struct file *file, struct page *page, mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); - mutex_lock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->fs_mutex); if (err) return -ENOSPC; From edbd8d4efe4ddaf29a175ae504e2c9a05a96ebee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 21 Dec 2007 16:27:24 -0500 Subject: [PATCH 0348/2985] Btrfs: Support for online FS resize (grow and shrink) Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 10 + fs/btrfs/disk-io.c | 32 ++- fs/btrfs/disk-io.h | 2 + fs/btrfs/extent-tree.c | 480 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/inode.c | 105 ++++++++- fs/btrfs/ioctl.h | 2 + fs/btrfs/root-tree.c | 5 + fs/btrfs/super.c | 7 +- 8 files changed, 629 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 10129cc6656f..b83a1bec346f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -508,6 +508,8 @@ BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, used, 64); BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, used, 64); +BTRFS_SETGET_FUNCS(disk_block_group_flags, struct btrfs_block_group_item, + flags, 8); /* struct btrfs_inode_ref */ BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); @@ -960,6 +962,9 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int level, u64 hint, u64 empty_size); +int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 new_size); +int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size); int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 bytenr, @@ -1117,6 +1122,9 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); /* inode.c */ +unsigned long btrfs_force_ra(struct address_space *mapping, + struct file_ra_state *ra, struct file *file, + pgoff_t offset, pgoff_t last_index); int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, int for_del); int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); @@ -1162,4 +1170,6 @@ void btrfs_sysfs_del_super(struct btrfs_fs_info *root); ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); +/* super.c */ +u64 btrfs_parse_size(char *str); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eebb4fb65c61..de026d9d9b2b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -471,13 +471,17 @@ insert: return root; } -struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_key *location, - const char *name, int namelen) +struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) { struct btrfs_root *root; int ret; + if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) + return fs_info->tree_root; + if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID) + return fs_info->extent_root; + root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)location->objectid); if (root) @@ -494,6 +498,23 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } + ret = btrfs_find_dead_roots(fs_info->tree_root, + root->root_key.objectid, root); + BUG_ON(ret); + + return root; +} + +struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location, + const char *name, int namelen) +{ + struct btrfs_root *root; + int ret; + + root = btrfs_read_fs_root_no_name(fs_info, location); + if (!root) + return NULL; ret = btrfs_set_root_name(root, name, namelen); if (ret) { @@ -509,11 +530,6 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } - - ret = btrfs_find_dead_roots(fs_info->tree_root, - root->root_key.objectid, root); - BUG_ON(ret); - return root; } #if 0 diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 33d2ccfd74d8..8c3cfd02901f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -39,6 +39,8 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, const char *name, int namelen); struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_key *location); +struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, + struct btrfs_key *location); int btrfs_insert_dev_radix(struct btrfs_root *root, struct block_device *bdev, u64 device_id, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1412d556313f..de0fb0743cf9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -18,6 +18,7 @@ #include #include +#include #include "hash.h" #include "ctree.h" #include "disk-io.h" @@ -1622,6 +1623,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, int pending_ret; u64 super_used, root_used; u64 search_start = 0; + u64 new_hint; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; @@ -1629,6 +1631,10 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, btrfs_set_stack_extent_refs(&extent_item, 1); + new_hint = max(hint_byte, 16ULL * 1024 * 1024 * 1024); + if (new_hint < btrfs_super_total_bytes(&info->super_copy)) + hint_byte = new_hint; + WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, search_start, search_end, hint_byte, ins, @@ -2100,6 +2106,480 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } +static int relocate_inode_pages(struct inode *inode, u64 start, u64 len) +{ + u64 page_start; + u64 page_end; + u64 delalloc_start; + u64 existing_delalloc; + unsigned long last_index; + unsigned long first_index; + unsigned long i; + struct page *page; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct file_ra_state ra; + + mutex_lock(&inode->i_mutex); + first_index = start >> PAGE_CACHE_SHIFT; + last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; + + memset(&ra, 0, sizeof(ra)); + file_ra_state_init(&ra, inode->i_mapping); + btrfs_force_ra(inode->i_mapping, &ra, NULL, first_index, last_index); + + for (i = first_index; i <= last_index; i++) { + page = grab_cache_page(inode->i_mapping, i); + if (!page) + goto out_unlock; + if (!PageUptodate(page)) { + btrfs_readpage(NULL, page); + lock_page(page); + if (!PageUptodate(page)) { + unlock_page(page); + page_cache_release(page); + goto out_unlock; + } + } + page_start = (u64)page->index << PAGE_CACHE_SHIFT; + page_end = page_start + PAGE_CACHE_SIZE - 1; + + lock_extent(em_tree, page_start, page_end, GFP_NOFS); + + delalloc_start = page_start; + existing_delalloc = + count_range_bits(&BTRFS_I(inode)->extent_tree, + &delalloc_start, page_end, + PAGE_CACHE_SIZE, EXTENT_DELALLOC); + + set_extent_delalloc(em_tree, page_start, + page_end, GFP_NOFS); + + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - + existing_delalloc; + spin_unlock(&root->fs_info->delalloc_lock); + + unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); + } + +out_unlock: + mutex_unlock(&inode->i_mutex); + return 0; +} + +static int relocate_one_reference(struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key, + u64 ref_root, u64 ref_gen, u64 ref_objectid, + u64 ref_offset) +{ + struct inode *inode; + struct btrfs_root *found_root; + struct btrfs_key root_location; + int ret; + + root_location.objectid = ref_root; + if (ref_gen == 0) + root_location.offset = 0; + else + root_location.offset = (u64)-1; + root_location.type = BTRFS_ROOT_ITEM_KEY; + + found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, + &root_location); + BUG_ON(!found_root); + + if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + mutex_unlock(&extent_root->fs_info->fs_mutex); + inode = btrfs_iget_locked(extent_root->fs_info->sb, + ref_objectid, found_root); + if (inode->i_state & I_NEW) { + /* the inode and parent dir are two different roots */ + BTRFS_I(inode)->root = found_root; + BTRFS_I(inode)->location.objectid = ref_objectid; + BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; + BTRFS_I(inode)->location.offset = 0; + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + + } + /* this can happen if the reference is not against + * the latest version of the tree root + */ + if (is_bad_inode(inode)) { + mutex_lock(&extent_root->fs_info->fs_mutex); + goto out; + } + relocate_inode_pages(inode, ref_offset, extent_key->offset); + /* FIXME, data=ordered will help get rid of this */ + filemap_fdatawrite(inode->i_mapping); + iput(inode); + mutex_lock(&extent_root->fs_info->fs_mutex); + } else { + struct btrfs_trans_handle *trans; + struct btrfs_key found_key; + struct extent_buffer *eb; + int level; + int i; + + trans = btrfs_start_transaction(found_root, 1); + eb = read_tree_block(found_root, extent_key->objectid, + extent_key->offset); + level = btrfs_header_level(eb); + + if (level == 0) + btrfs_item_key_to_cpu(eb, &found_key, 0); + else + btrfs_node_key_to_cpu(eb, &found_key, 0); + + free_extent_buffer(eb); + + path->lowest_level = level; + path->reada = 0; + ret = btrfs_search_slot(trans, found_root, &found_key, path, + 0, 1); + path->lowest_level = 0; + for (i = level; i < BTRFS_MAX_LEVEL; i++) { + if (!path->nodes[i]) + break; + free_extent_buffer(path->nodes[i]); + path->nodes[i] = NULL; + } + btrfs_release_path(found_root, path); + btrfs_end_transaction(trans, found_root); + } + +out: + return 0; +} + +static int relocate_one_extent(struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key) +{ + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_extent_ref *ref; + struct extent_buffer *leaf; + u64 ref_root; + u64 ref_gen; + u64 ref_objectid; + u64 ref_offset; + u32 nritems; + u32 item_size; + int ret = 0; + + key.objectid = extent_key->objectid; + key.type = BTRFS_EXTENT_REF_KEY; + key.offset = 0; + + while(1) { + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); + + BUG_ON(ret == 0); + + if (ret < 0) + goto out; + + ret = 0; + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (path->slots[0] == nritems) + goto out; + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid != extent_key->objectid) + break; + + if (found_key.type != BTRFS_EXTENT_REF_KEY) + break; + + key.offset = found_key.offset + 1; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref); + ref_root = btrfs_ref_root(leaf, ref); + ref_gen = btrfs_ref_generation(leaf, ref); + ref_objectid = btrfs_ref_objectid(leaf, ref); + ref_offset = btrfs_ref_offset(leaf, ref); + btrfs_release_path(extent_root, path); + + ret = relocate_one_reference(extent_root, path, + extent_key, ref_root, ref_gen, + ref_objectid, ref_offset); + if (ret) + goto out; + } + ret = 0; +out: + btrfs_release_path(extent_root, path); + return ret; +} + +static int find_overlapping_extent(struct btrfs_root *root, + struct btrfs_path *path, u64 new_size) +{ + struct btrfs_key found_key; + struct extent_buffer *leaf; + int ret; + + while(1) { + if (path->slots[0] == 0) { + ret = btrfs_prev_leaf(root, path); + if (ret == 1) { + return 1; + } + if (ret < 0) + return ret; + } else { + path->slots[0]--; + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.type == BTRFS_EXTENT_ITEM_KEY) { + if (found_key.objectid + found_key.offset > new_size) + return 0; + else + return 1; + } + } + return 1; +} + +int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *tree_root = root->fs_info->tree_root; + struct btrfs_path *path; + u64 cur_byte; + u64 total_found; + u64 ptr; + struct btrfs_fs_info *info = root->fs_info; + struct extent_map_tree *block_group_cache; + struct btrfs_key key; + struct btrfs_key found_key = { 0, 0, 0 }; + struct extent_buffer *leaf; + u32 nritems; + int ret; + int slot; + + btrfs_set_super_total_bytes(&info->super_copy, new_size); + block_group_cache = &info->block_group_cache; + path = btrfs_alloc_path(); + root = root->fs_info->extent_root; + +again: + total_found = 0; + key.objectid = new_size; + cur_byte = key.objectid; + key.offset = 0; + key.type = 0; + while(1) { + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; +next: + leaf = path->nodes[0]; + if (key.objectid == new_size - 1) { + ret = find_overlapping_extent(root, path, new_size); + if (ret != 0) { + btrfs_release_path(root, path); + ret = btrfs_search_slot(NULL, root, &key, + path, 0, 0); + if (ret < 0) + goto out; + } + } + nritems = btrfs_header_nritems(leaf); + ret = 0; + slot = path->slots[0]; + if (slot < nritems) + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (slot == nritems || + btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY) { + path->slots[0]++; + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret == 1) { + ret = 0; + break; + } + } + goto next; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (found_key.objectid + found_key.offset <= cur_byte) + continue; + total_found++; + cur_byte = found_key.objectid + found_key.offset; + key.objectid = cur_byte; + btrfs_release_path(root, path); + ret = relocate_one_extent(root, path, &found_key); + } + + btrfs_release_path(root, path); + + if (total_found > 0) { + trans = btrfs_start_transaction(tree_root, 1); + btrfs_commit_transaction(trans, tree_root); + + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_clean_old_snapshots(tree_root); + mutex_lock(&root->fs_info->fs_mutex); + + trans = btrfs_start_transaction(tree_root, 1); + btrfs_commit_transaction(trans, tree_root); + goto again; + } + + trans = btrfs_start_transaction(root, 1); + key.objectid = new_size; + key.offset = 0; + key.type = 0; + while(1) { + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + goto out; +bg_next: + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + ret = 0; + slot = path->slots[0]; + if (slot < nritems) + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (slot == nritems || + btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) { + if (slot < nritems) { + printk("shrinker found key %Lu %u %Lu\n", + found_key.objectid, found_key.type, + found_key.offset); + path->slots[0]++; + } + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + break; + if (ret == 1) { + ret = 0; + break; + } + } + goto bg_next; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + ret = get_state_private(&info->block_group_cache, + found_key.objectid, &ptr); + if (!ret) + kfree((void *)(unsigned long)ptr); + + clear_extent_bits(&info->block_group_cache, found_key.objectid, + found_key.objectid + found_key.offset - 1, + (unsigned int)-1, GFP_NOFS); + + key.objectid = found_key.objectid + 1; + btrfs_del_item(trans, root, path); + btrfs_release_path(root, path); + } + clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, + GFP_NOFS); + btrfs_commit_transaction(trans, root); +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 new_size) +{ + struct btrfs_path *path; + u64 nr = 0; + u64 cur_byte; + u64 old_size; + struct btrfs_block_group_cache *cache; + struct btrfs_block_group_item *item; + struct btrfs_fs_info *info = root->fs_info; + struct extent_map_tree *block_group_cache; + struct btrfs_key key; + struct extent_buffer *leaf; + int ret; + int bit; + + old_size = btrfs_super_total_bytes(&info->super_copy); + block_group_cache = &info->block_group_cache; + + root = info->extent_root; + + cache = btrfs_lookup_block_group(root->fs_info, old_size - 1); + + cur_byte = cache->key.objectid + cache->key.offset; + if (cur_byte >= new_size) + goto set_size; + + key.offset = BTRFS_BLOCK_GROUP_SIZE; + btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + while(cur_byte < new_size) { + key.objectid = cur_byte; + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(struct btrfs_block_group_item)); + BUG_ON(ret); + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_block_group_item); + + btrfs_set_disk_block_group_used(leaf, item, 0); + if (nr % 3) { + btrfs_set_disk_block_group_flags(leaf, item, + BTRFS_BLOCK_GROUP_DATA); + } else { + btrfs_set_disk_block_group_flags(leaf, item, 0); + } + nr++; + + cache = kmalloc(sizeof(*cache), GFP_NOFS); + BUG_ON(!cache); + + read_extent_buffer(leaf, &cache->item, (unsigned long)item, + sizeof(cache->item)); + + memcpy(&cache->key, &key, sizeof(key)); + cache->cached = 0; + cache->pinned = 0; + cur_byte = key.objectid + key.offset; + btrfs_release_path(root, path); + + if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { + bit = BLOCK_GROUP_DATA; + cache->data = BTRFS_BLOCK_GROUP_DATA; + } else { + bit = BLOCK_GROUP_METADATA; + cache->data = 0; + } + + /* use EXTENT_LOCKED to prevent merging */ + set_extent_bits(block_group_cache, key.objectid, + key.objectid + key.offset - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + set_state_private(block_group_cache, key.objectid, + (unsigned long)cache); + } + btrfs_free_path(path); +set_size: + btrfs_set_super_total_bytes(&info->super_copy, new_size); + return 0; +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5f5b7b89b144..f6a20112d9e7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2346,7 +2346,7 @@ fail_unlock: return ret; } -static unsigned long force_ra(struct address_space *mapping, +unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index) { @@ -2372,6 +2372,8 @@ int btrfs_defrag_file(struct file *file) { unsigned long ra_index = 0; u64 page_start; u64 page_end; + u64 delalloc_start; + u64 existing_delalloc; unsigned long i; int ret; @@ -2385,8 +2387,9 @@ int btrfs_defrag_file(struct file *file) { last_index = inode->i_size >> PAGE_CACHE_SHIFT; for (i = 0; i <= last_index; i++) { if (i == ra_index) { - ra_index = force_ra(inode->i_mapping, &file->f_ra, - file, ra_index, last_index); + ra_index = btrfs_force_ra(inode->i_mapping, + &file->f_ra, + file, ra_index, last_index); } page = grab_cache_page(inode->i_mapping, i); if (!page) @@ -2404,8 +2407,19 @@ int btrfs_defrag_file(struct file *file) { page_end = page_start + PAGE_CACHE_SIZE - 1; lock_extent(em_tree, page_start, page_end, GFP_NOFS); + delalloc_start = page_start; + existing_delalloc = + count_range_bits(&BTRFS_I(inode)->extent_tree, + &delalloc_start, page_end, + PAGE_CACHE_SIZE, EXTENT_DELALLOC); set_extent_delalloc(em_tree, page_start, page_end, GFP_NOFS); + + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - + existing_delalloc; + spin_unlock(&root->fs_info->delalloc_lock); + unlock_extent(em_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); @@ -2418,6 +2432,89 @@ out_unlock: return 0; } +static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) +{ + u64 new_size; + u64 old_size; + struct btrfs_ioctl_vol_args *vol_args; + struct btrfs_trans_handle *trans; + char *sizestr; + int ret = 0; + int namelen; + int mod = 0; + + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); + + if (!vol_args) + return -ENOMEM; + + if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { + ret = -EFAULT; + goto out; + } + namelen = strlen(vol_args->name); + if (namelen > BTRFS_VOL_NAME_MAX) { + ret = -EINVAL; + goto out; + } + + sizestr = vol_args->name; + if (!strcmp(sizestr, "max")) + new_size = root->fs_info->sb->s_bdev->bd_inode->i_size; + else { + if (sizestr[0] == '-') { + mod = -1; + sizestr++; + } else if (sizestr[0] == '+') { + mod = 1; + sizestr++; + } + new_size = btrfs_parse_size(sizestr); + if (new_size == 0) { + ret = -EINVAL; + goto out; + } + } + + mutex_lock(&root->fs_info->fs_mutex); + old_size = btrfs_super_total_bytes(&root->fs_info->super_copy); + + if (mod < 0) { + if (new_size > old_size) { + ret = -EINVAL; + goto out_unlock; + } + new_size = old_size - new_size; + } else if (mod > 0) { + new_size = old_size + new_size; + } + + if (new_size < 256 * 1024 * 1024) { + ret = -EINVAL; + goto out_unlock; + } + if (new_size > root->fs_info->sb->s_bdev->bd_inode->i_size) { + ret = -EFBIG; + goto out_unlock; + } + new_size = (new_size / root->sectorsize) * root->sectorsize; + +printk("new size is %Lu\n", new_size); + if (new_size > old_size) { + trans = btrfs_start_transaction(root, 1); + ret = btrfs_grow_extent_tree(trans, root, new_size); + btrfs_commit_transaction(trans, root); + } else { + ret = btrfs_shrink_extent_tree(root, new_size); + } + +out_unlock: + mutex_unlock(&root->fs_info->fs_mutex); +out: + kfree(vol_args); + return ret; +} + static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg) { struct btrfs_ioctl_vol_args *vol_args; @@ -2510,6 +2607,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_snap_create(root, (void __user *)arg); case BTRFS_IOC_DEFRAG: return btrfs_ioctl_defrag(file); + case BTRFS_IOC_RESIZE: + return btrfs_ioctl_resize(root, (void __user *)arg); } return -ENOTTY; diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 8bc47dec2865..8c6290665d49 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -30,4 +30,6 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ + struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 88bcdd33f56e..e2b9b86af696 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -179,6 +179,11 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = btrfs_search_slot(trans, root, key, path, -1, 1); if (ret < 0) goto out; + if (ret) { +btrfs_print_leaf(root, path->nodes[0]); +printk("failed to del %Lu %u %Lu\n", key->objectid, key->type, key->offset); + + } BUG_ON(ret != 0); leaf = path->nodes[0]; ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7cab680418a1..3bbd7f665694 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -74,9 +74,9 @@ static match_table_t tokens = { {Opt_err, NULL} }; -static unsigned long parse_size(char *str) +u64 btrfs_parse_size(char *str) { - unsigned long res; + u64 res; int mult = 1; char *end; char last; @@ -150,7 +150,8 @@ static int parse_options (char * options, if (info) { char *num = match_strdup(&args[0]); if (num) { - info->max_extent = parse_size(num); + info->max_extent = + btrfs_parse_size(num); kfree(num); info->max_extent = max_t(u64, From e52ec0eb62e76ea584c8438ef92933e9b815db80 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 21 Dec 2007 16:36:24 -0500 Subject: [PATCH 0349/2985] Btrfs: Fix NULL block groups on reading the inode Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/inode.c | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index de0fb0743cf9..a421069e998b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1630,11 +1630,11 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_path *path; btrfs_set_stack_extent_refs(&extent_item, 1); - +#if 0 new_hint = max(hint_byte, 16ULL * 1024 * 1024 * 1024); if (new_hint < btrfs_super_total_bytes(&info->super_copy)) hint_byte = new_hint; - +#endif WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, search_start, search_end, hint_byte, ins, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f6a20112d9e7..3c7bf3bddcad 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -402,6 +402,10 @@ void btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, alloc_group_block); + if (!BTRFS_I(inode)->block_group) { + BTRFS_I(inode)->block_group = btrfs_find_block_group(root, + NULL, 0, 0, 0); + } btrfs_free_path(path); inode_item = NULL; From 8f662a76c6af8eb367fa519e9bb9766040d9cea8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 2 Jan 2008 10:01:11 -0500 Subject: [PATCH 0350/2985] Btrfs: Add readahead to the online shrinker, and a mount -o alloc_start= for testing Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 9 ++++++--- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 9 +++++---- fs/btrfs/inode.c | 2 +- fs/btrfs/super.c | 16 +++++++++++++++- 5 files changed, 28 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2b3ffa707eae..8fa92a2d9819 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2654,9 +2654,9 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, */ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { + u64 bytenr; int slot; int level = 1; - u64 bytenr; struct extent_buffer *c; struct extent_buffer *next = NULL; @@ -2687,11 +2687,14 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) level--; c = path->nodes[level]; free_extent_buffer(c); + slot = btrfs_header_nritems(next); + if (slot != 0) + slot--; path->nodes[level] = next; - path->slots[level] = 0; + path->slots[level] = slot; if (!level) break; - next = read_tree_block(root, btrfs_node_blockptr(next, 0), + next = read_tree_block(root, btrfs_node_blockptr(next, slot), btrfs_level_size(root, level - 1)); } return 0; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b83a1bec346f..d6e5c19969b0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -324,6 +324,7 @@ struct btrfs_fs_info { u64 last_trans_committed; unsigned long mount_opt; u64 max_extent; + u64 alloc_start; struct btrfs_transaction *running_transaction; struct btrfs_super_block super_copy; struct extent_buffer *sb_buffer; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a421069e998b..d6243c2afc73 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1630,11 +1630,11 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_path *path; btrfs_set_stack_extent_refs(&extent_item, 1); -#if 0 - new_hint = max(hint_byte, 16ULL * 1024 * 1024 * 1024); + + new_hint = max(hint_byte, root->fs_info->alloc_start); if (new_hint < btrfs_super_total_bytes(&info->super_copy)) hint_byte = new_hint; -#endif + WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, search_start, search_end, hint_byte, ins, @@ -2239,7 +2239,7 @@ static int relocate_one_reference(struct btrfs_root *extent_root, free_extent_buffer(eb); path->lowest_level = level; - path->reada = 0; + path->reada = 2; ret = btrfs_search_slot(trans, found_root, &found_key, path, 0, 1); path->lowest_level = 0; @@ -2372,6 +2372,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) block_group_cache = &info->block_group_cache; path = btrfs_alloc_path(); root = root->fs_info->extent_root; + path->reada = 2; again: total_found = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3c7bf3bddcad..d6de46961887 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2058,7 +2058,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); - mutex_lock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->fs_mutex); if (ret) goto out; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3bbd7f665694..f1b56eb77b1d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -63,7 +63,8 @@ static void btrfs_put_super (struct super_block * sb) } enum { - Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, Opt_err, + Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, + Opt_alloc_start, Opt_err, }; static match_table_t tokens = { @@ -71,6 +72,7 @@ static match_table_t tokens = { {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, {Opt_max_extent, "max_extent=%s"}, + {Opt_alloc_start, "alloc_start=%s"}, {Opt_err, NULL} }; @@ -162,6 +164,18 @@ static int parse_options (char * options, } } break; + case Opt_alloc_start: + if (info) { + char *num = match_strdup(&args[0]); + if (num) { + info->alloc_start = + btrfs_parse_size(num); + kfree(num); + printk("btrfs: allocations start at " + "%Lu\n", info->alloc_start); + } + } + break; default: break; } From 56b453c92fdf51fd3283a2dc2dfbedf36f516031 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Jan 2008 09:08:27 -0500 Subject: [PATCH 0351/2985] Btrfs: Explicitly send a root objectid to count_snapshots_in_path Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 16 ++++++---------- fs/btrfs/inode.c | 39 +++++---------------------------------- 2 files changed, 11 insertions(+), 44 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d6243c2afc73..91c2b27d9689 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -688,7 +688,7 @@ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, struct btrfs_path *path; u64 bytenr; u64 found_objectid; - u64 root_objectid = 0; + u64 root_objectid = root->root_key.objectid; u32 total_count = 0; u32 cur_count; u32 refs; @@ -749,15 +749,11 @@ again: struct btrfs_extent_ref); found_objectid = btrfs_ref_root(l, ref_item); - if (found_objectid != root_objectid) - total_count++; - - if (total_count > 1) - goto out; - - if (root_objectid == 0) - root_objectid = found_objectid; - + if (found_objectid != root_objectid) { + total_count = 2; + break; + } + total_count = 1; path->slots[0]++; } if (cur_count == 0) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d6de46961887..5003a86510a1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1770,6 +1770,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, again: em = lookup_extent_mapping(em_tree, start, end); if (em) { + if (em->start > start) { + printk("get_extent start %Lu em start %Lu\n", + start, em->start); + WARN_ON(1); + } goto out; } if (!em) { @@ -1952,23 +1957,6 @@ static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) return extent_bmap(mapping, iblock, btrfs_get_extent); } -static int btrfs_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; - int err; - - mutex_lock(&root->fs_info->fs_mutex); - err = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); - mutex_unlock(&root->fs_info->fs_mutex); - if (err) - return -ENOSPC; - - return extent_prepare_write(&BTRFS_I(page->mapping->host)->extent_tree, - page->mapping->host, page, from, to, - btrfs_get_extent); -} - int btrfs_readpage(struct file *file, struct page *page) { struct extent_map_tree *tree; @@ -2120,21 +2108,6 @@ static void btrfs_truncate(struct inode *inode) btrfs_btree_balance_dirty(root, nr); } -int btrfs_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - struct inode *inode = page->mapping->host; - - btrfs_cow_one_page(inode, page, PAGE_CACHE_SIZE); - - if (pos > inode->i_size) { - i_size_write(inode, pos); - mark_inode_dirty(inode); - } - return 0; -} - static int create_subvol(struct btrfs_root *root, char *name, int namelen) { struct btrfs_trans_handle *trans; @@ -2930,8 +2903,6 @@ static struct address_space_operations btrfs_aops = { .writepages = btrfs_writepages, .readpages = btrfs_readpages, .sync_page = block_sync_page, - .prepare_write = btrfs_prepare_write, - .commit_write = btrfs_commit_write, .bmap = btrfs_bmap, .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, From 4313b3994d719fcdeb7e661473019ca3d62e829b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Jan 2008 09:08:48 -0500 Subject: [PATCH 0352/2985] Btrfs: Reduce stack usage in the resizer, fix 32 bit compiles Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 5 +++ fs/btrfs/extent-tree.c | 74 ++++++++++++++++++++++-------------------- fs/btrfs/inode.c | 6 ++-- fs/btrfs/transaction.c | 20 +++++++++--- 5 files changed, 63 insertions(+), 43 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d6e5c19969b0..9873975ce0ee 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -386,6 +386,7 @@ struct btrfs_root { int defrag_running; int defrag_level; char *name; + int in_sysfs; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index de026d9d9b2b..67d9fd728868 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -374,6 +374,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->highest_inode = 0; root->last_inode_alloc = 0; root->name = NULL; + root->in_sysfs = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); @@ -516,6 +517,9 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, if (!root) return NULL; + if (root->in_sysfs) + return root; + ret = btrfs_set_root_name(root, name, namelen); if (ret) { free_extent_buffer(root->node); @@ -530,6 +534,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } + root->in_sysfs = 1; return root; } #if 0 diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 91c2b27d9689..6137f06091e8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -751,7 +751,7 @@ again: if (found_objectid != root_objectid) { total_count = 2; - break; + goto out; } total_count = 1; path->slots[0]++; @@ -760,8 +760,6 @@ again: total_count = 0; goto out; } - if (total_count > 1) - goto out; if (level >= 0 && root->node == count_path->nodes[level]) goto out; level++; @@ -2109,22 +2107,23 @@ static int relocate_inode_pages(struct inode *inode, u64 start, u64 len) u64 delalloc_start; u64 existing_delalloc; unsigned long last_index; - unsigned long first_index; unsigned long i; struct page *page; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - struct file_ra_state ra; + struct file_ra_state *ra; + + ra = kzalloc(sizeof(*ra), GFP_NOFS); mutex_lock(&inode->i_mutex); - first_index = start >> PAGE_CACHE_SHIFT; + i = start >> PAGE_CACHE_SHIFT; last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; - memset(&ra, 0, sizeof(ra)); - file_ra_state_init(&ra, inode->i_mapping); - btrfs_force_ra(inode->i_mapping, &ra, NULL, first_index, last_index); + file_ra_state_init(ra, inode->i_mapping); + btrfs_force_ra(inode->i_mapping, ra, NULL, i, last_index); + kfree(ra); - for (i = first_index; i <= last_index; i++) { + for (; i <= last_index; i++) { page = grab_cache_page(inode->i_mapping, i); if (!page) goto out_unlock; @@ -2167,27 +2166,43 @@ out_unlock: return 0; } +/* + * note, this releases the path + */ static int relocate_one_reference(struct btrfs_root *extent_root, struct btrfs_path *path, - struct btrfs_key *extent_key, - u64 ref_root, u64 ref_gen, u64 ref_objectid, - u64 ref_offset) + struct btrfs_key *extent_key) { struct inode *inode; struct btrfs_root *found_root; - struct btrfs_key root_location; + struct btrfs_key *root_location; + struct btrfs_extent_ref *ref; + u64 ref_root; + u64 ref_gen; + u64 ref_objectid; + u64 ref_offset; int ret; - root_location.objectid = ref_root; + ref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_extent_ref); + ref_root = btrfs_ref_root(path->nodes[0], ref); + ref_gen = btrfs_ref_generation(path->nodes[0], ref); + ref_objectid = btrfs_ref_objectid(path->nodes[0], ref); + ref_offset = btrfs_ref_offset(path->nodes[0], ref); + btrfs_release_path(extent_root, path); + + root_location = kmalloc(sizeof(*root_location), GFP_NOFS); + root_location->objectid = ref_root; if (ref_gen == 0) - root_location.offset = 0; + root_location->offset = 0; else - root_location.offset = (u64)-1; - root_location.type = BTRFS_ROOT_ITEM_KEY; + root_location->offset = (u64)-1; + root_location->type = BTRFS_ROOT_ITEM_KEY; found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, - &root_location); + root_location); BUG_ON(!found_root); + kfree(root_location); if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { mutex_unlock(&extent_root->fs_info->fs_mutex); @@ -2259,12 +2274,7 @@ static int relocate_one_extent(struct btrfs_root *extent_root, { struct btrfs_key key; struct btrfs_key found_key; - struct btrfs_extent_ref *ref; struct extent_buffer *leaf; - u64 ref_root; - u64 ref_gen; - u64 ref_objectid; - u64 ref_offset; u32 nritems; u32 item_size; int ret = 0; @@ -2297,17 +2307,7 @@ static int relocate_one_extent(struct btrfs_root *extent_root, key.offset = found_key.offset + 1; item_size = btrfs_item_size_nr(leaf, path->slots[0]); - ref = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_extent_ref); - ref_root = btrfs_ref_root(leaf, ref); - ref_gen = btrfs_ref_generation(leaf, ref); - ref_objectid = btrfs_ref_objectid(leaf, ref); - ref_offset = btrfs_ref_offset(leaf, ref); - btrfs_release_path(extent_root, path); - - ret = relocate_one_reference(extent_root, path, - extent_key, ref_root, ref_gen, - ref_objectid, ref_offset); + ret = relocate_one_reference(extent_root, path, extent_key); if (ret) goto out; } @@ -2354,7 +2354,6 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) struct btrfs_path *path; u64 cur_byte; u64 total_found; - u64 ptr; struct btrfs_fs_info *info = root->fs_info; struct extent_map_tree *block_group_cache; struct btrfs_key key; @@ -2377,6 +2376,7 @@ again: key.offset = 0; key.type = 0; while(1) { + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; @@ -2441,6 +2441,8 @@ next: key.offset = 0; key.type = 0; while(1) { + u64 ptr; + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) goto out; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5003a86510a1..b62f35e862bd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2108,7 +2108,8 @@ static void btrfs_truncate(struct inode *inode) btrfs_btree_balance_dirty(root, nr); } -static int create_subvol(struct btrfs_root *root, char *name, int namelen) +static int noinline create_subvol(struct btrfs_root *root, char *name, + int namelen) { struct btrfs_trans_handle *trans; struct btrfs_key key; @@ -2492,7 +2493,8 @@ out: return ret; } -static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg) +static int noinline btrfs_ioctl_snap_create(struct btrfs_root *root, + void __user *arg) { struct btrfs_ioctl_vol_args *vol_args; struct btrfs_dir_item *di; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1ad611b9f61b..02721eea9a7a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -481,12 +481,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; - struct extent_map_tree pinned_copy; + struct extent_map_tree *pinned_copy; DEFINE_WAIT(wait); int ret; - extent_map_tree_init(&pinned_copy, - root->fs_info->btree_inode->i_mapping, GFP_NOFS); INIT_LIST_HEAD(&dirty_fs_roots); mutex_lock(&root->fs_info->trans_mutex); @@ -507,6 +505,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->fs_mutex); return 0; } + + pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS); + if (!pinned_copy) + return -ENOMEM; + + extent_map_tree_init(pinned_copy, + root->fs_info->btree_inode->i_mapping, GFP_NOFS); + trans->transaction->in_commit = 1; cur_trans = trans->transaction; if (cur_trans->list.prev != &root->fs_info->trans_list) { @@ -568,16 +574,20 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, &root->fs_info->super_copy, 0, sizeof(root->fs_info->super_copy)); - btrfs_copy_pinned(root, &pinned_copy); + btrfs_copy_pinned(root, pinned_copy); mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root); + mutex_lock(&root->fs_info->fs_mutex); - btrfs_finish_extent_commit(trans, root, &pinned_copy); + btrfs_finish_extent_commit(trans, root, pinned_copy); mutex_lock(&root->fs_info->trans_mutex); + + kfree(pinned_copy); + cur_trans->commit_done = 1; root->fs_info->last_trans_committed = cur_trans->transid; wake_up(&cur_trans->commit_wait); From f9ef6604ace23a6fcd698e08b58a883d6009157b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Jan 2008 09:22:38 -0500 Subject: [PATCH 0353/2985] Btrfs: 32 bit compile fixes for the resizer and enospc checks Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 +++- fs/btrfs/inode.c | 10 +++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6137f06091e8..54f2b1d8bdfd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2501,6 +2501,7 @@ int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, u64 nr = 0; u64 cur_byte; u64 old_size; + unsigned long rem; struct btrfs_block_group_cache *cache; struct btrfs_block_group_item *item; struct btrfs_fs_info *info = root->fs_info; @@ -2538,7 +2539,8 @@ int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, struct btrfs_block_group_item); btrfs_set_disk_block_group_used(leaf, item, 0); - if (nr % 3) { + div_long_long_rem(nr, 3, &rem); + if (rem) { btrfs_set_disk_block_group_flags(leaf, item, BTRFS_BLOCK_GROUP_DATA); } else { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b62f35e862bd..8a2001bbf104 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -81,9 +81,11 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, int ret = 0; if (for_del) - thresh = (total * 90) / 100; + thresh = total * 90; else - thresh = (total * 85) / 100; + thresh = total * 85; + + do_div(thresh, 100); spin_lock(&root->fs_info->delalloc_lock); if (used + root->fs_info->delalloc_bytes + num_required > thresh) @@ -2475,7 +2477,9 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) ret = -EFBIG; goto out_unlock; } - new_size = (new_size / root->sectorsize) * root->sectorsize; + + do_div(new_size, root->sectorsize); + new_size *= root->sectorsize; printk("new size is %Lu\n", new_size); if (new_size > old_size) { From 98ed51747b63435b9987ef12692a75c223818bbe Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Jan 2008 10:01:48 -0500 Subject: [PATCH 0354/2985] Btrfs: Force inlining off in a few places to save stack usage Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 21 +++++++------- fs/btrfs/extent-tree.c | 66 +++++++++++++++++++++++------------------- fs/btrfs/file.c | 16 ++++------ 3 files changed, 54 insertions(+), 49 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 8fa92a2d9819..35c57074a376 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -548,8 +548,8 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, return 0; } -static int check_block(struct btrfs_root *root, struct btrfs_path *path, - int level) +static int noinline check_block(struct btrfs_root *root, + struct btrfs_path *path, int level) { return 0; #if 0 @@ -676,8 +676,9 @@ static struct extent_buffer *read_node_slot(struct btrfs_root *root, btrfs_level_size(root, btrfs_header_level(parent) - 1)); } -static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int level) +static int balance_level(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int level) { struct extent_buffer *right = NULL; struct extent_buffer *mid; @@ -868,9 +869,9 @@ enospc: } /* returns zero if the push worked, non-zero otherwise */ -static int push_nodes_for_insert(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, int level) +static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int level) { struct extent_buffer *right = NULL; struct extent_buffer *mid; @@ -1207,8 +1208,8 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, * returns 0 if some ptrs were pushed left, < 0 if there was some horrible * error, and > 0 if there was no room in the left hand block. */ -static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct extent_buffer *dst, +static int push_node_left(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *dst, struct extent_buffer *src) { int push_items = 0; @@ -1309,7 +1310,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, * * returns zero on success or < 0 on failure. */ -static int insert_new_root(struct btrfs_trans_handle *trans, +static int noinline insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 54f2b1d8bdfd..4957cface9ab 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -162,10 +162,11 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct return block_group; return NULL; } -static u64 find_search_start(struct btrfs_root *root, - struct btrfs_block_group_cache **cache_ret, - u64 search_start, int num, - int data, int full_scan) + +static u64 noinline find_search_start(struct btrfs_root *root, + struct btrfs_block_group_cache **cache_ret, + u64 search_start, int num, + int data, int full_scan) { int ret; struct btrfs_block_group_cache *cache = *cache_ret; @@ -393,11 +394,12 @@ static int match_extent_ref(struct extent_buffer *leaf, return ret == 0; } -static int lookup_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, u64 bytenr, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, int del) +static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 bytenr, + u64 root_objectid, + u64 ref_generation, u64 owner, + u64 owner_offset, int del) { u64 hash; struct btrfs_key key; @@ -1116,8 +1118,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, return 0; } -static int finish_current_insert(struct btrfs_trans_handle *trans, struct - btrfs_root *extent_root) +static int finish_current_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root) { u64 start; u64 end; @@ -1360,11 +1362,13 @@ static u64 stripe_align(struct btrfs_root *root, u64 val) * ins->offset == number of blocks * Any available blocks before search_start are skipped. */ -static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_bytes, u64 empty_size, - u64 search_start, u64 search_end, u64 hint_byte, - struct btrfs_key *ins, u64 exclude_start, - u64 exclude_nr, int data) +static int noinline find_free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *orig_root, + u64 num_bytes, u64 empty_size, + u64 search_start, u64 search_end, + u64 hint_byte, struct btrfs_key *ins, + u64 exclude_start, u64 exclude_nr, + int data) { struct btrfs_path *path; struct btrfs_key key; @@ -1760,8 +1764,9 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return buf; } -static int drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct extent_buffer *leaf) +static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *leaf) { u64 leaf_owner; u64 leaf_generation; @@ -1802,8 +1807,8 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } -static void reada_walk_down(struct btrfs_root *root, - struct extent_buffer *node) +static void noinline reada_walk_down(struct btrfs_root *root, + struct extent_buffer *node) { int i; u32 nritems; @@ -1835,8 +1840,9 @@ static void reada_walk_down(struct btrfs_root *root, * helper function for drop_snapshot, this walks down the tree dropping ref * counts as it goes. */ -static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int *level) +static int noinline walk_down_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level) { u64 root_owner; u64 root_gen; @@ -1955,8 +1961,9 @@ out: * to find the first node higher up where we haven't yet gone through * all the slots */ -static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int *level) +static int noinline walk_up_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level) { u64 root_owner; u64 root_gen; @@ -2100,7 +2107,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } -static int relocate_inode_pages(struct inode *inode, u64 start, u64 len) +static int noinline relocate_inode_pages(struct inode *inode, u64 start, + u64 len) { u64 page_start; u64 page_end; @@ -2169,7 +2177,7 @@ out_unlock: /* * note, this releases the path */ -static int relocate_one_reference(struct btrfs_root *extent_root, +static int noinline relocate_one_reference(struct btrfs_root *extent_root, struct btrfs_path *path, struct btrfs_key *extent_key) { @@ -2268,9 +2276,9 @@ out: return 0; } -static int relocate_one_extent(struct btrfs_root *extent_root, - struct btrfs_path *path, - struct btrfs_key *extent_key) +static int noinline relocate_one_extent(struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key) { struct btrfs_key key; struct btrfs_key found_key; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 71dc2d33b6c6..c9ebd4fe8f7e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -80,7 +80,7 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) } } -static int insert_inline_extent(struct btrfs_trans_handle *trans, +static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 offset, size_t size, struct page **pages, size_t page_offset, @@ -221,7 +221,7 @@ fail: return err; } -static int dirty_and_release_pages(struct btrfs_trans_handle *trans, +static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct file *file, struct page **pages, @@ -653,14 +653,10 @@ out: /* * this gets pages into the page cache and locks them down */ -static int prepare_pages(struct btrfs_root *root, - struct file *file, - struct page **pages, - size_t num_pages, - loff_t pos, - unsigned long first_index, - unsigned long last_index, - size_t write_bytes) +static int prepare_pages(struct btrfs_root *root, struct file *file, + struct page **pages, size_t num_pages, + loff_t pos, unsigned long first_index, + unsigned long last_index, size_t write_bytes) { int i; unsigned long index = pos >> PAGE_CACHE_SHIFT; From bd09835d9aad9c7b664cddc8435cc37b86077971 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Jan 2008 13:23:19 -0500 Subject: [PATCH 0355/2985] count_snapshots: Properly update the leaf pointer after btrfs_next_leaf Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/inode.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4957cface9ab..b801eb4b3eef 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -693,7 +693,6 @@ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, u64 root_objectid = root->root_key.objectid; u32 total_count = 0; u32 cur_count; - u32 refs; u32 nritems; int ret; struct btrfs_key key; @@ -729,8 +728,8 @@ again: } item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - refs = btrfs_extent_refs(l, item); while (1) { + l = path->nodes[0]; nritems = btrfs_header_nritems(l); if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(extent_root, path); @@ -741,6 +740,7 @@ again: btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); if (found_key.objectid != bytenr) break; + if (found_key.type != BTRFS_EXTENT_REF_KEY) { path->slots[0]++; continue; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8a2001bbf104..6e2d03626fb9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -208,7 +208,7 @@ again: } start = extent_end; - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + } else { goto not_found; } loop: From 5b92ee7204a5fb6542b204831202adbc1a7a851a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Jan 2008 13:46:11 -0500 Subject: [PATCH 0356/2985] Btrfs: Fix lock ordering of the snapshot semaphore against the page lock Signed-off-by: Chris Mason --- fs/btrfs/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c9ebd4fe8f7e..e862292bdfc6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -251,7 +251,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, num_bytes = (write_bytes + pos - start_pos + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); - down_read(&BTRFS_I(inode)->root->snap_sem); end_of_last_block = start_pos + num_bytes - 1; lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); @@ -356,7 +355,6 @@ out_unlock: mutex_unlock(&root->fs_info->fs_mutex); unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); free_extent_map(em); - up_read(&BTRFS_I(inode)->root->snap_sem); return err; } @@ -726,6 +724,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); + down_read(&BTRFS_I(inode)->root->snap_sem); + mutex_lock(&inode->i_mutex); first_index = pos >> PAGE_CACHE_SHIFT; last_index = (pos + count) >> PAGE_CACHE_SHIFT; @@ -804,6 +804,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } out: mutex_unlock(&inode->i_mutex); + up_read(&BTRFS_I(inode)->root->snap_sem); + out_nolock: kfree(pages); if (pinned[0]) From d548ee5182bda42c87bdd4e28b539d5e436e206a Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 3 Jan 2008 13:56:30 -0500 Subject: [PATCH 0357/2985] Btrfs: Add a helper that finds previous extent item Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 80 +++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b801eb4b3eef..cc89df4493ab 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -33,6 +33,28 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +static int find_previous_extent(struct btrfs_root *root, + struct btrfs_path *path) +{ + struct btrfs_key found_key; + struct extent_buffer *leaf; + int ret; + + while(1) { + if (path->slots[0] == 0) { + ret = btrfs_prev_leaf(root, path); + if (ret != 0) + return ret; + } else { + path->slots[0]--; + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.type == BTRFS_EXTENT_ITEM_KEY) + return 0; + } + return 1; +} static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) @@ -65,16 +87,19 @@ static int cache_block_group(struct btrfs_root *root, first_free = block_group->key.objectid; key.objectid = block_group->key.objectid; key.offset = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) return ret; - - if (ret && path->slots[0] > 0) - path->slots[0]--; - + ret = find_previous_extent(root, path); + if (ret < 0) + return ret; + if (ret == 0) { + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid + key.offset > first_free) + first_free = key.objectid + key.offset; + } while(1) { leaf = path->nodes[0]; slot = path->slots[0]; @@ -88,15 +113,10 @@ static int cache_block_group(struct btrfs_root *root, break; } } - btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid < block_group->key.objectid) { - if (btrfs_key_type(&key) != BTRFS_EXTENT_REF_KEY && - key.objectid + key.offset > first_free) - first_free = key.objectid + key.offset; goto next; } - if (key.objectid >= block_group->key.objectid + block_group->key.offset) { break; @@ -162,11 +182,9 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct return block_group; return NULL; } - static u64 noinline find_search_start(struct btrfs_root *root, struct btrfs_block_group_cache **cache_ret, - u64 search_start, int num, - int data, int full_scan) + u64 search_start, int num, int data) { int ret; struct btrfs_block_group_cache *cache = *cache_ret; @@ -771,9 +789,7 @@ again: out: btrfs_free_path(path); return total_count; - } - int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner_objectid) { @@ -1422,7 +1438,7 @@ check_failed: orig_search_start); } search_start = find_search_start(root, &block_group, search_start, - total_needed, data, full_scan); + total_needed, data); search_start = stripe_align(root, search_start); cached_start = search_start; btrfs_init_path(path); @@ -1434,35 +1450,11 @@ check_failed: ret = btrfs_search_slot(trans, root, ins, path, 0, 0); if (ret < 0) goto error; - - if (path->slots[0] > 0) { - path->slots[0]--; - } - + ret = find_previous_extent(root, path); + if (ret < 0) + goto error; l = path->nodes[0]; btrfs_item_key_to_cpu(l, &key, path->slots[0]); - - /* - * walk backwards to find the first extent item key - */ - while(btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { - if (path->slots[0] == 0) { - ret = btrfs_prev_leaf(root, path); - if (ret != 0) { - ret = btrfs_search_slot(trans, root, ins, - path, 0, 0); - if (ret < 0) - goto error; - if (path->slots[0] > 0) - path->slots[0]--; - break; - } - } else { - path->slots[0]--; - } - l = path->nodes[0]; - btrfs_item_key_to_cpu(l, &key, path->slots[0]); - } while (1) { l = path->nodes[0]; slot = path->slots[0]; From 73e48b277a3fcd647f1936c71aea2f8d450ecc74 Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 3 Jan 2008 14:14:39 -0500 Subject: [PATCH 0358/2985] Btrfs: Properly handle overlapping extent in shrink_extent_tree The patch fixes the overlapping extent issue in shrink_extent_tree. It checks whether there is an overlapping extent by using find_previous_extent. If there is an overlapping extent, it setups key.objectid and cur_byte properly. --- Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 142 +++++++++++++++++------------------------ 1 file changed, 57 insertions(+), 85 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cc89df4493ab..426a0222e12f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2317,36 +2317,6 @@ out: return ret; } -static int find_overlapping_extent(struct btrfs_root *root, - struct btrfs_path *path, u64 new_size) -{ - struct btrfs_key found_key; - struct extent_buffer *leaf; - int ret; - - while(1) { - if (path->slots[0] == 0) { - ret = btrfs_prev_leaf(root, path); - if (ret == 1) { - return 1; - } - if (ret < 0) - return ret; - } else { - path->slots[0]--; - } - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.type == BTRFS_EXTENT_ITEM_KEY) { - if (found_key.objectid + found_key.offset > new_size) - return 0; - else - return 1; - } - } - return 1; -} - int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) { struct btrfs_trans_handle *trans; @@ -2357,11 +2327,10 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) struct btrfs_fs_info *info = root->fs_info; struct extent_map_tree *block_group_cache; struct btrfs_key key; - struct btrfs_key found_key = { 0, 0, 0 }; + struct btrfs_key found_key; struct extent_buffer *leaf; u32 nritems; int ret; - int slot; btrfs_set_super_total_bytes(&info->super_copy, new_size); block_group_cache = &info->block_group_cache; @@ -2372,48 +2341,54 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) again: total_found = 0; key.objectid = new_size; - cur_byte = key.objectid; key.offset = 0; key.type = 0; - while(1) { + cur_byte = key.objectid; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + + ret = find_previous_extent(root, path); + if (ret < 0) + goto out; + if (ret == 0) { + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid + found_key.offset > new_size) { + cur_byte = found_key.objectid; + key.objectid = cur_byte; + } + } + btrfs_release_path(root, path); + + while(1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; -next: + leaf = path->nodes[0]; - if (key.objectid == new_size - 1) { - ret = find_overlapping_extent(root, path, new_size); - if (ret != 0) { - btrfs_release_path(root, path); - ret = btrfs_search_slot(NULL, root, &key, - path, 0, 0); - if (ret < 0) - goto out; - } - } nritems = btrfs_header_nritems(leaf); - ret = 0; - slot = path->slots[0]; - if (slot < nritems) - btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (slot == nritems || - btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY) { - path->slots[0]++; - if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) - goto out; - if (ret == 1) { - ret = 0; - break; - } +next: + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret == 1) { + ret = 0; + break; } + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + } + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY || + found_key.objectid + found_key.offset <= cur_byte) { + path->slots[0]++; goto next; } - btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (found_key.objectid + found_key.offset <= cur_byte) - continue; + total_found++; cur_byte = found_key.objectid + found_key.offset; key.objectid = cur_byte; @@ -2446,33 +2421,30 @@ next: ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) goto out; -bg_next: + leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); - ret = 0; - slot = path->slots[0]; - if (slot < nritems) - btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (slot == nritems || - btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) { - if (slot < nritems) { - printk("shrinker found key %Lu %u %Lu\n", - found_key.objectid, found_key.type, - found_key.offset); - path->slots[0]++; - } - if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) - break; - if (ret == 1) { - ret = 0; - break; - } +bg_next: + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + break; + if (ret == 1) { + ret = 0; + break; } + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + } + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) { + printk("shrinker found key %Lu %u %Lu\n", + found_key.objectid, found_key.type, + found_key.offset); + path->slots[0]++; goto bg_next; } - btrfs_item_key_to_cpu(leaf, &found_key, slot); ret = get_state_private(&info->block_group_cache, found_key.objectid, &ptr); if (!ret) From d666746207a01546e55bdaa4b721d1890faaf6e8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Jan 2008 14:51:00 -0500 Subject: [PATCH 0359/2985] Btrfs: Change st_blocksize to 4k Some programs (python) do rwm cycles at the granularity returned by stat. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- fs/btrfs/root-tree.c | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6e2d03626fb9..6d0cd9a7a615 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2701,7 +2701,7 @@ static int btrfs_getattr(struct vfsmount *mnt, { struct inode *inode = dentry->d_inode; generic_fillattr(inode, stat); - stat->blksize = 256 * 1024; + stat->blksize = PAGE_CACHE_SIZE; return 0; } diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index e2b9b86af696..08f19ec88092 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -75,7 +75,14 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root ret = btrfs_search_slot(trans, root, key, path, 0, 1); if (ret < 0) goto out; - BUG_ON(ret != 0); + + if (ret != 0) { + btrfs_print_leaf(root, path->nodes[0]); + printk("unable to update root key %Lu %u %Lu\n", + key->objectid, key->type, key->offset); + BUG_ON(1); + } + l = path->nodes[0]; slot = path->slots[0]; ptr = btrfs_item_ptr_offset(l, slot); From 2552488345ea0933711010ee2c844792130a4252 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Jan 2008 15:44:57 -0500 Subject: [PATCH 0360/2985] Btrfs: Lower the max inline size to 8k Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e862292bdfc6..94c93373cb7d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -303,7 +303,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, */ inline_size = end_pos; if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) || - inline_size > 32768 || + inline_size > 8192 || inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { u64 last_end; u64 existing_delalloc = 0; From 1372f8e609d260c2352bf74e125614eef7aee8ef Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 4 Jan 2008 09:34:54 -0500 Subject: [PATCH 0361/2985] Properly call btrfs_search_slot while shrinking The shrinking code used btrfs_next_leaf to find the next item, but this does not cow the blocks it touches. This fix calls search_slot after finding the next item to do appropriate cow and balancing. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 426a0222e12f..c390fd847a27 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2434,7 +2434,15 @@ bg_next: break; } leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + /* + * btrfs_next_leaf doesn't cow buffers, we have to + * do the search again + */ + memcpy(&key, &found_key, sizeof(key)); + btrfs_release_path(root, path); + continue; } btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); From 725c8463ea9bcfc2b56cff09b93a90e5bcca59c4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 4 Jan 2008 16:47:16 -0500 Subject: [PATCH 0362/2985] Btrfs: resizer: don't hold the fs_mutex for long periods of time Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c390fd847a27..c906bb19b211 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2331,6 +2331,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) struct extent_buffer *leaf; u32 nritems; int ret; + int progress = 0; btrfs_set_super_total_bytes(&info->super_copy, new_size); block_group_cache = &info->block_group_cache; @@ -2383,6 +2384,19 @@ next: } btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (progress && need_resched()) { + memcpy(&key, &found_key, sizeof(key)); + mutex_unlock(&root->fs_info->fs_mutex); + cond_resched(); + mutex_lock(&root->fs_info->fs_mutex); + btrfs_release_path(root, path); + btrfs_search_slot(NULL, root, &key, path, 0, 0); + progress = 0; + goto next; + } + progress = 1; + if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY || found_key.objectid + found_key.offset <= cur_byte) { path->slots[0]++; @@ -2442,7 +2456,7 @@ bg_next: */ memcpy(&key, &found_key, sizeof(key)); btrfs_release_path(root, path); - continue; + goto resched_check; } btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); @@ -2465,6 +2479,12 @@ bg_next: key.objectid = found_key.objectid + 1; btrfs_del_item(trans, root, path); btrfs_release_path(root, path); +resched_check: + if (need_resched()) { + mutex_unlock(&root->fs_info->fs_mutex); + cond_resched(); + mutex_lock(&root->fs_info->fs_mutex); + } } clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, GFP_NOFS); From e4204dedbbaa3a614605cb83cc0ac5161af6b4e6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jan 2008 15:46:27 -0500 Subject: [PATCH 0363/2985] Btrfs: Change tree block csum tagging to avoid false error messages Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 67d9fd728868..a6170ff19e7f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -145,10 +145,25 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, btrfs_csum_final(crc, result); if (verify) { - if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { - printk("btrfs: %s checksum verify failed on %llu\n", + int from_this_trans = 0; + + if (root->fs_info->running_transaction && + btrfs_header_generation(buf) == + root->fs_info->running_transaction->transid) + from_this_trans = 1; + + /* FIXME, this is not good */ + if (from_this_trans == 0 && + memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { + u32 val; + u32 found = 0; + memcpy(&found, result, BTRFS_CRC32_SIZE); + + read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE); + printk("btrfs: %s checksum verify failed on %llu " + "wanted %X found %X from_this_trans %d\n", root->fs_info->sb->s_id, - buf->start); + buf->start, val, found, from_this_trans); return 1; } } else { @@ -313,6 +328,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; struct extent_map_tree *extent_tree; + u64 end; int ret; extent_tree = &BTRFS_I(btree_inode)->extent_tree; @@ -322,19 +338,29 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 0, 1); - if (buf->flags & EXTENT_CSUM) { + + if (buf->flags & EXTENT_CSUM) return buf; - } - if (test_range_bit(extent_tree, buf->start, buf->start + buf->len - 1, - EXTENT_CSUM, 1)) { + + end = buf->start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; return buf; } + + lock_extent(extent_tree, buf->start, end, GFP_NOFS); + + if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) { + buf->flags |= EXTENT_CSUM; + goto out_unlock; + } + ret = csum_tree_block(root, buf, 1); - set_extent_bits(extent_tree, buf->start, - buf->start + buf->len - 1, - EXTENT_CSUM, GFP_NOFS); + set_extent_bits(extent_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; + +out_unlock: + unlock_extent(extent_tree, buf->start, end, GFP_NOFS); return buf; } From dc17ff8f11d129db9e83ab7244769e4eae05e14d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jan 2008 15:46:30 -0500 Subject: [PATCH 0364/2985] Btrfs: Add data=ordered support This forces file data extents down the disk along with the metadata that references them. The current implementation is fairly simple, and just writes out all of the dirty pages in an inode before the commit. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/ctree.c | 6 +- fs/btrfs/ctree.h | 7 +- fs/btrfs/disk-io.c | 16 ++- fs/btrfs/disk-io.h | 2 + fs/btrfs/extent-tree.c | 4 +- fs/btrfs/file.c | 5 +- fs/btrfs/inode.c | 46 ++++++--- fs/btrfs/ordered-data.c | 221 ++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ordered-data.h | 39 +++++++ fs/btrfs/transaction.c | 58 +++++++++++ fs/btrfs/transaction.h | 8 +- 13 files changed, 387 insertions(+), 28 deletions(-) create mode 100644 fs/btrfs/ordered-data.c create mode 100644 fs/btrfs/ordered-data.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d5804c5ca075..ab9a9f8edbf8 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -5,7 +5,7 @@ obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ - extent_map.o sysfs.o struct-funcs.o xattr.o acl.o + extent_map.o sysfs.o struct-funcs.o xattr.o acl.o ordered-data.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index d1d5af471c3c..f27e633f1742 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -29,6 +29,7 @@ struct btrfs_inode { struct extent_map_tree extent_tree; struct inode vfs_inode; + u64 ordered_trans; /* * transid of the trans_handle that last modified this inode */ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 35c57074a376..43d23148a4fe 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -221,7 +221,9 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, struct extent_buffer **cow_ret) { u64 search_start; + u64 header_trans; int ret; + if (trans->transaction != root->fs_info->running_transaction) { printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, root->fs_info->running_transaction->transid); @@ -232,7 +234,9 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, root->fs_info->generation); WARN_ON(1); } - if (btrfs_header_generation(buf) == trans->transid) { + + header_trans = btrfs_header_generation(buf); + if (header_trans == trans->transid) { *cow_ret = buf; return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9873975ce0ee..b55dba58dfaa 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -16,8 +16,8 @@ * Boston, MA 021110-1307, USA. */ -#ifndef __BTRFS__ -#define __BTRFS__ +#ifndef __BTRFS_CTREE__ +#define __BTRFS_CTREE__ #include #include @@ -363,7 +363,6 @@ struct btrfs_root { struct inode *inode; struct kobject root_kobj; struct completion kobj_unregister; - struct rw_semaphore snap_sem; u64 objectid; u64 last_trans; @@ -1142,6 +1141,8 @@ void btrfs_destroy_cachep(void); long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root); +struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, + u64 root_objectid); int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to); struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a6170ff19e7f..34cf1f1f47be 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -406,7 +406,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); memset(&root->root_kobj, 0, sizeof(root->root_kobj)); init_completion(&root->kobj_unregister); - init_rwsem(&root->snap_sem); root->defrag_running = 0; root->defrag_level = 0; root->root_key.objectid = objectid; @@ -498,6 +497,21 @@ insert: return root; } +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_objectid) +{ + struct btrfs_root *root; + + if (root_objectid == BTRFS_ROOT_TREE_OBJECTID) + return fs_info->tree_root; + if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) + return fs_info->extent_root; + + root = radix_tree_lookup(&fs_info->fs_roots_radix, + (unsigned long)root_objectid); + return root; +} + struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, struct btrfs_key *location) { diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 8c3cfd02901f..dae9fba8efcd 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -34,6 +34,8 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_objectid); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c906bb19b211..68137cd8506a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1195,7 +1195,9 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, if (btrfs_buffer_uptodate(buf)) { u64 transid = root->fs_info->running_transaction->transid; - if (btrfs_header_generation(buf) == transid) { + u64 header_transid = + btrfs_header_generation(buf); + if (header_transid == transid) { free_extent_buffer(buf); return 1; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 94c93373cb7d..0a5f4defe59b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -34,6 +34,7 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" +#include "ordered-data.h" #include "ioctl.h" #include "print-tree.h" @@ -329,6 +330,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, root->fs_info->delalloc_bytes += (end_of_last_block + 1 - start_pos) - existing_delalloc; spin_unlock(&root->fs_info->delalloc_lock); + btrfs_add_ordered_inode(inode); } else { u64 aligned_end; /* step one, delete the existing extents in this range */ @@ -724,8 +726,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); - down_read(&BTRFS_I(inode)->root->snap_sem); - mutex_lock(&inode->i_mutex); first_index = pos >> PAGE_CACHE_SHIFT; last_index = (pos + count) >> PAGE_CACHE_SHIFT; @@ -804,7 +804,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } out: mutex_unlock(&inode->i_mutex); - up_read(&BTRFS_I(inode)->root->snap_sem); out_nolock: kfree(pages); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6d0cd9a7a615..6d6e1ac0a9a0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -135,6 +135,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; } + btrfs_add_ordered_inode(inode); out: btrfs_end_transaction(trans, root); return ret; @@ -367,8 +368,8 @@ void btrfs_read_locked_inode(struct inode *inode) path = btrfs_alloc_path(); BUG_ON(!path); mutex_lock(&root->fs_info->fs_mutex); - memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); + ret = btrfs_lookup_inode(NULL, root, path, &location, 0); if (ret) goto make_bad; @@ -898,7 +899,6 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) if ((offset & (blocksize - 1)) == 0) goto out; - down_read(&root->snap_sem); ret = -ENOMEM; page = grab_cache_page(mapping, index); if (!page) @@ -917,7 +917,6 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) unlock_page(page); page_cache_release(page); - up_read(&BTRFS_I(inode)->root->snap_sem); out: return ret; } @@ -1146,6 +1145,19 @@ static int btrfs_find_actor(struct inode *inode, void *opaque) args->root == BTRFS_I(inode)->root); } +struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, + u64 root_objectid) +{ + struct btrfs_iget_args args; + args.ino = objectid; + args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid); + + if (!args.root) + return NULL; + + return ilookup5(s, objectid, btrfs_find_actor, (void *)&args); +} + struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root) { @@ -1336,7 +1348,6 @@ read_dir_items: d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; btrfs_dir_item_key_to_cpu(leaf, di, &location); - over = filldir(dirent, name_ptr, name_len, found_key.offset, location.objectid, @@ -2054,7 +2065,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) ret = -EINVAL; - down_read(&BTRFS_I(inode)->root->snap_sem); lock_page(page); wait_on_page_writeback(page); size = i_size_read(inode); @@ -2075,7 +2085,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) ret = btrfs_cow_one_page(inode, page, end); out_unlock: - up_read(&BTRFS_I(inode)->root->snap_sem); unlock_page(page); out: return ret; @@ -2118,7 +2127,7 @@ static int noinline create_subvol(struct btrfs_root *root, char *name, struct btrfs_root_item root_item; struct btrfs_inode_item *inode_item; struct extent_buffer *leaf; - struct btrfs_root *new_root; + struct btrfs_root *new_root = root; struct inode *inode; struct inode *dir; int ret; @@ -2230,7 +2239,7 @@ static int noinline create_subvol(struct btrfs_root *root, char *name, goto fail; fail: nr = trans->blocks_used; - err = btrfs_commit_transaction(trans, root); + err = btrfs_commit_transaction(trans, new_root); if (err && !ret) ret = err; fail_commit: @@ -2253,10 +2262,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (!root->ref_cows) return -EINVAL; - down_write(&root->snap_sem); - freeze_bdev(root->fs_info->sb->s_bdev); - thaw_bdev(root->fs_info->sb->s_bdev, root->fs_info->sb); - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 0); if (ret) @@ -2264,6 +2269,9 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); + err = btrfs_commit_transaction(trans, root); + + trans = btrfs_start_transaction(root, 1); ret = btrfs_update_inode(trans, root, root->inode); if (ret) @@ -2272,9 +2280,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); if (ret) - goto fail; - - memcpy(&new_root_item, &root->root_item, + goto fail; memcpy(&new_root_item, &root->root_item, sizeof(new_root_item)); key.objectid = objectid; @@ -2285,12 +2291,20 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); free_extent_buffer(tmp); + /* write the ordered inodes to force all delayed allocations to + * be filled. Once this is done, we can copy the root + */ + mutex_lock(&root->fs_info->trans_mutex); + btrfs_write_ordered_inodes(trans, root); + mutex_unlock(&root->fs_info->trans_mutex); + btrfs_copy_root(trans, root, root->node, &tmp, objectid); btrfs_set_root_bytenr(&new_root_item, tmp->start); btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); +printk("new root %Lu node %Lu\n", objectid, tmp->start); free_extent_buffer(tmp); if (ret) goto fail; @@ -2321,7 +2335,6 @@ fail: ret = err; fail_unlock: mutex_unlock(&root->fs_info->fs_mutex); - up_write(&root->snap_sem); btrfs_btree_balance_dirty(root, nr); return ret; } @@ -2608,6 +2621,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) if (!ei) return NULL; ei->last_trans = 0; + ei->ordered_trans = 0; return &ei->vfs_inode; } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c new file mode 100644 index 000000000000..411aba84d305 --- /dev/null +++ b/fs/btrfs/ordered-data.c @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include +#include "ctree.h" +#include "transaction.h" +#include "btrfs_inode.h" + +struct tree_entry { + u64 root_objectid; + u64 objectid; + struct rb_node rb_node; +}; + +/* + * returns > 0 if entry passed (root, objectid) is > entry, + * < 0 if (root, objectid) < entry and zero if they are equal + */ +static int comp_entry(struct tree_entry *entry, u64 root_objectid, + u64 objectid) +{ + if (root_objectid < entry->root_objectid) + return -1; + if (root_objectid > entry->root_objectid) + return 1; + if (objectid < entry->objectid) + return -1; + if (objectid > entry->objectid) + return 1; + return 0; +} + +static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid, + u64 objectid, struct rb_node *node) +{ + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct tree_entry *entry; + int comp; + + while(*p) { + parent = *p; + entry = rb_entry(parent, struct tree_entry, rb_node); + + comp = comp_entry(entry, root_objectid, objectid); + if (comp < 0) + p = &(*p)->rb_left; + else if (comp > 0) + p = &(*p)->rb_right; + else + return parent; + } + + rb_link_node(node, parent, p); + rb_insert_color(node, root); + return NULL; +} + +static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid, + u64 objectid, struct rb_node **prev_ret) +{ + struct rb_node * n = root->rb_node; + struct rb_node *prev = NULL; + struct tree_entry *entry; + struct tree_entry *prev_entry = NULL; + int comp; + + while(n) { + entry = rb_entry(n, struct tree_entry, rb_node); + prev = n; + prev_entry = entry; + comp = comp_entry(entry, root_objectid, objectid); + + if (comp < 0) + n = n->rb_left; + else if (comp > 0) + n = n->rb_right; + else + return n; + } + if (!prev_ret) + return NULL; + + while(prev && comp_entry(prev_entry, root_objectid, objectid) >= 0) { + prev = rb_next(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *prev_ret = prev; + return NULL; +} + +static inline struct rb_node *tree_search(struct rb_root *root, + u64 root_objectid, u64 objectid) +{ + struct rb_node *prev; + struct rb_node *ret; + ret = __tree_search(root, root_objectid, objectid, &prev); + if (!ret) + return prev; + return ret; +} + +int btrfs_add_ordered_inode(struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 root_objectid = root->root_key.objectid; + u64 transid = root->fs_info->running_transaction->transid; + struct tree_entry *entry; + struct rb_node *node; + struct btrfs_ordered_inode_tree *tree; + + if (transid <= BTRFS_I(inode)->ordered_trans) + return 0; + + tree = &root->fs_info->running_transaction->ordered_inode_tree; + + read_lock(&tree->lock); + node = __tree_search(&tree->tree, root_objectid, inode->i_ino, NULL); + read_unlock(&tree->lock); + if (node) { + return 0; + } + + entry = kmalloc(sizeof(*entry), GFP_NOFS); + if (!entry) + return -ENOMEM; + + write_lock(&tree->lock); + entry->objectid = inode->i_ino; + entry->root_objectid = root_objectid; + + node = tree_insert(&tree->tree, root_objectid, + inode->i_ino, &entry->rb_node); + + BTRFS_I(inode)->ordered_trans = transid; + + write_unlock(&tree->lock); + if (node) + kfree(entry); + return 0; +} + +int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, + u64 *root_objectid, u64 *objectid) +{ + struct tree_entry *entry; + struct rb_node *node; + + write_lock(&tree->lock); + node = tree_search(&tree->tree, *root_objectid, *objectid); + if (!node) { + write_unlock(&tree->lock); + return 0; + } + entry = rb_entry(node, struct tree_entry, rb_node); + + while(comp_entry(entry, *root_objectid, *objectid) >= 0) { + node = rb_next(node); + if (!node) + break; + entry = rb_entry(node, struct tree_entry, rb_node); + } + if (!node) { + write_unlock(&tree->lock); + return 0; + } + + *root_objectid = entry->root_objectid; + *objectid = entry->objectid; + write_unlock(&tree->lock); + return 1; +} + +int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, + u64 *root_objectid, u64 *objectid) +{ + struct tree_entry *entry; + struct rb_node *node; + + write_lock(&tree->lock); + node = tree_search(&tree->tree, *root_objectid, *objectid); + if (!node) { + write_unlock(&tree->lock); + return 0; + } + + entry = rb_entry(node, struct tree_entry, rb_node); + while(comp_entry(entry, *root_objectid, *objectid) >= 0) { + node = rb_next(node); + if (!node) + break; + entry = rb_entry(node, struct tree_entry, rb_node); + } + if (!node) { + write_unlock(&tree->lock); + return 0; + } + + *root_objectid = entry->root_objectid; + *objectid = entry->objectid; + rb_erase(node, &tree->tree); + write_unlock(&tree->lock); + kfree(entry); + return 1; +} diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h new file mode 100644 index 000000000000..aaf9eb142719 --- /dev/null +++ b/fs/btrfs/ordered-data.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_ORDERED_DATA__ +#define __BTRFS_ORDERED_DATA__ + +struct btrfs_ordered_inode_tree { + rwlock_t lock; + struct rb_root tree; +}; + +static inline void +btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) +{ + rwlock_init(&t->lock); + t->tree.rb_node = NULL; +} + +int btrfs_add_ordered_inode(struct inode *inode); +int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, + u64 *root_objectid, u64 *objectid); +int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, + u64 *root_objectid, u64 *objectid); +#endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 02721eea9a7a..3ed5868e7c0f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -67,6 +67,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->commit_done = 0; cur_trans->start_time = get_seconds(); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); + btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree); extent_map_tree_init(&cur_trans->dirty_pages, root->fs_info->btree_inode->i_mapping, GFP_NOFS); @@ -473,6 +474,60 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, return ret; } +int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_transaction *cur_trans = trans->transaction; + struct inode *inode; + u64 root_objectid = 0; + u64 objectid = 0; + u64 transid = trans->transid; + int ret; + +printk("write ordered trans %Lu\n", transid); + while(1) { + ret = btrfs_find_first_ordered_inode( + &cur_trans->ordered_inode_tree, + &root_objectid, &objectid); + if (!ret) + break; + + mutex_unlock(&root->fs_info->trans_mutex); + mutex_unlock(&root->fs_info->fs_mutex); + inode = btrfs_ilookup(root->fs_info->sb, objectid, + root_objectid); + if (inode) { + if (S_ISREG(inode->i_mode)) + filemap_fdatawrite(inode->i_mapping); + iput(inode); + } + mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->trans_mutex); + } + while(1) { + root_objectid = 0; + objectid = 0; + ret = btrfs_find_del_first_ordered_inode( + &cur_trans->ordered_inode_tree, + &root_objectid, &objectid); + if (!ret) + break; + mutex_unlock(&root->fs_info->trans_mutex); + mutex_unlock(&root->fs_info->fs_mutex); + inode = btrfs_ilookup(root->fs_info->sb, objectid, + root_objectid); + if (inode) { + if (S_ISREG(inode->i_mode)) + filemap_write_and_wait(inode->i_mapping); + iput(inode); + } + mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->trans_mutex); + } +printk("done write ordered trans %Lu\n", transid); + return 0; +} + int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -550,10 +605,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); finish_wait(&cur_trans->writer_wait, &wait); + ret = btrfs_write_ordered_inodes(trans, root); + } while (cur_trans->num_writers > 1 || (cur_trans->num_joined != joined)); WARN_ON(cur_trans != trans->transaction); + ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, &dirty_fs_roots); BUG_ON(ret); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index eef840bca91e..c157ddbe9d1e 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -16,9 +16,10 @@ * Boston, MA 021110-1307, USA. */ -#ifndef __TRANSACTION__ -#define __TRANSACTION__ +#ifndef __BTRFS_TRANSACTION__ +#define __BTRFS_TRANSACTION__ #include "btrfs_inode.h" +#include "ordered-data.h" struct btrfs_transaction { u64 transid; @@ -30,6 +31,7 @@ struct btrfs_transaction { struct list_head list; struct extent_map_tree dirty_pages; unsigned long start_time; + struct btrfs_ordered_inode_tree ordered_inode_tree; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; }; @@ -90,4 +92,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, + struct btrfs_root *root); #endif From 3063d29f2a4d4a4e9fa1ec77c124514f287c6da7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jan 2008 15:46:30 -0500 Subject: [PATCH 0365/2985] Btrfs: Move snapshot creation to commit time It is very difficult to create a consistent snapshot of the btree when other writers may update the btree before the commit is done. This changes the snapshot creation to happen during the commit, while no other updates are possible. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 79 ++++++++-------------------------------- fs/btrfs/transaction.c | 81 ++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/transaction.h | 7 ++++ 3 files changed, 100 insertions(+), 67 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6d6e1ac0a9a0..10cece11dbd8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2250,13 +2250,10 @@ fail_commit: static int create_snapshot(struct btrfs_root *root, char *name, int namelen) { + struct btrfs_pending_snapshot *pending_snapshot; struct btrfs_trans_handle *trans; - struct btrfs_key key; - struct btrfs_root_item new_root_item; - struct extent_buffer *tmp; int ret; int err; - u64 objectid; unsigned long nr = 0; if (!root->ref_cows) @@ -2267,72 +2264,26 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail_unlock; + pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS); + if (!pending_snapshot) { + ret = -ENOMEM; + goto fail_unlock; + } + pending_snapshot->name = kstrndup(name, namelen, GFP_NOFS); + if (!pending_snapshot->name) { + ret = -ENOMEM; + kfree(pending_snapshot); + goto fail_unlock; + } trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - err = btrfs_commit_transaction(trans, root); - - trans = btrfs_start_transaction(root, 1); + pending_snapshot->root = root; + list_add(&pending_snapshot->list, + &trans->transaction->pending_snapshots); ret = btrfs_update_inode(trans, root, root->inode); - if (ret) - goto fail; - - ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, - 0, &objectid); - if (ret) - goto fail; memcpy(&new_root_item, &root->root_item, - sizeof(new_root_item)); - - key.objectid = objectid; - key.offset = 1; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - - extent_buffer_get(root->node); - btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); - free_extent_buffer(tmp); - - /* write the ordered inodes to force all delayed allocations to - * be filled. Once this is done, we can copy the root - */ - mutex_lock(&root->fs_info->trans_mutex); - btrfs_write_ordered_inodes(trans, root); - mutex_unlock(&root->fs_info->trans_mutex); - - btrfs_copy_root(trans, root, root->node, &tmp, objectid); - - btrfs_set_root_bytenr(&new_root_item, tmp->start); - btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp)); - ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, - &new_root_item); -printk("new root %Lu node %Lu\n", objectid, tmp->start); - free_extent_buffer(tmp); - if (ret) - goto fail; - - /* - * insert the directory item - */ - key.offset = (u64)-1; - ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, - name, namelen, - root->fs_info->sb->s_root->d_inode->i_ino, - &key, BTRFS_FT_DIR); - - if (ret) - goto fail; - - ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, - name, namelen, objectid, - root->fs_info->sb->s_root->d_inode->i_ino); - - if (ret) - goto fail; -fail: - nr = trans->blocks_used; err = btrfs_commit_transaction(trans, root); - if (err && !ret) - ret = err; fail_unlock: mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3ed5868e7c0f..dc9865323e38 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -66,6 +66,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->use_count = 1; cur_trans->commit_done = 0; cur_trans->start_time = get_seconds(); + INIT_LIST_HEAD(&cur_trans->pending_snapshots); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree); extent_map_tree_init(&cur_trans->dirty_pages, @@ -481,10 +482,8 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, struct inode *inode; u64 root_objectid = 0; u64 objectid = 0; - u64 transid = trans->transid; int ret; -printk("write ordered trans %Lu\n", transid); while(1) { ret = btrfs_find_first_ordered_inode( &cur_trans->ordered_inode_tree, @@ -524,7 +523,80 @@ printk("write ordered trans %Lu\n", transid); mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); } -printk("done write ordered trans %Lu\n", transid); + return 0; +} + +static int create_pending_snapshot(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_pending_snapshot *pending) +{ + struct btrfs_key key; + struct btrfs_root_item new_root_item; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *root = pending->root; + struct extent_buffer *tmp; + int ret; + u64 objectid; + + ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); + if (ret) + goto fail; + + memcpy(&new_root_item, &root->root_item, sizeof(new_root_item)); + + key.objectid = objectid; + key.offset = 1; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + + extent_buffer_get(root->node); + btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); + free_extent_buffer(tmp); + + btrfs_copy_root(trans, root, root->node, &tmp, objectid); + + btrfs_set_root_bytenr(&new_root_item, tmp->start); + btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp)); + ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, + &new_root_item); + free_extent_buffer(tmp); + if (ret) + goto fail; + + /* + * insert the directory item + */ + key.offset = (u64)-1; + ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, + pending->name, strlen(pending->name), + root->fs_info->sb->s_root->d_inode->i_ino, + &key, BTRFS_FT_DIR); + + if (ret) + goto fail; + + ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, + pending->name, strlen(pending->name), objectid, + root->fs_info->sb->s_root->d_inode->i_ino); +fail: + return ret; +} + +static int create_pending_snapshots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_pending_snapshot *pending; + struct list_head *head = &trans->transaction->pending_snapshots; + int ret; + + while(!list_empty(head)) { + pending = list_entry(head->next, + struct btrfs_pending_snapshot, list); + ret = create_pending_snapshot(trans, fs_info, pending); + BUG_ON(ret); + list_del(&pending->list); + kfree(pending->name); + kfree(pending); + } return 0; } @@ -610,6 +682,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } while (cur_trans->num_writers > 1 || (cur_trans->num_joined != joined)); + ret = create_pending_snapshots(trans, root->fs_info); + BUG_ON(ret); + WARN_ON(cur_trans != trans->transaction); ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index c157ddbe9d1e..fd52e9b23922 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -34,6 +34,7 @@ struct btrfs_transaction { struct btrfs_ordered_inode_tree ordered_inode_tree; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; + struct list_head pending_snapshots; }; struct btrfs_trans_handle { @@ -46,6 +47,12 @@ struct btrfs_trans_handle { u64 alloc_exclude_nr; }; +struct btrfs_pending_snapshot { + struct btrfs_root *root; + char *name; + struct list_head list; +}; + static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, struct inode *inode) From e2008b61401ecb467a8ce1788fcd2116ae1cfbc1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jan 2008 15:46:30 -0500 Subject: [PATCH 0366/2985] Btrfs: Add some simple throttling to wait for data=ordered and snapshot deletion Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 7 +++++++ fs/btrfs/disk-io.h | 1 + fs/btrfs/file.c | 1 + fs/btrfs/inode.c | 12 ++++++++++++ fs/btrfs/transaction.c | 4 ++++ 6 files changed, 26 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b55dba58dfaa..f0fb1978553c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -345,6 +345,7 @@ struct btrfs_fs_info { struct completion kobj_unregister; int do_barriers; int closing; + unsigned long throttles; u64 total_pinned; spinlock_t delalloc_lock; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 34cf1f1f47be..e0940a39ff07 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -631,6 +631,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->sb = sb; + fs_info->throttles = 0; fs_info->mount_opt = 0; fs_info->max_extent = (u64)-1; fs_info->delalloc_bytes = 0; @@ -889,6 +890,12 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); } +void btrfs_throttle(struct btrfs_root *root) +{ + if (root->fs_info->throttles) + congestion_wait(WRITE, HZ/10); +} + void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) { balance_dirty_pages_ratelimited_nr( diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index dae9fba8efcd..828f3a2081b6 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -64,4 +64,5 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf); int btrfs_read_buffer(struct extent_buffer *buf); u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, char *result); +void btrfs_throttle(struct btrfs_root *root); #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0a5f4defe59b..897242e87fa7 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -800,6 +800,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) btrfs_btree_balance_dirty(root, 1); + btrfs_throttle(root); cond_resched(); } out: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 10cece11dbd8..2cb2dd32407e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -610,6 +610,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) fail: mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); + btrfs_throttle(root); return ret; } @@ -644,6 +645,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) fail: mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); + btrfs_throttle(root); if (ret && !err) err = ret; @@ -1010,6 +1012,7 @@ void btrfs_delete_inode(struct inode *inode) btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); + btrfs_throttle(root); return; no_delete_lock: @@ -1017,6 +1020,7 @@ no_delete_lock: btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); + btrfs_throttle(root); no_delete: clear_inode(inode); } @@ -1574,6 +1578,7 @@ fail: iput(inode); } btrfs_btree_balance_dirty(root, nr); + btrfs_throttle(root); return err; } @@ -1633,6 +1638,7 @@ fail: iput(inode); } btrfs_btree_balance_dirty(root, nr); + btrfs_throttle(root); return err; } @@ -1684,6 +1690,7 @@ fail: iput(inode); } btrfs_btree_balance_dirty(root, nr); + btrfs_throttle(root); return err; } @@ -1752,6 +1759,7 @@ out_unlock: if (drop_on_err) iput(inode); btrfs_btree_balance_dirty(root, nr); + btrfs_throttle(root); return err; } @@ -2117,6 +2125,7 @@ static void btrfs_truncate(struct inode *inode) BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); + btrfs_throttle(root); } static int noinline create_subvol(struct btrfs_root *root, char *name, @@ -2245,6 +2254,7 @@ fail: fail_commit: mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); + btrfs_throttle(root); return ret; } @@ -2287,6 +2297,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) fail_unlock: mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); + btrfs_throttle(root); return ret; } @@ -2827,6 +2838,7 @@ out_fail: iput(inode); } btrfs_btree_balance_dirty(root, nr); + btrfs_throttle(root); return err; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index dc9865323e38..614903f5c884 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -424,6 +424,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, num_bytes = btrfs_root_used(&dirty->root->root_item); root = dirty->latest_root; + root->fs_info->throttles++; while(1) { trans = btrfs_start_transaction(tree_root, 1); @@ -447,6 +448,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, mutex_lock(&tree_root->fs_info->fs_mutex); } BUG_ON(ret); + root->fs_info->throttles--; num_bytes -= btrfs_root_used(&dirty->root->root_item); bytes_used = btrfs_root_used(&root->root_item); @@ -484,6 +486,7 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, u64 objectid = 0; int ret; + root->fs_info->throttles++; while(1) { ret = btrfs_find_first_ordered_inode( &cur_trans->ordered_inode_tree, @@ -523,6 +526,7 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); } + root->fs_info->throttles--; return 0; } From b0331a4c4c339ba7786472b137d6ece9e7f810ec Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jan 2008 15:46:31 -0500 Subject: [PATCH 0367/2985] Btrfs: Disable btree reada during extent backref lookups. This reada is generally not effective. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 68137cd8506a..660b05a4baf5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -630,6 +630,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; + path->reada = 0; key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_bytes; @@ -649,6 +650,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_release_path(root->fs_info->extent_root, path); + path->reada = 0; ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root, path, bytenr, root_objectid, ref_generation, owner, owner_offset); @@ -680,6 +682,7 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, WARN_ON(num_bytes < root->sectorsize); path = btrfs_alloc_path(); + path->reada = 0; key.objectid = bytenr; key.offset = num_bytes; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -1240,6 +1243,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; + path->reada = 0; ret = lookup_extent_backref(trans, extent_root, path, bytenr, root_objectid, ref_generation, From c31f8830f0dfd31e7e196b85ca1b39aef8b77d57 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jan 2008 15:46:31 -0500 Subject: [PATCH 0368/2985] Btrfs: online shrinking fixes While shrinking the FS, the allocation functions need to make sure they don't try to allocate bytes past the end of the FS. nodatacow needed an extra check to force cows when the existing extents are past the end of the FS. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 32 +++++++++++++++++++++++--------- fs/btrfs/inode.c | 15 +++++++++++++-- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 660b05a4baf5..99a8b0f0d318 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -192,11 +192,13 @@ static u64 noinline find_search_start(struct btrfs_root *root, u64 start = 0; u64 end = 0; u64 cache_miss = 0; + u64 total_fs_bytes; int wrapped = 0; if (!cache) { goto out; } + total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); again: ret = cache_block_group(root, cache); if (ret) @@ -223,6 +225,8 @@ again: if (data != BTRFS_BLOCK_GROUP_MIXED && start + num > cache->key.objectid + cache->key.offset) goto new_group; + if (start + num > total_fs_bytes) + goto new_group; return start; } out: @@ -239,7 +243,7 @@ new_group: last = cache->key.objectid + cache->key.offset; wrapped: cache = btrfs_lookup_block_group(root->fs_info, last); - if (!cache) { + if (!cache || cache->key.objectid >= total_fs_bytes) { no_cache: if (!wrapped) { wrapped = 1; @@ -287,6 +291,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, u64 end; u64 free_check; u64 ptr; + u64 total_fs_bytes; int bit; int ret; int full_search = 0; @@ -294,6 +299,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int data_swap = 0; block_group_cache = &info->block_group_cache; + total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); if (!owner) factor = 8; @@ -306,7 +312,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, else bit = BLOCK_GROUP_METADATA; - if (search_start) { + if (search_start && search_start < total_fs_bytes) { struct btrfs_block_group_cache *shint; shint = btrfs_lookup_block_group(info, search_start); if (shint && (shint->data == data || @@ -318,8 +324,8 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, } } } - if (hint && (hint->data == data || - hint->data == BTRFS_BLOCK_GROUP_MIXED)) { + if (hint && hint->key.objectid < total_fs_bytes && + (hint->data == data || hint->data == BTRFS_BLOCK_GROUP_MIXED)) { used = btrfs_block_group_used(&hint->item); if (used + hint->pinned < div_factor(hint->key.offset, factor)) { @@ -333,6 +339,8 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, else hint_last = search_start; + if (hint_last >= total_fs_bytes) + hint_last = search_start; last = hint_last; } again: @@ -350,6 +358,9 @@ again: last = cache->key.objectid + cache->key.offset; used = btrfs_block_group_used(&cache->item); + if (cache->key.objectid > total_fs_bytes) + break; + if (full_search) free_check = cache->key.offset; else @@ -1420,8 +1431,8 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_MIXED; } - if (search_end == (u64)-1) - search_end = btrfs_super_total_bytes(&info->super_copy); + search_end = min(search_end, + btrfs_super_total_bytes(&info->super_copy)); if (hint_byte) { block_group = btrfs_lookup_block_group(info, hint_byte); if (!block_group) @@ -1617,7 +1628,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, { int ret; int pending_ret; - u64 super_used, root_used; + u64 super_used; + u64 root_used; u64 search_start = 0; u64 new_hint; struct btrfs_fs_info *info = root->fs_info; @@ -1636,6 +1648,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, search_start, search_end, hint_byte, ins, trans->alloc_exclude_start, trans->alloc_exclude_nr, data); +if (ret) +printk("find free extent returns %d\n", ret); BUG_ON(ret); if (ret) return ret; @@ -2292,8 +2306,6 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, while(1) { ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); - BUG_ON(ret == 0); - if (ret < 0) goto out; @@ -2340,6 +2352,8 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) int progress = 0; btrfs_set_super_total_bytes(&info->super_copy, new_size); + clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, + GFP_NOFS); block_group_cache = &info->block_group_cache; path = btrfs_alloc_path(); root = root->fs_info->extent_root; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2cb2dd32407e..281757007ddb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -148,6 +148,7 @@ static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) u64 bytenr; u64 cow_end; u64 loops = 0; + u64 total_fs_bytes; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_buffer *leaf; int found_type; @@ -157,6 +158,7 @@ static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) int err; struct btrfs_key found_key; + total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); path = btrfs_alloc_path(); BUG_ON(!path); again: @@ -189,8 +191,10 @@ again: found_type = btrfs_file_extent_type(leaf, item); extent_start = found_key.offset; if (found_type == BTRFS_FILE_EXTENT_REG) { - extent_end = extent_start + - btrfs_file_extent_num_bytes(leaf, item); + u64 extent_num_bytes; + + extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item); + extent_end = extent_start + extent_num_bytes; err = 0; if (loops && start != extent_start) @@ -204,6 +208,13 @@ again: if (bytenr == 0) goto not_found; + /* + * we may be called by the resizer, make sure we're inside + * the limits of the FS + */ + if (bytenr + extent_num_bytes > total_fs_bytes) + goto not_found; + if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) { goto not_found; } From b98b6767a0f6f9baa1d00eff64a43d58412ecdeb Mon Sep 17 00:00:00 2001 From: Yan Date: Tue, 8 Jan 2008 15:54:37 -0500 Subject: [PATCH 0369/2985] Btrfs: Add inode flags support This patch adds NODATASUM & NODATACOW inode flags support. Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/ctree.h | 11 +++++++++++ fs/btrfs/inode.c | 24 +++++++++++------------- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index f27e633f1742..9fc99883c70a 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -34,6 +34,7 @@ struct btrfs_inode { * transid of the trans_handle that last modified this inode */ u64 last_trans; + u32 flags; }; static inline struct btrfs_inode *BTRFS_I(struct inode *inode) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f0fb1978553c..7eda51542d17 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -447,7 +447,18 @@ struct btrfs_root { #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) #define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \ BTRFS_MOUNT_##opt) +/* + * Inode flags + */ +#define BTRFS_INODE_NODATASUM 0x1 +#define BTRFS_INODE_NODATACOW 0x2 +#define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \ + ~BTRFS_INODE_##flag) +#define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \ + BTRFS_INODE_##flag) +#define btrfs_test_flag(inode, flag) (BTRFS_I(inode)->flags & \ + BTRFS_INODE_##flag) /* some macros to generate set/get funcs for the struct fields. This * assumes there is a lefoo_to_cpu for every type, so lets make a simple * one for u8: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 281757007ddb..fba0478a95f4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -243,9 +243,9 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) struct btrfs_root *root = BTRFS_I(inode)->root; u64 num_bytes; int ret; - mutex_lock(&root->fs_info->fs_mutex); - if (btrfs_test_opt(root, NODATACOW)) + if (btrfs_test_opt(root, NODATACOW) || + btrfs_test_flag(inode, NODATACOW)) ret = run_delalloc_nocow(inode, start, end); else ret = cow_file_range(inode, start, end); @@ -273,10 +273,9 @@ int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end) int ret = 0; u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; size_t offset = start - page_start; - - if (btrfs_test_opt(root, NODATASUM)) + if (btrfs_test_opt(root, NODATASUM) || + btrfs_test_flag(inode, NODATASUM)) return 0; - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -299,10 +298,9 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) struct btrfs_csum_item *item; struct btrfs_path *path = NULL; u32 csum; - - if (btrfs_test_opt(root, NODATASUM)) + if (btrfs_test_opt(root, NODATASUM) || + btrfs_test_flag(inode, NODATASUM)) return 0; - mutex_lock(&root->fs_info->fs_mutex); path = btrfs_alloc_path(); item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); @@ -335,10 +333,9 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) struct btrfs_root *root = BTRFS_I(inode)->root; u32 csum = ~(u32)0; unsigned long flags; - - if (btrfs_test_opt(root, NODATASUM)) + if (btrfs_test_opt(root, NODATASUM) || + btrfs_test_flag(inode, NODATASUM)) return 0; - ret = get_state_private(em_tree, start, &private); local_irq_save(flags); kaddr = kmap_atomic(page, KM_IRQ0); @@ -415,7 +412,7 @@ void btrfs_read_locked_inode(struct inode *inode) alloc_group_block = btrfs_inode_block_group(leaf, inode_item); BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, alloc_group_block); - + BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); if (!BTRFS_I(inode)->block_group) { BTRFS_I(inode)->block_group = btrfs_find_block_group(root, NULL, 0, 0, 0); @@ -484,6 +481,7 @@ static void fill_inode_item(struct extent_buffer *leaf, btrfs_set_inode_nblocks(leaf, item, inode->i_blocks); btrfs_set_inode_generation(leaf, item, inode->i_generation); btrfs_set_inode_rdev(leaf, item, inode->i_rdev); + btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group->key.objectid); } @@ -1454,7 +1452,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, owner = 1; group = btrfs_find_block_group(root, group, 0, 0, owner); BTRFS_I(inode)->block_group = group; - + BTRFS_I(inode)->flags = 0; ret = btrfs_insert_empty_inode(trans, root, path, objectid); if (ret) goto fail; From bcd987feefe8da66bc59b4e6bd51761a9820588c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Jan 2008 06:28:28 -0500 Subject: [PATCH 0370/2985] Btrfs: Remove extent_map debugging message Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f53d8c976219..9d6aefa937c4 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -273,7 +273,6 @@ int add_extent_mapping(struct extent_map_tree *tree, rb = tree_insert(&tree->map, em->end, &em->rb_node); if (rb) { prev = rb_entry(rb, struct extent_map, rb_node); - printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end); ret = -EEXIST; goto out; } From 21ad10cf3e9c1ef42e725e5c3a593c49f779a16b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Jan 2008 09:23:21 -0500 Subject: [PATCH 0371/2985] Btrfs: Add flush barriers on commit Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 5 +++-- fs/btrfs/disk-io.c | 5 +++++ fs/btrfs/super.c | 9 ++++++++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7eda51542d17..0c0edacbc14c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -440,8 +440,9 @@ struct btrfs_root { */ #define BTRFS_STRING_ITEM_KEY 253 -#define BTRFS_MOUNT_NODATASUM 0x1 -#define BTRFS_MOUNT_NODATACOW 0x2 +#define BTRFS_MOUNT_NODATASUM (1 << 0) +#define BTRFS_MOUNT_NODATACOW (1 << 1) +#define BTRFS_MOUNT_NOBARRIER (1 << 2) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e0940a39ff07..a481b970608c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -762,10 +762,15 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root int ret; struct extent_buffer *super = root->fs_info->sb_buffer; struct inode *btree_inode = root->fs_info->btree_inode; + struct super_block *sb = root->fs_info->sb; + if (!btrfs_test_opt(root, NOBARRIER)) + blkdev_issue_flush(sb->s_bdev, NULL); set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super); ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, super->start, super->len); + if (!btrfs_test_opt(root, NOBARRIER)) + blkdev_issue_flush(sb->s_bdev, NULL); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f1b56eb77b1d..9ab4086d463c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -64,13 +64,14 @@ static void btrfs_put_super (struct super_block * sb) enum { Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, - Opt_alloc_start, Opt_err, + Opt_alloc_start, Opt_nobarrier, Opt_err, }; static match_table_t tokens = { {Opt_subvol, "subvol=%s"}, {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, + {Opt_nobarrier, "nobarrier"}, {Opt_max_extent, "max_extent=%s"}, {Opt_alloc_start, "alloc_start=%s"}, {Opt_err, NULL} @@ -148,6 +149,12 @@ static int parse_options (char * options, btrfs_set_opt(info->mount_opt, NODATASUM); } break; + case Opt_nobarrier: + if (info) { + printk("btrfs: turning off barriers\n"); + btrfs_set_opt(info->mount_opt, NOBARRIER); + } + break; case Opt_max_extent: if (info) { char *num = match_strdup(&args[0]); From 55c69072d6bd5be170a85467f64a20963cddf490 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Jan 2008 15:55:33 -0500 Subject: [PATCH 0372/2985] Btrfs: Fix extent_buffer usage when nodesize != leafsize Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 26 +++++++++++++++++++++++--- fs/btrfs/extent-tree.c | 13 ++++++++++--- fs/btrfs/extent_map.c | 30 +++++++++++++++++++++--------- 3 files changed, 54 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a481b970608c..0338f8fd382d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -197,9 +197,23 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) if (found_start != start) { printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", start, found_start, len); + WARN_ON(1); + goto err; + } + if (eb->first_page != page) { + printk("bad first page %lu %lu\n", eb->first_page->index, + page->index); + WARN_ON(1); + goto err; + } + if (!PageUptodate(page)) { + printk("csum not up to date page %lu\n", page->index); + WARN_ON(1); + goto err; } found_level = btrfs_header_level(eb); csum_tree_block(root, eb, 0); +err: free_extent_buffer(eb); out: return 0; @@ -368,7 +382,10 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) { struct inode *btree_inode = root->fs_info->btree_inode; - clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); + if (btrfs_header_generation(buf) == + root->fs_info->running_transaction->transid) + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, + buf); return 0; } @@ -897,8 +914,11 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) void btrfs_throttle(struct btrfs_root *root) { - if (root->fs_info->throttles) - congestion_wait(WRITE, HZ/10); + struct backing_dev_info *bdi; + + bdi = root->fs_info->sb->s_bdev->bd_inode->i_mapping->backing_dev_info; + if (root->fs_info->throttles && bdi_write_congested(bdi)) + congestion_wait(WRITE, HZ/20); } void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 99a8b0f0d318..2c569b4d59d4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1212,6 +1212,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, u64 header_transid = btrfs_header_generation(buf); if (header_transid == transid) { + clean_tree_block(NULL, root, buf); free_extent_buffer(buf); return 1; } @@ -1249,7 +1250,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_bytes; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -1648,8 +1648,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, search_start, search_end, hint_byte, ins, trans->alloc_exclude_start, trans->alloc_exclude_nr, data); -if (ret) -printk("find free extent returns %d\n", ret); BUG_ON(ret); if (ret) return ret; @@ -1764,7 +1762,16 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 0, 0, 0); return ERR_PTR(-ENOMEM); } + btrfs_set_header_generation(buf, trans->transid); + clean_tree_block(trans, root, buf); + wait_on_tree_block_writeback(root, buf); btrfs_set_buffer_uptodate(buf); + + if (PageDirty(buf->first_page)) { + printk("page %lu dirty\n", buf->first_page->index); + WARN_ON(1); + } + set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 9d6aefa937c4..f3a384ed700c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1663,6 +1663,13 @@ void set_page_extent_mapped(struct page *page) } } +void set_page_extent_head(struct page *page, unsigned long len) +{ + WARN_ON(page->private && page->private == EXTENT_PAGE_PRIVATE && + PageDirty(page)); + set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); +} + /* * basic readpage implementation. Locked extent state structs are inserted * into the tree that are removed when the IO is done (by the end_io @@ -2490,8 +2497,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, mark_page_accessed(page0); set_page_extent_mapped(page0); WARN_ON(!PageUptodate(page0)); - set_page_private(page0, EXTENT_PAGE_PRIVATE_FIRST_PAGE | - len << 2); + set_page_extent_head(page0, len); } else { i = 0; } @@ -2505,8 +2511,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, mark_page_accessed(p); if (i == 0) { eb->first_page = p; - set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE | - len << 2); + set_page_extent_head(p, len); } else { set_page_private(p, EXTENT_PAGE_PRIVATE); } @@ -2569,8 +2574,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, if (i == 0) { eb->first_page = p; - set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE | - len << 2); + set_page_extent_head(p, len); } else { set_page_private(p, EXTENT_PAGE_PRIVATE); } @@ -2643,6 +2647,11 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree, for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); lock_page(page); + if (i == 0) + set_page_extent_head(page, eb->len); + else + set_page_private(page, EXTENT_PAGE_PRIVATE); + /* * if we're on the last page or the first page and the * block isn't aligned on a page boundary, do extra checks @@ -2697,9 +2706,12 @@ int set_extent_buffer_dirty(struct extent_map_tree *tree, */ if (i == 0) { lock_page(page); - set_page_private(page, - EXTENT_PAGE_PRIVATE_FIRST_PAGE | - eb->len << 2); + set_page_extent_head(page, eb->len); + } else if (PagePrivate(page) && + page->private != EXTENT_PAGE_PRIVATE) { + lock_page(page); + set_page_extent_mapped(page); + unlock_page(page); } __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); if (i == 0) From b3a0d8d28c607cb2531a68742afc2b967b1f6083 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 14 Jan 2008 13:22:53 -0500 Subject: [PATCH 0373/2985] Btrfs: Remove verbose WARN_ON Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f3a384ed700c..d71aed341abd 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1665,8 +1665,6 @@ void set_page_extent_mapped(struct page *page) void set_page_extent_head(struct page *page, unsigned long len) { - WARN_ON(page->private && page->private == EXTENT_PAGE_PRIVATE && - PageDirty(page)); set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); } From 744f52f997b21de2a01931f79cf6c198bf688120 Mon Sep 17 00:00:00 2001 From: Yan Date: Mon, 14 Jan 2008 13:26:08 -0500 Subject: [PATCH 0374/2985] Btrfs: Implement ACLs setting and getting ACLs are stored but not used for permission checks (yet) Signed-off-by: Chris Mason --- fs/btrfs/acl.c | 73 +++++++++++++++++++++++++++------------------ fs/btrfs/dir-item.c | 3 ++ fs/btrfs/xattr.c | 16 +++------- 3 files changed, 51 insertions(+), 41 deletions(-) diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 889ba89d3f9f..b4f85ebe1bd6 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -22,53 +22,68 @@ #include #include "ctree.h" #include "xattr.h" +static int btrfs_xattr_set_acl(struct inode *inode, int type, + const void *value, size_t size) +{ + int ret = 0; + struct posix_acl *acl; -/* - * FIXME: At this point this is all place holder stuff, we just return - * -EOPNOTSUPP so cp won't complain when it tries to copy over a file with an - * acl on it. - */ + if (!is_owner_or_cap(inode)) + return -EPERM; + if (value) { + acl = posix_acl_from_xattr(value, size); + if (acl == NULL) { + value = NULL; + size = 0; + } else if (IS_ERR(acl)) { + ret = PTR_ERR(acl); + } else { + ret = posix_acl_valid(acl); + posix_acl_release(acl); + } + if (ret) + return ret; + } + return btrfs_xattr_set(inode, type, "", value, size, 0); +} +static int btrfs_xattr_get_acl(struct inode *inode, int type, + void *value, size_t size) +{ + return btrfs_xattr_get(inode, type, "", value, size); +} static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name, void *value, size_t size) { - /* - return btrfs_xattr_get(inode, BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS, name, - value, size); - */ - return -EOPNOTSUPP; + if (*name != '\0') + return -EINVAL; + return btrfs_xattr_get_acl(inode, BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS, + value, size); } - static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) { - /* - return btrfs_xattr_set(inode, BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS, name, - value, size, flags); - */ - return -EOPNOTSUPP; + if (*name != '\0') + return -EINVAL; + return btrfs_xattr_set_acl(inode, BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS, + value, size); } - static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name, void *value, size_t size) { - /* - return btrfs_xattr_get(inode, BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT, - name, value, size); - */ - return -EOPNOTSUPP; + if (*name != '\0') + return -EINVAL; + return btrfs_xattr_get_acl(inode, BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT, + value, size); } - static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) { - /* - return btrfs_xattr_set(inode, BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT, - name, value, size, flags); - */ - return -EOPNOTSUPP; + if (*name != '\0') + return -EINVAL; + return btrfs_xattr_set_acl(inode, BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT, + value, size); } - struct xattr_handler btrfs_xattr_acl_default_handler = { .prefix = POSIX_ACL_XATTR_DEFAULT, .list = btrfs_xattr_generic_list, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index ddbe12ae0d63..5247a9a41f08 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -76,6 +76,9 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) return -ENOMEM; + if (name_len + data_len + sizeof(struct btrfs_dir_item) > + BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item)) + return -ENOSPC; data_size = sizeof(*dir_item) + name_len + data_len; dir_item = insert_with_overflow(trans, root, path, &key, data_size, diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 33caaf5b4b0c..85ac42605f8d 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -141,11 +141,6 @@ ssize_t btrfs_xattr_get(struct inode *inode, int name_index, if (!handler) return -EOPNOTSUPP; - - /* just in case... */ - if (*attr_name == '\0') - return -EINVAL; - name = get_name(attr_name, name_index); if (!name) return -ENOMEM; @@ -201,14 +196,8 @@ int btrfs_xattr_set(struct inode *inode, int name_index, struct xattr_handler *handler = btrfs_xattr_handler(name_index); char *name; int ret = 0, mod = 0; - if (!handler) return -EOPNOTSUPP; - - /* just in case... */ - if (*attr_name == '\0') - return -EINVAL; - name = get_name(attr_name, name_index); if (!name) return -ENOMEM; @@ -454,15 +443,18 @@ static int btrfs_xattr_##name##_get(struct inode *inode, \ const char *name, void *value, \ size_t size) \ { \ + if (*name == '\0') \ + return -EINVAL; \ return btrfs_xattr_get(inode, index, name, value, size); \ } \ static int btrfs_xattr_##name##_set(struct inode *inode, \ const char *name, const void *value,\ size_t size, int flags) \ { \ + if (*name == '\0') \ + return -EINVAL; \ return btrfs_xattr_set(inode, index, name, value, size, flags); \ } \ - BTRFS_XATTR_SETGET_FUNCS(security, BTRFS_XATTR_INDEX_SECURITY); BTRFS_XATTR_SETGET_FUNCS(system, BTRFS_XATTR_INDEX_SYSTEM); BTRFS_XATTR_SETGET_FUNCS(user, BTRFS_XATTR_INDEX_USER); From fdebe2bd70047e057827cba85ba31b2545e31900 Mon Sep 17 00:00:00 2001 From: Yan Date: Mon, 14 Jan 2008 13:26:08 -0500 Subject: [PATCH 0375/2985] Btrfs: Add readonly inode flag This patch adds readonly inode flag support. A file with this flag can't be modified, but can be deleted. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 6 +++--- fs/btrfs/inode.c | 16 ++++++++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0c0edacbc14c..245952cf7650 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -451,9 +451,9 @@ struct btrfs_root { /* * Inode flags */ -#define BTRFS_INODE_NODATASUM 0x1 -#define BTRFS_INODE_NODATACOW 0x2 - +#define BTRFS_INODE_NODATASUM (1 << 0) +#define BTRFS_INODE_NODATACOW (1 << 1) +#define BTRFS_INODE_READONLY (1 << 2) #define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \ ~BTRFS_INODE_##flag) #define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fba0478a95f4..fd0e0d80333b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2850,6 +2850,13 @@ out_fail: btrfs_throttle(root); return err; } +static int btrfs_permission(struct inode *inode, int mask, + struct nameidata *nd) +{ + if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE)) + return -EACCES; + return generic_permission(inode, mask, NULL); +} static struct inode_operations btrfs_dir_inode_operations = { .lookup = btrfs_lookup, @@ -2866,12 +2873,12 @@ static struct inode_operations btrfs_dir_inode_operations = { .getxattr = generic_getxattr, .listxattr = btrfs_listxattr, .removexattr = generic_removexattr, + .permission = btrfs_permission, }; - static struct inode_operations btrfs_dir_ro_inode_operations = { .lookup = btrfs_lookup, + .permission = btrfs_permission, }; - static struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, @@ -2916,15 +2923,16 @@ static struct inode_operations btrfs_file_inode_operations = { .getxattr = generic_getxattr, .listxattr = btrfs_listxattr, .removexattr = generic_removexattr, + .permission = btrfs_permission, }; - static struct inode_operations btrfs_special_inode_operations = { .getattr = btrfs_getattr, .setattr = btrfs_setattr, + .permission = btrfs_permission, }; - static struct inode_operations btrfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, + .permission = btrfs_permission, }; From 69a32ac5175ec9bcfb407e8619a024e5eaea87b7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 14 Jan 2008 14:33:35 -0500 Subject: [PATCH 0376/2985] Btrfs: Change magic string to reflect new format Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/xattr.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 245952cf7650..f65b258344d4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -36,7 +36,7 @@ extern struct kmem_cache *btrfs_transaction_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; -#define BTRFS_MAGIC "_B2RfS_M" +#define BTRFS_MAGIC "_B3RfS_M" #define BTRFS_MAX_LEVEL 8 #define BTRFS_ROOT_TREE_OBJECTID 1ULL diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 85ac42605f8d..4b3bd24af72b 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -454,7 +454,8 @@ static int btrfs_xattr_##name##_set(struct inode *inode, \ if (*name == '\0') \ return -EINVAL; \ return btrfs_xattr_set(inode, index, name, value, size, flags); \ -} \ +} + BTRFS_XATTR_SETGET_FUNCS(security, BTRFS_XATTR_INDEX_SECURITY); BTRFS_XATTR_SETGET_FUNCS(system, BTRFS_XATTR_INDEX_SYSTEM); BTRFS_XATTR_SETGET_FUNCS(user, BTRFS_XATTR_INDEX_USER); From b99aa6cb548ae9461c5dc0e38f88ebc618ea68d7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 14 Jan 2008 14:41:16 -0500 Subject: [PATCH 0377/2985] Btrfs: Only delete roots from sysfs when they were added to sysfs Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0338f8fd382d..87daed632f37 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -795,7 +795,8 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { radix_tree_delete(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid); - btrfs_sysfs_del_root(root); + if (root->in_sysfs) + btrfs_sysfs_del_root(root); if (root->inode) iput(root->inode); if (root->node) From 8c416c9e0d35b0462e89b76b7581127efe03b8b2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 14 Jan 2008 15:10:26 -0500 Subject: [PATCH 0378/2985] Btrfs: Delete any remaining extent_maps before freeing the inode Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fd0e0d80333b..5260b4708158 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2601,6 +2601,7 @@ void btrfs_destroy_inode(struct inode *inode) WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); + btrfs_drop_extent_cache(inode, 0, (u64)-1); kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } From 61295eb8665e723e77af91d0a1e655a4bd28344f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 14 Jan 2008 16:24:38 -0500 Subject: [PATCH 0379/2985] Btrfs: Add drop inode func to avoid data=ordered deadlock Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/inode.c | 19 +++++++++++++++++++ fs/btrfs/super.c | 1 + 3 files changed, 21 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f65b258344d4..a2c2d6d82c5e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1144,6 +1144,7 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); +void btrfs_drop_inode(struct inode *inode); void btrfs_read_locked_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, int wait); void btrfs_dirty_inode(struct inode *inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5260b4708158..e53d20331646 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -990,6 +990,25 @@ out: fail: return err; } + +void btrfs_drop_inode(struct inode *inode) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(inode)->root; + + if (!BTRFS_I(inode)->ordered_trans) { + generic_drop_inode(inode); + return; + } + /* nasty, but it prevents a deadlock with data=ordered by preventing + * a commit until after this inode is done + */ + trans = btrfs_start_transaction(root, 1); + generic_drop_inode(inode); + /* note, the inode is now untouchable */ + btrfs_end_transaction(trans, root); +} + void btrfs_delete_inode(struct inode *inode) { struct btrfs_trans_handle *trans; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9ab4086d463c..4deea393ca94 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -425,6 +425,7 @@ static struct file_system_type btrfs_fs_type = { static struct super_operations btrfs_super_ops = { .delete_inode = btrfs_delete_inode, + .drop_inode = btrfs_drop_inode, .put_super = btrfs_put_super, .read_inode = btrfs_read_locked_inode, .write_super = btrfs_write_super, From cee36a03e8f7c6e14aefd497d3acf01bcd3ef153 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 15 Jan 2008 08:40:48 -0500 Subject: [PATCH 0380/2985] Rework btrfs_drop_inode to avoid scheduling Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/inode.c | 13 +++---------- fs/btrfs/ordered-data.c | 36 ++++++++++++++++++++++++++++++++++++ fs/btrfs/ordered-data.h | 1 + fs/btrfs/transaction.c | 2 ++ 6 files changed, 44 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a2c2d6d82c5e..1e19f2d86334 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -349,6 +349,7 @@ struct btrfs_fs_info { u64 total_pinned; spinlock_t delalloc_lock; + spinlock_t new_trans_lock; u64 delalloc_bytes; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 87daed632f37..cd29922d4073 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -639,6 +639,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_LIST_HEAD(&fs_info->hashers); spin_lock_init(&fs_info->hash_lock); spin_lock_init(&fs_info->delalloc_lock); + spin_lock_init(&fs_info->new_trans_lock); memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); init_completion(&fs_info->kobj_unregister); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e53d20331646..008e3445748e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -993,20 +993,13 @@ fail: void btrfs_drop_inode(struct inode *inode) { - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(inode)->root; - - if (!BTRFS_I(inode)->ordered_trans) { + if (!BTRFS_I(inode)->ordered_trans || inode->i_nlink) { generic_drop_inode(inode); return; } - /* nasty, but it prevents a deadlock with data=ordered by preventing - * a commit until after this inode is done - */ - trans = btrfs_start_transaction(root, 1); + /* FIXME, make sure this delete actually ends up in the transaction */ + btrfs_del_ordered_inode(inode); generic_drop_inode(inode); - /* note, the inode is now untouchable */ - btrfs_end_transaction(trans, root); } void btrfs_delete_inode(struct inode *inode) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 411aba84d305..b56011baa17c 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -219,3 +219,39 @@ int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, kfree(entry); return 1; } + +static int __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, + u64 root_objectid, u64 objectid) +{ + struct tree_entry *entry; + struct rb_node *node; + struct rb_node *prev; + + write_lock(&tree->lock); + node = __tree_search(&tree->tree, root_objectid, objectid, &prev); + if (!node) { + write_unlock(&tree->lock); + return 0; + } + rb_erase(node, &tree->tree); + write_unlock(&tree->lock); + entry = rb_entry(node, struct tree_entry, rb_node); + kfree(entry); + return 1; +} + +int btrfs_del_ordered_inode(struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 root_objectid = root->root_key.objectid; + + spin_lock(&root->fs_info->new_trans_lock); + if (root->fs_info->running_transaction) { + struct btrfs_ordered_inode_tree *tree; + tree = &root->fs_info->running_transaction->ordered_inode_tree; + __btrfs_del_ordered_inode(tree, root_objectid, inode->i_ino); + } + spin_unlock(&root->fs_info->new_trans_lock); + return 0; +} + diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index aaf9eb142719..26b26212865b 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -36,4 +36,5 @@ int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, u64 *root_objectid, u64 *objectid); int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, u64 *root_objectid, u64 *objectid); +int btrfs_del_ordered_inode(struct inode *inode); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 614903f5c884..a3205808ab2b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -699,7 +699,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, BUG_ON(ret); cur_trans = root->fs_info->running_transaction; + spin_lock(&root->fs_info->new_trans_lock); root->fs_info->running_transaction = NULL; + spin_unlock(&root->fs_info->new_trans_lock); btrfs_set_super_generation(&root->fs_info->super_copy, cur_trans->transid); btrfs_set_super_root(&root->fs_info->super_copy, From 9cce6c3bfca85bf92e8c9358542a18dfa6c232be Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 15 Jan 2008 08:44:06 -0500 Subject: [PATCH 0381/2985] Btrfs: Disable delalloc accounting for now Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 008e3445748e..f83f88ca8ac7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -80,6 +80,8 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, u64 thresh; int ret = 0; + return 0; + if (for_del) thresh = total * 90; else From 2da98f003f4788b0a72c5f87bc55b061f65f30fa Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 16 Jan 2008 11:44:43 -0500 Subject: [PATCH 0382/2985] Btrfs: Run igrab on data=ordered inodes to prevent deadlocks during writeout Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/inode.c | 32 ++++++++++++++++++++++++++------ fs/btrfs/ordered-data.c | 10 ++++++++-- fs/btrfs/super.c | 2 +- fs/btrfs/transaction.c | 1 + 5 files changed, 37 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1e19f2d86334..fa65fe027e45 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1145,7 +1145,7 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); -void btrfs_drop_inode(struct inode *inode); +void btrfs_put_inode(struct inode *inode); void btrfs_read_locked_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, int wait); void btrfs_dirty_inode(struct inode *inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f83f88ca8ac7..c1ac0bcbb462 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -601,6 +601,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) { struct btrfs_root *root; struct btrfs_trans_handle *trans; + struct inode *inode = dentry->d_inode; int ret; unsigned long nr = 0; @@ -617,6 +618,18 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) ret = btrfs_unlink_trans(trans, root, dir, dentry); nr = trans->blocks_used; + if (inode->i_nlink == 0) { + int found; + /* if the inode isn't linked anywhere, + * we don't need to worry about + * data=ordered + */ + found = btrfs_del_ordered_inode(inode); + if (found == 1) { + atomic_dec(&inode->i_count); + } + } + btrfs_end_transaction(trans, root); fail: mutex_unlock(&root->fs_info->fs_mutex); @@ -993,15 +1006,22 @@ fail: return err; } -void btrfs_drop_inode(struct inode *inode) +void btrfs_put_inode(struct inode *inode) { - if (!BTRFS_I(inode)->ordered_trans || inode->i_nlink) { - generic_drop_inode(inode); + int ret; + + if (!BTRFS_I(inode)->ordered_trans) { return; } - /* FIXME, make sure this delete actually ends up in the transaction */ - btrfs_del_ordered_inode(inode); - generic_drop_inode(inode); + + if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) || + mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)) + return; + + ret = btrfs_del_ordered_inode(inode); + if (ret == 1) { + atomic_dec(&inode->i_count); + } } void btrfs_delete_inode(struct inode *inode) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index b56011baa17c..cba2b623d02e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -153,6 +153,8 @@ int btrfs_add_ordered_inode(struct inode *inode) write_unlock(&tree->lock); if (node) kfree(entry); + else + igrab(inode); return 0; } @@ -221,6 +223,7 @@ int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, } static int __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, + struct inode *inode, u64 root_objectid, u64 objectid) { struct tree_entry *entry; @@ -234,6 +237,7 @@ static int __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, return 0; } rb_erase(node, &tree->tree); + BTRFS_I(inode)->ordered_trans = 0; write_unlock(&tree->lock); entry = rb_entry(node, struct tree_entry, rb_node); kfree(entry); @@ -244,14 +248,16 @@ int btrfs_del_ordered_inode(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 root_objectid = root->root_key.objectid; + int ret = 0; spin_lock(&root->fs_info->new_trans_lock); if (root->fs_info->running_transaction) { struct btrfs_ordered_inode_tree *tree; tree = &root->fs_info->running_transaction->ordered_inode_tree; - __btrfs_del_ordered_inode(tree, root_objectid, inode->i_ino); + ret = __btrfs_del_ordered_inode(tree, inode, root_objectid, + inode->i_ino); } spin_unlock(&root->fs_info->new_trans_lock); - return 0; + return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4deea393ca94..e506de3168bc 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -425,7 +425,7 @@ static struct file_system_type btrfs_fs_type = { static struct super_operations btrfs_super_ops = { .delete_inode = btrfs_delete_inode, - .drop_inode = btrfs_drop_inode, + .put_inode = btrfs_put_inode, .put_super = btrfs_put_super, .read_inode = btrfs_read_locked_inode, .write_super = btrfs_write_super, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a3205808ab2b..08f7a188dc3e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -521,6 +521,7 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, if (inode) { if (S_ISREG(inode->i_mode)) filemap_write_and_wait(inode->i_mapping); + atomic_dec(&inode->i_count); iput(inode); } mutex_lock(&root->fs_info->fs_mutex); From 4d5e74bc0aec3f54b7e429d77b7c35de042c507d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 16 Jan 2008 16:09:22 -0500 Subject: [PATCH 0383/2985] Btrfs: Fix data=ordered vs wait_on_inode deadlock on older kernels Using ilookup5 during data=ordered writeback could deadlock on I_LOCK. This saves a pointer to the inode instead. Signed-off-by: Chris Mason --- fs/btrfs/ordered-data.c | 12 ++++++++++-- fs/btrfs/ordered-data.h | 6 ++++-- fs/btrfs/transaction.c | 30 +++++++++++++----------------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index cba2b623d02e..3ee51e10c187 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -25,6 +25,7 @@ struct tree_entry { u64 root_objectid; u64 objectid; + struct inode *inode; struct rb_node rb_node; }; @@ -144,6 +145,7 @@ int btrfs_add_ordered_inode(struct inode *inode) write_lock(&tree->lock); entry->objectid = inode->i_ino; entry->root_objectid = root_objectid; + entry->inode = inode; node = tree_insert(&tree->tree, root_objectid, inode->i_ino, &entry->rb_node); @@ -159,7 +161,8 @@ int btrfs_add_ordered_inode(struct inode *inode) } int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid) + u64 *root_objectid, u64 *objectid, + struct inode **inode) { struct tree_entry *entry; struct rb_node *node; @@ -184,13 +187,16 @@ int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, } *root_objectid = entry->root_objectid; + *inode = entry->inode; + atomic_inc(&entry->inode->i_count); *objectid = entry->objectid; write_unlock(&tree->lock); return 1; } int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid) + u64 *root_objectid, u64 *objectid, + struct inode **inode) { struct tree_entry *entry; struct rb_node *node; @@ -216,6 +222,8 @@ int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, *root_objectid = entry->root_objectid; *objectid = entry->objectid; + *inode = entry->inode; + atomic_inc(&entry->inode->i_count); rb_erase(node, &tree->tree); write_unlock(&tree->lock); kfree(entry); diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 26b26212865b..f25c6771ec64 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -33,8 +33,10 @@ btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) int btrfs_add_ordered_inode(struct inode *inode); int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid); + u64 *root_objectid, u64 *objectid, + struct inode **inode); int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid); + u64 *root_objectid, u64 *objectid, + struct inode **inode); int btrfs_del_ordered_inode(struct inode *inode); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 08f7a188dc3e..b6bbfc179c29 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -490,19 +490,17 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, while(1) { ret = btrfs_find_first_ordered_inode( &cur_trans->ordered_inode_tree, - &root_objectid, &objectid); + &root_objectid, &objectid, &inode); if (!ret) break; mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->fs_mutex); - inode = btrfs_ilookup(root->fs_info->sb, objectid, - root_objectid); - if (inode) { - if (S_ISREG(inode->i_mode)) - filemap_fdatawrite(inode->i_mapping); - iput(inode); - } + + if (S_ISREG(inode->i_mode)) + filemap_fdatawrite(inode->i_mapping); + iput(inode); + mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); } @@ -511,19 +509,17 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, objectid = 0; ret = btrfs_find_del_first_ordered_inode( &cur_trans->ordered_inode_tree, - &root_objectid, &objectid); + &root_objectid, &objectid, &inode); if (!ret) break; mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->fs_mutex); - inode = btrfs_ilookup(root->fs_info->sb, objectid, - root_objectid); - if (inode) { - if (S_ISREG(inode->i_mode)) - filemap_write_and_wait(inode->i_mapping); - atomic_dec(&inode->i_count); - iput(inode); - } + + if (S_ISREG(inode->i_mode)) + filemap_write_and_wait(inode->i_mapping); + atomic_dec(&inode->i_count); + iput(inode); + mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); } From caaca38b8fdcf63a17647fddae2195b189e19e37 Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 17 Jan 2008 11:59:48 -0500 Subject: [PATCH 0384/2985] Btrfs: Fix compile on kernel without ACLs enabled Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 5 ++++- fs/btrfs/xattr.c | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index ab9a9f8edbf8..72cc3e16741f 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -5,8 +5,11 @@ obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ - extent_map.o sysfs.o struct-funcs.o xattr.o acl.o ordered-data.o + extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o +ifeq ($(CONFIG_FS_POSIX_ACL),y) +btrfs-y += acl.o +endif #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ # inode-map.o \ diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 4b3bd24af72b..0a4950400f85 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -26,20 +26,22 @@ #include "transaction.h" #include "xattr.h" #include "disk-io.h" - static struct xattr_handler *btrfs_xattr_handler_map[] = { [BTRFS_XATTR_INDEX_USER] = &btrfs_xattr_user_handler, +#ifdef CONFIG_FS_POSIX_ACL [BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &btrfs_xattr_acl_access_handler, [BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &btrfs_xattr_acl_default_handler, +#endif [BTRFS_XATTR_INDEX_TRUSTED] = &btrfs_xattr_trusted_handler, [BTRFS_XATTR_INDEX_SECURITY] = &btrfs_xattr_security_handler, [BTRFS_XATTR_INDEX_SYSTEM] = &btrfs_xattr_system_handler, }; - struct xattr_handler *btrfs_xattr_handlers[] = { &btrfs_xattr_user_handler, +#ifdef CONFIG_FS_POSIX_ACL &btrfs_xattr_acl_access_handler, &btrfs_xattr_acl_default_handler, +#endif &btrfs_xattr_trusted_handler, &btrfs_xattr_security_handler, &btrfs_xattr_system_handler, From fb4bc1e0565a842d704846005ac0ec739e608ede Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 17 Jan 2008 11:59:51 -0500 Subject: [PATCH 0385/2985] Btrfs: Fix compile on 2.6.22 kernel This patch fixes compile error on kernel-2.6.22 Signed-off-by: Chris Mason --- fs/btrfs/acl.c | 5 +++++ fs/btrfs/inode.c | 5 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index b4f85ebe1bd6..27bb841fafeb 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -22,6 +22,11 @@ #include #include "ctree.h" #include "xattr.h" +#ifndef is_owner_or_cap +#define is_owner_or_cap(inode) \ + ((current->fsuid == (inode)->i_uid) || capable(CAP_FOWNER)) +#endif + static int btrfs_xattr_set_acl(struct inode *inode, int type, const void *value, size_t size) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c1ac0bcbb462..51fc06dc4a5d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2322,15 +2322,16 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) ret = -ENOMEM; goto fail_unlock; } - pending_snapshot->name = kstrndup(name, namelen, GFP_NOFS); + pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); if (!pending_snapshot->name) { ret = -ENOMEM; kfree(pending_snapshot); goto fail_unlock; } + memcpy(pending_snapshot->name, name, namelen); + pending_snapshot->name[namelen] = '\0'; trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - pending_snapshot->root = root; list_add(&pending_snapshot->list, &trans->transaction->pending_snapshots); From 04005cc7a1feef94237204566ccac38571af7991 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jan 2008 12:01:41 -0500 Subject: [PATCH 0386/2985] Btrfs: Use blk_congestion_wait on older kernels Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index cd29922d4073..731a534f81f5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -919,8 +919,13 @@ void btrfs_throttle(struct btrfs_root *root) struct backing_dev_info *bdi; bdi = root->fs_info->sb->s_bdev->bd_inode->i_mapping->backing_dev_info; - if (root->fs_info->throttles && bdi_write_congested(bdi)) + if (root->fs_info->throttles && bdi_write_congested(bdi)) { +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) congestion_wait(WRITE, HZ/20); +#else + blk_congestion_wait(WRITE, HZ/20); +#endif + } } void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) From e18e4809b10e6c9efb5fe10c1ddcb4ebb690d517 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Jan 2008 10:54:22 -0500 Subject: [PATCH 0387/2985] Btrfs: Add mount -o ssd, which includes optimizations for seek free storage Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/disk-io.c | 3 +++ fs/btrfs/extent-tree.c | 31 ++++++++++++++++++++++++++++++- fs/btrfs/super.c | 9 ++++++++- fs/btrfs/transaction.c | 1 + fs/btrfs/tree-defrag.c | 3 +++ 6 files changed, 47 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fa65fe027e45..7a588ba2b747 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -351,6 +351,7 @@ struct btrfs_fs_info { spinlock_t delalloc_lock; spinlock_t new_trans_lock; u64 delalloc_bytes; + u64 last_alloc; }; /* * in ram representation of the tree. extent_root is used for all allocations @@ -444,6 +445,7 @@ struct btrfs_root { #define BTRFS_MOUNT_NODATASUM (1 << 0) #define BTRFS_MOUNT_NODATACOW (1 << 1) #define BTRFS_MOUNT_NOBARRIER (1 << 2) +#define BTRFS_MOUNT_SSD (1 << 3) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 731a534f81f5..5d1f9bca2712 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -193,6 +193,7 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1); + btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", @@ -676,6 +677,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->do_barriers = 1; fs_info->closing = 0; fs_info->total_pinned = 0; + fs_info->last_alloc = 0; + #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); #else diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2c569b4d59d4..b69a46691a96 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1431,6 +1431,19 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_MIXED; } + /* for SSD, cluster allocations together as much as possible */ + if (btrfs_test_opt(root, SSD)) { + if (!data) { + if (root->fs_info->last_alloc) + hint_byte = root->fs_info->last_alloc; + else { + hint_byte = hint_byte & + ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); + empty_size += 16 * 1024 * 1024; + } + } + } + search_end = min(search_end, btrfs_super_total_bytes(&info->super_copy)); if (hint_byte) { @@ -1456,6 +1469,19 @@ check_failed: } search_start = find_search_start(root, &block_group, search_start, total_needed, data); + + if (!data && btrfs_test_opt(root, SSD) && info->last_alloc && + search_start != info->last_alloc) { + info->last_alloc = 0; + if (!empty_size) { + empty_size += 16 * 1024 * 1024; + total_needed += empty_size; + } + search_start = find_search_start(root, &block_group, + search_start, total_needed, + data); + } + search_start = stripe_align(root, search_start); cached_start = search_start; btrfs_init_path(path); @@ -1610,6 +1636,8 @@ enospc: error: btrfs_release_path(root, path); btrfs_free_path(path); + if (btrfs_test_opt(root, SSD) && !ret && !data) + info->last_alloc = ins->objectid + ins->offset; return ret; } /* @@ -1778,7 +1806,8 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, buf->start, buf->start + buf->len - 1, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; - btrfs_set_buffer_defrag(buf); + if (!btrfs_test_opt(root, SSD)) + btrfs_set_buffer_defrag(buf); trans->blocks_used++; return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index e506de3168bc..b6982f0133cf 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -64,7 +64,7 @@ static void btrfs_put_super (struct super_block * sb) enum { Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, - Opt_alloc_start, Opt_nobarrier, Opt_err, + Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_err, }; static match_table_t tokens = { @@ -74,6 +74,7 @@ static match_table_t tokens = { {Opt_nobarrier, "nobarrier"}, {Opt_max_extent, "max_extent=%s"}, {Opt_alloc_start, "alloc_start=%s"}, + {Opt_ssd, "ssd"}, {Opt_err, NULL} }; @@ -149,6 +150,12 @@ static int parse_options (char * options, btrfs_set_opt(info->mount_opt, NODATASUM); } break; + case Opt_ssd: + if (info) { + printk("btrfs: use ssd allocation scheme\n"); + btrfs_set_opt(info->mount_opt, SSD); + } + break; case Opt_nobarrier: if (info) { printk("btrfs: turning off barriers\n"); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b6bbfc179c29..163c01a24498 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -57,6 +57,7 @@ static int join_transaction(struct btrfs_root *root) BUG_ON(!cur_trans); root->fs_info->generation++; root->fs_info->running_transaction = cur_trans; + root->fs_info->last_alloc = 0; cur_trans->num_writers = 1; cur_trans->num_joined = 0; cur_trans->transid = root->fs_info->generation; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 5c58630dce03..5935cbd8f2b8 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -179,6 +179,9 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (root->ref_cows == 0 && !is_extent) goto out; + if (btrfs_test_opt(root, SSD)) + goto out; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; From b17854275cd297d40c6c5562fa44a472314c6c7f Mon Sep 17 00:00:00 2001 From: Yan Date: Tue, 22 Jan 2008 12:46:56 -0500 Subject: [PATCH 0388/2985] Fix for btrfs_find_free_objectid btrfs_find_free_objectid may return a used objectid due to arithmetic underflow. This bug may happen when parameter 'root' is tree root, so it may cause serious problems when creating snapshot or sub-volume. Signed-off-by: Chris Mason --- fs/btrfs/inode-map.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index ab74977adf5c..a0925eabdaa2 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -62,7 +62,6 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_key key; int ret; - u64 hole_size = 0; int slot = 0; u64 last_ino = 0; int start_found; @@ -109,8 +108,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, if (start_found) { if (last_ino < search_start) last_ino = search_start; - hole_size = key.objectid - last_ino; - if (hole_size > 0) { + if (key.objectid > last_ino) { *objectid = last_ino; goto found; } From f0c5da1446cc500856a5e31c9a0e2a7bdd30e663 Mon Sep 17 00:00:00 2001 From: Yan Date: Tue, 22 Jan 2008 12:46:56 -0500 Subject: [PATCH 0389/2985] Btrfs: Fix for test_range_bit test_range_bit doesn't properly handle the case: there's a hole at the end of the range and there's no other extent_state after the range. Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index d71aed341abd..485cf0719b3c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1336,6 +1336,11 @@ int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, if (start > end) break; node = rb_next(node); + if (!node) { + if (filled) + bitset = 0; + break; + } } read_unlock_irq(&tree->lock); return bitset; From ed0dab6b86cee64e9072107f65fa40629f162b38 Mon Sep 17 00:00:00 2001 From: Yan Date: Tue, 22 Jan 2008 12:46:56 -0500 Subject: [PATCH 0390/2985] Btrfs: Add basic lockfs calls Signed-off-by: Chris Mason --- fs/btrfs/super.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b6982f0133cf..8b52c69fda2e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -429,6 +429,17 @@ static struct file_system_type btrfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +static void btrfs_write_super_lockfs(struct super_block *sb) +{ + struct btrfs_root *root = btrfs_sb(sb); + btrfs_transaction_flush_work(root); +} + +static void btrfs_unlockfs(struct super_block *sb) +{ + struct btrfs_root *root = btrfs_sb(sb); + btrfs_transaction_queue_work(root, HZ * 30); +} static struct super_operations btrfs_super_ops = { .delete_inode = btrfs_delete_inode, @@ -442,8 +453,9 @@ static struct super_operations btrfs_super_ops = { .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, .statfs = btrfs_statfs, + .write_super_lockfs = btrfs_write_super_lockfs, + .unlockfs = btrfs_unlockfs, }; - static int __init init_btrfs_fs(void) { int err; From c1e32da616a17813f11b701a7a87775d35c12e3a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jan 2008 12:46:56 -0500 Subject: [PATCH 0391/2985] Btrfs: Include sched.h in the acl code for current (fixes compile on 2.6.23) Signed-off-by: Chris Mason --- fs/btrfs/acl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 27bb841fafeb..ff0fcc72848a 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "ctree.h" #include "xattr.h" #ifndef is_owner_or_cap From 5f56406aabdf5444d040c5955effc665b1d0dbaf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jan 2008 16:47:59 -0500 Subject: [PATCH 0392/2985] Btrfs: Fix hole insertion corner cases There were a few places that could cause duplicate extent insertion, this adjusts the code that creates holes to avoid it. lookup_extent_map is changed to correctly return all of the extents in a range, even when there are none matching at the start of the range. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent_map.c | 47 ++++++++++++++++++++------ fs/btrfs/file.c | 78 ++++++++++++++++++++++++++++++++++++++++++- fs/btrfs/inode.c | 15 +++++++-- 4 files changed, 127 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7a588ba2b747..b9f2975b55c9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1170,6 +1170,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, struct inode *inode); /* file.c */ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end); +int btrfs_check_file(struct btrfs_root *root, struct inode *inode); extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 485cf0719b3c..010a287fbd71 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -204,10 +204,12 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, } static struct rb_node *__tree_search(struct rb_root *root, u64 offset, - struct rb_node **prev_ret) + struct rb_node **prev_ret, + struct rb_node **next_ret) { struct rb_node * n = root->rb_node; struct rb_node *prev = NULL; + struct rb_node *orig_prev = NULL; struct tree_entry *entry; struct tree_entry *prev_entry = NULL; @@ -223,13 +225,25 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, else return n; } - if (!prev_ret) - return NULL; - while(prev && offset > prev_entry->end) { - prev = rb_next(prev); - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + + if (prev_ret) { + orig_prev = prev; + while(prev && offset > prev_entry->end) { + prev = rb_next(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *prev_ret = prev; + prev = orig_prev; + } + + if (next_ret) { + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + while(prev && offset < prev_entry->start) { + prev = rb_prev(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *next_ret = prev; } - *prev_ret = prev; return NULL; } @@ -237,7 +251,7 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) { struct rb_node *prev; struct rb_node *ret; - ret = __tree_search(root, offset, &prev); + ret = __tree_search(root, offset, &prev, NULL); if (!ret) return prev; return ret; @@ -248,7 +262,7 @@ static int tree_delete(struct rb_root *root, u64 offset) struct rb_node *node; struct tree_entry *entry; - node = __tree_search(root, offset, NULL); + node = __tree_search(root, offset, NULL, NULL); if (!node) return -ENOENT; entry = rb_entry(node, struct tree_entry, rb_node); @@ -314,9 +328,21 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, { struct extent_map *em; struct rb_node *rb_node; + struct rb_node *prev = NULL; + struct rb_node *next = NULL; read_lock_irq(&tree->lock); - rb_node = tree_search(&tree->map, start); + rb_node = __tree_search(&tree->map, start, &prev, &next); + if (!rb_node && prev) { + em = rb_entry(prev, struct extent_map, rb_node); + if (em->start <= end && em->end >= start) + goto found; + } + if (!rb_node && next) { + em = rb_entry(next, struct extent_map, rb_node); + if (em->start <= end && em->end >= start) + goto found; + } if (!rb_node) { em = NULL; goto out; @@ -330,6 +356,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, em = NULL; goto out; } +found: atomic_inc(&em->refs); out: read_unlock_irq(&tree->lock); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 897242e87fa7..1cd8c908811e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -278,7 +278,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, u64 hole_size; u64 mask = root->sectorsize - 1; last_pos_in_file = (isize + mask) & ~mask; - hole_size = (start_pos - last_pos_in_file + mask) & ~mask; + hole_size = (end_pos - last_pos_in_file + mask) & ~mask; if (last_pos_in_file < start_pos) { err = btrfs_drop_extents(trans, root, inode, @@ -293,6 +293,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inode->i_ino, last_pos_in_file, 0, 0, hole_size); + btrfs_check_file(root, inode); } if (err) goto failed; @@ -378,6 +379,80 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) return 0; } +int btrfs_check_file(struct btrfs_root *root, struct inode *inode) +{ + return 0; +#if 0 + struct btrfs_path *path; + struct btrfs_key found_key; + struct extent_buffer *leaf; + struct btrfs_file_extent_item *extent; + u64 last_offset = 0; + int nritems; + int slot; + int found_type; + int ret; + int err = 0; + u64 extent_end = 0; + + path = btrfs_alloc_path(); + ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino, + last_offset, 0); + while(1) { + nritems = btrfs_header_nritems(path->nodes[0]); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret) + goto out; + nritems = btrfs_header_nritems(path->nodes[0]); + } + slot = path->slots[0]; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (found_key.objectid != inode->i_ino) + break; + if (found_key.type != BTRFS_EXTENT_DATA_KEY) + goto out; + + if (found_key.offset != last_offset) { + WARN_ON(1); + btrfs_print_leaf(root, leaf); + printk("inode %lu found offset %Lu expected %Lu\n", + inode->i_ino, found_key.offset, last_offset); + err = 1; + goto out; + } + extent = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + found_type = btrfs_file_extent_type(leaf, extent); + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_end = found_key.offset + + btrfs_file_extent_num_bytes(leaf, extent); + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + struct btrfs_item *item; + item = btrfs_item_nr(leaf, slot); + extent_end = found_key.offset + + btrfs_file_extent_inline_len(leaf, item); + extent_end = (extent_end + root->sectorsize - 1) & + ~((u64)root->sectorsize -1 ); + } + last_offset = extent_end; + path->slots[0]++; + } + if (last_offset < inode->i_size) { + WARN_ON(1); + btrfs_print_leaf(root, leaf); + printk("inode %lu found offset %Lu size %Lu\n", inode->i_ino, + last_offset, inode->i_size); + err = 1; + + } +out: + btrfs_free_path(path); + return err; +#endif +} + /* * this is very complex, but the basic idea is to drop all extents * in the range start - end. hint_block is filled in with a block number @@ -436,6 +511,7 @@ next_slot: slot = path->slots[0]; ret = 0; btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.offset >= end || key.objectid != inode->i_ino) { goto out; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 51fc06dc4a5d..67005480e139 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -133,6 +133,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start, ins.objectid, ins.offset, ins.offset); + btrfs_check_file(root, inode); num_bytes -= cur_alloc_size; alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; @@ -965,12 +966,18 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) u64 mask = root->sectorsize - 1; u64 pos = (inode->i_size + mask) & ~mask; u64 block_end = attr->ia_size | mask; + u64 hole_start; u64 hole_size; u64 alloc_hint = 0; if (attr->ia_size <= pos) goto out; + if (pos != inode->i_size) + hole_start = pos + root->sectorsize; + else + hole_start = pos; + mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); mutex_unlock(&root->fs_info->fs_mutex); @@ -980,19 +987,21 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_truncate_page(inode->i_mapping, inode->i_size); lock_extent(em_tree, pos, block_end, GFP_NOFS); - hole_size = (attr->ia_size - pos + mask) & ~mask; + hole_size = block_end - hole_start; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); err = btrfs_drop_extents(trans, root, inode, - pos, pos + hole_size, pos, + pos, block_end, pos, &alloc_hint); if (alloc_hint != EXTENT_MAP_INLINE) { err = btrfs_insert_file_extent(trans, root, inode->i_ino, - pos, 0, 0, hole_size); + hole_start, 0, 0, + hole_size); + btrfs_check_file(root, inode); } btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); From d1310b2e0cd98eb1348553e69b73827b436dca7b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Jan 2008 16:13:08 -0500 Subject: [PATCH 0393/2985] Btrfs: Split the extent_map code into two parts There is now extent_map for mapping offsets in the file to disk and extent_io for state tracking, IO submission and extent_bufers. The new extent_map code shifts from [start,end] pairs to [start,len], and pushes the locking out into the caller. This allows a few performance optimizations and is easier to use. A number of extent_map usage bugs were fixed, mostly with failing to remove extent_map entries when changing the file. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/btrfs_inode.h | 2 + fs/btrfs/ctree.h | 15 +- fs/btrfs/disk-io.c | 123 +- fs/btrfs/extent-tree.c | 43 +- fs/btrfs/extent_io.c | 3089 +++++++++++++++++++++++++++++++++++++++ fs/btrfs/extent_io.h | 193 +++ fs/btrfs/extent_map.c | 3119 ++-------------------------------------- fs/btrfs/extent_map.h | 218 +-- fs/btrfs/file.c | 29 +- fs/btrfs/inode.c | 175 +-- fs/btrfs/super.c | 10 +- fs/btrfs/transaction.c | 8 +- fs/btrfs/transaction.h | 2 +- 14 files changed, 3614 insertions(+), 3415 deletions(-) create mode 100644 fs/btrfs/extent_io.c create mode 100644 fs/btrfs/extent_io.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 72cc3e16741f..9c61609f3714 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -5,7 +5,8 @@ obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ - extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o + extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ + extent_io.o ifeq ($(CONFIG_FS_POSIX_ACL),y) btrfs-y += acl.o diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 9fc99883c70a..9a0647dc5a0b 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -20,6 +20,7 @@ #define __BTRFS_I__ #include "extent_map.h" +#include "extent_io.h" /* in memory btrfs inode */ struct btrfs_inode { @@ -27,6 +28,7 @@ struct btrfs_inode { struct btrfs_block_group_cache *block_group; struct btrfs_key location; struct extent_map_tree extent_tree; + struct extent_io_tree io_tree; struct inode vfs_inode; u64 ordered_trans; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b9f2975b55c9..6c65473e0fe3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -27,6 +27,7 @@ #include #include #include "bit-radix.h" +#include "extent_io.h" #include "extent_map.h" struct btrfs_trans_handle; @@ -314,11 +315,11 @@ struct btrfs_fs_info { struct btrfs_root *tree_root; struct radix_tree_root fs_roots_radix; - struct extent_map_tree free_space_cache; - struct extent_map_tree block_group_cache; - struct extent_map_tree pinned_extents; - struct extent_map_tree pending_del; - struct extent_map_tree extent_ins; + struct extent_io_tree free_space_cache; + struct extent_io_tree block_group_cache; + struct extent_io_tree pinned_extents; + struct extent_io_tree pending_del; + struct extent_io_tree extent_ins; u64 generation; u64 last_trans_committed; @@ -956,7 +957,7 @@ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, u64 first_extent); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy); +int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr); @@ -1001,7 +1002,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root u64 owner_objectid, u64 owner_offset, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_map_tree *unpin); + struct extent_io_tree *unpin); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5d1f9bca2712..4c4ebea0b2a9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -43,14 +43,14 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) } #endif -static struct extent_map_ops btree_extent_map_ops; +static struct extent_io_ops btree_extent_io_ops; struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; - eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr, blocksize, GFP_NOFS); return eb; } @@ -61,13 +61,13 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; - eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr, blocksize, NULL, GFP_NOFS); return eb; } struct extent_map *btree_get_extent(struct inode *inode, struct page *page, - size_t page_offset, u64 start, u64 end, + size_t page_offset, u64 start, u64 len, int create) { struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; @@ -75,7 +75,9 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, int ret; again: - em = lookup_extent_mapping(em_tree, start, end); + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, start, len); + spin_unlock(&em_tree->lock); if (em) { goto out; } @@ -85,11 +87,14 @@ again: goto out; } em->start = 0; - em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1; + em->len = i_size_read(inode); em->block_start = 0; - em->block_end = em->end; em->bdev = inode->i_sb->s_bdev; + + spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + if (ret == -EEXIST) { free_extent_map(em); em = NULL; @@ -175,13 +180,13 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 found_start; int found_level; unsigned long len; struct extent_buffer *eb; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; if (page->private == EXTENT_PAGE_PRIVATE) goto out; @@ -230,16 +235,16 @@ static int btree_writepage_io_hook(struct page *page, u64 start, u64 end) static int btree_writepage(struct page *page, struct writeback_control *wbc) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_write_full_page(tree, page, btree_get_extent, wbc); } static int btree_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct extent_map_tree *tree; - tree = &BTRFS_I(mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(mapping->host)->io_tree; if (wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; @@ -264,18 +269,20 @@ static int btree_writepages(struct address_space *mapping, int btree_readpage(struct file *file, struct page *page) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_read_full_page(tree, page, btree_get_extent); } static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; + struct extent_map_tree *map; int ret; - tree = &BTRFS_I(page->mapping->host)->extent_tree; - ret = try_release_extent_mapping(tree, page); + tree = &BTRFS_I(page->mapping->host)->io_tree; + map = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(map, tree, page); if (ret == 1) { ClearPagePrivate(page); set_page_private(page, 0); @@ -286,8 +293,8 @@ static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) static void btree_invalidatepage(struct page *page, unsigned long offset) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); } @@ -331,7 +338,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return 0; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 0); free_extent_buffer(buf); return ret; @@ -342,40 +349,39 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; - struct extent_map_tree *extent_tree; + struct extent_io_tree *io_tree; u64 end; int ret; - extent_tree = &BTRFS_I(btree_inode)->extent_tree; + io_tree = &BTRFS_I(btree_inode)->io_tree; buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 0, 1); + read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1); if (buf->flags & EXTENT_CSUM) return buf; end = buf->start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) { + if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; return buf; } - lock_extent(extent_tree, buf->start, end, GFP_NOFS); + lock_extent(io_tree, buf->start, end, GFP_NOFS); - if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) { + if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; goto out_unlock; } ret = csum_tree_block(root, buf, 1); - set_extent_bits(extent_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); + set_extent_bits(io_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; out_unlock: - unlock_extent(extent_tree, buf->start, end, GFP_NOFS); + unlock_extent(io_tree, buf->start, end, GFP_NOFS); return buf; } @@ -385,7 +391,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *btree_inode = root->fs_info->btree_inode; if (btrfs_header_generation(buf) == root->fs_info->running_transaction->transid) - clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); return 0; } @@ -394,7 +400,7 @@ int wait_on_tree_block_writeback(struct btrfs_root *root, struct extent_buffer *buf) { struct inode *btree_inode = root->fs_info->btree_inode; - wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree, + wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->io_tree, buf); return 0; } @@ -659,20 +665,23 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; - extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, fs_info->btree_inode->i_mapping, GFP_NOFS); - BTRFS_I(fs_info->btree_inode)->extent_tree.ops = &btree_extent_map_ops; + extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + GFP_NOFS); - extent_map_tree_init(&fs_info->free_space_cache, + BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; + + extent_io_tree_init(&fs_info->free_space_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->block_group_cache, + extent_io_tree_init(&fs_info->block_group_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->pinned_extents, + extent_io_tree_init(&fs_info->pinned_extents, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->pending_del, + extent_io_tree_init(&fs_info->pending_del, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->extent_ins, + extent_io_tree_init(&fs_info->extent_ins, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; @@ -787,7 +796,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root if (!btrfs_test_opt(root, NOBARRIER)) blkdev_issue_flush(sb->s_bdev, NULL); - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super); ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, super->start, super->len); if (!btrfs_test_opt(root, NOBARRIER)) @@ -864,12 +873,12 @@ int close_ctree(struct btrfs_root *root) filemap_write_and_wait(fs_info->btree_inode->i_mapping); - extent_map_tree_empty_lru(&fs_info->free_space_cache); - extent_map_tree_empty_lru(&fs_info->block_group_cache); - extent_map_tree_empty_lru(&fs_info->pinned_extents); - extent_map_tree_empty_lru(&fs_info->pending_del); - extent_map_tree_empty_lru(&fs_info->extent_ins); - extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree); + extent_io_tree_empty_lru(&fs_info->free_space_cache); + extent_io_tree_empty_lru(&fs_info->block_group_cache); + extent_io_tree_empty_lru(&fs_info->pinned_extents); + extent_io_tree_empty_lru(&fs_info->pending_del); + extent_io_tree_empty_lru(&fs_info->extent_ins); + extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); @@ -892,13 +901,13 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { struct inode *btree_inode = buf->first_page->mapping->host; - return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); + return extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { struct inode *btree_inode = buf->first_page->mapping->host; - return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, + return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); } @@ -914,7 +923,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) transid, root->fs_info->generation); WARN_ON(1); } - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); } void btrfs_throttle(struct btrfs_root *root) @@ -941,7 +950,7 @@ void btrfs_set_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); } @@ -949,7 +958,7 @@ void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, GFP_NOFS); } @@ -958,7 +967,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + return test_range_bit(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); } @@ -966,7 +975,7 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + return test_range_bit(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, 0); } @@ -975,7 +984,7 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, GFP_NOFS); } @@ -984,7 +993,7 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); } @@ -993,10 +1002,10 @@ int btrfs_read_buffer(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + return read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1); } -static struct extent_map_ops btree_extent_map_ops = { +static struct extent_io_ops btree_extent_io_ops = { .writepage_io_hook = btree_writepage_io_hook, }; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b69a46691a96..1cf125ab7822 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -63,7 +63,7 @@ static int cache_block_group(struct btrfs_root *root, int ret; struct btrfs_key key; struct extent_buffer *leaf; - struct extent_map_tree *free_space_cache; + struct extent_io_tree *free_space_cache; int slot; u64 last = 0; u64 hole_size; @@ -158,7 +158,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr) { - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *block_group = NULL; u64 ptr; u64 start; @@ -281,7 +281,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int data, int owner) { struct btrfs_block_group_cache *cache; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; u64 used; @@ -951,7 +951,7 @@ fail: int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *cache; int ret; int err = 0; @@ -1107,12 +1107,12 @@ static int update_pinned_extents(struct btrfs_root *root, return 0; } -int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy) +int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) { u64 last = 0; u64 start; u64 end; - struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents; + struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; int ret; while(1) { @@ -1128,12 +1128,12 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy) int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_map_tree *unpin) + struct extent_io_tree *unpin) { u64 start; u64 end; int ret; - struct extent_map_tree *free_space_cache; + struct extent_io_tree *free_space_cache; free_space_cache = &root->fs_info->free_space_cache; while(1) { @@ -1329,8 +1329,8 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct int err = 0; u64 start; u64 end; - struct extent_map_tree *pending_del; - struct extent_map_tree *pinned_extents; + struct extent_io_tree *pending_del; + struct extent_io_tree *pinned_extents; pending_del = &extent_root->fs_info->pending_del; pinned_extents = &extent_root->fs_info->pinned_extents; @@ -1802,7 +1802,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); - set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree, + set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; @@ -2166,7 +2166,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, unsigned long i; struct page *page; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct file_ra_state *ra; ra = kzalloc(sizeof(*ra), GFP_NOFS); @@ -2195,15 +2195,14 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(em_tree, page_start, page_end, GFP_NOFS); + lock_extent(io_tree, page_start, page_end, GFP_NOFS); delalloc_start = page_start; - existing_delalloc = - count_range_bits(&BTRFS_I(inode)->extent_tree, - &delalloc_start, page_end, - PAGE_CACHE_SIZE, EXTENT_DELALLOC); + existing_delalloc = count_range_bits(io_tree, + &delalloc_start, page_end, + PAGE_CACHE_SIZE, EXTENT_DELALLOC); - set_extent_delalloc(em_tree, page_start, + set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); @@ -2211,7 +2210,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, existing_delalloc; spin_unlock(&root->fs_info->delalloc_lock); - unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); page_cache_release(page); @@ -2379,7 +2378,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) u64 cur_byte; u64 total_found; struct btrfs_fs_info *info = root->fs_info; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; @@ -2561,7 +2560,7 @@ int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *cache; struct btrfs_block_group_item *item; struct btrfs_fs_info *info = root->fs_info; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_key key; struct extent_buffer *leaf; int ret; @@ -2645,7 +2644,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) int bit; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c new file mode 100644 index 000000000000..15cc158a0498 --- /dev/null +++ b/fs/btrfs/extent_io.c @@ -0,0 +1,3089 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "extent_io.h" +#include "extent_map.h" + +/* temporary define until extent_map moves out of btrfs */ +struct kmem_cache *btrfs_cache_create(const char *name, size_t size, + unsigned long extra_flags, + void (*ctor)(void *, struct kmem_cache *, + unsigned long)); + +static struct kmem_cache *extent_state_cache; +static struct kmem_cache *extent_buffer_cache; + +static LIST_HEAD(buffers); +static LIST_HEAD(states); + +static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; +#define BUFFER_LRU_MAX 64 + +struct tree_entry { + u64 start; + u64 end; + int in_tree; + struct rb_node rb_node; +}; + +struct extent_page_data { + struct bio *bio; + struct extent_io_tree *tree; + get_extent_t *get_extent; +}; + +int __init extent_io_init(void) +{ + extent_state_cache = btrfs_cache_create("extent_state", + sizeof(struct extent_state), 0, + NULL); + if (!extent_state_cache) + return -ENOMEM; + + extent_buffer_cache = btrfs_cache_create("extent_buffers", + sizeof(struct extent_buffer), 0, + NULL); + if (!extent_buffer_cache) + goto free_state_cache; + return 0; + +free_state_cache: + kmem_cache_destroy(extent_state_cache); + return -ENOMEM; +} + +void extent_io_exit(void) +{ + struct extent_state *state; + + while (!list_empty(&states)) { + state = list_entry(states.next, struct extent_state, list); + printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); + list_del(&state->list); + kmem_cache_free(extent_state_cache, state); + + } + + if (extent_state_cache) + kmem_cache_destroy(extent_state_cache); + if (extent_buffer_cache) + kmem_cache_destroy(extent_buffer_cache); +} + +void extent_io_tree_init(struct extent_io_tree *tree, + struct address_space *mapping, gfp_t mask) +{ + tree->state.rb_node = NULL; + tree->ops = NULL; + tree->dirty_bytes = 0; + rwlock_init(&tree->lock); + spin_lock_init(&tree->lru_lock); + tree->mapping = mapping; + INIT_LIST_HEAD(&tree->buffer_lru); + tree->lru_size = 0; +} +EXPORT_SYMBOL(extent_io_tree_init); + +void extent_io_tree_empty_lru(struct extent_io_tree *tree) +{ + struct extent_buffer *eb; + while(!list_empty(&tree->buffer_lru)) { + eb = list_entry(tree->buffer_lru.next, struct extent_buffer, + lru); + list_del_init(&eb->lru); + free_extent_buffer(eb); + } +} +EXPORT_SYMBOL(extent_io_tree_empty_lru); + +struct extent_state *alloc_extent_state(gfp_t mask) +{ + struct extent_state *state; + unsigned long flags; + + state = kmem_cache_alloc(extent_state_cache, mask); + if (!state || IS_ERR(state)) + return state; + state->state = 0; + state->in_tree = 0; + state->private = 0; + + spin_lock_irqsave(&state_lock, flags); + list_add(&state->list, &states); + spin_unlock_irqrestore(&state_lock, flags); + + atomic_set(&state->refs, 1); + init_waitqueue_head(&state->wq); + return state; +} +EXPORT_SYMBOL(alloc_extent_state); + +void free_extent_state(struct extent_state *state) +{ + unsigned long flags; + if (!state) + return; + if (atomic_dec_and_test(&state->refs)) { + WARN_ON(state->in_tree); + spin_lock_irqsave(&state_lock, flags); + list_del(&state->list); + spin_unlock_irqrestore(&state_lock, flags); + kmem_cache_free(extent_state_cache, state); + } +} +EXPORT_SYMBOL(free_extent_state); + +static struct rb_node *tree_insert(struct rb_root *root, u64 offset, + struct rb_node *node) +{ + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct tree_entry *entry; + + while(*p) { + parent = *p; + entry = rb_entry(parent, struct tree_entry, rb_node); + + if (offset < entry->start) + p = &(*p)->rb_left; + else if (offset > entry->end) + p = &(*p)->rb_right; + else + return parent; + } + + entry = rb_entry(node, struct tree_entry, rb_node); + entry->in_tree = 1; + rb_link_node(node, parent, p); + rb_insert_color(node, root); + return NULL; +} + +static struct rb_node *__tree_search(struct rb_root *root, u64 offset, + struct rb_node **prev_ret, + struct rb_node **next_ret) +{ + struct rb_node * n = root->rb_node; + struct rb_node *prev = NULL; + struct rb_node *orig_prev = NULL; + struct tree_entry *entry; + struct tree_entry *prev_entry = NULL; + + while(n) { + entry = rb_entry(n, struct tree_entry, rb_node); + prev = n; + prev_entry = entry; + + if (offset < entry->start) + n = n->rb_left; + else if (offset > entry->end) + n = n->rb_right; + else + return n; + } + + if (prev_ret) { + orig_prev = prev; + while(prev && offset > prev_entry->end) { + prev = rb_next(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *prev_ret = prev; + prev = orig_prev; + } + + if (next_ret) { + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + while(prev && offset < prev_entry->start) { + prev = rb_prev(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *next_ret = prev; + } + return NULL; +} + +static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) +{ + struct rb_node *prev; + struct rb_node *ret; + ret = __tree_search(root, offset, &prev, NULL); + if (!ret) + return prev; + return ret; +} + +/* + * utility function to look for merge candidates inside a given range. + * Any extents with matching state are merged together into a single + * extent in the tree. Extents with EXTENT_IO in their state field + * are not merged because the end_io handlers need to be able to do + * operations on them without sleeping (or doing allocations/splits). + * + * This should be called with the tree lock held. + */ +static int merge_state(struct extent_io_tree *tree, + struct extent_state *state) +{ + struct extent_state *other; + struct rb_node *other_node; + + if (state->state & EXTENT_IOBITS) + return 0; + + other_node = rb_prev(&state->rb_node); + if (other_node) { + other = rb_entry(other_node, struct extent_state, rb_node); + if (other->end == state->start - 1 && + other->state == state->state) { + state->start = other->start; + other->in_tree = 0; + rb_erase(&other->rb_node, &tree->state); + free_extent_state(other); + } + } + other_node = rb_next(&state->rb_node); + if (other_node) { + other = rb_entry(other_node, struct extent_state, rb_node); + if (other->start == state->end + 1 && + other->state == state->state) { + other->start = state->start; + state->in_tree = 0; + rb_erase(&state->rb_node, &tree->state); + free_extent_state(state); + } + } + return 0; +} + +/* + * insert an extent_state struct into the tree. 'bits' are set on the + * struct before it is inserted. + * + * This may return -EEXIST if the extent is already there, in which case the + * state struct is freed. + * + * The tree lock is not taken internally. This is a utility function and + * probably isn't what you want to call (see set/clear_extent_bit). + */ +static int insert_state(struct extent_io_tree *tree, + struct extent_state *state, u64 start, u64 end, + int bits) +{ + struct rb_node *node; + + if (end < start) { + printk("end < start %Lu %Lu\n", end, start); + WARN_ON(1); + } + if (bits & EXTENT_DIRTY) + tree->dirty_bytes += end - start + 1; + state->state |= bits; + state->start = start; + state->end = end; + node = tree_insert(&tree->state, end, &state->rb_node); + if (node) { + struct extent_state *found; + found = rb_entry(node, struct extent_state, rb_node); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); + free_extent_state(state); + return -EEXIST; + } + merge_state(tree, state); + return 0; +} + +/* + * split a given extent state struct in two, inserting the preallocated + * struct 'prealloc' as the newly created second half. 'split' indicates an + * offset inside 'orig' where it should be split. + * + * Before calling, + * the tree has 'orig' at [orig->start, orig->end]. After calling, there + * are two extent state structs in the tree: + * prealloc: [orig->start, split - 1] + * orig: [ split, orig->end ] + * + * The tree locks are not taken by this function. They need to be held + * by the caller. + */ +static int split_state(struct extent_io_tree *tree, struct extent_state *orig, + struct extent_state *prealloc, u64 split) +{ + struct rb_node *node; + prealloc->start = orig->start; + prealloc->end = split - 1; + prealloc->state = orig->state; + orig->start = split; + + node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); + if (node) { + struct extent_state *found; + found = rb_entry(node, struct extent_state, rb_node); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); + free_extent_state(prealloc); + return -EEXIST; + } + return 0; +} + +/* + * utility function to clear some bits in an extent state struct. + * it will optionally wake up any one waiting on this state (wake == 1), or + * forcibly remove the state from the tree (delete == 1). + * + * If no bits are set on the state struct after clearing things, the + * struct is freed and removed from the tree + */ +static int clear_state_bit(struct extent_io_tree *tree, + struct extent_state *state, int bits, int wake, + int delete) +{ + int ret = state->state & bits; + + if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { + u64 range = state->end - state->start + 1; + WARN_ON(range > tree->dirty_bytes); + tree->dirty_bytes -= range; + } + state->state &= ~bits; + if (wake) + wake_up(&state->wq); + if (delete || state->state == 0) { + if (state->in_tree) { + rb_erase(&state->rb_node, &tree->state); + state->in_tree = 0; + free_extent_state(state); + } else { + WARN_ON(1); + } + } else { + merge_state(tree, state); + } + return ret; +} + +/* + * clear some bits on a range in the tree. This may require splitting + * or inserting elements in the tree, so the gfp mask is used to + * indicate which allocations or sleeping are allowed. + * + * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove + * the given range from the tree regardless of state (ie for truncate). + * + * the range [start, end] is inclusive. + * + * This takes the tree lock, and returns < 0 on error, > 0 if any of the + * bits were already set, or zero if none of the bits were already set. + */ +int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int wake, int delete, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + unsigned long flags; + int err; + int set = 0; + +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + write_lock_irqsave(&tree->lock, flags); + /* + * this search will find the extents that end after + * our range starts + */ + node = tree_search(&tree->state, start); + if (!node) + goto out; + state = rb_entry(node, struct extent_state, rb_node); + if (state->start > end) + goto out; + WARN_ON(state->end < start); + + /* + * | ---- desired range ---- | + * | state | or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip + * bits on second half. + * + * If the extent we found extends past our range, we + * just split and search again. It'll get split again + * the next time though. + * + * If the extent we found is inside our range, we clear + * the desired bit on it. + */ + + if (state->start < start) { + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + start = state->end + 1; + set |= clear_state_bit(tree, state, bits, + wake, delete); + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and clear the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + if (wake) + wake_up(&state->wq); + set |= clear_state_bit(tree, prealloc, bits, + wake, delete); + prealloc = NULL; + goto out; + } + + start = state->end + 1; + set |= clear_state_bit(tree, state, bits, wake, delete); + goto search_again; + +out: + write_unlock_irqrestore(&tree->lock, flags); + if (prealloc) + free_extent_state(prealloc); + + return set; + +search_again: + if (start > end) + goto out; + write_unlock_irqrestore(&tree->lock, flags); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} +EXPORT_SYMBOL(clear_extent_bit); + +static int wait_on_state(struct extent_io_tree *tree, + struct extent_state *state) +{ + DEFINE_WAIT(wait); + prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); + read_unlock_irq(&tree->lock); + schedule(); + read_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + return 0; +} + +/* + * waits for one or more bits to clear on a range in the state tree. + * The range [start, end] is inclusive. + * The tree lock is taken by this function + */ +int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) +{ + struct extent_state *state; + struct rb_node *node; + + read_lock_irq(&tree->lock); +again: + while (1) { + /* + * this search will find all the extents that end after + * our range starts + */ + node = tree_search(&tree->state, start); + if (!node) + break; + + state = rb_entry(node, struct extent_state, rb_node); + + if (state->start > end) + goto out; + + if (state->state & bits) { + start = state->start; + atomic_inc(&state->refs); + wait_on_state(tree, state); + free_extent_state(state); + goto again; + } + start = state->end + 1; + + if (start > end) + break; + + if (need_resched()) { + read_unlock_irq(&tree->lock); + cond_resched(); + read_lock_irq(&tree->lock); + } + } +out: + read_unlock_irq(&tree->lock); + return 0; +} +EXPORT_SYMBOL(wait_extent_bit); + +static void set_state_bits(struct extent_io_tree *tree, + struct extent_state *state, + int bits) +{ + if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { + u64 range = state->end - state->start + 1; + tree->dirty_bytes += range; + } + state->state |= bits; +} + +/* + * set some bits on a range in the tree. This may require allocations + * or sleeping, so the gfp mask is used to indicate what is allowed. + * + * If 'exclusive' == 1, this will fail with -EEXIST if some part of the + * range already has the desired bits set. The start of the existing + * range is returned in failed_start in this case. + * + * [start, end] is inclusive + * This takes the tree lock. + */ +int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, + int exclusive, u64 *failed_start, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + unsigned long flags; + int err = 0; + int set; + u64 last_start; + u64 last_end; +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + write_lock_irqsave(&tree->lock, flags); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node) { + err = insert_state(tree, prealloc, start, end, bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + goto out; + } + + state = rb_entry(node, struct extent_state, rb_node); + last_start = state->start; + last_end = state->end; + + /* + * | ---- desired range ---- | + * | state | + * + * Just lock what we found and keep going + */ + if (state->start == start && state->end <= end) { + set = state->state & bits; + if (set && exclusive) { + *failed_start = state->start; + err = -EEXIST; + goto out; + } + set_state_bits(tree, state, bits); + start = state->end + 1; + merge_state(tree, state); + goto search_again; + } + + /* + * | ---- desired range ---- | + * | state | + * or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip bits on + * second half. + * + * If the extent we found extends past our + * range, we just split and search again. It'll get split + * again the next time though. + * + * If the extent we found is inside our range, we set the + * desired bit on it. + */ + if (state->start < start) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + set_state_bits(tree, state, bits); + start = state->end + 1; + merge_state(tree, state); + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | or | state | + * + * There's a hole, we need to insert something in it and + * ignore the extent we found. + */ + if (state->start > start) { + u64 this_end; + if (end < last_start) + this_end = end; + else + this_end = last_start -1; + err = insert_state(tree, prealloc, start, this_end, + bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + if (err) + goto out; + start = this_end + 1; + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and set the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + set_state_bits(tree, prealloc, bits); + merge_state(tree, prealloc); + prealloc = NULL; + goto out; + } + + goto search_again; + +out: + write_unlock_irqrestore(&tree->lock, flags); + if (prealloc) + free_extent_state(prealloc); + + return err; + +search_again: + if (start > end) + goto out; + write_unlock_irqrestore(&tree->lock, flags); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} +EXPORT_SYMBOL(set_extent_bit); + +/* wrappers around set/clear extent bit */ +int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_dirty); + +int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask) +{ + return set_extent_bit(tree, start, end, bits, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_bits); + +int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask) +{ + return clear_extent_bit(tree, start, end, bits, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_bits); + +int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, + EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_delalloc); + +int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_dirty); + +int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_new); + +int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_new); + +int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_uptodate); + +int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_uptodate); + +int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, + 0, NULL, mask); +} +EXPORT_SYMBOL(set_extent_writeback); + +int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); +} +EXPORT_SYMBOL(clear_extent_writeback); + +int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) +{ + return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); +} +EXPORT_SYMBOL(wait_on_extent_writeback); + +/* + * locks a range in ascending order, waiting for any locked regions + * it hits on the way. [start,end] are inclusive, and this will sleep. + */ +int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) +{ + int err; + u64 failed_start; + while (1) { + err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, + &failed_start, mask); + if (err == -EEXIST && (mask & __GFP_WAIT)) { + wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); + start = failed_start; + } else { + break; + } + WARN_ON(start > end); + } + return err; +} +EXPORT_SYMBOL(lock_extent); + +int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); +} +EXPORT_SYMBOL(unlock_extent); + +/* + * helper function to set pages and extents in the tree dirty + */ +int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + BUG_ON(!page); + __set_page_dirty_nobuffers(page); + page_cache_release(page); + index++; + } + set_extent_dirty(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(set_range_dirty); + +/* + * helper function to set both pages and extents in the tree writeback + */ +int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + BUG_ON(!page); + set_page_writeback(page); + page_cache_release(page); + index++; + } + set_extent_writeback(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(set_range_writeback); + +int find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 1; + + read_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->end >= start && (state->state & bits)) { + *start_ret = state->start; + *end_ret = state->end; + ret = 0; + break; + } + node = rb_next(node); + if (!node) + break; + } +out: + read_unlock_irq(&tree->lock); + return ret; +} +EXPORT_SYMBOL(find_first_extent_bit); + +u64 find_lock_delalloc_range(struct extent_io_tree *tree, + u64 *start, u64 *end, u64 max_bytes) +{ + struct rb_node *node; + struct extent_state *state; + u64 cur_start = *start; + u64 found = 0; + u64 total_bytes = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ +search_again: + node = tree_search(&tree->state, cur_start); + if (!node || IS_ERR(node)) { + *end = (u64)-1; + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (found && state->start != cur_start) { + goto out; + } + if (!(state->state & EXTENT_DELALLOC)) { + if (!found) + *end = state->end; + goto out; + } + if (!found) { + struct extent_state *prev_state; + struct rb_node *prev_node = node; + while(1) { + prev_node = rb_prev(prev_node); + if (!prev_node) + break; + prev_state = rb_entry(prev_node, + struct extent_state, + rb_node); + if (!(prev_state->state & EXTENT_DELALLOC)) + break; + state = prev_state; + node = prev_node; + } + } + if (state->state & EXTENT_LOCKED) { + DEFINE_WAIT(wait); + atomic_inc(&state->refs); + prepare_to_wait(&state->wq, &wait, + TASK_UNINTERRUPTIBLE); + write_unlock_irq(&tree->lock); + schedule(); + write_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + free_extent_state(state); + goto search_again; + } + state->state |= EXTENT_LOCKED; + if (!found) + *start = state->start; + found++; + *end = state->end; + cur_start = state->end + 1; + node = rb_next(node); + if (!node) + break; + total_bytes += state->end - state->start + 1; + if (total_bytes >= max_bytes) + break; + } +out: + write_unlock_irq(&tree->lock); + return found; +} + +u64 count_range_bits(struct extent_io_tree *tree, + u64 *start, u64 search_end, u64 max_bytes, + unsigned long bits) +{ + struct rb_node *node; + struct extent_state *state; + u64 cur_start = *start; + u64 total_bytes = 0; + int found = 0; + + if (search_end <= cur_start) { + printk("search_end %Lu start %Lu\n", search_end, cur_start); + WARN_ON(1); + return 0; + } + + write_lock_irq(&tree->lock); + if (cur_start == 0 && bits == EXTENT_DIRTY) { + total_bytes = tree->dirty_bytes; + goto out; + } + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, cur_start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->start > search_end) + break; + if (state->end >= cur_start && (state->state & bits)) { + total_bytes += min(search_end, state->end) + 1 - + max(cur_start, state->start); + if (total_bytes >= max_bytes) + break; + if (!found) { + *start = state->start; + found = 1; + } + } + node = rb_next(node); + if (!node) + break; + } +out: + write_unlock_irq(&tree->lock); + return total_bytes; +} +/* + * helper function to lock both pages and extents in the tree. + * pages must be locked first. + */ +int lock_range(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + int err; + + while (index <= end_index) { + page = grab_cache_page(tree->mapping, index); + if (!page) { + err = -ENOMEM; + goto failed; + } + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto failed; + } + index++; + } + lock_extent(tree, start, end, GFP_NOFS); + return 0; + +failed: + /* + * we failed above in getting the page at 'index', so we undo here + * up to but not including the page at 'index' + */ + end_index = index; + index = start >> PAGE_CACHE_SHIFT; + while (index < end_index) { + page = find_get_page(tree->mapping, index); + unlock_page(page); + page_cache_release(page); + index++; + } + return err; +} +EXPORT_SYMBOL(lock_range); + +/* + * helper function to unlock both pages and extents in the tree. + */ +int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + unlock_page(page); + page_cache_release(page); + index++; + } + unlock_extent(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(unlock_range); + +int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + ret = -ENOENT; + goto out; + } + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + state->private = private; +out: + write_unlock_irq(&tree->lock); + return ret; +} + +int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 0; + + read_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + ret = -ENOENT; + goto out; + } + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + *private = state->private; +out: + read_unlock_irq(&tree->lock); + return ret; +} + +/* + * searches a range in the state tree for a given mask. + * If 'filled' == 1, this returns 1 only if ever extent in the tree + * has the bits set. Otherwise, 1 is returned if any bit in the + * range is found set. + */ +int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int filled) +{ + struct extent_state *state = NULL; + struct rb_node *node; + int bitset = 0; + unsigned long flags; + + read_lock_irqsave(&tree->lock, flags); + node = tree_search(&tree->state, start); + while (node && start <= end) { + state = rb_entry(node, struct extent_state, rb_node); + + if (filled && state->start > start) { + bitset = 0; + break; + } + + if (state->start > end) + break; + + if (state->state & bits) { + bitset = 1; + if (!filled) + break; + } else if (filled) { + bitset = 0; + break; + } + start = state->end + 1; + if (start > end) + break; + node = rb_next(node); + if (!node) { + if (filled) + bitset = 0; + break; + } + } + read_unlock_irqrestore(&tree->lock, flags); + return bitset; +} +EXPORT_SYMBOL(test_range_bit); + +/* + * helper function to set a given page up to date if all the + * extents in the tree for that page are up to date + */ +static int check_page_uptodate(struct extent_io_tree *tree, + struct page *page) +{ + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) + SetPageUptodate(page); + return 0; +} + +/* + * helper function to unlock a page if all the extents in the tree + * for that page are unlocked + */ +static int check_page_locked(struct extent_io_tree *tree, + struct page *page) +{ + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) + unlock_page(page); + return 0; +} + +/* + * helper function to end page writeback if all the extents + * in the tree for that page are done with writeback + */ +static int check_page_writeback(struct extent_io_tree *tree, + struct page *page) +{ + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) + end_page_writeback(page); + return 0; +} + +/* lots and lots of room for performance fixes in the end_bio funcs */ + +/* + * after a writepage IO is done, we need to: + * clear the uptodate bits on error + * clear the writeback bits in the extent tree for this IO + * end_page_writeback if the page has no more pending IO + * + * Scheduling is not allowed, so the extent state tree is expected + * to have one and only one object corresponding to this IO. + */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_writepage(struct bio *bio, int err) +#else +static int end_bio_extent_writepage(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_io_tree *tree = bio->bi_private; + u64 start; + u64 end; + int whole_page; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + do { + struct page *page = bvec->bv_page; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) + whole_page = 1; + else + whole_page = 0; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (!uptodate) { + clear_extent_uptodate(tree, start, end, GFP_ATOMIC); + ClearPageUptodate(page); + SetPageError(page); + } + clear_extent_writeback(tree, start, end, GFP_ATOMIC); + + if (whole_page) + end_page_writeback(page); + else + check_page_writeback(tree, page); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, start, end); + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + +/* + * after a readpage IO is done, we need to: + * clear the uptodate bits on error + * set the uptodate bits if things worked + * set the page up to date if all extents in the tree are uptodate + * clear the lock bit in the extent tree + * unlock the page if there are no other extents locked for it + * + * Scheduling is not allowed, so the extent state tree is expected + * to have one and only one object corresponding to this IO. + */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_readpage(struct bio *bio, int err) +#else +static int end_bio_extent_readpage(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_io_tree *tree = bio->bi_private; + u64 start; + u64 end; + int whole_page; + int ret; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + do { + struct page *page = bvec->bv_page; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) + whole_page = 1; + else + whole_page = 0; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { + ret = tree->ops->readpage_end_io_hook(page, start, end); + if (ret) + uptodate = 0; + } + if (uptodate) { + set_extent_uptodate(tree, start, end, GFP_ATOMIC); + if (whole_page) + SetPageUptodate(page); + else + check_page_uptodate(tree, page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + + unlock_extent(tree, start, end, GFP_ATOMIC); + + if (whole_page) + unlock_page(page); + else + check_page_locked(tree, page); + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + +/* + * IO done from prepare_write is pretty simple, we just unlock + * the structs in the extent tree when done, and set the uptodate bits + * as appropriate. + */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_preparewrite(struct bio *bio, int err) +#else +static int end_bio_extent_preparewrite(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_io_tree *tree = bio->bi_private; + u64 start; + u64 end; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + do { + struct page *page = bvec->bv_page; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate) { + set_extent_uptodate(tree, start, end, GFP_ATOMIC); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + + unlock_extent(tree, start, end, GFP_ATOMIC); + + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + +static struct bio * +extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, + gfp_t gfp_flags) +{ + struct bio *bio; + + bio = bio_alloc(gfp_flags, nr_vecs); + + if (bio == NULL && (current->flags & PF_MEMALLOC)) { + while (!bio && (nr_vecs /= 2)) + bio = bio_alloc(gfp_flags, nr_vecs); + } + + if (bio) { + bio->bi_bdev = bdev; + bio->bi_sector = first_sector; + } + return bio; +} + +static int submit_one_bio(int rw, struct bio *bio) +{ + u64 maxsector; + int ret = 0; + + bio_get(bio); + + maxsector = bio->bi_bdev->bd_inode->i_size >> 9; + if (maxsector < bio->bi_sector) { + printk("sector too large max %Lu got %llu\n", maxsector, + (unsigned long long)bio->bi_sector); + WARN_ON(1); + } + + submit_bio(rw, bio); + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + bio_put(bio); + return ret; +} + +static int submit_extent_page(int rw, struct extent_io_tree *tree, + struct page *page, sector_t sector, + size_t size, unsigned long offset, + struct block_device *bdev, + struct bio **bio_ret, + unsigned long max_pages, + bio_end_io_t end_io_func) +{ + int ret = 0; + struct bio *bio; + int nr; + + if (bio_ret && *bio_ret) { + bio = *bio_ret; + if (bio->bi_sector + (bio->bi_size >> 9) != sector || + bio_add_page(bio, page, size, offset) < size) { + ret = submit_one_bio(rw, bio); + bio = NULL; + } else { + return 0; + } + } + nr = min_t(int, max_pages, bio_get_nr_vecs(bdev)); + bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); + if (!bio) { + printk("failed to allocate bio nr %d\n", nr); + } + bio_add_page(bio, page, size, offset); + bio->bi_end_io = end_io_func; + bio->bi_private = tree; + if (bio_ret) { + *bio_ret = bio; + } else { + ret = submit_one_bio(rw, bio); + } + + return ret; +} + +void set_page_extent_mapped(struct page *page) +{ + if (!PagePrivate(page)) { + SetPagePrivate(page); + WARN_ON(!page->mapping->a_ops->invalidatepage); + set_page_private(page, EXTENT_PAGE_PRIVATE); + page_cache_get(page); + } +} + +void set_page_extent_head(struct page *page, unsigned long len) +{ + set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); +} + +/* + * basic readpage implementation. Locked extent state structs are inserted + * into the tree that are removed when the IO is done (by the end_io + * handlers) + */ +static int __extent_read_full_page(struct extent_io_tree *tree, + struct page *page, + get_extent_t *get_extent, + struct bio **bio) +{ + struct inode *inode = page->mapping->host; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 end; + u64 cur = start; + u64 extent_offset; + u64 last_byte = i_size_read(inode); + u64 block_start; + u64 cur_end; + sector_t sector; + struct extent_map *em; + struct block_device *bdev; + int ret; + int nr = 0; + size_t page_offset = 0; + size_t iosize; + size_t blocksize = inode->i_sb->s_blocksize; + + set_page_extent_mapped(page); + + end = page_end; + lock_extent(tree, start, end, GFP_NOFS); + + while (cur <= end) { + if (cur >= last_byte) { + char *userpage; + iosize = PAGE_CACHE_SIZE - page_offset; + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + set_extent_uptodate(tree, cur, cur + iosize - 1, + GFP_NOFS); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + break; + } + em = get_extent(inode, page, page_offset, cur, + end - cur + 1, 0); + if (IS_ERR(em) || !em) { + SetPageError(page); + unlock_extent(tree, cur, end, GFP_NOFS); + break; + } + + extent_offset = cur - em->start; + BUG_ON(extent_map_end(em) <= cur); + BUG_ON(end < cur); + + iosize = min(extent_map_end(em) - cur, end - cur + 1); + cur_end = min(extent_map_end(em) - 1, end); + iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + sector = (em->block_start + extent_offset) >> 9; + bdev = em->bdev; + block_start = em->block_start; + free_extent_map(em); + em = NULL; + + /* we've found a hole, just zero and go on */ + if (block_start == EXTENT_MAP_HOLE) { + char *userpage; + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + + set_extent_uptodate(tree, cur, cur + iosize - 1, + GFP_NOFS); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + /* the get_extent function already copied into the page */ + if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + + ret = 0; + if (tree->ops && tree->ops->readpage_io_hook) { + ret = tree->ops->readpage_io_hook(page, cur, + cur + iosize - 1); + } + if (!ret) { + unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1; + nr -= page->index; + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, + bdev, bio, nr, + end_bio_extent_readpage); + } + if (ret) + SetPageError(page); + cur = cur + iosize; + page_offset += iosize; + nr++; + } + if (!nr) { + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); + } + return 0; +} + +int extent_read_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent) +{ + struct bio *bio = NULL; + int ret; + + ret = __extent_read_full_page(tree, page, get_extent, &bio); + if (bio) + submit_one_bio(READ, bio); + return ret; +} +EXPORT_SYMBOL(extent_read_full_page); + +/* + * the writepage semantics are similar to regular writepage. extent + * records are inserted to lock ranges in the tree, and as dirty areas + * are found, they are marked writeback. Then the lock bits are removed + * and the end_io handler clears the writeback ranges + */ +static int __extent_writepage(struct page *page, struct writeback_control *wbc, + void *data) +{ + struct inode *inode = page->mapping->host; + struct extent_page_data *epd = data; + struct extent_io_tree *tree = epd->tree; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 delalloc_start; + u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 end; + u64 cur = start; + u64 extent_offset; + u64 last_byte = i_size_read(inode); + u64 block_start; + u64 iosize; + sector_t sector; + struct extent_map *em; + struct block_device *bdev; + int ret; + int nr = 0; + size_t page_offset = 0; + size_t blocksize; + loff_t i_size = i_size_read(inode); + unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; + u64 nr_delalloc; + u64 delalloc_end; + + WARN_ON(!PageLocked(page)); + if (page->index > end_index) { + clear_extent_dirty(tree, start, page_end, GFP_NOFS); + unlock_page(page); + return 0; + } + + if (page->index == end_index) { + char *userpage; + + size_t offset = i_size & (PAGE_CACHE_SIZE - 1); + + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + } + + set_page_extent_mapped(page); + + delalloc_start = start; + delalloc_end = 0; + while(delalloc_end < page_end) { + nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, + &delalloc_end, + 128 * 1024 * 1024); + if (nr_delalloc == 0) { + delalloc_start = delalloc_end + 1; + continue; + } + tree->ops->fill_delalloc(inode, delalloc_start, + delalloc_end); + clear_extent_bit(tree, delalloc_start, + delalloc_end, + EXTENT_LOCKED | EXTENT_DELALLOC, + 1, 0, GFP_NOFS); + delalloc_start = delalloc_end + 1; + } + lock_extent(tree, start, page_end, GFP_NOFS); + + end = page_end; + if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { + printk("found delalloc bits after lock_extent\n"); + } + + if (last_byte <= start) { + clear_extent_dirty(tree, start, page_end, GFP_NOFS); + goto done; + } + + set_extent_uptodate(tree, start, page_end, GFP_NOFS); + blocksize = inode->i_sb->s_blocksize; + + while (cur <= end) { + if (cur >= last_byte) { + clear_extent_dirty(tree, cur, page_end, GFP_NOFS); + break; + } + em = epd->get_extent(inode, page, page_offset, cur, + end - cur + 1, 1); + if (IS_ERR(em) || !em) { + SetPageError(page); + break; + } + + extent_offset = cur - em->start; + BUG_ON(extent_map_end(em) <= cur); + BUG_ON(end < cur); + iosize = min(extent_map_end(em) - cur, end - cur + 1); + iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + sector = (em->block_start + extent_offset) >> 9; + bdev = em->bdev; + block_start = em->block_start; + free_extent_map(em); + em = NULL; + + if (block_start == EXTENT_MAP_HOLE || + block_start == EXTENT_MAP_INLINE) { + clear_extent_dirty(tree, cur, + cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + + /* leave this out until we have a page_mkwrite call */ + if (0 && !test_range_bit(tree, cur, cur + iosize - 1, + EXTENT_DIRTY, 0)) { + cur = cur + iosize; + page_offset += iosize; + continue; + } + clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); + if (tree->ops && tree->ops->writepage_io_hook) { + ret = tree->ops->writepage_io_hook(page, cur, + cur + iosize - 1); + } else { + ret = 0; + } + if (ret) + SetPageError(page); + else { + unsigned long max_nr = end_index + 1; + set_range_writeback(tree, cur, cur + iosize - 1); + if (!PageWriteback(page)) { + printk("warning page %lu not writeback, " + "cur %llu end %llu\n", page->index, + (unsigned long long)cur, + (unsigned long long)end); + } + + ret = submit_extent_page(WRITE, tree, page, sector, + iosize, page_offset, bdev, + &epd->bio, max_nr, + end_bio_extent_writepage); + if (ret) + SetPageError(page); + } + cur = cur + iosize; + page_offset += iosize; + nr++; + } +done: + if (nr == 0) { + /* make sure the mapping tag for page dirty gets cleared */ + set_page_writeback(page); + end_page_writeback(page); + } + unlock_extent(tree, start, page_end, GFP_NOFS); + unlock_page(page); + return 0; +} + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + +/* Taken directly from 2.6.23 for 2.6.18 back port */ +typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, + void *data); + +/** + * write_cache_pages - walk the list of dirty pages of the given address space + * and write all of them. + * @mapping: address space structure to write + * @wbc: subtract the number of written pages from *@wbc->nr_to_write + * @writepage: function called for each page + * @data: data passed to writepage function + * + * If a page is already under I/O, write_cache_pages() skips it, even + * if it's dirty. This is desirable behaviour for memory-cleaning writeback, + * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() + * and msync() need to guarantee that all the data which was dirty at the time + * the call was made get new I/O started against them. If wbc->sync_mode is + * WB_SYNC_ALL then we were called for data integrity and we must wait for + * existing IO to complete. + */ +static int write_cache_pages(struct address_space *mapping, + struct writeback_control *wbc, writepage_t writepage, + void *data) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; /* Inclusive */ + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + unsigned i; + + scanned = 1; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + done = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + ret = (*writepage)(page, wbc, data); + + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { + unlock_page(page); + ret = 0; + } + if (ret || (--(wbc->nr_to_write) <= 0)) + done = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + done = 1; + } + } + pagevec_release(&pvec); + cond_resched(); + } + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} +#endif + +int extent_write_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent, + struct writeback_control *wbc) +{ + int ret; + struct address_space *mapping = page->mapping; + struct extent_page_data epd = { + .bio = NULL, + .tree = tree, + .get_extent = get_extent, + }; + struct writeback_control wbc_writepages = { + .bdi = wbc->bdi, + .sync_mode = WB_SYNC_NONE, + .older_than_this = NULL, + .nr_to_write = 64, + .range_start = page_offset(page) + PAGE_CACHE_SIZE, + .range_end = (loff_t)-1, + }; + + + ret = __extent_writepage(page, wbc, &epd); + + write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); + if (epd.bio) { + submit_one_bio(WRITE, epd.bio); + } + return ret; +} +EXPORT_SYMBOL(extent_write_full_page); + + +int extent_writepages(struct extent_io_tree *tree, + struct address_space *mapping, + get_extent_t *get_extent, + struct writeback_control *wbc) +{ + int ret = 0; + struct extent_page_data epd = { + .bio = NULL, + .tree = tree, + .get_extent = get_extent, + }; + + ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); + if (epd.bio) { + submit_one_bio(WRITE, epd.bio); + } + return ret; +} +EXPORT_SYMBOL(extent_writepages); + +int extent_readpages(struct extent_io_tree *tree, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages, + get_extent_t get_extent) +{ + struct bio *bio = NULL; + unsigned page_idx; + struct pagevec pvec; + + pagevec_init(&pvec, 0); + for (page_idx = 0; page_idx < nr_pages; page_idx++) { + struct page *page = list_entry(pages->prev, struct page, lru); + + prefetchw(&page->flags); + list_del(&page->lru); + /* + * what we want to do here is call add_to_page_cache_lru, + * but that isn't exported, so we reproduce it here + */ + if (!add_to_page_cache(page, mapping, + page->index, GFP_KERNEL)) { + + /* open coding of lru_cache_add, also not exported */ + page_cache_get(page); + if (!pagevec_add(&pvec, page)) + __pagevec_lru_add(&pvec); + __extent_read_full_page(tree, page, get_extent, &bio); + } + page_cache_release(page); + } + if (pagevec_count(&pvec)) + __pagevec_lru_add(&pvec); + BUG_ON(!list_empty(pages)); + if (bio) + submit_one_bio(READ, bio); + return 0; +} +EXPORT_SYMBOL(extent_readpages); + +/* + * basic invalidatepage code, this waits on any locked or writeback + * ranges corresponding to the page, and then deletes any extent state + * records from the tree + */ +int extent_invalidatepage(struct extent_io_tree *tree, + struct page *page, unsigned long offset) +{ + u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); + u64 end = start + PAGE_CACHE_SIZE - 1; + size_t blocksize = page->mapping->host->i_sb->s_blocksize; + + start += (offset + blocksize -1) & ~(blocksize - 1); + if (start > end) + return 0; + + lock_extent(tree, start, end, GFP_NOFS); + wait_on_extent_writeback(tree, start, end); + clear_extent_bit(tree, start, end, + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, + 1, 1, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(extent_invalidatepage); + +/* + * simple commit_write call, set_range_dirty is used to mark both + * the pages and the extent records as dirty + */ +int extent_commit_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to) +{ + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + + set_page_extent_mapped(page); + set_page_dirty(page); + + if (pos > inode->i_size) { + i_size_write(inode, pos); + mark_inode_dirty(inode); + } + return 0; +} +EXPORT_SYMBOL(extent_commit_write); + +int extent_prepare_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to, get_extent_t *get_extent) +{ + u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + u64 block_start; + u64 orig_block_start; + u64 block_end; + u64 cur_end; + struct extent_map *em; + unsigned blocksize = 1 << inode->i_blkbits; + size_t page_offset = 0; + size_t block_off_start; + size_t block_off_end; + int err = 0; + int iocount = 0; + int ret = 0; + int isnew; + + set_page_extent_mapped(page); + + block_start = (page_start + from) & ~((u64)blocksize - 1); + block_end = (page_start + to - 1) | (blocksize - 1); + orig_block_start = block_start; + + lock_extent(tree, page_start, page_end, GFP_NOFS); + while(block_start <= block_end) { + em = get_extent(inode, page, page_offset, block_start, + block_end - block_start + 1, 1); + if (IS_ERR(em) || !em) { + goto err; + } + cur_end = min(block_end, extent_map_end(em) - 1); + block_off_start = block_start & (PAGE_CACHE_SIZE - 1); + block_off_end = block_off_start + blocksize; + isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); + + if (!PageUptodate(page) && isnew && + (block_off_end > to || block_off_start < from)) { + void *kaddr; + + kaddr = kmap_atomic(page, KM_USER0); + if (block_off_end > to) + memset(kaddr + to, 0, block_off_end - to); + if (block_off_start < from) + memset(kaddr + block_off_start, 0, + from - block_off_start); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + } + if ((em->block_start != EXTENT_MAP_HOLE && + em->block_start != EXTENT_MAP_INLINE) && + !isnew && !PageUptodate(page) && + (block_off_end > to || block_off_start < from) && + !test_range_bit(tree, block_start, cur_end, + EXTENT_UPTODATE, 1)) { + u64 sector; + u64 extent_offset = block_start - em->start; + size_t iosize; + sector = (em->block_start + extent_offset) >> 9; + iosize = (cur_end - block_start + blocksize) & + ~((u64)blocksize - 1); + /* + * we've already got the extent locked, but we + * need to split the state such that our end_bio + * handler can clear the lock. + */ + set_extent_bit(tree, block_start, + block_start + iosize - 1, + EXTENT_LOCKED, 0, NULL, GFP_NOFS); + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, em->bdev, + NULL, 1, + end_bio_extent_preparewrite); + iocount++; + block_start = block_start + iosize; + } else { + set_extent_uptodate(tree, block_start, cur_end, + GFP_NOFS); + unlock_extent(tree, block_start, cur_end, GFP_NOFS); + block_start = cur_end + 1; + } + page_offset = block_start & (PAGE_CACHE_SIZE - 1); + free_extent_map(em); + } + if (iocount) { + wait_extent_bit(tree, orig_block_start, + block_end, EXTENT_LOCKED); + } + check_page_uptodate(tree, page); +err: + /* FIXME, zero out newly allocated blocks on error */ + return err; +} +EXPORT_SYMBOL(extent_prepare_write); + +/* + * a helper for releasepage. As long as there are no locked extents + * in the range corresponding to the page, both state records and extent + * map records are removed + */ +int try_release_extent_mapping(struct extent_map_tree *map, + struct extent_io_tree *tree, struct page *page) +{ + struct extent_map *em; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + u64 orig_start = start; + int ret = 1; + + while (start <= end) { + spin_lock(&map->lock); + em = lookup_extent_mapping(map, start, end); + if (!em || IS_ERR(em)) { + spin_unlock(&map->lock); + break; + } + if (!test_range_bit(tree, em->start, extent_map_end(em) - 1, + EXTENT_LOCKED, 0)) { + remove_extent_mapping(map, em); + /* once for the rb tree */ + free_extent_map(em); + } + start = extent_map_end(em); + spin_unlock(&map->lock); + + /* once for us */ + free_extent_map(em); + } + if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) + ret = 0; + else + clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, + 1, 1, GFP_NOFS); + return ret; +} +EXPORT_SYMBOL(try_release_extent_mapping); + +sector_t extent_bmap(struct address_space *mapping, sector_t iblock, + get_extent_t *get_extent) +{ + struct inode *inode = mapping->host; + u64 start = iblock << inode->i_blkbits; + sector_t sector = 0; + struct extent_map *em; + + em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0); + if (!em || IS_ERR(em)) + return 0; + + if (em->block_start == EXTENT_MAP_INLINE || + em->block_start == EXTENT_MAP_HOLE) + goto out; + + sector = (em->block_start + start - em->start) >> inode->i_blkbits; +printk("bmap finds %Lu %Lu block %Lu\n", em->start, em->len, em->block_start); +out: + free_extent_map(em); + return sector; +} + +static int add_lru(struct extent_io_tree *tree, struct extent_buffer *eb) +{ + if (list_empty(&eb->lru)) { + extent_buffer_get(eb); + list_add(&eb->lru, &tree->buffer_lru); + tree->lru_size++; + if (tree->lru_size >= BUFFER_LRU_MAX) { + struct extent_buffer *rm; + rm = list_entry(tree->buffer_lru.prev, + struct extent_buffer, lru); + tree->lru_size--; + list_del_init(&rm->lru); + free_extent_buffer(rm); + } + } else + list_move(&eb->lru, &tree->buffer_lru); + return 0; +} +static struct extent_buffer *find_lru(struct extent_io_tree *tree, + u64 start, unsigned long len) +{ + struct list_head *lru = &tree->buffer_lru; + struct list_head *cur = lru->next; + struct extent_buffer *eb; + + if (list_empty(lru)) + return NULL; + + do { + eb = list_entry(cur, struct extent_buffer, lru); + if (eb->start == start && eb->len == len) { + extent_buffer_get(eb); + return eb; + } + cur = cur->next; + } while (cur != lru); + return NULL; +} + +static inline unsigned long num_extent_pages(u64 start, u64 len) +{ + return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT); +} + +static inline struct page *extent_buffer_page(struct extent_buffer *eb, + unsigned long i) +{ + struct page *p; + struct address_space *mapping; + + if (i == 0) + return eb->first_page; + i += eb->start >> PAGE_CACHE_SHIFT; + mapping = eb->first_page->mapping; + read_lock_irq(&mapping->tree_lock); + p = radix_tree_lookup(&mapping->page_tree, i); + read_unlock_irq(&mapping->tree_lock); + return p; +} + +static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, + u64 start, + unsigned long len, + gfp_t mask) +{ + struct extent_buffer *eb = NULL; + + spin_lock(&tree->lru_lock); + eb = find_lru(tree, start, len); + spin_unlock(&tree->lru_lock); + if (eb) { + return eb; + } + + eb = kmem_cache_zalloc(extent_buffer_cache, mask); + INIT_LIST_HEAD(&eb->lru); + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + return eb; +} + +static void __free_extent_buffer(struct extent_buffer *eb) +{ + kmem_cache_free(extent_buffer_cache, eb); +} + +struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + struct page *page0, + gfp_t mask) +{ + unsigned long num_pages = num_extent_pages(start, len); + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + int uptodate = 1; + + eb = __alloc_extent_buffer(tree, start, len, mask); + if (!eb || IS_ERR(eb)) + return NULL; + + if (eb->flags & EXTENT_BUFFER_FILLED) + goto lru_add; + + if (page0) { + eb->first_page = page0; + i = 1; + index++; + page_cache_get(page0); + mark_page_accessed(page0); + set_page_extent_mapped(page0); + WARN_ON(!PageUptodate(page0)); + set_page_extent_head(page0, len); + } else { + i = 0; + } + for (; i < num_pages; i++, index++) { + p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); + if (!p) { + WARN_ON(1); + goto fail; + } + set_page_extent_mapped(p); + mark_page_accessed(p); + if (i == 0) { + eb->first_page = p; + set_page_extent_head(p, len); + } else { + set_page_private(p, EXTENT_PAGE_PRIVATE); + } + if (!PageUptodate(p)) + uptodate = 0; + unlock_page(p); + } + if (uptodate) + eb->flags |= EXTENT_UPTODATE; + eb->flags |= EXTENT_BUFFER_FILLED; + +lru_add: + spin_lock(&tree->lru_lock); + add_lru(tree, eb); + spin_unlock(&tree->lru_lock); + return eb; + +fail: + spin_lock(&tree->lru_lock); + list_del_init(&eb->lru); + spin_unlock(&tree->lru_lock); + if (!atomic_dec_and_test(&eb->refs)) + return NULL; + for (index = 1; index < i; index++) { + page_cache_release(extent_buffer_page(eb, index)); + } + if (i > 0) + page_cache_release(extent_buffer_page(eb, 0)); + __free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(alloc_extent_buffer); + +struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + gfp_t mask) +{ + unsigned long num_pages = num_extent_pages(start, len); + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + int uptodate = 1; + + eb = __alloc_extent_buffer(tree, start, len, mask); + if (!eb || IS_ERR(eb)) + return NULL; + + if (eb->flags & EXTENT_BUFFER_FILLED) + goto lru_add; + + for (i = 0; i < num_pages; i++, index++) { + p = find_lock_page(mapping, index); + if (!p) { + goto fail; + } + set_page_extent_mapped(p); + mark_page_accessed(p); + + if (i == 0) { + eb->first_page = p; + set_page_extent_head(p, len); + } else { + set_page_private(p, EXTENT_PAGE_PRIVATE); + } + + if (!PageUptodate(p)) + uptodate = 0; + unlock_page(p); + } + if (uptodate) + eb->flags |= EXTENT_UPTODATE; + eb->flags |= EXTENT_BUFFER_FILLED; + +lru_add: + spin_lock(&tree->lru_lock); + add_lru(tree, eb); + spin_unlock(&tree->lru_lock); + return eb; +fail: + spin_lock(&tree->lru_lock); + list_del_init(&eb->lru); + spin_unlock(&tree->lru_lock); + if (!atomic_dec_and_test(&eb->refs)) + return NULL; + for (index = 1; index < i; index++) { + page_cache_release(extent_buffer_page(eb, index)); + } + if (i > 0) + page_cache_release(extent_buffer_page(eb, 0)); + __free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(find_extent_buffer); + +void free_extent_buffer(struct extent_buffer *eb) +{ + unsigned long i; + unsigned long num_pages; + + if (!eb) + return; + + if (!atomic_dec_and_test(&eb->refs)) + return; + + WARN_ON(!list_empty(&eb->lru)); + num_pages = num_extent_pages(eb->start, eb->len); + + for (i = 1; i < num_pages; i++) { + page_cache_release(extent_buffer_page(eb, i)); + } + page_cache_release(extent_buffer_page(eb, 0)); + __free_extent_buffer(eb); +} +EXPORT_SYMBOL(free_extent_buffer); + +int clear_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + int set; + unsigned long i; + unsigned long num_pages; + struct page *page; + + u64 start = eb->start; + u64 end = start + eb->len - 1; + + set = clear_extent_dirty(tree, start, end, GFP_NOFS); + num_pages = num_extent_pages(eb->start, eb->len); + + for (i = 0; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + lock_page(page); + if (i == 0) + set_page_extent_head(page, eb->len); + else + set_page_private(page, EXTENT_PAGE_PRIVATE); + + /* + * if we're on the last page or the first page and the + * block isn't aligned on a page boundary, do extra checks + * to make sure we don't clean page that is partially dirty + */ + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { + start = (u64)page->index << PAGE_CACHE_SHIFT; + end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, + EXTENT_DIRTY, 0)) { + unlock_page(page); + continue; + } + } + clear_page_dirty_for_io(page); + write_lock_irq(&page->mapping->tree_lock); + if (!PageDirty(page)) { + radix_tree_tag_clear(&page->mapping->page_tree, + page_index(page), + PAGECACHE_TAG_DIRTY); + } + write_unlock_irq(&page->mapping->tree_lock); + unlock_page(page); + } + return 0; +} +EXPORT_SYMBOL(clear_extent_buffer_dirty); + +int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + return wait_on_extent_writeback(tree, eb->start, + eb->start + eb->len - 1); +} +EXPORT_SYMBOL(wait_on_extent_buffer_writeback); + +int set_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + unsigned long num_pages; + + num_pages = num_extent_pages(eb->start, eb->len); + for (i = 0; i < num_pages; i++) { + struct page *page = extent_buffer_page(eb, i); + /* writepage may need to do something special for the + * first page, we have to make sure page->private is + * properly set. releasepage may drop page->private + * on us if the page isn't already dirty. + */ + if (i == 0) { + lock_page(page); + set_page_extent_head(page, eb->len); + } else if (PagePrivate(page) && + page->private != EXTENT_PAGE_PRIVATE) { + lock_page(page); + set_page_extent_mapped(page); + unlock_page(page); + } + __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); + if (i == 0) + unlock_page(page); + } + return set_extent_dirty(tree, eb->start, + eb->start + eb->len - 1, GFP_NOFS); +} +EXPORT_SYMBOL(set_extent_buffer_dirty); + +int set_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + struct page *page; + unsigned long num_pages; + + num_pages = num_extent_pages(eb->start, eb->len); + + set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + GFP_NOFS); + for (i = 0; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { + check_page_uptodate(tree, page); + continue; + } + SetPageUptodate(page); + } + return 0; +} +EXPORT_SYMBOL(set_extent_buffer_uptodate); + +int extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + if (eb->flags & EXTENT_UPTODATE) + return 1; + return test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1); +} +EXPORT_SYMBOL(extent_buffer_uptodate); + +int read_extent_buffer_pages(struct extent_io_tree *tree, + struct extent_buffer *eb, + u64 start, + int wait) +{ + unsigned long i; + unsigned long start_i; + struct page *page; + int err; + int ret = 0; + unsigned long num_pages; + + if (eb->flags & EXTENT_UPTODATE) + return 0; + + if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1)) { + return 0; + } + + if (start) { + WARN_ON(start < eb->start); + start_i = (start >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT); + } else { + start_i = 0; + } + + num_pages = num_extent_pages(eb->start, eb->len); + for (i = start_i; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + if (PageUptodate(page)) { + continue; + } + if (!wait) { + if (TestSetPageLocked(page)) { + continue; + } + } else { + lock_page(page); + } + if (!PageUptodate(page)) { + err = page->mapping->a_ops->readpage(NULL, page); + if (err) { + ret = err; + } + } else { + unlock_page(page); + } + } + + if (ret || !wait) { + return ret; + } + + for (i = start_i; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + wait_on_page_locked(page); + if (!PageUptodate(page)) { + ret = -EIO; + } + } + if (!ret) + eb->flags |= EXTENT_UPTODATE; + return ret; +} +EXPORT_SYMBOL(read_extent_buffer_pages); + +void read_extent_buffer(struct extent_buffer *eb, void *dstv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *dst = (char *)dstv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + unsigned long num_pages = num_extent_pages(eb->start, eb->len); + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + if (!PageUptodate(page)) { + printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len); + WARN_ON(1); + } + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + kaddr = kmap_atomic(page, KM_USER1); + memcpy(dst, kaddr + offset, cur); + kunmap_atomic(kaddr, KM_USER1); + + dst += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(read_extent_buffer); + +int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + size_t offset = start & (PAGE_CACHE_SIZE - 1); + char *kaddr; + struct page *p; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + unsigned long end_i = (start_offset + start + min_len - 1) >> + PAGE_CACHE_SHIFT; + + if (i != end_i) + return -EINVAL; + + if (i == 0) { + offset = start_offset; + *map_start = 0; + } else { + offset = 0; + *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; + } + if (start + min_len > eb->len) { +printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); + WARN_ON(1); + } + + p = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(p)); + kaddr = kmap_atomic(p, km); + *token = kaddr; + *map = kaddr + offset; + *map_len = PAGE_CACHE_SIZE - offset; + return 0; +} +EXPORT_SYMBOL(map_private_extent_buffer); + +int map_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + int err; + int save = 0; + if (eb->map_token) { + unmap_extent_buffer(eb, eb->map_token, km); + eb->map_token = NULL; + save = 1; + } + err = map_private_extent_buffer(eb, start, min_len, token, map, + map_start, map_len, km); + if (!err && save) { + eb->map_token = *token; + eb->kaddr = *map; + eb->map_start = *map_start; + eb->map_len = *map_len; + } + return err; +} +EXPORT_SYMBOL(map_extent_buffer); + +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) +{ + kunmap_atomic(token, km); +} +EXPORT_SYMBOL(unmap_extent_buffer); + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *ptr = (char *)ptrv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + int ret = 0; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + + kaddr = kmap_atomic(page, KM_USER0); + ret = memcmp(ptr, kaddr + offset, cur); + kunmap_atomic(kaddr, KM_USER0); + if (ret) + break; + + ptr += cur; + len -= cur; + offset = 0; + i++; + } + return ret; +} +EXPORT_SYMBOL(memcmp_extent_buffer); + +void write_extent_buffer(struct extent_buffer *eb, const void *srcv, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *src = (char *)srcv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + kaddr = kmap_atomic(page, KM_USER1); + memcpy(kaddr + offset, src, cur); + kunmap_atomic(kaddr, KM_USER1); + + src += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(write_extent_buffer); + +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + offset, c, cur); + kunmap_atomic(kaddr, KM_USER0); + + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(memset_extent_buffer); + +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len) +{ + u64 dst_len = dst->len; + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + + WARN_ON(src->len != dst_len); + + offset = (start_offset + dst_offset) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(dst, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); + + kaddr = kmap_atomic(page, KM_USER0); + read_extent_buffer(src, kaddr + offset, src_offset, cur); + kunmap_atomic(kaddr, KM_USER0); + + src_offset += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(copy_extent_buffer); + +static void move_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + if (dst_page == src_page) { + memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); + } else { + char *src_kaddr = kmap_atomic(src_page, KM_USER1); + char *p = dst_kaddr + dst_off + len; + char *s = src_kaddr + src_off + len; + + while (len--) + *--p = *--s; + + kunmap_atomic(src_kaddr, KM_USER1); + } + kunmap_atomic(dst_kaddr, KM_USER0); +} + +static void copy_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + char *src_kaddr; + + if (dst_page != src_page) + src_kaddr = kmap_atomic(src_page, KM_USER1); + else + src_kaddr = dst_kaddr; + + memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); + kunmap_atomic(dst_kaddr, KM_USER0); + if (dst_page != src_page) + kunmap_atomic(src_kaddr, KM_USER1); +} + +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + + while(len > 0) { + dst_off_in_page = (start_offset + dst_offset) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = (start_offset + src_offset) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - + src_off_in_page)); + cur = min_t(unsigned long, cur, + (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); + + copy_pages(extent_buffer_page(dst, dst_i), + extent_buffer_page(dst, src_i), + dst_off_in_page, src_off_in_page, cur); + + src_offset += cur; + dst_offset += cur; + len -= cur; + } +} +EXPORT_SYMBOL(memcpy_extent_buffer); + +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + unsigned long dst_end = dst_offset + len - 1; + unsigned long src_end = src_offset + len - 1; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset < src_offset) { + memcpy_extent_buffer(dst, dst_offset, src_offset, len); + return; + } + while(len > 0) { + dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; + + dst_off_in_page = (start_offset + dst_end) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = (start_offset + src_end) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + cur = min_t(unsigned long, len, src_off_in_page + 1); + cur = min(cur, dst_off_in_page + 1); + move_pages(extent_buffer_page(dst, dst_i), + extent_buffer_page(dst, src_i), + dst_off_in_page - cur + 1, + src_off_in_page - cur + 1, cur); + + dst_end -= cur; + src_end -= cur; + len -= cur; + } +} +EXPORT_SYMBOL(memmove_extent_buffer); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h new file mode 100644 index 000000000000..06be1fe84b29 --- /dev/null +++ b/fs/btrfs/extent_io.h @@ -0,0 +1,193 @@ +#ifndef __EXTENTIO__ +#define __EXTENTIO__ + +#include + +/* bits for the extent state */ +#define EXTENT_DIRTY 1 +#define EXTENT_WRITEBACK (1 << 1) +#define EXTENT_UPTODATE (1 << 2) +#define EXTENT_LOCKED (1 << 3) +#define EXTENT_NEW (1 << 4) +#define EXTENT_DELALLOC (1 << 5) +#define EXTENT_DEFRAG (1 << 6) +#define EXTENT_DEFRAG_DONE (1 << 7) +#define EXTENT_BUFFER_FILLED (1 << 8) +#define EXTENT_CSUM (1 << 9) +#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) + +/* + * page->private values. Every page that is controlled by the extent + * map has page->private set to one. + */ +#define EXTENT_PAGE_PRIVATE 1 +#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 + +struct extent_io_ops { + int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); + int (*writepage_io_hook)(struct page *page, u64 start, u64 end); + int (*readpage_io_hook)(struct page *page, u64 start, u64 end); + int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end); + void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end); +}; + +struct extent_io_tree { + struct rb_root state; + struct address_space *mapping; + u64 dirty_bytes; + rwlock_t lock; + struct extent_io_ops *ops; + spinlock_t lru_lock; + struct list_head buffer_lru; + int lru_size; +}; + +struct extent_state { + u64 start; + u64 end; /* inclusive */ + int in_tree; + struct rb_node rb_node; + wait_queue_head_t wq; + atomic_t refs; + unsigned long state; + + /* for use by the FS */ + u64 private; + + struct list_head list; +}; + +struct extent_buffer { + u64 start; + unsigned long len; + char *map_token; + char *kaddr; + unsigned long map_start; + unsigned long map_len; + struct page *first_page; + struct list_head lru; + atomic_t refs; + int flags; +}; + +struct extent_map_tree; + +typedef struct extent_map *(get_extent_t)(struct inode *inode, + struct page *page, + size_t page_offset, + u64 start, u64 len, + int create); + +void extent_io_tree_init(struct extent_io_tree *tree, + struct address_space *mapping, gfp_t mask); +void extent_io_tree_empty_lru(struct extent_io_tree *tree); +int try_release_extent_mapping(struct extent_map_tree *map, + struct extent_io_tree *tree, struct page *page); +int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int extent_read_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent); +int __init extent_io_init(void); +void extent_io_exit(void); + +u64 count_range_bits(struct extent_io_tree *tree, + u64 *start, u64 search_end, + u64 max_bytes, unsigned long bits); + +int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int filled); +int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask); +int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask); +int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits); +int extent_invalidatepage(struct extent_io_tree *tree, + struct page *page, unsigned long offset); +int extent_write_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent, + struct writeback_control *wbc); +int extent_writepages(struct extent_io_tree *tree, + struct address_space *mapping, + get_extent_t *get_extent, + struct writeback_control *wbc); +int extent_readpages(struct extent_io_tree *tree, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages, + get_extent_t get_extent); +int extent_prepare_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to, get_extent_t *get_extent); +int extent_commit_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to); +sector_t extent_bmap(struct address_space *mapping, sector_t iblock, + get_extent_t *get_extent); +int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end); +int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); +int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); +void set_page_extent_mapped(struct page *page); + +struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + struct page *page0, + gfp_t mask); +struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + gfp_t mask); +void free_extent_buffer(struct extent_buffer *eb); +int read_extent_buffer_pages(struct extent_io_tree *tree, + struct extent_buffer *eb, u64 start, int wait); + +static inline void extent_buffer_get(struct extent_buffer *eb) +{ + atomic_inc(&eb->refs); +} + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len); +void read_extent_buffer(struct extent_buffer *eb, void *dst, + unsigned long start, + unsigned long len); +void write_extent_buffer(struct extent_buffer *eb, const void *src, + unsigned long start, unsigned long len); +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len); +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len); +int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, + struct extent_buffer *eb); +int clear_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb); +int extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb); +int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); +int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); +#endif diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 010a287fbd71..268ad8facf6e 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1,17 +1,10 @@ -#include -#include -#include -#include +#include #include -#include -#include +#include #include #include -#include -#include #include -#include -#include +#include #include "extent_map.h" /* temporary define until extent_map moves out of btrfs */ @@ -21,27 +14,6 @@ struct kmem_cache *btrfs_cache_create(const char *name, size_t size, unsigned long)); static struct kmem_cache *extent_map_cache; -static struct kmem_cache *extent_state_cache; -static struct kmem_cache *extent_buffer_cache; - -static LIST_HEAD(buffers); -static LIST_HEAD(states); - -static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; -#define BUFFER_LRU_MAX 64 - -struct tree_entry { - u64 start; - u64 end; - int in_tree; - struct rb_node rb_node; -}; - -struct extent_page_data { - struct bio *bio; - struct extent_map_tree *tree; - get_extent_t *get_extent; -}; int __init extent_map_init(void) { @@ -50,72 +22,23 @@ int __init extent_map_init(void) NULL); if (!extent_map_cache) return -ENOMEM; - extent_state_cache = btrfs_cache_create("extent_state", - sizeof(struct extent_state), 0, - NULL); - if (!extent_state_cache) - goto free_map_cache; - extent_buffer_cache = btrfs_cache_create("extent_buffers", - sizeof(struct extent_buffer), 0, - NULL); - if (!extent_buffer_cache) - goto free_state_cache; return 0; - -free_state_cache: - kmem_cache_destroy(extent_state_cache); -free_map_cache: - kmem_cache_destroy(extent_map_cache); - return -ENOMEM; } void extent_map_exit(void) { - struct extent_state *state; - - while (!list_empty(&states)) { - state = list_entry(states.next, struct extent_state, list); - printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); - list_del(&state->list); - kmem_cache_free(extent_state_cache, state); - - } - if (extent_map_cache) kmem_cache_destroy(extent_map_cache); - if (extent_state_cache) - kmem_cache_destroy(extent_state_cache); - if (extent_buffer_cache) - kmem_cache_destroy(extent_buffer_cache); } -void extent_map_tree_init(struct extent_map_tree *tree, - struct address_space *mapping, gfp_t mask) +void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) { tree->map.rb_node = NULL; - tree->state.rb_node = NULL; - tree->ops = NULL; - tree->dirty_bytes = 0; - rwlock_init(&tree->lock); - spin_lock_init(&tree->lru_lock); - tree->mapping = mapping; - INIT_LIST_HEAD(&tree->buffer_lru); - tree->lru_size = 0; + tree->last = NULL; + spin_lock_init(&tree->lock); } EXPORT_SYMBOL(extent_map_tree_init); -void extent_map_tree_empty_lru(struct extent_map_tree *tree) -{ - struct extent_buffer *eb; - while(!list_empty(&tree->buffer_lru)) { - eb = list_entry(tree->buffer_lru.next, struct extent_buffer, - lru); - list_del_init(&eb->lru); - free_extent_buffer(eb); - } -} -EXPORT_SYMBOL(extent_map_tree_empty_lru); - struct extent_map *alloc_extent_map(gfp_t mask) { struct extent_map *em; @@ -123,6 +46,7 @@ struct extent_map *alloc_extent_map(gfp_t mask) if (!em || IS_ERR(em)) return em; em->in_tree = 0; + em->flags = 0; atomic_set(&em->refs, 1); return em; } @@ -132,6 +56,7 @@ void free_extent_map(struct extent_map *em) { if (!em) return; + WARN_ON(atomic_read(&em->refs) == 0); if (atomic_dec_and_test(&em->refs)) { WARN_ON(em->in_tree); kmem_cache_free(extent_map_cache, em); @@ -139,64 +64,28 @@ void free_extent_map(struct extent_map *em) } EXPORT_SYMBOL(free_extent_map); - -struct extent_state *alloc_extent_state(gfp_t mask) -{ - struct extent_state *state; - unsigned long flags; - - state = kmem_cache_alloc(extent_state_cache, mask); - if (!state || IS_ERR(state)) - return state; - state->state = 0; - state->in_tree = 0; - state->private = 0; - - spin_lock_irqsave(&state_lock, flags); - list_add(&state->list, &states); - spin_unlock_irqrestore(&state_lock, flags); - - atomic_set(&state->refs, 1); - init_waitqueue_head(&state->wq); - return state; -} -EXPORT_SYMBOL(alloc_extent_state); - -void free_extent_state(struct extent_state *state) -{ - unsigned long flags; - if (!state) - return; - if (atomic_dec_and_test(&state->refs)) { - WARN_ON(state->in_tree); - spin_lock_irqsave(&state_lock, flags); - list_del(&state->list); - spin_unlock_irqrestore(&state_lock, flags); - kmem_cache_free(extent_state_cache, state); - } -} -EXPORT_SYMBOL(free_extent_state); - static struct rb_node *tree_insert(struct rb_root *root, u64 offset, struct rb_node *node) { struct rb_node ** p = &root->rb_node; struct rb_node * parent = NULL; - struct tree_entry *entry; + struct extent_map *entry; while(*p) { parent = *p; - entry = rb_entry(parent, struct tree_entry, rb_node); + entry = rb_entry(parent, struct extent_map, rb_node); + + WARN_ON(!entry->in_tree); if (offset < entry->start) p = &(*p)->rb_left; - else if (offset > entry->end) + else if (offset >= extent_map_end(entry)) p = &(*p)->rb_right; else return parent; } - entry = rb_entry(node, struct tree_entry, rb_node); + entry = rb_entry(node, struct extent_map, rb_node); entry->in_tree = 1; rb_link_node(node, parent, p); rb_insert_color(node, root); @@ -210,17 +99,19 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, struct rb_node * n = root->rb_node; struct rb_node *prev = NULL; struct rb_node *orig_prev = NULL; - struct tree_entry *entry; - struct tree_entry *prev_entry = NULL; + struct extent_map *entry; + struct extent_map *prev_entry = NULL; while(n) { - entry = rb_entry(n, struct tree_entry, rb_node); + entry = rb_entry(n, struct extent_map, rb_node); prev = n; prev_entry = entry; + WARN_ON(!entry->in_tree); + if (offset < entry->start) n = n->rb_left; - else if (offset > entry->end) + else if (offset >= extent_map_end(entry)) n = n->rb_right; else return n; @@ -228,19 +119,19 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, if (prev_ret) { orig_prev = prev; - while(prev && offset > prev_entry->end) { + while(prev && offset >= extent_map_end(prev_entry)) { prev = rb_next(prev); - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + prev_entry = rb_entry(prev, struct extent_map, rb_node); } *prev_ret = prev; prev = orig_prev; } if (next_ret) { - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + prev_entry = rb_entry(prev, struct extent_map, rb_node); while(prev && offset < prev_entry->start) { prev = rb_prev(prev); - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + prev_entry = rb_entry(prev, struct extent_map, rb_node); } *next_ret = prev; } @@ -257,22 +148,26 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) return ret; } -static int tree_delete(struct rb_root *root, u64 offset) +static int mergable_maps(struct extent_map *prev, struct extent_map *next) { - struct rb_node *node; - struct tree_entry *entry; - - node = __tree_search(root, offset, NULL, NULL); - if (!node) - return -ENOENT; - entry = rb_entry(node, struct tree_entry, rb_node); - entry->in_tree = 0; - rb_erase(node, root); + if (extent_map_end(prev) == next->start && + prev->flags == next->flags && + prev->bdev == next->bdev && + ((next->block_start == EXTENT_MAP_HOLE && + prev->block_start == EXTENT_MAP_HOLE) || + (next->block_start == EXTENT_MAP_INLINE && + prev->block_start == EXTENT_MAP_INLINE) || + (next->block_start == EXTENT_MAP_DELALLOC && + prev->block_start == EXTENT_MAP_DELALLOC) || + (next->block_start < EXTENT_MAP_LAST_BYTE - 1 && + next->block_start == extent_map_block_end(prev)))) { + return 1; + } return 0; } /* - * add_extent_mapping tries a simple backward merge with existing + * add_extent_mapping tries a simple forward/backward merge with existing * mappings. The extent_map struct passed in will be inserted into * the tree directly (no copies made, just a reference taken). */ @@ -280,13 +175,12 @@ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { int ret = 0; - struct extent_map *prev = NULL; + struct extent_map *merge = NULL; struct rb_node *rb; - write_lock_irq(&tree->lock); - rb = tree_insert(&tree->map, em->end, &em->rb_node); + rb = tree_insert(&tree->map, em->start, &em->rb_node); if (rb) { - prev = rb_entry(rb, struct extent_map, rb_node); + merge = rb_entry(rb, struct extent_map, rb_node); ret = -EEXIST; goto out; } @@ -294,53 +188,60 @@ int add_extent_mapping(struct extent_map_tree *tree, if (em->start != 0) { rb = rb_prev(&em->rb_node); if (rb) - prev = rb_entry(rb, struct extent_map, rb_node); - if (prev && prev->end + 1 == em->start && - ((em->block_start == EXTENT_MAP_HOLE && - prev->block_start == EXTENT_MAP_HOLE) || - (em->block_start == EXTENT_MAP_INLINE && - prev->block_start == EXTENT_MAP_INLINE) || - (em->block_start == EXTENT_MAP_DELALLOC && - prev->block_start == EXTENT_MAP_DELALLOC) || - (em->block_start < EXTENT_MAP_DELALLOC - 1 && - em->block_start == prev->block_end + 1))) { - em->start = prev->start; - em->block_start = prev->block_start; - rb_erase(&prev->rb_node, &tree->map); - prev->in_tree = 0; - free_extent_map(prev); + merge = rb_entry(rb, struct extent_map, rb_node); + if (rb && mergable_maps(merge, em)) { + em->start = merge->start; + em->len += merge->len; + em->block_start = merge->block_start; + merge->in_tree = 0; + rb_erase(&merge->rb_node, &tree->map); + free_extent_map(merge); } } + rb = rb_next(&em->rb_node); + if (rb) + merge = rb_entry(rb, struct extent_map, rb_node); + if (rb && mergable_maps(em, merge)) { + em->len += merge->len; + rb_erase(&merge->rb_node, &tree->map); + merge->in_tree = 0; + free_extent_map(merge); + } + tree->last = em; out: - write_unlock_irq(&tree->lock); return ret; } EXPORT_SYMBOL(add_extent_mapping); +static u64 range_end(u64 start, u64 len) +{ + if (start + len < start) + return (u64)-1; + return start + len; +} + /* * lookup_extent_mapping returns the first extent_map struct in the - * tree that intersects the [start, end] (inclusive) range. There may + * tree that intersects the [start, len] range. There may * be additional objects in the tree that intersect, so check the object * returned carefully to make sure you don't need additional lookups. */ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, - u64 start, u64 end) + u64 start, u64 len) { struct extent_map *em; struct rb_node *rb_node; - struct rb_node *prev = NULL; - struct rb_node *next = NULL; + struct rb_node *prev = NULL; struct rb_node *next = NULL; u64 end = range_end(start, len); em = tree->last; if (em && end > em->start && start < extent_map_end(em)) goto found; - read_lock_irq(&tree->lock); rb_node = __tree_search(&tree->map, start, &prev, &next); if (!rb_node && prev) { em = rb_entry(prev, struct extent_map, rb_node); - if (em->start <= end && em->end >= start) + if (end > em->start && start < extent_map_end(em)) goto found; } if (!rb_node && next) { em = rb_entry(next, struct extent_map, rb_node); - if (em->start <= end && em->end >= start) + if (end > em->start && start < extent_map_end(em)) goto found; } if (!rb_node) { @@ -352,14 +253,16 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, goto out; } em = rb_entry(rb_node, struct extent_map, rb_node); - if (em->end < start || em->start > end) { - em = NULL; - goto out; - } + if (end > em->start && start < extent_map_end(em)) + goto found; + + em = NULL; + goto out; + found: atomic_inc(&em->refs); + tree->last = em; out: - read_unlock_irq(&tree->lock); return em; } EXPORT_SYMBOL(lookup_extent_mapping); @@ -370,2866 +273,12 @@ EXPORT_SYMBOL(lookup_extent_mapping); */ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { - int ret; + int ret = 0; - write_lock_irq(&tree->lock); - ret = tree_delete(&tree->map, em->end); - write_unlock_irq(&tree->lock); + rb_erase(&em->rb_node, &tree->map); + em->in_tree = 0; + if (tree->last == em) + tree->last = NULL; return ret; } EXPORT_SYMBOL(remove_extent_mapping); - -/* - * utility function to look for merge candidates inside a given range. - * Any extents with matching state are merged together into a single - * extent in the tree. Extents with EXTENT_IO in their state field - * are not merged because the end_io handlers need to be able to do - * operations on them without sleeping (or doing allocations/splits). - * - * This should be called with the tree lock held. - */ -static int merge_state(struct extent_map_tree *tree, - struct extent_state *state) -{ - struct extent_state *other; - struct rb_node *other_node; - - if (state->state & EXTENT_IOBITS) - return 0; - - other_node = rb_prev(&state->rb_node); - if (other_node) { - other = rb_entry(other_node, struct extent_state, rb_node); - if (other->end == state->start - 1 && - other->state == state->state) { - state->start = other->start; - other->in_tree = 0; - rb_erase(&other->rb_node, &tree->state); - free_extent_state(other); - } - } - other_node = rb_next(&state->rb_node); - if (other_node) { - other = rb_entry(other_node, struct extent_state, rb_node); - if (other->start == state->end + 1 && - other->state == state->state) { - other->start = state->start; - state->in_tree = 0; - rb_erase(&state->rb_node, &tree->state); - free_extent_state(state); - } - } - return 0; -} - -/* - * insert an extent_state struct into the tree. 'bits' are set on the - * struct before it is inserted. - * - * This may return -EEXIST if the extent is already there, in which case the - * state struct is freed. - * - * The tree lock is not taken internally. This is a utility function and - * probably isn't what you want to call (see set/clear_extent_bit). - */ -static int insert_state(struct extent_map_tree *tree, - struct extent_state *state, u64 start, u64 end, - int bits) -{ - struct rb_node *node; - - if (end < start) { - printk("end < start %Lu %Lu\n", end, start); - WARN_ON(1); - } - if (bits & EXTENT_DIRTY) - tree->dirty_bytes += end - start + 1; - state->state |= bits; - state->start = start; - state->end = end; - node = tree_insert(&tree->state, end, &state->rb_node); - if (node) { - struct extent_state *found; - found = rb_entry(node, struct extent_state, rb_node); - printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); - free_extent_state(state); - return -EEXIST; - } - merge_state(tree, state); - return 0; -} - -/* - * split a given extent state struct in two, inserting the preallocated - * struct 'prealloc' as the newly created second half. 'split' indicates an - * offset inside 'orig' where it should be split. - * - * Before calling, - * the tree has 'orig' at [orig->start, orig->end]. After calling, there - * are two extent state structs in the tree: - * prealloc: [orig->start, split - 1] - * orig: [ split, orig->end ] - * - * The tree locks are not taken by this function. They need to be held - * by the caller. - */ -static int split_state(struct extent_map_tree *tree, struct extent_state *orig, - struct extent_state *prealloc, u64 split) -{ - struct rb_node *node; - prealloc->start = orig->start; - prealloc->end = split - 1; - prealloc->state = orig->state; - orig->start = split; - - node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); - if (node) { - struct extent_state *found; - found = rb_entry(node, struct extent_state, rb_node); - printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); - free_extent_state(prealloc); - return -EEXIST; - } - return 0; -} - -/* - * utility function to clear some bits in an extent state struct. - * it will optionally wake up any one waiting on this state (wake == 1), or - * forcibly remove the state from the tree (delete == 1). - * - * If no bits are set on the state struct after clearing things, the - * struct is freed and removed from the tree - */ -static int clear_state_bit(struct extent_map_tree *tree, - struct extent_state *state, int bits, int wake, - int delete) -{ - int ret = state->state & bits; - - if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { - u64 range = state->end - state->start + 1; - WARN_ON(range > tree->dirty_bytes); - tree->dirty_bytes -= range; - } - state->state &= ~bits; - if (wake) - wake_up(&state->wq); - if (delete || state->state == 0) { - if (state->in_tree) { - rb_erase(&state->rb_node, &tree->state); - state->in_tree = 0; - free_extent_state(state); - } else { - WARN_ON(1); - } - } else { - merge_state(tree, state); - } - return ret; -} - -/* - * clear some bits on a range in the tree. This may require splitting - * or inserting elements in the tree, so the gfp mask is used to - * indicate which allocations or sleeping are allowed. - * - * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove - * the given range from the tree regardless of state (ie for truncate). - * - * the range [start, end] is inclusive. - * - * This takes the tree lock, and returns < 0 on error, > 0 if any of the - * bits were already set, or zero if none of the bits were already set. - */ -int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int wake, int delete, gfp_t mask) -{ - struct extent_state *state; - struct extent_state *prealloc = NULL; - struct rb_node *node; - unsigned long flags; - int err; - int set = 0; - -again: - if (!prealloc && (mask & __GFP_WAIT)) { - prealloc = alloc_extent_state(mask); - if (!prealloc) - return -ENOMEM; - } - - write_lock_irqsave(&tree->lock, flags); - /* - * this search will find the extents that end after - * our range starts - */ - node = tree_search(&tree->state, start); - if (!node) - goto out; - state = rb_entry(node, struct extent_state, rb_node); - if (state->start > end) - goto out; - WARN_ON(state->end < start); - - /* - * | ---- desired range ---- | - * | state | or - * | ------------- state -------------- | - * - * We need to split the extent we found, and may flip - * bits on second half. - * - * If the extent we found extends past our range, we - * just split and search again. It'll get split again - * the next time though. - * - * If the extent we found is inside our range, we clear - * the desired bit on it. - */ - - if (state->start < start) { - err = split_state(tree, state, prealloc, start); - BUG_ON(err == -EEXIST); - prealloc = NULL; - if (err) - goto out; - if (state->end <= end) { - start = state->end + 1; - set |= clear_state_bit(tree, state, bits, - wake, delete); - } else { - start = state->start; - } - goto search_again; - } - /* - * | ---- desired range ---- | - * | state | - * We need to split the extent, and clear the bit - * on the first half - */ - if (state->start <= end && state->end > end) { - err = split_state(tree, state, prealloc, end + 1); - BUG_ON(err == -EEXIST); - - if (wake) - wake_up(&state->wq); - set |= clear_state_bit(tree, prealloc, bits, - wake, delete); - prealloc = NULL; - goto out; - } - - start = state->end + 1; - set |= clear_state_bit(tree, state, bits, wake, delete); - goto search_again; - -out: - write_unlock_irqrestore(&tree->lock, flags); - if (prealloc) - free_extent_state(prealloc); - - return set; - -search_again: - if (start > end) - goto out; - write_unlock_irqrestore(&tree->lock, flags); - if (mask & __GFP_WAIT) - cond_resched(); - goto again; -} -EXPORT_SYMBOL(clear_extent_bit); - -static int wait_on_state(struct extent_map_tree *tree, - struct extent_state *state) -{ - DEFINE_WAIT(wait); - prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); - read_unlock_irq(&tree->lock); - schedule(); - read_lock_irq(&tree->lock); - finish_wait(&state->wq, &wait); - return 0; -} - -/* - * waits for one or more bits to clear on a range in the state tree. - * The range [start, end] is inclusive. - * The tree lock is taken by this function - */ -int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits) -{ - struct extent_state *state; - struct rb_node *node; - - read_lock_irq(&tree->lock); -again: - while (1) { - /* - * this search will find all the extents that end after - * our range starts - */ - node = tree_search(&tree->state, start); - if (!node) - break; - - state = rb_entry(node, struct extent_state, rb_node); - - if (state->start > end) - goto out; - - if (state->state & bits) { - start = state->start; - atomic_inc(&state->refs); - wait_on_state(tree, state); - free_extent_state(state); - goto again; - } - start = state->end + 1; - - if (start > end) - break; - - if (need_resched()) { - read_unlock_irq(&tree->lock); - cond_resched(); - read_lock_irq(&tree->lock); - } - } -out: - read_unlock_irq(&tree->lock); - return 0; -} -EXPORT_SYMBOL(wait_extent_bit); - -static void set_state_bits(struct extent_map_tree *tree, - struct extent_state *state, - int bits) -{ - if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { - u64 range = state->end - state->start + 1; - tree->dirty_bytes += range; - } - state->state |= bits; -} - -/* - * set some bits on a range in the tree. This may require allocations - * or sleeping, so the gfp mask is used to indicate what is allowed. - * - * If 'exclusive' == 1, this will fail with -EEXIST if some part of the - * range already has the desired bits set. The start of the existing - * range is returned in failed_start in this case. - * - * [start, end] is inclusive - * This takes the tree lock. - */ -int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, - int exclusive, u64 *failed_start, gfp_t mask) -{ - struct extent_state *state; - struct extent_state *prealloc = NULL; - struct rb_node *node; - unsigned long flags; - int err = 0; - int set; - u64 last_start; - u64 last_end; -again: - if (!prealloc && (mask & __GFP_WAIT)) { - prealloc = alloc_extent_state(mask); - if (!prealloc) - return -ENOMEM; - } - - write_lock_irqsave(&tree->lock, flags); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node) { - err = insert_state(tree, prealloc, start, end, bits); - prealloc = NULL; - BUG_ON(err == -EEXIST); - goto out; - } - - state = rb_entry(node, struct extent_state, rb_node); - last_start = state->start; - last_end = state->end; - - /* - * | ---- desired range ---- | - * | state | - * - * Just lock what we found and keep going - */ - if (state->start == start && state->end <= end) { - set = state->state & bits; - if (set && exclusive) { - *failed_start = state->start; - err = -EEXIST; - goto out; - } - set_state_bits(tree, state, bits); - start = state->end + 1; - merge_state(tree, state); - goto search_again; - } - - /* - * | ---- desired range ---- | - * | state | - * or - * | ------------- state -------------- | - * - * We need to split the extent we found, and may flip bits on - * second half. - * - * If the extent we found extends past our - * range, we just split and search again. It'll get split - * again the next time though. - * - * If the extent we found is inside our range, we set the - * desired bit on it. - */ - if (state->start < start) { - set = state->state & bits; - if (exclusive && set) { - *failed_start = start; - err = -EEXIST; - goto out; - } - err = split_state(tree, state, prealloc, start); - BUG_ON(err == -EEXIST); - prealloc = NULL; - if (err) - goto out; - if (state->end <= end) { - set_state_bits(tree, state, bits); - start = state->end + 1; - merge_state(tree, state); - } else { - start = state->start; - } - goto search_again; - } - /* - * | ---- desired range ---- | - * | state | or | state | - * - * There's a hole, we need to insert something in it and - * ignore the extent we found. - */ - if (state->start > start) { - u64 this_end; - if (end < last_start) - this_end = end; - else - this_end = last_start -1; - err = insert_state(tree, prealloc, start, this_end, - bits); - prealloc = NULL; - BUG_ON(err == -EEXIST); - if (err) - goto out; - start = this_end + 1; - goto search_again; - } - /* - * | ---- desired range ---- | - * | state | - * We need to split the extent, and set the bit - * on the first half - */ - if (state->start <= end && state->end > end) { - set = state->state & bits; - if (exclusive && set) { - *failed_start = start; - err = -EEXIST; - goto out; - } - err = split_state(tree, state, prealloc, end + 1); - BUG_ON(err == -EEXIST); - - set_state_bits(tree, prealloc, bits); - merge_state(tree, prealloc); - prealloc = NULL; - goto out; - } - - goto search_again; - -out: - write_unlock_irqrestore(&tree->lock, flags); - if (prealloc) - free_extent_state(prealloc); - - return err; - -search_again: - if (start > end) - goto out; - write_unlock_irqrestore(&tree->lock, flags); - if (mask & __GFP_WAIT) - cond_resched(); - goto again; -} -EXPORT_SYMBOL(set_extent_bit); - -/* wrappers around set/clear extent bit */ -int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_dirty); - -int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask) -{ - return set_extent_bit(tree, start, end, bits, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_bits); - -int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask) -{ - return clear_extent_bit(tree, start, end, bits, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_bits); - -int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, - EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_delalloc); - -int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_dirty); - -int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_new); - -int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_new); - -int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_uptodate); - -int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_uptodate); - -int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, - 0, NULL, mask); -} -EXPORT_SYMBOL(set_extent_writeback); - -int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); -} -EXPORT_SYMBOL(clear_extent_writeback); - -int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end) -{ - return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); -} -EXPORT_SYMBOL(wait_on_extent_writeback); - -/* - * locks a range in ascending order, waiting for any locked regions - * it hits on the way. [start,end] are inclusive, and this will sleep. - */ -int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) -{ - int err; - u64 failed_start; - while (1) { - err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, - &failed_start, mask); - if (err == -EEXIST && (mask & __GFP_WAIT)) { - wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); - start = failed_start; - } else { - break; - } - WARN_ON(start > end); - } - return err; -} -EXPORT_SYMBOL(lock_extent); - -int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); -} -EXPORT_SYMBOL(unlock_extent); - -/* - * helper function to set pages and extents in the tree dirty - */ -int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - BUG_ON(!page); - __set_page_dirty_nobuffers(page); - page_cache_release(page); - index++; - } - set_extent_dirty(tree, start, end, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(set_range_dirty); - -/* - * helper function to set both pages and extents in the tree writeback - */ -int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - BUG_ON(!page); - set_page_writeback(page); - page_cache_release(page); - index++; - } - set_extent_writeback(tree, start, end, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(set_range_writeback); - -int find_first_extent_bit(struct extent_map_tree *tree, u64 start, - u64 *start_ret, u64 *end_ret, int bits) -{ - struct rb_node *node; - struct extent_state *state; - int ret = 1; - - read_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node || IS_ERR(node)) { - goto out; - } - - while(1) { - state = rb_entry(node, struct extent_state, rb_node); - if (state->end >= start && (state->state & bits)) { - *start_ret = state->start; - *end_ret = state->end; - ret = 0; - break; - } - node = rb_next(node); - if (!node) - break; - } -out: - read_unlock_irq(&tree->lock); - return ret; -} -EXPORT_SYMBOL(find_first_extent_bit); - -u64 find_lock_delalloc_range(struct extent_map_tree *tree, - u64 *start, u64 *end, u64 max_bytes) -{ - struct rb_node *node; - struct extent_state *state; - u64 cur_start = *start; - u64 found = 0; - u64 total_bytes = 0; - - write_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ -search_again: - node = tree_search(&tree->state, cur_start); - if (!node || IS_ERR(node)) { - *end = (u64)-1; - goto out; - } - - while(1) { - state = rb_entry(node, struct extent_state, rb_node); - if (found && state->start != cur_start) { - goto out; - } - if (!(state->state & EXTENT_DELALLOC)) { - if (!found) - *end = state->end; - goto out; - } - if (!found) { - struct extent_state *prev_state; - struct rb_node *prev_node = node; - while(1) { - prev_node = rb_prev(prev_node); - if (!prev_node) - break; - prev_state = rb_entry(prev_node, - struct extent_state, - rb_node); - if (!(prev_state->state & EXTENT_DELALLOC)) - break; - state = prev_state; - node = prev_node; - } - } - if (state->state & EXTENT_LOCKED) { - DEFINE_WAIT(wait); - atomic_inc(&state->refs); - prepare_to_wait(&state->wq, &wait, - TASK_UNINTERRUPTIBLE); - write_unlock_irq(&tree->lock); - schedule(); - write_lock_irq(&tree->lock); - finish_wait(&state->wq, &wait); - free_extent_state(state); - goto search_again; - } - state->state |= EXTENT_LOCKED; - if (!found) - *start = state->start; - found++; - *end = state->end; - cur_start = state->end + 1; - node = rb_next(node); - if (!node) - break; - total_bytes += state->end - state->start + 1; - if (total_bytes >= max_bytes) - break; - } -out: - write_unlock_irq(&tree->lock); - return found; -} - -u64 count_range_bits(struct extent_map_tree *tree, - u64 *start, u64 search_end, u64 max_bytes, - unsigned long bits) -{ - struct rb_node *node; - struct extent_state *state; - u64 cur_start = *start; - u64 total_bytes = 0; - int found = 0; - - if (search_end <= cur_start) { - printk("search_end %Lu start %Lu\n", search_end, cur_start); - WARN_ON(1); - return 0; - } - - write_lock_irq(&tree->lock); - if (cur_start == 0 && bits == EXTENT_DIRTY) { - total_bytes = tree->dirty_bytes; - goto out; - } - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, cur_start); - if (!node || IS_ERR(node)) { - goto out; - } - - while(1) { - state = rb_entry(node, struct extent_state, rb_node); - if (state->start > search_end) - break; - if (state->end >= cur_start && (state->state & bits)) { - total_bytes += min(search_end, state->end) + 1 - - max(cur_start, state->start); - if (total_bytes >= max_bytes) - break; - if (!found) { - *start = state->start; - found = 1; - } - } - node = rb_next(node); - if (!node) - break; - } -out: - write_unlock_irq(&tree->lock); - return total_bytes; -} -/* - * helper function to lock both pages and extents in the tree. - * pages must be locked first. - */ -int lock_range(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - int err; - - while (index <= end_index) { - page = grab_cache_page(tree->mapping, index); - if (!page) { - err = -ENOMEM; - goto failed; - } - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto failed; - } - index++; - } - lock_extent(tree, start, end, GFP_NOFS); - return 0; - -failed: - /* - * we failed above in getting the page at 'index', so we undo here - * up to but not including the page at 'index' - */ - end_index = index; - index = start >> PAGE_CACHE_SHIFT; - while (index < end_index) { - page = find_get_page(tree->mapping, index); - unlock_page(page); - page_cache_release(page); - index++; - } - return err; -} -EXPORT_SYMBOL(lock_range); - -/* - * helper function to unlock both pages and extents in the tree. - */ -int unlock_range(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - unlock_page(page); - page_cache_release(page); - index++; - } - unlock_extent(tree, start, end, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(unlock_range); - -int set_state_private(struct extent_map_tree *tree, u64 start, u64 private) -{ - struct rb_node *node; - struct extent_state *state; - int ret = 0; - - write_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node || IS_ERR(node)) { - ret = -ENOENT; - goto out; - } - state = rb_entry(node, struct extent_state, rb_node); - if (state->start != start) { - ret = -ENOENT; - goto out; - } - state->private = private; -out: - write_unlock_irq(&tree->lock); - return ret; -} - -int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private) -{ - struct rb_node *node; - struct extent_state *state; - int ret = 0; - - read_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node || IS_ERR(node)) { - ret = -ENOENT; - goto out; - } - state = rb_entry(node, struct extent_state, rb_node); - if (state->start != start) { - ret = -ENOENT; - goto out; - } - *private = state->private; -out: - read_unlock_irq(&tree->lock); - return ret; -} - -/* - * searches a range in the state tree for a given mask. - * If 'filled' == 1, this returns 1 only if ever extent in the tree - * has the bits set. Otherwise, 1 is returned if any bit in the - * range is found set. - */ -int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int filled) -{ - struct extent_state *state = NULL; - struct rb_node *node; - int bitset = 0; - - read_lock_irq(&tree->lock); - node = tree_search(&tree->state, start); - while (node && start <= end) { - state = rb_entry(node, struct extent_state, rb_node); - - if (filled && state->start > start) { - bitset = 0; - break; - } - - if (state->start > end) - break; - - if (state->state & bits) { - bitset = 1; - if (!filled) - break; - } else if (filled) { - bitset = 0; - break; - } - start = state->end + 1; - if (start > end) - break; - node = rb_next(node); - if (!node) { - if (filled) - bitset = 0; - break; - } - } - read_unlock_irq(&tree->lock); - return bitset; -} -EXPORT_SYMBOL(test_range_bit); - -/* - * helper function to set a given page up to date if all the - * extents in the tree for that page are up to date - */ -static int check_page_uptodate(struct extent_map_tree *tree, - struct page *page) -{ - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) - SetPageUptodate(page); - return 0; -} - -/* - * helper function to unlock a page if all the extents in the tree - * for that page are unlocked - */ -static int check_page_locked(struct extent_map_tree *tree, - struct page *page) -{ - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) - unlock_page(page); - return 0; -} - -/* - * helper function to end page writeback if all the extents - * in the tree for that page are done with writeback - */ -static int check_page_writeback(struct extent_map_tree *tree, - struct page *page) -{ - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) - end_page_writeback(page); - return 0; -} - -/* lots and lots of room for performance fixes in the end_bio funcs */ - -/* - * after a writepage IO is done, we need to: - * clear the uptodate bits on error - * clear the writeback bits in the extent tree for this IO - * end_page_writeback if the page has no more pending IO - * - * Scheduling is not allowed, so the extent state tree is expected - * to have one and only one object corresponding to this IO. - */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) -static void end_bio_extent_writepage(struct bio *bio, int err) -#else -static int end_bio_extent_writepage(struct bio *bio, - unsigned int bytes_done, int err) -#endif -{ - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_map_tree *tree = bio->bi_private; - u64 start; - u64 end; - int whole_page; - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - - do { - struct page *page = bvec->bv_page; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; - end = start + bvec->bv_len - 1; - - if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) - whole_page = 1; - else - whole_page = 0; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (!uptodate) { - clear_extent_uptodate(tree, start, end, GFP_ATOMIC); - ClearPageUptodate(page); - SetPageError(page); - } - clear_extent_writeback(tree, start, end, GFP_ATOMIC); - - if (whole_page) - end_page_writeback(page); - else - check_page_writeback(tree, page); - if (tree->ops && tree->ops->writepage_end_io_hook) - tree->ops->writepage_end_io_hook(page, start, end); - } while (bvec >= bio->bi_io_vec); - - bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif -} - -/* - * after a readpage IO is done, we need to: - * clear the uptodate bits on error - * set the uptodate bits if things worked - * set the page up to date if all extents in the tree are uptodate - * clear the lock bit in the extent tree - * unlock the page if there are no other extents locked for it - * - * Scheduling is not allowed, so the extent state tree is expected - * to have one and only one object corresponding to this IO. - */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) -static void end_bio_extent_readpage(struct bio *bio, int err) -#else -static int end_bio_extent_readpage(struct bio *bio, - unsigned int bytes_done, int err) -#endif -{ - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_map_tree *tree = bio->bi_private; - u64 start; - u64 end; - int whole_page; - int ret; - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - - do { - struct page *page = bvec->bv_page; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; - end = start + bvec->bv_len - 1; - - if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) - whole_page = 1; - else - whole_page = 0; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { - ret = tree->ops->readpage_end_io_hook(page, start, end); - if (ret) - uptodate = 0; - } - if (uptodate) { - set_extent_uptodate(tree, start, end, GFP_ATOMIC); - if (whole_page) - SetPageUptodate(page); - else - check_page_uptodate(tree, page); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - - unlock_extent(tree, start, end, GFP_ATOMIC); - - if (whole_page) - unlock_page(page); - else - check_page_locked(tree, page); - } while (bvec >= bio->bi_io_vec); - - bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif -} - -/* - * IO done from prepare_write is pretty simple, we just unlock - * the structs in the extent tree when done, and set the uptodate bits - * as appropriate. - */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) -static void end_bio_extent_preparewrite(struct bio *bio, int err) -#else -static int end_bio_extent_preparewrite(struct bio *bio, - unsigned int bytes_done, int err) -#endif -{ - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_map_tree *tree = bio->bi_private; - u64 start; - u64 end; - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - - do { - struct page *page = bvec->bv_page; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; - end = start + bvec->bv_len - 1; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (uptodate) { - set_extent_uptodate(tree, start, end, GFP_ATOMIC); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - - unlock_extent(tree, start, end, GFP_ATOMIC); - - } while (bvec >= bio->bi_io_vec); - - bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif -} - -static struct bio * -extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, - gfp_t gfp_flags) -{ - struct bio *bio; - - bio = bio_alloc(gfp_flags, nr_vecs); - - if (bio == NULL && (current->flags & PF_MEMALLOC)) { - while (!bio && (nr_vecs /= 2)) - bio = bio_alloc(gfp_flags, nr_vecs); - } - - if (bio) { - bio->bi_bdev = bdev; - bio->bi_sector = first_sector; - } - return bio; -} - -static int submit_one_bio(int rw, struct bio *bio) -{ - u64 maxsector; - int ret = 0; - - bio_get(bio); - - maxsector = bio->bi_bdev->bd_inode->i_size >> 9; - if (maxsector < bio->bi_sector) { - printk("sector too large max %Lu got %llu\n", maxsector, - (unsigned long long)bio->bi_sector); - WARN_ON(1); - } - - submit_bio(rw, bio); - if (bio_flagged(bio, BIO_EOPNOTSUPP)) - ret = -EOPNOTSUPP; - bio_put(bio); - return ret; -} - -static int submit_extent_page(int rw, struct extent_map_tree *tree, - struct page *page, sector_t sector, - size_t size, unsigned long offset, - struct block_device *bdev, - struct bio **bio_ret, - unsigned long max_pages, - bio_end_io_t end_io_func) -{ - int ret = 0; - struct bio *bio; - int nr; - - if (bio_ret && *bio_ret) { - bio = *bio_ret; - if (bio->bi_sector + (bio->bi_size >> 9) != sector || - bio_add_page(bio, page, size, offset) < size) { - ret = submit_one_bio(rw, bio); - bio = NULL; - } else { - return 0; - } - } - nr = min_t(int, max_pages, bio_get_nr_vecs(bdev)); - bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); - if (!bio) { - printk("failed to allocate bio nr %d\n", nr); - } - bio_add_page(bio, page, size, offset); - bio->bi_end_io = end_io_func; - bio->bi_private = tree; - if (bio_ret) { - *bio_ret = bio; - } else { - ret = submit_one_bio(rw, bio); - } - - return ret; -} - -void set_page_extent_mapped(struct page *page) -{ - if (!PagePrivate(page)) { - SetPagePrivate(page); - WARN_ON(!page->mapping->a_ops->invalidatepage); - set_page_private(page, EXTENT_PAGE_PRIVATE); - page_cache_get(page); - } -} - -void set_page_extent_head(struct page *page, unsigned long len) -{ - set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); -} - -/* - * basic readpage implementation. Locked extent state structs are inserted - * into the tree that are removed when the IO is done (by the end_io - * handlers) - */ -static int __extent_read_full_page(struct extent_map_tree *tree, - struct page *page, - get_extent_t *get_extent, - struct bio **bio) -{ - struct inode *inode = page->mapping->host; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 page_end = start + PAGE_CACHE_SIZE - 1; - u64 end; - u64 cur = start; - u64 extent_offset; - u64 last_byte = i_size_read(inode); - u64 block_start; - u64 cur_end; - sector_t sector; - struct extent_map *em; - struct block_device *bdev; - int ret; - int nr = 0; - size_t page_offset = 0; - size_t iosize; - size_t blocksize = inode->i_sb->s_blocksize; - - set_page_extent_mapped(page); - - end = page_end; - lock_extent(tree, start, end, GFP_NOFS); - - while (cur <= end) { - if (cur >= last_byte) { - char *userpage; - iosize = PAGE_CACHE_SIZE - page_offset; - userpage = kmap_atomic(page, KM_USER0); - memset(userpage + page_offset, 0, iosize); - flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); - set_extent_uptodate(tree, cur, cur + iosize - 1, - GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); - break; - } - em = get_extent(inode, page, page_offset, cur, end, 0); - if (IS_ERR(em) || !em) { - SetPageError(page); - unlock_extent(tree, cur, end, GFP_NOFS); - break; - } - - extent_offset = cur - em->start; - BUG_ON(em->end < cur); - BUG_ON(end < cur); - - iosize = min(em->end - cur, end - cur) + 1; - cur_end = min(em->end, end); - iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); - sector = (em->block_start + extent_offset) >> 9; - bdev = em->bdev; - block_start = em->block_start; - free_extent_map(em); - em = NULL; - - /* we've found a hole, just zero and go on */ - if (block_start == EXTENT_MAP_HOLE) { - char *userpage; - userpage = kmap_atomic(page, KM_USER0); - memset(userpage + page_offset, 0, iosize); - flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); - - set_extent_uptodate(tree, cur, cur + iosize - 1, - GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); - cur = cur + iosize; - page_offset += iosize; - continue; - } - /* the get_extent function already copied into the page */ - if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); - cur = cur + iosize; - page_offset += iosize; - continue; - } - - ret = 0; - if (tree->ops && tree->ops->readpage_io_hook) { - ret = tree->ops->readpage_io_hook(page, cur, - cur + iosize - 1); - } - if (!ret) { - unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1; - nr -= page->index; - ret = submit_extent_page(READ, tree, page, - sector, iosize, page_offset, - bdev, bio, nr, - end_bio_extent_readpage); - } - if (ret) - SetPageError(page); - cur = cur + iosize; - page_offset += iosize; - nr++; - } - if (!nr) { - if (!PageError(page)) - SetPageUptodate(page); - unlock_page(page); - } - return 0; -} - -int extent_read_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent) -{ - struct bio *bio = NULL; - int ret; - - ret = __extent_read_full_page(tree, page, get_extent, &bio); - if (bio) - submit_one_bio(READ, bio); - return ret; -} -EXPORT_SYMBOL(extent_read_full_page); - -/* - * the writepage semantics are similar to regular writepage. extent - * records are inserted to lock ranges in the tree, and as dirty areas - * are found, they are marked writeback. Then the lock bits are removed - * and the end_io handler clears the writeback ranges - */ -static int __extent_writepage(struct page *page, struct writeback_control *wbc, - void *data) -{ - struct inode *inode = page->mapping->host; - struct extent_page_data *epd = data; - struct extent_map_tree *tree = epd->tree; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 delalloc_start; - u64 page_end = start + PAGE_CACHE_SIZE - 1; - u64 end; - u64 cur = start; - u64 extent_offset; - u64 last_byte = i_size_read(inode); - u64 block_start; - u64 iosize; - sector_t sector; - struct extent_map *em; - struct block_device *bdev; - int ret; - int nr = 0; - size_t page_offset = 0; - size_t blocksize; - loff_t i_size = i_size_read(inode); - unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; - u64 nr_delalloc; - u64 delalloc_end; - - WARN_ON(!PageLocked(page)); - if (page->index > end_index) { - clear_extent_dirty(tree, start, page_end, GFP_NOFS); - unlock_page(page); - return 0; - } - - if (page->index == end_index) { - char *userpage; - - size_t offset = i_size & (PAGE_CACHE_SIZE - 1); - - userpage = kmap_atomic(page, KM_USER0); - memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); - } - - set_page_extent_mapped(page); - - delalloc_start = start; - delalloc_end = 0; - while(delalloc_end < page_end) { - nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, - &delalloc_end, - 128 * 1024 * 1024); - if (nr_delalloc == 0) { - delalloc_start = delalloc_end + 1; - continue; - } - tree->ops->fill_delalloc(inode, delalloc_start, - delalloc_end); - clear_extent_bit(tree, delalloc_start, - delalloc_end, - EXTENT_LOCKED | EXTENT_DELALLOC, - 1, 0, GFP_NOFS); - delalloc_start = delalloc_end + 1; - } - lock_extent(tree, start, page_end, GFP_NOFS); - - end = page_end; - if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { - printk("found delalloc bits after lock_extent\n"); - } - - if (last_byte <= start) { - clear_extent_dirty(tree, start, page_end, GFP_NOFS); - goto done; - } - - set_extent_uptodate(tree, start, page_end, GFP_NOFS); - blocksize = inode->i_sb->s_blocksize; - - while (cur <= end) { - if (cur >= last_byte) { - clear_extent_dirty(tree, cur, page_end, GFP_NOFS); - break; - } - em = epd->get_extent(inode, page, page_offset, cur, end, 1); - if (IS_ERR(em) || !em) { - SetPageError(page); - break; - } - - extent_offset = cur - em->start; - BUG_ON(em->end < cur); - BUG_ON(end < cur); - iosize = min(em->end - cur, end - cur) + 1; - iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); - sector = (em->block_start + extent_offset) >> 9; - bdev = em->bdev; - block_start = em->block_start; - free_extent_map(em); - em = NULL; - - if (block_start == EXTENT_MAP_HOLE || - block_start == EXTENT_MAP_INLINE) { - clear_extent_dirty(tree, cur, - cur + iosize - 1, GFP_NOFS); - cur = cur + iosize; - page_offset += iosize; - continue; - } - - /* leave this out until we have a page_mkwrite call */ - if (0 && !test_range_bit(tree, cur, cur + iosize - 1, - EXTENT_DIRTY, 0)) { - cur = cur + iosize; - page_offset += iosize; - continue; - } - clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); - if (tree->ops && tree->ops->writepage_io_hook) { - ret = tree->ops->writepage_io_hook(page, cur, - cur + iosize - 1); - } else { - ret = 0; - } - if (ret) - SetPageError(page); - else { - unsigned long max_nr = end_index + 1; - set_range_writeback(tree, cur, cur + iosize - 1); - if (!PageWriteback(page)) { - printk("warning page %lu not writeback, " - "cur %llu end %llu\n", page->index, - (unsigned long long)cur, - (unsigned long long)end); - } - - ret = submit_extent_page(WRITE, tree, page, sector, - iosize, page_offset, bdev, - &epd->bio, max_nr, - end_bio_extent_writepage); - if (ret) - SetPageError(page); - } - cur = cur + iosize; - page_offset += iosize; - nr++; - } -done: - if (nr == 0) { - /* make sure the mapping tag for page dirty gets cleared */ - set_page_writeback(page); - end_page_writeback(page); - } - unlock_extent(tree, start, page_end, GFP_NOFS); - unlock_page(page); - return 0; -} - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - -/* Taken directly from 2.6.23 for 2.6.18 back port */ -typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, - void *data); - -/** - * write_cache_pages - walk the list of dirty pages of the given address space - * and write all of them. - * @mapping: address space structure to write - * @wbc: subtract the number of written pages from *@wbc->nr_to_write - * @writepage: function called for each page - * @data: data passed to writepage function - * - * If a page is already under I/O, write_cache_pages() skips it, even - * if it's dirty. This is desirable behaviour for memory-cleaning writeback, - * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() - * and msync() need to guarantee that all the data which was dirty at the time - * the call was made get new I/O started against them. If wbc->sync_mode is - * WB_SYNC_ALL then we were called for data integrity and we must wait for - * existing IO to complete. - */ -static int write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc, writepage_t writepage, - void *data) -{ - struct backing_dev_info *bdi = mapping->backing_dev_info; - int ret = 0; - int done = 0; - struct pagevec pvec; - int nr_pages; - pgoff_t index; - pgoff_t end; /* Inclusive */ - int scanned = 0; - int range_whole = 0; - - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - return 0; - } - - pagevec_init(&pvec, 0); - if (wbc->range_cyclic) { - index = mapping->writeback_index; /* Start from prev offset */ - end = -1; - } else { - index = wbc->range_start >> PAGE_CACHE_SHIFT; - end = wbc->range_end >> PAGE_CACHE_SHIFT; - if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) - range_whole = 1; - scanned = 1; - } -retry: - while (!done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { - unsigned i; - - scanned = 1; - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - - /* - * At this point we hold neither mapping->tree_lock nor - * lock on the page itself: the page may be truncated or - * invalidated (changing page->mapping to NULL), or even - * swizzled back from swapper_space to tmpfs file - * mapping - */ - lock_page(page); - - if (unlikely(page->mapping != mapping)) { - unlock_page(page); - continue; - } - - if (!wbc->range_cyclic && page->index > end) { - done = 1; - unlock_page(page); - continue; - } - - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || - !clear_page_dirty_for_io(page)) { - unlock_page(page); - continue; - } - - ret = (*writepage)(page, wbc, data); - - if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { - unlock_page(page); - ret = 0; - } - if (ret || (--(wbc->nr_to_write) <= 0)) - done = 1; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - done = 1; - } - } - pagevec_release(&pvec); - cond_resched(); - } - if (!scanned && !done) { - /* - * We hit the last page and there is more work to be done: wrap - * back to the start of the file - */ - scanned = 1; - index = 0; - goto retry; - } - if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) - mapping->writeback_index = index; - return ret; -} -#endif - -int extent_write_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent, - struct writeback_control *wbc) -{ - int ret; - struct address_space *mapping = page->mapping; - struct extent_page_data epd = { - .bio = NULL, - .tree = tree, - .get_extent = get_extent, - }; - struct writeback_control wbc_writepages = { - .bdi = wbc->bdi, - .sync_mode = WB_SYNC_NONE, - .older_than_this = NULL, - .nr_to_write = 64, - .range_start = page_offset(page) + PAGE_CACHE_SIZE, - .range_end = (loff_t)-1, - }; - - - ret = __extent_writepage(page, wbc, &epd); - - write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); - if (epd.bio) { - submit_one_bio(WRITE, epd.bio); - } - return ret; -} -EXPORT_SYMBOL(extent_write_full_page); - - -int extent_writepages(struct extent_map_tree *tree, - struct address_space *mapping, - get_extent_t *get_extent, - struct writeback_control *wbc) -{ - int ret = 0; - struct extent_page_data epd = { - .bio = NULL, - .tree = tree, - .get_extent = get_extent, - }; - - ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); - if (epd.bio) { - submit_one_bio(WRITE, epd.bio); - } - return ret; -} -EXPORT_SYMBOL(extent_writepages); - -int extent_readpages(struct extent_map_tree *tree, - struct address_space *mapping, - struct list_head *pages, unsigned nr_pages, - get_extent_t get_extent) -{ - struct bio *bio = NULL; - unsigned page_idx; - struct pagevec pvec; - - pagevec_init(&pvec, 0); - for (page_idx = 0; page_idx < nr_pages; page_idx++) { - struct page *page = list_entry(pages->prev, struct page, lru); - - prefetchw(&page->flags); - list_del(&page->lru); - /* - * what we want to do here is call add_to_page_cache_lru, - * but that isn't exported, so we reproduce it here - */ - if (!add_to_page_cache(page, mapping, - page->index, GFP_KERNEL)) { - - /* open coding of lru_cache_add, also not exported */ - page_cache_get(page); - if (!pagevec_add(&pvec, page)) - __pagevec_lru_add(&pvec); - __extent_read_full_page(tree, page, get_extent, &bio); - } - page_cache_release(page); - } - if (pagevec_count(&pvec)) - __pagevec_lru_add(&pvec); - BUG_ON(!list_empty(pages)); - if (bio) - submit_one_bio(READ, bio); - return 0; -} -EXPORT_SYMBOL(extent_readpages); - -/* - * basic invalidatepage code, this waits on any locked or writeback - * ranges corresponding to the page, and then deletes any extent state - * records from the tree - */ -int extent_invalidatepage(struct extent_map_tree *tree, - struct page *page, unsigned long offset) -{ - u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); - u64 end = start + PAGE_CACHE_SIZE - 1; - size_t blocksize = page->mapping->host->i_sb->s_blocksize; - - start += (offset + blocksize -1) & ~(blocksize - 1); - if (start > end) - return 0; - - lock_extent(tree, start, end, GFP_NOFS); - wait_on_extent_writeback(tree, start, end); - clear_extent_bit(tree, start, end, - EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, - 1, 1, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(extent_invalidatepage); - -/* - * simple commit_write call, set_range_dirty is used to mark both - * the pages and the extent records as dirty - */ -int extent_commit_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to) -{ - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - - set_page_extent_mapped(page); - set_page_dirty(page); - - if (pos > inode->i_size) { - i_size_write(inode, pos); - mark_inode_dirty(inode); - } - return 0; -} -EXPORT_SYMBOL(extent_commit_write); - -int extent_prepare_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to, get_extent_t *get_extent) -{ - u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 page_end = page_start + PAGE_CACHE_SIZE - 1; - u64 block_start; - u64 orig_block_start; - u64 block_end; - u64 cur_end; - struct extent_map *em; - unsigned blocksize = 1 << inode->i_blkbits; - size_t page_offset = 0; - size_t block_off_start; - size_t block_off_end; - int err = 0; - int iocount = 0; - int ret = 0; - int isnew; - - set_page_extent_mapped(page); - - block_start = (page_start + from) & ~((u64)blocksize - 1); - block_end = (page_start + to - 1) | (blocksize - 1); - orig_block_start = block_start; - - lock_extent(tree, page_start, page_end, GFP_NOFS); - while(block_start <= block_end) { - em = get_extent(inode, page, page_offset, block_start, - block_end, 1); - if (IS_ERR(em) || !em) { - goto err; - } - cur_end = min(block_end, em->end); - block_off_start = block_start & (PAGE_CACHE_SIZE - 1); - block_off_end = block_off_start + blocksize; - isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); - - if (!PageUptodate(page) && isnew && - (block_off_end > to || block_off_start < from)) { - void *kaddr; - - kaddr = kmap_atomic(page, KM_USER0); - if (block_off_end > to) - memset(kaddr + to, 0, block_off_end - to); - if (block_off_start < from) - memset(kaddr + block_off_start, 0, - from - block_off_start); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - } - if ((em->block_start != EXTENT_MAP_HOLE && - em->block_start != EXTENT_MAP_INLINE) && - !isnew && !PageUptodate(page) && - (block_off_end > to || block_off_start < from) && - !test_range_bit(tree, block_start, cur_end, - EXTENT_UPTODATE, 1)) { - u64 sector; - u64 extent_offset = block_start - em->start; - size_t iosize; - sector = (em->block_start + extent_offset) >> 9; - iosize = (cur_end - block_start + blocksize) & - ~((u64)blocksize - 1); - /* - * we've already got the extent locked, but we - * need to split the state such that our end_bio - * handler can clear the lock. - */ - set_extent_bit(tree, block_start, - block_start + iosize - 1, - EXTENT_LOCKED, 0, NULL, GFP_NOFS); - ret = submit_extent_page(READ, tree, page, - sector, iosize, page_offset, em->bdev, - NULL, 1, - end_bio_extent_preparewrite); - iocount++; - block_start = block_start + iosize; - } else { - set_extent_uptodate(tree, block_start, cur_end, - GFP_NOFS); - unlock_extent(tree, block_start, cur_end, GFP_NOFS); - block_start = cur_end + 1; - } - page_offset = block_start & (PAGE_CACHE_SIZE - 1); - free_extent_map(em); - } - if (iocount) { - wait_extent_bit(tree, orig_block_start, - block_end, EXTENT_LOCKED); - } - check_page_uptodate(tree, page); -err: - /* FIXME, zero out newly allocated blocks on error */ - return err; -} -EXPORT_SYMBOL(extent_prepare_write); - -/* - * a helper for releasepage. As long as there are no locked extents - * in the range corresponding to the page, both state records and extent - * map records are removed - */ -int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) -{ - struct extent_map *em; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - u64 orig_start = start; - int ret = 1; - - while (start <= end) { - em = lookup_extent_mapping(tree, start, end); - if (!em || IS_ERR(em)) - break; - if (!test_range_bit(tree, em->start, em->end, - EXTENT_LOCKED, 0)) { - remove_extent_mapping(tree, em); - /* once for the rb tree */ - free_extent_map(em); - } - start = em->end + 1; - /* once for us */ - free_extent_map(em); - } - if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) - ret = 0; - else - clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, - 1, 1, GFP_NOFS); - return ret; -} -EXPORT_SYMBOL(try_release_extent_mapping); - -sector_t extent_bmap(struct address_space *mapping, sector_t iblock, - get_extent_t *get_extent) -{ - struct inode *inode = mapping->host; - u64 start = iblock << inode->i_blkbits; - u64 end = start + (1 << inode->i_blkbits) - 1; - sector_t sector = 0; - struct extent_map *em; - - em = get_extent(inode, NULL, 0, start, end, 0); - if (!em || IS_ERR(em)) - return 0; - - if (em->block_start == EXTENT_MAP_INLINE || - em->block_start == EXTENT_MAP_HOLE) - goto out; - - sector = (em->block_start + start - em->start) >> inode->i_blkbits; -out: - free_extent_map(em); - return sector; -} - -static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb) -{ - if (list_empty(&eb->lru)) { - extent_buffer_get(eb); - list_add(&eb->lru, &tree->buffer_lru); - tree->lru_size++; - if (tree->lru_size >= BUFFER_LRU_MAX) { - struct extent_buffer *rm; - rm = list_entry(tree->buffer_lru.prev, - struct extent_buffer, lru); - tree->lru_size--; - list_del_init(&rm->lru); - free_extent_buffer(rm); - } - } else - list_move(&eb->lru, &tree->buffer_lru); - return 0; -} -static struct extent_buffer *find_lru(struct extent_map_tree *tree, - u64 start, unsigned long len) -{ - struct list_head *lru = &tree->buffer_lru; - struct list_head *cur = lru->next; - struct extent_buffer *eb; - - if (list_empty(lru)) - return NULL; - - do { - eb = list_entry(cur, struct extent_buffer, lru); - if (eb->start == start && eb->len == len) { - extent_buffer_get(eb); - return eb; - } - cur = cur->next; - } while (cur != lru); - return NULL; -} - -static inline unsigned long num_extent_pages(u64 start, u64 len) -{ - return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT); -} - -static inline struct page *extent_buffer_page(struct extent_buffer *eb, - unsigned long i) -{ - struct page *p; - struct address_space *mapping; - - if (i == 0) - return eb->first_page; - i += eb->start >> PAGE_CACHE_SHIFT; - mapping = eb->first_page->mapping; - read_lock_irq(&mapping->tree_lock); - p = radix_tree_lookup(&mapping->page_tree, i); - read_unlock_irq(&mapping->tree_lock); - return p; -} - -static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree, - u64 start, - unsigned long len, - gfp_t mask) -{ - struct extent_buffer *eb = NULL; - - spin_lock(&tree->lru_lock); - eb = find_lru(tree, start, len); - spin_unlock(&tree->lru_lock); - if (eb) { - return eb; - } - - eb = kmem_cache_zalloc(extent_buffer_cache, mask); - INIT_LIST_HEAD(&eb->lru); - eb->start = start; - eb->len = len; - atomic_set(&eb->refs, 1); - - return eb; -} - -static void __free_extent_buffer(struct extent_buffer *eb) -{ - kmem_cache_free(extent_buffer_cache, eb); -} - -struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - struct page *page0, - gfp_t mask) -{ - unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; - unsigned long index = start >> PAGE_CACHE_SHIFT; - struct extent_buffer *eb; - struct page *p; - struct address_space *mapping = tree->mapping; - int uptodate = 1; - - eb = __alloc_extent_buffer(tree, start, len, mask); - if (!eb || IS_ERR(eb)) - return NULL; - - if (eb->flags & EXTENT_BUFFER_FILLED) - goto lru_add; - - if (page0) { - eb->first_page = page0; - i = 1; - index++; - page_cache_get(page0); - mark_page_accessed(page0); - set_page_extent_mapped(page0); - WARN_ON(!PageUptodate(page0)); - set_page_extent_head(page0, len); - } else { - i = 0; - } - for (; i < num_pages; i++, index++) { - p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); - if (!p) { - WARN_ON(1); - goto fail; - } - set_page_extent_mapped(p); - mark_page_accessed(p); - if (i == 0) { - eb->first_page = p; - set_page_extent_head(p, len); - } else { - set_page_private(p, EXTENT_PAGE_PRIVATE); - } - if (!PageUptodate(p)) - uptodate = 0; - unlock_page(p); - } - if (uptodate) - eb->flags |= EXTENT_UPTODATE; - eb->flags |= EXTENT_BUFFER_FILLED; - -lru_add: - spin_lock(&tree->lru_lock); - add_lru(tree, eb); - spin_unlock(&tree->lru_lock); - return eb; - -fail: - spin_lock(&tree->lru_lock); - list_del_init(&eb->lru); - spin_unlock(&tree->lru_lock); - if (!atomic_dec_and_test(&eb->refs)) - return NULL; - for (index = 1; index < i; index++) { - page_cache_release(extent_buffer_page(eb, index)); - } - if (i > 0) - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); - return NULL; -} -EXPORT_SYMBOL(alloc_extent_buffer); - -struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - gfp_t mask) -{ - unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; - unsigned long index = start >> PAGE_CACHE_SHIFT; - struct extent_buffer *eb; - struct page *p; - struct address_space *mapping = tree->mapping; - int uptodate = 1; - - eb = __alloc_extent_buffer(tree, start, len, mask); - if (!eb || IS_ERR(eb)) - return NULL; - - if (eb->flags & EXTENT_BUFFER_FILLED) - goto lru_add; - - for (i = 0; i < num_pages; i++, index++) { - p = find_lock_page(mapping, index); - if (!p) { - goto fail; - } - set_page_extent_mapped(p); - mark_page_accessed(p); - - if (i == 0) { - eb->first_page = p; - set_page_extent_head(p, len); - } else { - set_page_private(p, EXTENT_PAGE_PRIVATE); - } - - if (!PageUptodate(p)) - uptodate = 0; - unlock_page(p); - } - if (uptodate) - eb->flags |= EXTENT_UPTODATE; - eb->flags |= EXTENT_BUFFER_FILLED; - -lru_add: - spin_lock(&tree->lru_lock); - add_lru(tree, eb); - spin_unlock(&tree->lru_lock); - return eb; -fail: - spin_lock(&tree->lru_lock); - list_del_init(&eb->lru); - spin_unlock(&tree->lru_lock); - if (!atomic_dec_and_test(&eb->refs)) - return NULL; - for (index = 1; index < i; index++) { - page_cache_release(extent_buffer_page(eb, index)); - } - if (i > 0) - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); - return NULL; -} -EXPORT_SYMBOL(find_extent_buffer); - -void free_extent_buffer(struct extent_buffer *eb) -{ - unsigned long i; - unsigned long num_pages; - - if (!eb) - return; - - if (!atomic_dec_and_test(&eb->refs)) - return; - - WARN_ON(!list_empty(&eb->lru)); - num_pages = num_extent_pages(eb->start, eb->len); - - for (i = 1; i < num_pages; i++) { - page_cache_release(extent_buffer_page(eb, i)); - } - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); -} -EXPORT_SYMBOL(free_extent_buffer); - -int clear_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - int set; - unsigned long i; - unsigned long num_pages; - struct page *page; - - u64 start = eb->start; - u64 end = start + eb->len - 1; - - set = clear_extent_dirty(tree, start, end, GFP_NOFS); - num_pages = num_extent_pages(eb->start, eb->len); - - for (i = 0; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - lock_page(page); - if (i == 0) - set_page_extent_head(page, eb->len); - else - set_page_private(page, EXTENT_PAGE_PRIVATE); - - /* - * if we're on the last page or the first page and the - * block isn't aligned on a page boundary, do extra checks - * to make sure we don't clean page that is partially dirty - */ - if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || - ((i == num_pages - 1) && - ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { - start = (u64)page->index << PAGE_CACHE_SHIFT; - end = start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(tree, start, end, - EXTENT_DIRTY, 0)) { - unlock_page(page); - continue; - } - } - clear_page_dirty_for_io(page); - write_lock_irq(&page->mapping->tree_lock); - if (!PageDirty(page)) { - radix_tree_tag_clear(&page->mapping->page_tree, - page_index(page), - PAGECACHE_TAG_DIRTY); - } - write_unlock_irq(&page->mapping->tree_lock); - unlock_page(page); - } - return 0; -} -EXPORT_SYMBOL(clear_extent_buffer_dirty); - -int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - return wait_on_extent_writeback(tree, eb->start, - eb->start + eb->len - 1); -} -EXPORT_SYMBOL(wait_on_extent_buffer_writeback); - -int set_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - unsigned long i; - unsigned long num_pages; - - num_pages = num_extent_pages(eb->start, eb->len); - for (i = 0; i < num_pages; i++) { - struct page *page = extent_buffer_page(eb, i); - /* writepage may need to do something special for the - * first page, we have to make sure page->private is - * properly set. releasepage may drop page->private - * on us if the page isn't already dirty. - */ - if (i == 0) { - lock_page(page); - set_page_extent_head(page, eb->len); - } else if (PagePrivate(page) && - page->private != EXTENT_PAGE_PRIVATE) { - lock_page(page); - set_page_extent_mapped(page); - unlock_page(page); - } - __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); - if (i == 0) - unlock_page(page); - } - return set_extent_dirty(tree, eb->start, - eb->start + eb->len - 1, GFP_NOFS); -} -EXPORT_SYMBOL(set_extent_buffer_dirty); - -int set_extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - unsigned long i; - struct page *page; - unsigned long num_pages; - - num_pages = num_extent_pages(eb->start, eb->len); - - set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, - GFP_NOFS); - for (i = 0; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || - ((i == num_pages - 1) && - ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { - check_page_uptodate(tree, page); - continue; - } - SetPageUptodate(page); - } - return 0; -} -EXPORT_SYMBOL(set_extent_buffer_uptodate); - -int extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - if (eb->flags & EXTENT_UPTODATE) - return 1; - return test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1); -} -EXPORT_SYMBOL(extent_buffer_uptodate); - -int read_extent_buffer_pages(struct extent_map_tree *tree, - struct extent_buffer *eb, - u64 start, - int wait) -{ - unsigned long i; - unsigned long start_i; - struct page *page; - int err; - int ret = 0; - unsigned long num_pages; - - if (eb->flags & EXTENT_UPTODATE) - return 0; - - if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1)) { - return 0; - } - - if (start) { - WARN_ON(start < eb->start); - start_i = (start >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT); - } else { - start_i = 0; - } - - num_pages = num_extent_pages(eb->start, eb->len); - for (i = start_i; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - if (PageUptodate(page)) { - continue; - } - if (!wait) { - if (TestSetPageLocked(page)) { - continue; - } - } else { - lock_page(page); - } - if (!PageUptodate(page)) { - err = page->mapping->a_ops->readpage(NULL, page); - if (err) { - ret = err; - } - } else { - unlock_page(page); - } - } - - if (ret || !wait) { - return ret; - } - - for (i = start_i; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - wait_on_page_locked(page); - if (!PageUptodate(page)) { - ret = -EIO; - } - } - if (!ret) - eb->flags |= EXTENT_UPTODATE; - return ret; -} -EXPORT_SYMBOL(read_extent_buffer_pages); - -void read_extent_buffer(struct extent_buffer *eb, void *dstv, - unsigned long start, - unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - char *dst = (char *)dstv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - unsigned long num_pages = num_extent_pages(eb->start, eb->len); - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - if (!PageUptodate(page)) { - printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len); - WARN_ON(1); - } - WARN_ON(!PageUptodate(page)); - - cur = min(len, (PAGE_CACHE_SIZE - offset)); - kaddr = kmap_atomic(page, KM_USER1); - memcpy(dst, kaddr + offset, cur); - kunmap_atomic(kaddr, KM_USER1); - - dst += cur; - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(read_extent_buffer); - -int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km) -{ - size_t offset = start & (PAGE_CACHE_SIZE - 1); - char *kaddr; - struct page *p; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - unsigned long end_i = (start_offset + start + min_len - 1) >> - PAGE_CACHE_SHIFT; - - if (i != end_i) - return -EINVAL; - - if (i == 0) { - offset = start_offset; - *map_start = 0; - } else { - offset = 0; - *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; - } - if (start + min_len > eb->len) { -printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); - WARN_ON(1); - } - - p = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(p)); - kaddr = kmap_atomic(p, km); - *token = kaddr; - *map = kaddr + offset; - *map_len = PAGE_CACHE_SIZE - offset; - return 0; -} -EXPORT_SYMBOL(map_private_extent_buffer); - -int map_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, - char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km) -{ - int err; - int save = 0; - if (eb->map_token) { - unmap_extent_buffer(eb, eb->map_token, km); - eb->map_token = NULL; - save = 1; - } - err = map_private_extent_buffer(eb, start, min_len, token, map, - map_start, map_len, km); - if (!err && save) { - eb->map_token = *token; - eb->kaddr = *map; - eb->map_start = *map_start; - eb->map_len = *map_len; - } - return err; -} -EXPORT_SYMBOL(map_extent_buffer); - -void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) -{ - kunmap_atomic(token, km); -} -EXPORT_SYMBOL(unmap_extent_buffer); - -int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, - unsigned long start, - unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - char *ptr = (char *)ptrv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - int ret = 0; - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, (PAGE_CACHE_SIZE - offset)); - - kaddr = kmap_atomic(page, KM_USER0); - ret = memcmp(ptr, kaddr + offset, cur); - kunmap_atomic(kaddr, KM_USER0); - if (ret) - break; - - ptr += cur; - len -= cur; - offset = 0; - i++; - } - return ret; -} -EXPORT_SYMBOL(memcmp_extent_buffer); - -void write_extent_buffer(struct extent_buffer *eb, const void *srcv, - unsigned long start, unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - char *src = (char *)srcv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, PAGE_CACHE_SIZE - offset); - kaddr = kmap_atomic(page, KM_USER1); - memcpy(kaddr + offset, src, cur); - kunmap_atomic(kaddr, KM_USER1); - - src += cur; - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(write_extent_buffer); - -void memset_extent_buffer(struct extent_buffer *eb, char c, - unsigned long start, unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, PAGE_CACHE_SIZE - offset); - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, c, cur); - kunmap_atomic(kaddr, KM_USER0); - - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(memset_extent_buffer); - -void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, - unsigned long dst_offset, unsigned long src_offset, - unsigned long len) -{ - u64 dst_len = dst->len; - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; - - WARN_ON(src->len != dst_len); - - offset = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(dst, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); - - kaddr = kmap_atomic(page, KM_USER0); - read_extent_buffer(src, kaddr + offset, src_offset, cur); - kunmap_atomic(kaddr, KM_USER0); - - src_offset += cur; - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(copy_extent_buffer); - -static void move_pages(struct page *dst_page, struct page *src_page, - unsigned long dst_off, unsigned long src_off, - unsigned long len) -{ - char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); - if (dst_page == src_page) { - memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); - } else { - char *src_kaddr = kmap_atomic(src_page, KM_USER1); - char *p = dst_kaddr + dst_off + len; - char *s = src_kaddr + src_off + len; - - while (len--) - *--p = *--s; - - kunmap_atomic(src_kaddr, KM_USER1); - } - kunmap_atomic(dst_kaddr, KM_USER0); -} - -static void copy_pages(struct page *dst_page, struct page *src_page, - unsigned long dst_off, unsigned long src_off, - unsigned long len) -{ - char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); - char *src_kaddr; - - if (dst_page != src_page) - src_kaddr = kmap_atomic(src_page, KM_USER1); - else - src_kaddr = dst_kaddr; - - memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); - kunmap_atomic(dst_kaddr, KM_USER0); - if (dst_page != src_page) - kunmap_atomic(src_kaddr, KM_USER1); -} - -void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len) -{ - size_t cur; - size_t dst_off_in_page; - size_t src_off_in_page; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long dst_i; - unsigned long src_i; - - if (src_offset + len > dst->len) { - printk("memmove bogus src_offset %lu move len %lu len %lu\n", - src_offset, len, dst->len); - BUG_ON(1); - } - if (dst_offset + len > dst->len) { - printk("memmove bogus dst_offset %lu move len %lu len %lu\n", - dst_offset, len, dst->len); - BUG_ON(1); - } - - while(len > 0) { - dst_off_in_page = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - src_off_in_page = (start_offset + src_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - - dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; - src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; - - cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - - src_off_in_page)); - cur = min_t(unsigned long, cur, - (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); - - copy_pages(extent_buffer_page(dst, dst_i), - extent_buffer_page(dst, src_i), - dst_off_in_page, src_off_in_page, cur); - - src_offset += cur; - dst_offset += cur; - len -= cur; - } -} -EXPORT_SYMBOL(memcpy_extent_buffer); - -void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len) -{ - size_t cur; - size_t dst_off_in_page; - size_t src_off_in_page; - unsigned long dst_end = dst_offset + len - 1; - unsigned long src_end = src_offset + len - 1; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long dst_i; - unsigned long src_i; - - if (src_offset + len > dst->len) { - printk("memmove bogus src_offset %lu move len %lu len %lu\n", - src_offset, len, dst->len); - BUG_ON(1); - } - if (dst_offset + len > dst->len) { - printk("memmove bogus dst_offset %lu move len %lu len %lu\n", - dst_offset, len, dst->len); - BUG_ON(1); - } - if (dst_offset < src_offset) { - memcpy_extent_buffer(dst, dst_offset, src_offset, len); - return; - } - while(len > 0) { - dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; - src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; - - dst_off_in_page = (start_offset + dst_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - src_off_in_page = (start_offset + src_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - - cur = min_t(unsigned long, len, src_off_in_page + 1); - cur = min(cur, dst_off_in_page + 1); - move_pages(extent_buffer_page(dst, dst_i), - extent_buffer_page(dst, src_i), - dst_off_in_page - cur + 1, - src_off_in_page - cur + 1, cur); - - dst_end -= cur; - src_end -= cur; - len -= cur; - } -} -EXPORT_SYMBOL(memmove_extent_buffer); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index ea60f5447b5b..56314217cfc0 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -3,215 +3,53 @@ #include +#define EXTENT_MAP_LAST_BYTE (u64)-4 #define EXTENT_MAP_HOLE (u64)-3 #define EXTENT_MAP_INLINE (u64)-2 #define EXTENT_MAP_DELALLOC (u64)-1 -/* bits for the extent state */ -#define EXTENT_DIRTY 1 -#define EXTENT_WRITEBACK (1 << 1) -#define EXTENT_UPTODATE (1 << 2) -#define EXTENT_LOCKED (1 << 3) -#define EXTENT_NEW (1 << 4) -#define EXTENT_DELALLOC (1 << 5) -#define EXTENT_DEFRAG (1 << 6) -#define EXTENT_DEFRAG_DONE (1 << 7) -#define EXTENT_BUFFER_FILLED (1 << 8) -#define EXTENT_CSUM (1 << 9) -#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) +struct extent_map { + struct rb_node rb_node; -/* - * page->private values. Every page that is controlled by the extent - * map has page->private set to one. - */ -#define EXTENT_PAGE_PRIVATE 1 -#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 - - -struct extent_map_ops { - int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); - int (*writepage_io_hook)(struct page *page, u64 start, u64 end); - int (*readpage_io_hook)(struct page *page, u64 start, u64 end); - int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end); - void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end); + /* all of these are in bytes */ + u64 start; + u64 len; + u64 block_start; + unsigned long flags; + struct block_device *bdev; + atomic_t refs; + int in_tree; }; struct extent_map_tree { struct rb_root map; - struct rb_root state; - struct address_space *mapping; - u64 dirty_bytes; - rwlock_t lock; - struct extent_map_ops *ops; - spinlock_t lru_lock; - struct list_head buffer_lru; - int lru_size; + struct extent_map *last; + spinlock_t lock; }; -/* note, this must start with the same fields as fs/extent_map.c:tree_entry */ -struct extent_map { - u64 start; - u64 end; /* inclusive */ - int in_tree; - struct rb_node rb_node; - /* block_start and block_end are in bytes */ - u64 block_start; - u64 block_end; /* inclusive */ - struct block_device *bdev; - atomic_t refs; -}; +static inline u64 extent_map_end(struct extent_map *em) +{ + if (em->start + em->len < em->start) + return (u64)-1; + return em->start + em->len; +} -/* note, this must start with the same fields as fs/extent_map.c:tree_entry */ -struct extent_state { - u64 start; - u64 end; /* inclusive */ - int in_tree; - struct rb_node rb_node; - wait_queue_head_t wq; - atomic_t refs; - unsigned long state; +static inline u64 extent_map_block_end(struct extent_map *em) +{ + if (em->block_start + em->len < em->block_start) + return (u64)-1; + return em->block_start + em->len; +} - /* for use by the FS */ - u64 private; - - struct list_head list; -}; - -struct extent_buffer { - u64 start; - unsigned long len; - char *map_token; - char *kaddr; - unsigned long map_start; - unsigned long map_len; - struct page *first_page; - struct list_head lru; - atomic_t refs; - int flags; -}; - -typedef struct extent_map *(get_extent_t)(struct inode *inode, - struct page *page, - size_t page_offset, - u64 start, u64 end, - int create); - -void extent_map_tree_init(struct extent_map_tree *tree, - struct address_space *mapping, gfp_t mask); -void extent_map_tree_empty_lru(struct extent_map_tree *tree); +void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask); struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, - u64 start, u64 end); + u64 start, u64 len); int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); -int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page); -int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); -int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); + struct extent_map *alloc_extent_map(gfp_t mask); void free_extent_map(struct extent_map *em); -int extent_read_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent); int __init extent_map_init(void); void extent_map_exit(void); - -u64 count_range_bits(struct extent_map_tree *tree, - u64 *start, u64 search_end, - u64 max_bytes, unsigned long bits); - -int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int filled); -int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask); -int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask); -int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int find_first_extent_bit(struct extent_map_tree *tree, u64 start, - u64 *start_ret, u64 *end_ret, int bits); -int extent_invalidatepage(struct extent_map_tree *tree, - struct page *page, unsigned long offset); -int extent_write_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent, - struct writeback_control *wbc); -int extent_writepages(struct extent_map_tree *tree, - struct address_space *mapping, - get_extent_t *get_extent, - struct writeback_control *wbc); -int extent_readpages(struct extent_map_tree *tree, - struct address_space *mapping, - struct list_head *pages, unsigned nr_pages, - get_extent_t get_extent); -int extent_prepare_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to, get_extent_t *get_extent); -int extent_commit_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to); -sector_t extent_bmap(struct address_space *mapping, sector_t iblock, - get_extent_t *get_extent); -int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); -int set_state_private(struct extent_map_tree *tree, u64 start, u64 private); -int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private); -void set_page_extent_mapped(struct page *page); - -struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - struct page *page0, - gfp_t mask); -struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - gfp_t mask); -void free_extent_buffer(struct extent_buffer *eb); -int read_extent_buffer_pages(struct extent_map_tree *tree, - struct extent_buffer *eb, u64 start, int wait); - -static inline void extent_buffer_get(struct extent_buffer *eb) -{ - atomic_inc(&eb->refs); -} - -int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, - unsigned long start, - unsigned long len); -void read_extent_buffer(struct extent_buffer *eb, void *dst, - unsigned long start, - unsigned long len); -void write_extent_buffer(struct extent_buffer *eb, const void *src, - unsigned long start, unsigned long len); -void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, - unsigned long dst_offset, unsigned long src_offset, - unsigned long len); -void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len); -void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len); -void memset_extent_buffer(struct extent_buffer *eb, char c, - unsigned long start, unsigned long len); -int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, - struct extent_buffer *eb); -int clear_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb); -int set_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb); -int set_extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb); -int extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb); -int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, - unsigned long min_len, char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km); -int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, - unsigned long min_len, char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km); -void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1cd8c908811e..c5bb00f92396 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -233,8 +233,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, int err = 0; int i; struct inode *inode = fdentry(file)->d_inode; - struct extent_map *em; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; u64 hint_byte; u64 num_bytes; u64 start_pos; @@ -242,11 +241,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, u64 end_pos = pos + write_bytes; u64 inline_size; loff_t isize = i_size_read(inode); - em = alloc_extent_map(GFP_NOFS); - if (!em) - return -ENOMEM; - - em->bdev = inode->i_sb->s_bdev; start_pos = pos & ~((u64)root->sectorsize - 1); num_bytes = (write_bytes + pos - start_pos + @@ -254,7 +248,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, end_of_last_block = start_pos + num_bytes - 1; - lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); + lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (!trans) { @@ -268,7 +262,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, if ((end_of_last_block & 4095) == 0) { printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block); } - set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS); + set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS); /* FIXME...EIEIO, ENOSPC and more */ @@ -293,6 +287,8 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inode->i_ino, last_pos_in_file, 0, 0, hole_size); + btrfs_drop_extent_cache(inode, last_pos_in_file, + last_pos_in_file + hole_size -1); btrfs_check_file(root, inode); } if (err) @@ -320,12 +316,12 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, last_end += PAGE_CACHE_SIZE - 1; if (start_pos < isize) { u64 delalloc_start = start_pos; - existing_delalloc = count_range_bits(em_tree, + existing_delalloc = count_range_bits(io_tree, &delalloc_start, end_of_last_block, (u64)-1, EXTENT_DELALLOC); } - set_extent_delalloc(em_tree, start_pos, end_of_last_block, + set_extent_delalloc(io_tree, start_pos, end_of_last_block, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); root->fs_info->delalloc_bytes += (end_of_last_block + 1 - @@ -346,6 +342,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inline_size -= start_pos; err = insert_inline_extent(trans, root, inode, start_pos, inline_size, pages, 0, num_pages); + btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1); BUG_ON(err); } if (end_pos > isize) { @@ -356,8 +353,7 @@ failed: err = btrfs_end_transaction(trans, root); out_unlock: mutex_unlock(&root->fs_info->fs_mutex); - unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); - free_extent_map(em); + unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); return err; } @@ -367,10 +363,15 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; while(1) { + spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, end); - if (!em) + if (!em) { + spin_unlock(&em_tree->lock); break; + } remove_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + /* once for us */ free_extent_map(em); /* once for the tree*/ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 67005480e139..16d3aef45d18 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -53,7 +53,7 @@ static struct inode_operations btrfs_file_inode_operations; static struct address_space_operations btrfs_aops; static struct address_space_operations btrfs_symlink_aops; static struct file_operations btrfs_dir_file_operations; -static struct extent_map_ops btrfs_extent_map_ops; +static struct extent_io_ops btrfs_extent_io_ops; static struct kmem_cache *btrfs_inode_cachep; struct kmem_cache *btrfs_trans_handle_cachep; @@ -104,6 +104,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) u64 num_bytes; u64 cur_alloc_size; u64 blocksize = root->sectorsize; + u64 orig_start = start; + u64 orig_num_bytes; struct btrfs_key ins; int ret; @@ -115,6 +117,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) num_bytes = max(blocksize, num_bytes); ret = btrfs_drop_extents(trans, root, inode, start, start + num_bytes, start, &alloc_hint); + orig_num_bytes = num_bytes; if (alloc_hint == EXTENT_MAP_INLINE) goto out; @@ -138,6 +141,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; } + btrfs_drop_extent_cache(inode, orig_start, + orig_start + orig_num_bytes - 1); btrfs_add_ordered_inode(inode); out: btrfs_end_transaction(trans, root); @@ -297,7 +302,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) int ret = 0; struct inode *inode = page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_csum_item *item; struct btrfs_path *path = NULL; u32 csum; @@ -317,7 +322,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) } read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, BTRFS_CRC32_SIZE); - set_state_private(em_tree, start, csum); + set_state_private(io_tree, start, csum); out: if (path) btrfs_free_path(path); @@ -329,17 +334,19 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) { size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); struct inode *inode = page->mapping->host; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; char *kaddr; u64 private; int ret; struct btrfs_root *root = BTRFS_I(inode)->root; u32 csum = ~(u32)0; unsigned long flags; + if (btrfs_test_opt(root, NODATASUM) || btrfs_test_flag(inode, NODATASUM)) return 0; - ret = get_state_private(em_tree, start, &private); + + ret = get_state_private(io_tree, start, &private); local_irq_save(flags); kaddr = kmap_atomic(page, KM_IRQ0); if (ret) { @@ -428,7 +435,7 @@ void btrfs_read_locked_inode(struct inode *inode) switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; - BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; break; @@ -873,7 +880,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, size_t zero_start) { char *kaddr; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_root *root = BTRFS_I(inode)->root; u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = page_start + PAGE_CACHE_SIZE - 1; @@ -884,12 +891,12 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, WARN_ON(!PageLocked(page)); set_page_extent_mapped(page); - lock_extent(em_tree, page_start, page_end, GFP_NOFS); + lock_extent(io_tree, page_start, page_end, GFP_NOFS); delalloc_start = page_start; - existing_delalloc = count_range_bits(&BTRFS_I(inode)->extent_tree, + existing_delalloc = count_range_bits(&BTRFS_I(inode)->io_tree, &delalloc_start, page_end, PAGE_CACHE_SIZE, EXTENT_DELALLOC); - set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start, + set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); @@ -903,7 +910,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, kunmap(page); } set_page_dirty(page); - unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); return ret; } @@ -961,7 +968,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; u64 mask = root->sectorsize - 1; u64 pos = (inode->i_size + mask) & ~mask; @@ -986,7 +993,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_truncate_page(inode->i_mapping, inode->i_size); - lock_extent(em_tree, pos, block_end, GFP_NOFS); + lock_extent(io_tree, pos, block_end, GFP_NOFS); hole_size = block_end - hole_start; mutex_lock(&root->fs_info->fs_mutex); @@ -1001,11 +1008,13 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) inode->i_ino, hole_start, 0, 0, hole_size); + btrfs_drop_extent_cache(inode, hole_start, + hole_size - 1); btrfs_check_file(root, inode); } btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - unlock_extent(em_tree, pos, block_end, GFP_NOFS); + unlock_extent(io_tree, pos, block_end, GFP_NOFS); if (err) return err; } @@ -1189,7 +1198,8 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) struct btrfs_iget_args *args = p; inode->i_ino = args->ino; BTRFS_I(inode)->root = args->root; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); return 0; } @@ -1485,7 +1495,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (!inode) return ERR_PTR(-ENOMEM); - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); BTRFS_I(inode)->root = root; @@ -1672,9 +1683,10 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &btrfs_aops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); - BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -1816,7 +1828,7 @@ out_unlock: } struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, - size_t page_offset, u64 start, u64 end, + size_t page_offset, u64 start, u64 len, int create) { int ret; @@ -1826,7 +1838,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, u64 extent_end = 0; u64 objectid = inode->i_ino; u32 found_type; - int failed_insert = 0; struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_file_extent_item *item; @@ -1834,6 +1845,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, struct btrfs_key found_key; struct extent_map *em = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_trans_handle *trans = NULL; path = btrfs_alloc_path(); @@ -1841,24 +1853,26 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, mutex_lock(&root->fs_info->fs_mutex); again: - em = lookup_extent_mapping(em_tree, start, end); + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, start, len); + spin_unlock(&em_tree->lock); + if (em) { if (em->start > start) { - printk("get_extent start %Lu em start %Lu\n", - start, em->start); + printk("get_extent lookup [%Lu %Lu] em [%Lu %Lu]\n", + start, len, em->start, em->len); WARN_ON(1); } goto out; } + em = alloc_extent_map(GFP_NOFS); if (!em) { - em = alloc_extent_map(GFP_NOFS); - if (!em) { - err = -ENOMEM; - goto out; - } - em->start = EXTENT_MAP_HOLE; - em->end = EXTENT_MAP_HOLE; + err = -ENOMEM; + goto out; } + + em->start = EXTENT_MAP_HOLE; + em->len = (u64)-1; em->bdev = inode->i_sb->s_bdev; ret = btrfs_lookup_file_extent(trans, root, path, objectid, start, trans != NULL); @@ -1893,28 +1907,25 @@ again: if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { - if (end < extent_start) + if (start + len <= extent_start) goto not_found; - em->end = extent_end - 1; + em->len = extent_end - extent_start; } else { - em->end = end; + em->len = len; } goto not_found_em; } bytenr = btrfs_file_extent_disk_bytenr(leaf, item); if (bytenr == 0) { em->start = extent_start; - em->end = extent_end - 1; + em->len = extent_end - extent_start; em->block_start = EXTENT_MAP_HOLE; - em->block_end = EXTENT_MAP_HOLE; goto insert; } bytenr += btrfs_file_extent_offset(leaf, item); em->block_start = bytenr; - em->block_end = em->block_start + - btrfs_file_extent_num_bytes(leaf, item) - 1; em->start = extent_start; - em->end = extent_end - 1; + em->len = extent_end - extent_start; goto insert; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { unsigned long ptr; @@ -1925,25 +1936,24 @@ again: size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, path->slots[0])); - extent_end = (extent_start + size - 1) | - ((u64)root->sectorsize - 1); + extent_end = (extent_start + size + root->sectorsize - 1) & + ~((u64)root->sectorsize - 1); if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { - if (end < extent_start) + if (start + len <= extent_start) goto not_found; - em->end = extent_end; + em->len = extent_end - extent_start; } else { - em->end = end; + em->len = len; } goto not_found_em; } em->block_start = EXTENT_MAP_INLINE; - em->block_end = EXTENT_MAP_INLINE; if (!page) { em->start = extent_start; - em->end = extent_start + size - 1; + em->len = size; goto out; } @@ -1952,8 +1962,7 @@ again: copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset, size - extent_offset); em->start = extent_start + extent_offset; - em->end = (em->start + copy_size -1) | - ((u64)root->sectorsize -1); + em->len = copy_size; map = kmap(page); ptr = btrfs_file_extent_inline_start(item) + extent_offset; if (create == 0 && !PageUptodate(page)) { @@ -1974,7 +1983,8 @@ again: btrfs_mark_buffer_dirty(leaf); } kunmap(page); - set_extent_uptodate(em_tree, em->start, em->end, GFP_NOFS); + set_extent_uptodate(io_tree, em->start, + extent_map_end(em) - 1, GFP_NOFS); goto insert; } else { printk("unkknown found_type %d\n", found_type); @@ -1982,33 +1992,29 @@ again: } not_found: em->start = start; - em->end = end; + em->len = len; not_found_em: em->block_start = EXTENT_MAP_HOLE; - em->block_end = EXTENT_MAP_HOLE; insert: btrfs_release_path(root, path); - if (em->start > start || em->end < start) { - printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->end, start, end); + if (em->start > start || extent_map_end(em) <= start) { + printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len); err = -EIO; goto out; } + + err = 0; + spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); if (ret == -EEXIST) { free_extent_map(em); - em = NULL; - if (0 && failed_insert == 1) { - btrfs_drop_extent_cache(inode, start, end); - } - failed_insert++; - if (failed_insert > 5) { - printk("failing to insert %Lu %Lu\n", start, end); + em = lookup_extent_mapping(em_tree, start, len); + if (!em) { err = -EIO; - goto out; + printk("failing to insert %Lu %Lu\n", start, len); } - goto again; } - err = 0; + spin_unlock(&em_tree->lock); out: btrfs_free_path(path); if (trans) { @@ -2032,14 +2038,14 @@ static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) int btrfs_readpage(struct file *file, struct page *page) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_read_full_page(tree, page, btrfs_get_extent); } static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; if (current->flags & PF_MEMALLOC) { @@ -2047,15 +2053,15 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) unlock_page(page); return 0; } - tree = &BTRFS_I(page->mapping->host)->extent_tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_write_full_page(tree, page, btrfs_get_extent, wbc); } static int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct extent_map_tree *tree; - tree = &BTRFS_I(mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(mapping->host)->io_tree; return extent_writepages(tree, mapping, btrfs_get_extent, wbc); } @@ -2063,19 +2069,21 @@ static int btrfs_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - struct extent_map_tree *tree; - tree = &BTRFS_I(mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(mapping->host)->io_tree; return extent_readpages(tree, mapping, pages, nr_pages, btrfs_get_extent); } static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; + struct extent_map_tree *map; int ret; - tree = &BTRFS_I(page->mapping->host)->extent_tree; - ret = try_release_extent_mapping(tree, page); + tree = &BTRFS_I(page->mapping->host)->io_tree; + map = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(map, tree, page); if (ret == 1) { ClearPagePrivate(page); set_page_private(page, 0); @@ -2086,9 +2094,9 @@ static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) static void btrfs_invalidatepage(struct page *page, unsigned long offset) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; extent_invalidatepage(tree, page, offset); btrfs_releasepage(page, GFP_NOFS); } @@ -2374,7 +2382,7 @@ unsigned long btrfs_force_ra(struct address_space *mapping, int btrfs_defrag_file(struct file *file) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct page *page; unsigned long last_index; unsigned long ra_index = 0; @@ -2414,13 +2422,13 @@ int btrfs_defrag_file(struct file *file) { page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(em_tree, page_start, page_end, GFP_NOFS); + lock_extent(io_tree, page_start, page_end, GFP_NOFS); delalloc_start = page_start; existing_delalloc = - count_range_bits(&BTRFS_I(inode)->extent_tree, + count_range_bits(&BTRFS_I(inode)->io_tree, &delalloc_start, page_end, PAGE_CACHE_SIZE, EXTENT_DELALLOC); - set_extent_delalloc(em_tree, page_start, + set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); @@ -2428,7 +2436,7 @@ int btrfs_defrag_file(struct file *file) { existing_delalloc; spin_unlock(&root->fs_info->delalloc_lock); - unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); page_cache_release(page); @@ -2842,9 +2850,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &btrfs_aops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); - BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -2934,7 +2943,7 @@ static struct file_operations btrfs_dir_file_operations = { #endif }; -static struct extent_map_ops btrfs_extent_map_ops = { +static struct extent_io_ops btrfs_extent_io_ops = { .fill_delalloc = run_delalloc_range, .writepage_io_hook = btrfs_writepage_io_hook, .readpage_io_hook = btrfs_readpage_io_hook, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8b52c69fda2e..f8a1016600b1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -468,10 +468,15 @@ static int __init init_btrfs_fs(void) err = btrfs_init_cachep(); if (err) goto free_transaction_sys; - err = extent_map_init(); + + err = extent_io_init(); if (err) goto free_cachep; + err = extent_map_init(); + if (err) + goto free_extent_io; + err = register_filesystem(&btrfs_fs_type); if (err) goto free_extent_map; @@ -479,6 +484,8 @@ static int __init init_btrfs_fs(void) free_extent_map: extent_map_exit(); +free_extent_io: + extent_io_exit(); free_cachep: btrfs_destroy_cachep(); free_transaction_sys: @@ -492,6 +499,7 @@ static void __exit exit_btrfs_fs(void) btrfs_exit_transaction_sys(); btrfs_destroy_cachep(); extent_map_exit(); + extent_io_exit(); unregister_filesystem(&btrfs_fs_type); btrfs_exit_sysfs(); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 163c01a24498..b4a1bc62a784 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -70,7 +70,7 @@ static int join_transaction(struct btrfs_root *root) INIT_LIST_HEAD(&cur_trans->pending_snapshots); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree); - extent_map_tree_init(&cur_trans->dirty_pages, + extent_io_tree_init(&cur_trans->dirty_pages, root->fs_info->btree_inode->i_mapping, GFP_NOFS); } else { @@ -153,7 +153,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, int ret; int err; int werr = 0; - struct extent_map_tree *dirty_pages; + struct extent_io_tree *dirty_pages; struct page *page; struct inode *btree_inode = root->fs_info->btree_inode; u64 start; @@ -610,7 +610,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; - struct extent_map_tree *pinned_copy; + struct extent_io_tree *pinned_copy; DEFINE_WAIT(wait); int ret; @@ -639,7 +639,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (!pinned_copy) return -ENOMEM; - extent_map_tree_init(pinned_copy, + extent_io_tree_init(pinned_copy, root->fs_info->btree_inode->i_mapping, GFP_NOFS); trans->transaction->in_commit = 1; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index fd52e9b23922..c3172ddb3321 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -29,7 +29,7 @@ struct btrfs_transaction { int use_count; int commit_done; struct list_head list; - struct extent_map_tree dirty_pages; + struct extent_io_tree dirty_pages; unsigned long start_time; struct btrfs_ordered_inode_tree ordered_inode_tree; wait_queue_head_t writer_wait; From bea495e5b4d9746f26cf5b4100d24c77c3dcbd84 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Jan 2008 16:13:14 -0500 Subject: [PATCH 0394/2985] Btrfs: Tune readahead during defrag to avoid reading too much at once Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1cf125ab7822..c4fe37791d3f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1856,27 +1856,44 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, } static void noinline reada_walk_down(struct btrfs_root *root, - struct extent_buffer *node) + struct extent_buffer *node, + int slot) { - int i; - u32 nritems; u64 bytenr; - int ret; + u64 last = 0; + u32 nritems; u32 refs; - int level; u32 blocksize; + int ret; + int i; + int level; + int skipped = 0; nritems = btrfs_header_nritems(node); level = btrfs_header_level(node); - for (i = 0; i < nritems; i++) { + if (level) + return; + + for (i = slot; i < nritems && skipped < 32; i++) { bytenr = btrfs_node_blockptr(node, i); - blocksize = btrfs_level_size(root, level - 1); - ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs); - BUG_ON(ret); - if (refs != 1) + if (last && ((bytenr > last && bytenr - last > 32 * 1024) || + (last > bytenr && last - bytenr > 32 * 1024))) { + skipped++; continue; + } + blocksize = btrfs_level_size(root, level - 1); + if (i != slot) { + ret = lookup_extent_ref(NULL, root, bytenr, + blocksize, &refs); + BUG_ON(ret); + if (refs != 1) { + skipped++; + continue; + } + } mutex_unlock(&root->fs_info->fs_mutex); ret = readahead_tree_block(root, bytenr, blocksize); + last = bytenr + blocksize; cond_resched(); mutex_lock(&root->fs_info->fs_mutex); if (ret) @@ -1919,9 +1936,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, WARN_ON(*level >= BTRFS_MAX_LEVEL); cur = path->nodes[*level]; - if (*level > 0 && path->slots[*level] == 0) - reada_walk_down(root, cur); - if (btrfs_header_level(cur) != *level) WARN_ON(1); @@ -1951,6 +1965,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next)) { free_extent_buffer(next); + reada_walk_down(root, cur, path->slots[*level]); mutex_unlock(&root->fs_info->fs_mutex); next = read_tree_block(root, bytenr, blocksize); mutex_lock(&root->fs_info->fs_mutex); From c2a8b6e11009398ca9363d8ba8d4e7e40fb897fd Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Jan 2008 09:10:26 -0500 Subject: [PATCH 0395/2985] Btrfs: Force f_pos to the max when a readdir hits the end of the directory. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 16d3aef45d18..026bc9c4c90d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1430,7 +1430,7 @@ read_dir_items: di = (struct btrfs_dir_item *)((char *)di + di_len); } } - filp->f_pos++; + filp->f_pos = INT_LIMIT(typeof(filp->f_pos)); nopos: ret = 0; err: From aadfeb6e39ad6bde080cb3ab23f4da57ccb25f4a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Jan 2008 09:10:27 -0500 Subject: [PATCH 0396/2985] Btrfs: Add some extra debugging around file data checksum failures Signed-off-by: Chris Mason --- fs/btrfs/file-item.c | 4 ++++ fs/btrfs/inode.c | 8 +++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 614176e52851..3ebbc058d082 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -261,6 +261,10 @@ csum: found: csum_result = btrfs_csum_data(root, data, csum_result, len); btrfs_csum_final(csum_result, (char *)&csum_result); + if (csum_result == 0) { + printk("csum result is 0 for inode %lu offset %Lu\n", inode->i_ino, offset); + } + write_extent_buffer(leaf, &csum_result, (unsigned long)item, BTRFS_CRC32_SIZE); btrfs_mark_buffer_dirty(path->nodes[0]); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 026bc9c4c90d..484551274100 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -318,6 +318,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) if (ret == -ENOENT || ret == -EFBIG) ret = 0; csum = 0; + printk("no csum found for inode %lu start %Lu\n", inode->i_ino, start); goto out; } read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, @@ -336,7 +337,7 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) struct inode *inode = page->mapping->host; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; char *kaddr; - u64 private; + u64 private = ~(u32)0; int ret; struct btrfs_root *root = BTRFS_I(inode)->root; u32 csum = ~(u32)0; @@ -362,8 +363,9 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) return 0; zeroit: - printk("btrfs csum failed ino %lu off %llu\n", - page->mapping->host->i_ino, (unsigned long long)start); + printk("btrfs csum failed ino %lu off %llu csum %u private %Lu\n", + page->mapping->host->i_ino, (unsigned long long)start, csum, + private); memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page); kunmap_atomic(kaddr, KM_IRQ0); From 70dec8079d78691e476cc6c7cede40656078ad30 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Jan 2008 09:59:12 -0500 Subject: [PATCH 0397/2985] Btrfs: extent_io and extent_state optimizations The end_bio routines are changed to take a pointer to the extent state struct, and the state tree is walked in order to set/clear appropriate bits as IO completes. This greatly reduces the number of rbtree searches done by the end_bio handlers, and reduces lock contention. The extent_io releasepage function is changed to avoid expensive searches for locked state. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 +- fs/btrfs/extent_io.c | 358 +++++++++++++++++++++++++++++++------------ fs/btrfs/extent_io.h | 15 +- fs/btrfs/inode.c | 35 +++-- 4 files changed, 298 insertions(+), 114 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4c4ebea0b2a9..e29a0716f6a9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -274,7 +274,7 @@ int btree_readpage(struct file *file, struct page *page) return extent_read_full_page(tree, page, btree_get_extent); } -static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) +static int btree_releasepage(struct page *page, gfp_t gfp_flags) { struct extent_io_tree *tree; struct extent_map_tree *map; @@ -282,7 +282,7 @@ static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; - ret = try_release_extent_mapping(map, tree, page); + ret = try_release_extent_mapping(map, tree, page, gfp_flags); if (ret == 1) { ClearPagePrivate(page); set_page_private(page, 0); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 15cc158a0498..1f734c34dc24 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -27,13 +27,11 @@ static struct kmem_cache *extent_buffer_cache; static LIST_HEAD(buffers); static LIST_HEAD(states); -static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; #define BUFFER_LRU_MAX 64 struct tree_entry { u64 start; u64 end; - int in_tree; struct rb_node rb_node; }; @@ -69,7 +67,7 @@ void extent_io_exit(void) while (!list_empty(&states)) { state = list_entry(states.next, struct extent_state, list); - printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); + printk("state leak: start %Lu end %Lu state %lu in tree %p refs %d\n", state->start, state->end, state->state, state->tree, atomic_read(&state->refs)); list_del(&state->list); kmem_cache_free(extent_state_cache, state); @@ -87,7 +85,7 @@ void extent_io_tree_init(struct extent_io_tree *tree, tree->state.rb_node = NULL; tree->ops = NULL; tree->dirty_bytes = 0; - rwlock_init(&tree->lock); + spin_lock_init(&tree->lock); spin_lock_init(&tree->lru_lock); tree->mapping = mapping; INIT_LIST_HEAD(&tree->buffer_lru); @@ -110,18 +108,13 @@ EXPORT_SYMBOL(extent_io_tree_empty_lru); struct extent_state *alloc_extent_state(gfp_t mask) { struct extent_state *state; - unsigned long flags; state = kmem_cache_alloc(extent_state_cache, mask); if (!state || IS_ERR(state)) return state; state->state = 0; - state->in_tree = 0; state->private = 0; - - spin_lock_irqsave(&state_lock, flags); - list_add(&state->list, &states); - spin_unlock_irqrestore(&state_lock, flags); + state->tree = NULL; atomic_set(&state->refs, 1); init_waitqueue_head(&state->wq); @@ -131,14 +124,10 @@ EXPORT_SYMBOL(alloc_extent_state); void free_extent_state(struct extent_state *state) { - unsigned long flags; if (!state) return; if (atomic_dec_and_test(&state->refs)) { - WARN_ON(state->in_tree); - spin_lock_irqsave(&state_lock, flags); - list_del(&state->list); - spin_unlock_irqrestore(&state_lock, flags); + WARN_ON(state->tree); kmem_cache_free(extent_state_cache, state); } } @@ -164,7 +153,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, } entry = rb_entry(node, struct tree_entry, rb_node); - entry->in_tree = 1; rb_link_node(node, parent, p); rb_insert_color(node, root); return NULL; @@ -216,8 +204,9 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) { - struct rb_node *prev; + struct rb_node *prev = NULL; struct rb_node *ret; + ret = __tree_search(root, offset, &prev, NULL); if (!ret) return prev; @@ -248,7 +237,7 @@ static int merge_state(struct extent_io_tree *tree, if (other->end == state->start - 1 && other->state == state->state) { state->start = other->start; - other->in_tree = 0; + other->tree = NULL; rb_erase(&other->rb_node, &tree->state); free_extent_state(other); } @@ -259,7 +248,7 @@ static int merge_state(struct extent_io_tree *tree, if (other->start == state->end + 1 && other->state == state->state) { other->start = state->start; - state->in_tree = 0; + state->tree = NULL; rb_erase(&state->rb_node, &tree->state); free_extent_state(state); } @@ -300,6 +289,7 @@ static int insert_state(struct extent_io_tree *tree, free_extent_state(state); return -EEXIST; } + state->tree = tree; merge_state(tree, state); return 0; } @@ -335,6 +325,7 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, free_extent_state(prealloc); return -EEXIST; } + prealloc->tree = tree; return 0; } @@ -361,9 +352,9 @@ static int clear_state_bit(struct extent_io_tree *tree, if (wake) wake_up(&state->wq); if (delete || state->state == 0) { - if (state->in_tree) { + if (state->tree) { rb_erase(&state->rb_node, &tree->state); - state->in_tree = 0; + state->tree = NULL; free_extent_state(state); } else { WARN_ON(1); @@ -404,7 +395,7 @@ again: return -ENOMEM; } - write_lock_irqsave(&tree->lock, flags); + spin_lock_irqsave(&tree->lock, flags); /* * this search will find the extents that end after * our range starts @@ -434,6 +425,8 @@ again: */ if (state->start < start) { + if (!prealloc) + prealloc = alloc_extent_state(GFP_ATOMIC); err = split_state(tree, state, prealloc, start); BUG_ON(err == -EEXIST); prealloc = NULL; @@ -455,6 +448,8 @@ again: * on the first half */ if (state->start <= end && state->end > end) { + if (!prealloc) + prealloc = alloc_extent_state(GFP_ATOMIC); err = split_state(tree, state, prealloc, end + 1); BUG_ON(err == -EEXIST); @@ -471,7 +466,7 @@ again: goto search_again; out: - write_unlock_irqrestore(&tree->lock, flags); + spin_unlock_irqrestore(&tree->lock, flags); if (prealloc) free_extent_state(prealloc); @@ -480,7 +475,7 @@ out: search_again: if (start > end) goto out; - write_unlock_irqrestore(&tree->lock, flags); + spin_unlock_irqrestore(&tree->lock, flags); if (mask & __GFP_WAIT) cond_resched(); goto again; @@ -492,9 +487,9 @@ static int wait_on_state(struct extent_io_tree *tree, { DEFINE_WAIT(wait); prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); - read_unlock_irq(&tree->lock); + spin_unlock_irq(&tree->lock); schedule(); - read_lock_irq(&tree->lock); + spin_lock_irq(&tree->lock); finish_wait(&state->wq, &wait); return 0; } @@ -509,7 +504,7 @@ int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) struct extent_state *state; struct rb_node *node; - read_lock_irq(&tree->lock); + spin_lock_irq(&tree->lock); again: while (1) { /* @@ -538,13 +533,13 @@ again: break; if (need_resched()) { - read_unlock_irq(&tree->lock); + spin_unlock_irq(&tree->lock); cond_resched(); - read_lock_irq(&tree->lock); + spin_lock_irq(&tree->lock); } } out: - read_unlock_irq(&tree->lock); + spin_unlock_irq(&tree->lock); return 0; } EXPORT_SYMBOL(wait_extent_bit); @@ -589,7 +584,7 @@ again: return -ENOMEM; } - write_lock_irqsave(&tree->lock, flags); + spin_lock_irqsave(&tree->lock, flags); /* * this search will find all the extents that end after * our range starts. @@ -709,7 +704,7 @@ again: goto search_again; out: - write_unlock_irqrestore(&tree->lock, flags); + spin_unlock_irqrestore(&tree->lock, flags); if (prealloc) free_extent_state(prealloc); @@ -718,7 +713,7 @@ out: search_again: if (start > end) goto out; - write_unlock_irqrestore(&tree->lock, flags); + spin_unlock_irqrestore(&tree->lock, flags); if (mask & __GFP_WAIT) cond_resched(); goto again; @@ -817,10 +812,6 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(wait_on_extent_writeback); -/* - * locks a range in ascending order, waiting for any locked regions - * it hits on the way. [start,end] are inclusive, and this will sleep. - */ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { int err; @@ -896,7 +887,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, struct extent_state *state; int ret = 1; - read_lock_irq(&tree->lock); + spin_lock_irq(&tree->lock); /* * this search will find all the extents that end after * our range starts. @@ -919,7 +910,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, break; } out: - read_unlock_irq(&tree->lock); + spin_unlock_irq(&tree->lock); return ret; } EXPORT_SYMBOL(find_first_extent_bit); @@ -933,7 +924,7 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree, u64 found = 0; u64 total_bytes = 0; - write_lock_irq(&tree->lock); + spin_lock_irq(&tree->lock); /* * this search will find all the extents that end after * our range starts. @@ -976,9 +967,9 @@ search_again: atomic_inc(&state->refs); prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); - write_unlock_irq(&tree->lock); + spin_unlock_irq(&tree->lock); schedule(); - write_lock_irq(&tree->lock); + spin_lock_irq(&tree->lock); finish_wait(&state->wq, &wait); free_extent_state(state); goto search_again; @@ -997,7 +988,7 @@ search_again: break; } out: - write_unlock_irq(&tree->lock); + spin_unlock_irq(&tree->lock); return found; } @@ -1017,7 +1008,7 @@ u64 count_range_bits(struct extent_io_tree *tree, return 0; } - write_lock_irq(&tree->lock); + spin_lock_irq(&tree->lock); if (cur_start == 0 && bits == EXTENT_DIRTY) { total_bytes = tree->dirty_bytes; goto out; @@ -1050,7 +1041,7 @@ u64 count_range_bits(struct extent_io_tree *tree, break; } out: - write_unlock_irq(&tree->lock); + spin_unlock_irq(&tree->lock); return total_bytes; } /* @@ -1122,7 +1113,7 @@ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) struct extent_state *state; int ret = 0; - write_lock_irq(&tree->lock); + spin_lock_irq(&tree->lock); /* * this search will find all the extents that end after * our range starts. @@ -1139,7 +1130,7 @@ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) } state->private = private; out: - write_unlock_irq(&tree->lock); + spin_unlock_irq(&tree->lock); return ret; } @@ -1149,7 +1140,7 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) struct extent_state *state; int ret = 0; - read_lock_irq(&tree->lock); + spin_lock_irq(&tree->lock); /* * this search will find all the extents that end after * our range starts. @@ -1166,13 +1157,13 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) } *private = state->private; out: - read_unlock_irq(&tree->lock); + spin_unlock_irq(&tree->lock); return ret; } /* * searches a range in the state tree for a given mask. - * If 'filled' == 1, this returns 1 only if ever extent in the tree + * If 'filled' == 1, this returns 1 only if every extent in the tree * has the bits set. Otherwise, 1 is returned if any bit in the * range is found set. */ @@ -1184,7 +1175,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, int bitset = 0; unsigned long flags; - read_lock_irqsave(&tree->lock, flags); + spin_lock_irqsave(&tree->lock, flags); node = tree_search(&tree->state, start); while (node && start <= end) { state = rb_entry(node, struct extent_state, rb_node); @@ -1215,7 +1206,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, break; } } - read_unlock_irqrestore(&tree->lock, flags); + spin_unlock_irqrestore(&tree->lock, flags); return bitset; } EXPORT_SYMBOL(test_range_bit); @@ -1282,16 +1273,19 @@ static int end_bio_extent_writepage(struct bio *bio, { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_io_tree *tree = bio->bi_private; + struct extent_state *state = bio->bi_private; + struct extent_io_tree *tree = state->tree; + struct rb_node *node; u64 start; u64 end; + u64 cur; int whole_page; + unsigned long flags; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) if (bio->bi_size) return 1; #endif - do { struct page *page = bvec->bv_page; start = ((u64)page->index << PAGE_CACHE_SHIFT) + @@ -1311,16 +1305,80 @@ static int end_bio_extent_writepage(struct bio *bio, ClearPageUptodate(page); SetPageError(page); } - clear_extent_writeback(tree, start, end, GFP_ATOMIC); + + if (tree->ops && tree->ops->writepage_end_io_hook) { + tree->ops->writepage_end_io_hook(page, start, end, + state); + } + + /* + * bios can get merged in funny ways, and so we need to + * be careful with the state variable. We know the + * state won't be merged with others because it has + * WRITEBACK set, but we can't be sure each biovec is + * sequential in the file. So, if our cached state + * doesn't match the expected end, search the tree + * for the correct one. + */ + + spin_lock_irqsave(&tree->lock, flags); + if (!state || state->end != end) { + state = NULL; + node = __tree_search(&tree->state, start, NULL, NULL); + if (node) { + state = rb_entry(node, struct extent_state, + rb_node); + if (state->end != end || + !(state->state & EXTENT_WRITEBACK)) + state = NULL; + } + if (!state) { + spin_unlock_irqrestore(&tree->lock, flags); + clear_extent_writeback(tree, start, + end, GFP_ATOMIC); + goto next_io; + } + } + cur = end; + while(1) { + struct extent_state *clear = state; + cur = state->start; + node = rb_prev(&state->rb_node); + if (node) { + state = rb_entry(node, + struct extent_state, + rb_node); + } else { + state = NULL; + } + + clear_state_bit(tree, clear, EXTENT_WRITEBACK, + 1, 0); + if (cur == start) + break; + if (cur < start) { + WARN_ON(1); + break; + } + if (!node) + break; + } + /* before releasing the lock, make sure the next state + * variable has the expected bits set and corresponds + * to the correct offsets in the file + */ + if (state && (state->end + 1 != start || + !state->state & EXTENT_WRITEBACK)) { + state = NULL; + } + spin_unlock_irqrestore(&tree->lock, flags); +next_io: if (whole_page) end_page_writeback(page); else check_page_writeback(tree, page); - if (tree->ops && tree->ops->writepage_end_io_hook) - tree->ops->writepage_end_io_hook(page, start, end); } while (bvec >= bio->bi_io_vec); - bio_put(bio); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) return 0; @@ -1347,9 +1405,13 @@ static int end_bio_extent_readpage(struct bio *bio, { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_io_tree *tree = bio->bi_private; + struct extent_state *state = bio->bi_private; + struct extent_io_tree *tree = state->tree; + struct rb_node *node; u64 start; u64 end; + u64 cur; + unsigned long flags; int whole_page; int ret; @@ -1373,27 +1435,83 @@ static int end_bio_extent_readpage(struct bio *bio, prefetchw(&bvec->bv_page->flags); if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { - ret = tree->ops->readpage_end_io_hook(page, start, end); + ret = tree->ops->readpage_end_io_hook(page, start, end, + state); if (ret) uptodate = 0; } - if (uptodate) { - set_extent_uptodate(tree, start, end, GFP_ATOMIC); - if (whole_page) - SetPageUptodate(page); - else - check_page_uptodate(tree, page); - } else { - ClearPageUptodate(page); - SetPageError(page); + + spin_lock_irqsave(&tree->lock, flags); + if (!state || state->end != end) { + state = NULL; + node = __tree_search(&tree->state, start, NULL, NULL); + if (node) { + state = rb_entry(node, struct extent_state, + rb_node); + if (state->end != end || + !(state->state & EXTENT_LOCKED)) + state = NULL; + } + if (!state) { + spin_unlock_irqrestore(&tree->lock, flags); + set_extent_uptodate(tree, start, end, + GFP_ATOMIC); + unlock_extent(tree, start, end, GFP_ATOMIC); + goto next_io; + } } - unlock_extent(tree, start, end, GFP_ATOMIC); - - if (whole_page) + cur = end; + while(1) { + struct extent_state *clear = state; + cur = state->start; + node = rb_prev(&state->rb_node); + if (node) { + state = rb_entry(node, + struct extent_state, + rb_node); + } else { + state = NULL; + } + clear->state |= EXTENT_UPTODATE; + clear_state_bit(tree, clear, EXTENT_LOCKED, + 1, 0); + if (cur == start) + break; + if (cur < start) { + WARN_ON(1); + break; + } + if (!node) + break; + } + /* before releasing the lock, make sure the next state + * variable has the expected bits set and corresponds + * to the correct offsets in the file + */ + if (state && (state->end + 1 != start || + !state->state & EXTENT_WRITEBACK)) { + state = NULL; + } + spin_unlock_irqrestore(&tree->lock, flags); +next_io: + if (whole_page) { + if (uptodate) { + SetPageUptodate(page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } unlock_page(page); - else + } else { + if (uptodate) { + check_page_uptodate(tree, page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } check_page_locked(tree, page); + } } while (bvec >= bio->bi_io_vec); bio_put(bio); @@ -1416,7 +1534,8 @@ static int end_bio_extent_preparewrite(struct bio *bio, { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_io_tree *tree = bio->bi_private; + struct extent_state *state = bio->bi_private; + struct extent_io_tree *tree = state->tree; u64 start; u64 end; @@ -1475,6 +1594,29 @@ static int submit_one_bio(int rw, struct bio *bio) { u64 maxsector; int ret = 0; + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct page *page = bvec->bv_page; + struct extent_io_tree *tree = bio->bi_private; + struct rb_node *node; + struct extent_state *state; + u64 start; + u64 end; + + start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + spin_lock_irq(&tree->lock); + node = __tree_search(&tree->state, start, NULL, NULL); + BUG_ON(!node); + state = rb_entry(node, struct extent_state, rb_node); + while(state->end < end) { + node = rb_next(node); + state = rb_entry(node, struct extent_state, rb_node); + } + BUG_ON(state->end != end); + spin_unlock_irq(&tree->lock); + + bio->bi_private = state; bio_get(bio); @@ -1519,9 +1661,12 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, if (!bio) { printk("failed to allocate bio nr %d\n", nr); } + + bio_add_page(bio, page, size, offset); bio->bi_end_io = end_io_func; bio->bi_private = tree; + if (bio_ret) { *bio_ret = bio; } else { @@ -1635,6 +1780,16 @@ static int __extent_read_full_page(struct extent_io_tree *tree, page_offset += iosize; continue; } + /* we have an inline extent but it didn't get marked up + * to date. Error out + */ + if (block_start == EXTENT_MAP_INLINE) { + SetPageError(page); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } ret = 0; if (tree->ops && tree->ops->readpage_io_hook) { @@ -2205,7 +2360,8 @@ EXPORT_SYMBOL(extent_prepare_write); * map records are removed */ int try_release_extent_mapping(struct extent_map_tree *map, - struct extent_io_tree *tree, struct page *page) + struct extent_io_tree *tree, struct page *page, + gfp_t mask) { struct extent_map *em; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; @@ -2213,30 +2369,42 @@ int try_release_extent_mapping(struct extent_map_tree *map, u64 orig_start = start; int ret = 1; - while (start <= end) { - spin_lock(&map->lock); - em = lookup_extent_mapping(map, start, end); - if (!em || IS_ERR(em)) { + if ((mask & __GFP_WAIT) && + page->mapping->host->i_size > 16 * 1024 * 1024) { + while (start <= end) { + spin_lock(&map->lock); + em = lookup_extent_mapping(map, start, end); + if (!em || IS_ERR(em)) { + spin_unlock(&map->lock); + break; + } + if (em->start != start) { + spin_unlock(&map->lock); + free_extent_map(em); + break; + } + if (!test_range_bit(tree, em->start, + extent_map_end(em) - 1, + EXTENT_LOCKED, 0)) { + remove_extent_mapping(map, em); + /* once for the rb tree */ + free_extent_map(em); + } + start = extent_map_end(em); spin_unlock(&map->lock); - break; - } - if (!test_range_bit(tree, em->start, extent_map_end(em) - 1, - EXTENT_LOCKED, 0)) { - remove_extent_mapping(map, em); - /* once for the rb tree */ + + /* once for us */ free_extent_map(em); } - start = extent_map_end(em); - spin_unlock(&map->lock); - - /* once for us */ - free_extent_map(em); } - if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) + if (test_range_bit(tree, orig_start, end, EXTENT_IOBITS, 0)) ret = 0; - else + else { + if ((mask & GFP_NOFS) == GFP_NOFS) + mask = GFP_NOFS; clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, - 1, 1, GFP_NOFS); + 1, 1, mask); + } return ret; } EXPORT_SYMBOL(try_release_extent_mapping); @@ -2553,13 +2721,13 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, } } clear_page_dirty_for_io(page); - write_lock_irq(&page->mapping->tree_lock); + read_lock_irq(&page->mapping->tree_lock); if (!PageDirty(page)) { radix_tree_tag_clear(&page->mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } - write_unlock_irq(&page->mapping->tree_lock); + read_unlock_irq(&page->mapping->tree_lock); unlock_page(page); } return 0; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 06be1fe84b29..f1e8bf251f32 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -23,19 +23,23 @@ #define EXTENT_PAGE_PRIVATE 1 #define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 +struct extent_state; + struct extent_io_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); int (*readpage_io_hook)(struct page *page, u64 start, u64 end); - int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end); - void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end); + int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, + struct extent_state *state); + void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, + struct extent_state *state); }; struct extent_io_tree { struct rb_root state; struct address_space *mapping; u64 dirty_bytes; - rwlock_t lock; + spinlock_t lock; struct extent_io_ops *ops; spinlock_t lru_lock; struct list_head buffer_lru; @@ -45,8 +49,8 @@ struct extent_io_tree { struct extent_state { u64 start; u64 end; /* inclusive */ - int in_tree; struct rb_node rb_node; + struct extent_io_tree *tree; wait_queue_head_t wq; atomic_t refs; unsigned long state; @@ -82,7 +86,8 @@ void extent_io_tree_init(struct extent_io_tree *tree, struct address_space *mapping, gfp_t mask); void extent_io_tree_empty_lru(struct extent_io_tree *tree); int try_release_extent_mapping(struct extent_map_tree *map, - struct extent_io_tree *tree, struct page *page); + struct extent_io_tree *tree, struct page *page, + gfp_t mask); int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int extent_read_full_page(struct extent_io_tree *tree, struct page *page, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 484551274100..bac8722e14e1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -331,7 +331,8 @@ out: return ret; } -int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) +int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, + struct extent_state *state) { size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); struct inode *inode = page->mapping->host; @@ -347,7 +348,12 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) btrfs_test_flag(inode, NODATASUM)) return 0; - ret = get_state_private(io_tree, start, &private); + if (state->start == start) { + private = state->private; + ret = 0; + } else { + ret = get_state_private(io_tree, start, &private); + } local_irq_save(flags); kaddr = kmap_atomic(page, KM_IRQ0); if (ret) { @@ -1830,7 +1836,7 @@ out_unlock: } struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, - size_t page_offset, u64 start, u64 len, + size_t pg_offset, u64 start, u64 len, int create) { int ret; @@ -1865,7 +1871,10 @@ again: start, len, em->start, em->len); WARN_ON(1); } - goto out; + if (em->block_start == EXTENT_MAP_INLINE && page) + free_extent_map(em); + else + goto out; } em = alloc_extent_map(GFP_NOFS); if (!em) { @@ -1930,6 +1939,7 @@ again: em->len = extent_end - extent_start; goto insert; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + u64 page_start; unsigned long ptr; char *map; size_t size; @@ -1959,16 +1969,17 @@ again: goto out; } - extent_offset = ((u64)page->index << PAGE_CACHE_SHIFT) - - extent_start + page_offset; - copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset, + page_start = page_offset(page) + pg_offset; + extent_offset = page_start - extent_start; + copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset, size - extent_offset); em->start = extent_start + extent_offset; - em->len = copy_size; + em->len = (copy_size + root->sectorsize - 1) & + ~((u64)root->sectorsize - 1); map = kmap(page); ptr = btrfs_file_extent_inline_start(item) + extent_offset; if (create == 0 && !PageUptodate(page)) { - read_extent_buffer(leaf, map + page_offset, ptr, + read_extent_buffer(leaf, map + pg_offset, ptr, copy_size); flush_dcache_page(page); } else if (create && PageUptodate(page)) { @@ -1980,7 +1991,7 @@ again: trans = btrfs_start_transaction(root, 1); goto again; } - write_extent_buffer(leaf, map + page_offset, ptr, + write_extent_buffer(leaf, map + pg_offset, ptr, copy_size); btrfs_mark_buffer_dirty(leaf); } @@ -2077,7 +2088,7 @@ btrfs_readpages(struct file *file, struct address_space *mapping, btrfs_get_extent); } -static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) +static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) { struct extent_io_tree *tree; struct extent_map_tree *map; @@ -2085,7 +2096,7 @@ static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; - ret = try_release_extent_mapping(map, tree, page); + ret = try_release_extent_mapping(map, tree, page, gfp_flags); if (ret == 1) { ClearPagePrivate(page); set_page_private(page, 0); From 85e21bac165b4ba1f6f90431ad6fc658ffcbaf3a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Jan 2008 15:11:36 -0500 Subject: [PATCH 0398/2985] Btrfs: During deletes and truncate, remove many items at once from the tree Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 38 ++++++++------- fs/btrfs/ctree.h | 12 ++++- fs/btrfs/extent_io.c | 1 - fs/btrfs/inode.c | 114 ++++++++++++++++++++++++------------------- 4 files changed, 95 insertions(+), 70 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 43d23148a4fe..84ad53e06b3a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2514,34 +2514,36 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, * delete the item at the leaf level in path. If that empties * the leaf, remove it from the tree */ -int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path) +int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_path *path, int slot, int nr) { - int slot; struct extent_buffer *leaf; struct btrfs_item *item; - int doff; - int dsize; + int last_off; + int dsize = 0; int ret = 0; int wret; + int i; u32 nritems; leaf = path->nodes[0]; - slot = path->slots[0]; - doff = btrfs_item_offset_nr(leaf, slot); - dsize = btrfs_item_size_nr(leaf, slot); + last_off = btrfs_item_offset_nr(leaf, slot + nr - 1); + + for (i = 0; i < nr; i++) + dsize += btrfs_item_size_nr(leaf, slot + i); + nritems = btrfs_header_nritems(leaf); - if (slot != nritems - 1) { + if (slot + nr != nritems) { int i; int data_end = leaf_data_end(root, leaf); memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end + dsize, btrfs_leaf_data(leaf) + data_end, - doff - data_end); + last_off - data_end); - for (i = slot + 1; i < nritems; i++) { + for (i = slot + nr; i < nritems; i++) { u32 ioff; item = btrfs_item_nr(leaf, i); @@ -2562,12 +2564,12 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, } memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), - btrfs_item_nr_offset(slot + 1), + btrfs_item_nr_offset(slot + nr), sizeof(struct btrfs_item) * - (nritems - slot - 1)); + (nritems - slot - nr)); } - btrfs_set_header_nritems(leaf, nritems - 1); - nritems--; + btrfs_set_header_nritems(leaf, nritems - nr); + nritems -= nr; /* delete the leaf if we've emptied it */ if (nritems == 0) { @@ -2600,7 +2602,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, } /* delete the leaf if it is mostly empty */ - if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { + if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) { /* push_leaf_left fixes the path. * make sure the path still points to our leaf * for possible call to del_ptr below @@ -2608,13 +2610,13 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, slot = path->slots[1]; extent_buffer_get(leaf); - wret = push_leaf_right(trans, root, path, 1, 1); + wret = push_leaf_left(trans, root, path, 1, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; if (path->nodes[0] == leaf && btrfs_header_nritems(leaf)) { - wret = push_leaf_left(trans, root, path, 1, 1); + wret = push_leaf_right(trans, root, path, 1, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6c65473e0fe3..098cf0883150 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1038,8 +1038,16 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); void btrfs_init_path(struct btrfs_path *p); -int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path); +int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_path *path, int slot, int nr); + +static inline int btrfs_del_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path) +{ + return btrfs_del_items(trans, root, path, path->slots[0], 1); +} + int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, void *data, u32 data_size); int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1f734c34dc24..8aec72253a17 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2863,7 +2863,6 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (ret || !wait) { return ret; } - for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); wait_on_page_locked(page); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bac8722e14e1..0a2fe51c4127 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -692,27 +692,6 @@ fail: return err; } -static int btrfs_free_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode) -{ - struct btrfs_path *path; - int ret; - - clear_inode(inode); - - path = btrfs_alloc_path(); - BUG_ON(!path); - ret = btrfs_lookup_inode(trans, root, path, - &BTRFS_I(inode)->location, -1); - if (ret > 0) - ret = -ENOENT; - if (!ret) - ret = btrfs_del_item(trans, root, path); - btrfs_free_path(path); - return ret; -} - /* * this can truncate away extent items, csum items and directory items. * It starts at a high offset and removes keys until it can't find @@ -723,7 +702,8 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, */ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct inode *inode) + struct inode *inode, + u32 min_type) { int ret; struct btrfs_path *path; @@ -739,6 +719,8 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, u64 root_owner = 0; int found_extent; int del_item; + int pending_del_nr = 0; + int pending_del_slot = 0; int extent_type = -1; btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1); @@ -751,17 +733,19 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, key.offset = (u64)-1; key.type = (u8)-1; + btrfs_init_path(path); +search_again: + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) { + goto error; + } + if (ret > 0) { + BUG_ON(path->slots[0] == 0); + path->slots[0]--; + } + while(1) { - btrfs_init_path(path); fi = NULL; - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) { - goto error; - } - if (ret > 0) { - BUG_ON(path->slots[0] == 0); - path->slots[0]--; - } leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); found_type = btrfs_key_type(&found_key); @@ -769,10 +753,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, if (found_key.objectid != inode->i_ino) break; - if (found_type != BTRFS_CSUM_ITEM_KEY && - found_type != BTRFS_DIR_ITEM_KEY && - found_type != BTRFS_DIR_INDEX_KEY && - found_type != BTRFS_EXTENT_DATA_KEY) + if (found_type < min_type) break; item_end = found_key.offset; @@ -801,14 +782,17 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, found_type = BTRFS_INODE_ITEM_KEY; } else if (found_type == BTRFS_EXTENT_ITEM_KEY) { found_type = BTRFS_CSUM_ITEM_KEY; + } else if (found_type == BTRFS_EXTENT_DATA_KEY) { + found_type = BTRFS_XATTR_ITEM_KEY; + } else if (found_type == BTRFS_XATTR_ITEM_KEY) { + found_type = BTRFS_INODE_REF_KEY; } else if (found_type) { found_type--; } else { break; } btrfs_set_key_type(&key, found_type); - btrfs_release_path(root, path); - continue; + goto next; } if (found_key.offset >= inode->i_size) del_item = 1; @@ -860,13 +844,21 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, } delete: if (del_item) { - ret = btrfs_del_item(trans, root, path); - if (ret) - goto error; + if (!pending_del_nr) { + /* no pending yet, add ourselves */ + pending_del_slot = path->slots[0]; + pending_del_nr = 1; + } else if (pending_del_nr && + path->slots[0] + 1 == pending_del_slot) { + /* hop on the pending chunk */ + pending_del_nr++; + pending_del_slot = path->slots[0]; + } else { + printk("bad pending slot %d pending_del_nr %d pending_del_slot %d\n", path->slots[0], pending_del_nr, pending_del_slot); + } } else { break; } - btrfs_release_path(root, path); if (found_extent) { ret = btrfs_free_extent(trans, root, extent_start, extent_num_bytes, @@ -875,9 +867,36 @@ delete: found_key.offset, 0); BUG_ON(ret); } +next: + if (path->slots[0] == 0) { + if (pending_del_nr) + goto del_pending; + btrfs_release_path(root, path); + goto search_again; + } + + path->slots[0]--; + if (pending_del_nr && + path->slots[0] + 1 != pending_del_slot) { + struct btrfs_key debug; +del_pending: + btrfs_item_key_to_cpu(path->nodes[0], &debug, + pending_del_slot); + ret = btrfs_del_items(trans, root, path, + pending_del_slot, + pending_del_nr); + BUG_ON(ret); + pending_del_nr = 0; + btrfs_release_path(root, path); + goto search_again; + } } ret = 0; error: + if (pending_del_nr) { + ret = btrfs_del_items(trans, root, path, pending_del_slot, + pending_del_nr); + } btrfs_release_path(root, path); btrfs_free_path(path); inode->i_sb->s_dirt = 1; @@ -1067,16 +1086,12 @@ void btrfs_delete_inode(struct inode *inode) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); - ret = btrfs_truncate_in_trans(trans, root, inode); - if (ret) - goto no_delete_lock; - ret = btrfs_delete_xattrs(trans, root, inode); - if (ret) - goto no_delete_lock; - ret = btrfs_free_inode(trans, root, inode); + ret = btrfs_truncate_in_trans(trans, root, inode, 0); if (ret) goto no_delete_lock; + nr = trans->blocks_used; + clear_inode(inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -2190,7 +2205,8 @@ static void btrfs_truncate(struct inode *inode) btrfs_set_trans_block_group(trans, inode); /* FIXME, add redo link to tree so we don't leak on crash */ - ret = btrfs_truncate_in_trans(trans, root, inode); + ret = btrfs_truncate_in_trans(trans, root, inode, + BTRFS_EXTENT_DATA_KEY); btrfs_update_inode(trans, root, inode); nr = trans->blocks_used; From 9c58309d6cf22471dacbcb6de54d00cef9ca20d4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Jan 2008 15:15:18 -0500 Subject: [PATCH 0399/2985] Btrfs: Add inode item and backref in one insert, reducing cpu usage Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 39 ++++++++++++++-------- fs/btrfs/ctree.h | 17 ++++++++-- fs/btrfs/inode.c | 85 +++++++++++++++++++++++++++++++++++------------- 3 files changed, 102 insertions(+), 39 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 84ad53e06b3a..fb2e2bd506c8 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2332,27 +2332,34 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ -int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, +int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *cpu_key, u32 data_size) + struct btrfs_key *cpu_key, u32 *data_size, + int nr) { struct extent_buffer *leaf; struct btrfs_item *item; int ret = 0; int slot; int slot_orig; + int i; u32 nritems; + u32 total_size = 0; + u32 total_data = 0; unsigned int data_end; struct btrfs_disk_key disk_key; - btrfs_cpu_key_to_disk(&disk_key, cpu_key); + for (i = 0; i < nr; i++) { + total_data += data_size[i]; + } /* create a root if there isn't one */ if (!root->node) BUG(); - ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1); + total_size = total_data + (nr - 1) * sizeof(struct btrfs_item); + ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); if (ret == 0) { return -EEXIST; } @@ -2366,10 +2373,10 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, data_end = leaf_data_end(root, leaf); if (btrfs_leaf_free_space(root, leaf) < - sizeof(struct btrfs_item) + data_size) { + sizeof(struct btrfs_item) + total_size) { btrfs_print_leaf(root, leaf); printk("not enough freespace need %u have %d\n", - data_size, btrfs_leaf_free_space(root, leaf)); + total_size, btrfs_leaf_free_space(root, leaf)); BUG(); } @@ -2404,7 +2411,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, } ioff = btrfs_item_offset(leaf, item); - btrfs_set_item_offset(leaf, item, ioff - data_size); + btrfs_set_item_offset(leaf, item, ioff - total_data); } if (leaf->map_token) { unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); @@ -2412,23 +2419,27 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, } /* shift the items */ - memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), btrfs_item_nr_offset(slot), (nritems - slot) * sizeof(struct btrfs_item)); /* shift the data */ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + - data_end - data_size, btrfs_leaf_data(leaf) + + data_end - total_data, btrfs_leaf_data(leaf) + data_end, old_data - data_end); data_end = old_data; } /* setup the item for the new data */ - btrfs_set_item_key(leaf, &disk_key, slot); - item = btrfs_item_nr(leaf, slot); - btrfs_set_item_offset(leaf, item, data_end - data_size); - btrfs_set_item_size(leaf, item, data_size); - btrfs_set_header_nritems(leaf, nritems + 1); + for (i = 0; i < nr; i++) { + btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); + btrfs_set_item_key(leaf, &disk_key, slot + i); + item = btrfs_item_nr(leaf, slot + i); + btrfs_set_item_offset(leaf, item, data_end - data_size[i]); + data_end -= data_size[i]; + btrfs_set_item_size(leaf, item, data_size[i]); + } + btrfs_set_header_nritems(leaf, nritems + nr); btrfs_mark_buffer_dirty(leaf); ret = 0; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 098cf0883150..127c86f795d0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1050,9 +1050,20 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans, int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, void *data, u32 data_size); -int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, struct btrfs_key - *cpu_key, u32 data_size); +int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 *data_size, int nr); + +static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *key, + u32 data_size) +{ + return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1); +} + int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0a2fe51c4127..413b1012de53 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1500,6 +1500,8 @@ void btrfs_dirty_inode(struct inode *inode) static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, + const char *name, int name_len, + u64 ref_objectid, u64 objectid, struct btrfs_block_group_cache *group, int mode) @@ -1508,6 +1510,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_inode_item *inode_item; struct btrfs_key *location; struct btrfs_path *path; + struct btrfs_inode_ref *ref; + struct btrfs_key key[2]; + u32 sizes[2]; + unsigned long ptr; int ret; int owner; @@ -1530,10 +1536,25 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, group = btrfs_find_block_group(root, group, 0, 0, owner); BTRFS_I(inode)->block_group = group; BTRFS_I(inode)->flags = 0; - ret = btrfs_insert_empty_inode(trans, root, path, objectid); - if (ret) + + key[0].objectid = objectid; + btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); + key[0].offset = 0; + + key[1].objectid = objectid; + btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); + key[1].offset = ref_objectid; + + sizes[0] = sizeof(struct btrfs_inode_item); + sizes[1] = name_len + sizeof(*ref); + + ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2); + if (ret != 0) goto fail; + if (objectid > root->highest_inode) + root->highest_inode = objectid; + inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; inode->i_mode = mode; @@ -1543,6 +1564,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); fill_inode_item(path->nodes[0], inode_item, inode); + + ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, + struct btrfs_inode_ref); + btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); + ptr = (unsigned long)(ref + 1); + write_extent_buffer(path->nodes[0], name, ptr, name_len); + btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); @@ -1564,7 +1592,8 @@ static inline u8 btrfs_inode_type(struct inode *inode) } static int btrfs_add_link(struct btrfs_trans_handle *trans, - struct dentry *dentry, struct inode *inode) + struct dentry *dentry, struct inode *inode, + int add_backref) { int ret; struct btrfs_key key; @@ -1580,11 +1609,13 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, dentry->d_parent->d_inode->i_ino, &key, btrfs_inode_type(inode)); if (ret == 0) { - ret = btrfs_insert_inode_ref(trans, root, - dentry->d_name.name, - dentry->d_name.len, - inode->i_ino, - dentry->d_parent->d_inode->i_ino); + if (add_backref) { + ret = btrfs_insert_inode_ref(trans, root, + dentry->d_name.name, + dentry->d_name.len, + inode->i_ino, + dentry->d_parent->d_inode->i_ino); + } parent_inode = dentry->d_parent->d_inode; parent_inode->i_size += dentry->d_name.len * 2; parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; @@ -1595,9 +1626,10 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, } static int btrfs_add_nondir(struct btrfs_trans_handle *trans, - struct dentry *dentry, struct inode *inode) + struct dentry *dentry, struct inode *inode, + int backref) { - int err = btrfs_add_link(trans, dentry, inode); + int err = btrfs_add_link(trans, dentry, inode, backref); if (!err) { d_instantiate(dentry, inode); return 0; @@ -1635,14 +1667,16 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, goto out_unlock; } - inode = btrfs_new_inode(trans, root, objectid, + inode = btrfs_new_inode(trans, root, dentry->d_name.name, + dentry->d_name.len, + dentry->d_parent->d_inode->i_ino, objectid, BTRFS_I(dir)->block_group, mode); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_unlock; btrfs_set_trans_block_group(trans, inode); - err = btrfs_add_nondir(trans, dentry, inode); + err = btrfs_add_nondir(trans, dentry, inode, 0); if (err) drop_inode = 1; else { @@ -1692,14 +1726,16 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; } - inode = btrfs_new_inode(trans, root, objectid, - BTRFS_I(dir)->block_group, mode); + inode = btrfs_new_inode(trans, root, dentry->d_name.name, + dentry->d_name.len, + dentry->d_parent->d_inode->i_ino, + objectid, BTRFS_I(dir)->block_group, mode); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_unlock; btrfs_set_trans_block_group(trans, inode); - err = btrfs_add_nondir(trans, dentry, inode); + err = btrfs_add_nondir(trans, dentry, inode, 0); if (err) drop_inode = 1; else { @@ -1755,7 +1791,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, btrfs_set_trans_block_group(trans, dir); atomic_inc(&inode->i_count); - err = btrfs_add_nondir(trans, dentry, inode); + err = btrfs_add_nondir(trans, dentry, inode, 1); if (err) drop_inode = 1; @@ -1810,7 +1846,9 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) goto out_unlock; } - inode = btrfs_new_inode(trans, root, objectid, + inode = btrfs_new_inode(trans, root, dentry->d_name.name, + dentry->d_name.len, + dentry->d_parent->d_inode->i_ino, objectid, BTRFS_I(dir)->block_group, S_IFDIR | mode); if (IS_ERR(inode)) { err = PTR_ERR(inode); @@ -1827,7 +1865,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (err) goto out_fail; - err = btrfs_add_link(trans, dentry, inode); + err = btrfs_add_link(trans, dentry, inode, 0); if (err) goto out_fail; @@ -2320,7 +2358,8 @@ static int noinline create_subvol(struct btrfs_root *root, char *name, trans = btrfs_start_transaction(new_root, 1); BUG_ON(!trans); - inode = btrfs_new_inode(trans, new_root, new_dirid, + inode = btrfs_new_inode(trans, new_root, "..", 2, new_dirid, + new_dirid, BTRFS_I(dir)->block_group, S_IFDIR | 0700); if (IS_ERR(inode)) goto fail; @@ -2817,7 +2856,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, if (ret) goto out_fail; } - ret = btrfs_add_link(trans, new_dentry, old_inode); + ret = btrfs_add_link(trans, new_dentry, old_inode, 1); if (ret) goto out_fail; @@ -2865,14 +2904,16 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock; } - inode = btrfs_new_inode(trans, root, objectid, + inode = btrfs_new_inode(trans, root, dentry->d_name.name, + dentry->d_name.len, + dentry->d_parent->d_inode->i_ino, objectid, BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_unlock; btrfs_set_trans_block_group(trans, inode); - err = btrfs_add_nondir(trans, dentry, inode); + err = btrfs_add_nondir(trans, dentry, inode, 0); if (err) drop_inode = 1; else { From 291d673e6a22d9c6834e939f66c7cfef90669021 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Jan 2008 15:55:23 -0500 Subject: [PATCH 0400/2985] Btrfs: Do delalloc accounting via hooks in the extent_state code Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 6 ----- fs/btrfs/extent_io.c | 25 ++++++++++++++++++ fs/btrfs/extent_io.h | 4 +++ fs/btrfs/file.c | 4 --- fs/btrfs/inode.c | 60 +++++++++++++++++------------------------- 5 files changed, 53 insertions(+), 46 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c4fe37791d3f..90718f77bbc0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2180,7 +2180,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, unsigned long last_index; unsigned long i; struct page *page; - struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct file_ra_state *ra; @@ -2220,11 +2219,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); - spin_lock(&root->fs_info->delalloc_lock); - root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - - existing_delalloc; - spin_unlock(&root->fs_info->delalloc_lock); - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8aec72253a17..e81b35b06187 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -256,6 +256,26 @@ static int merge_state(struct extent_io_tree *tree, return 0; } +static void set_state_cb(struct extent_io_tree *tree, + struct extent_state *state, + unsigned long bits) +{ + if (tree->ops && tree->ops->set_bit_hook) { + tree->ops->set_bit_hook(tree->mapping->host, state->start, + state->end, bits); + } +} + +static void clear_state_cb(struct extent_io_tree *tree, + struct extent_state *state, + unsigned long bits) +{ + if (tree->ops && tree->ops->set_bit_hook) { + tree->ops->clear_bit_hook(tree->mapping->host, state->start, + state->end, bits); + } +} + /* * insert an extent_state struct into the tree. 'bits' are set on the * struct before it is inserted. @@ -281,6 +301,7 @@ static int insert_state(struct extent_io_tree *tree, state->state |= bits; state->start = start; state->end = end; + set_state_cb(tree, state, bits); node = tree_insert(&tree->state, end, &state->rb_node); if (node) { struct extent_state *found; @@ -349,6 +370,7 @@ static int clear_state_bit(struct extent_io_tree *tree, tree->dirty_bytes -= range; } state->state &= ~bits; + clear_state_cb(tree, state, bits); if (wake) wake_up(&state->wq); if (delete || state->state == 0) { @@ -553,6 +575,7 @@ static void set_state_bits(struct extent_io_tree *tree, tree->dirty_bytes += range; } state->state |= bits; + set_state_cb(tree, state, bits); } /* @@ -975,6 +998,7 @@ search_again: goto search_again; } state->state |= EXTENT_LOCKED; + set_state_cb(tree, state, EXTENT_LOCKED); if (!found) *start = state->start; found++; @@ -1474,6 +1498,7 @@ static int end_bio_extent_readpage(struct bio *bio, state = NULL; } clear->state |= EXTENT_UPTODATE; + set_state_cb(tree, clear, EXTENT_UPTODATE); clear_state_bit(tree, clear, EXTENT_LOCKED, 1, 0); if (cur == start) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index f1e8bf251f32..a96c5a14134e 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -33,6 +33,10 @@ struct extent_io_ops { struct extent_state *state); void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); + int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, + unsigned long bits); + int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, + unsigned long bits); }; struct extent_io_tree { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c5bb00f92396..8e210616d702 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -323,10 +323,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, } set_extent_delalloc(io_tree, start_pos, end_of_last_block, GFP_NOFS); - spin_lock(&root->fs_info->delalloc_lock); - root->fs_info->delalloc_bytes += (end_of_last_block + 1 - - start_pos) - existing_delalloc; - spin_unlock(&root->fs_info->delalloc_lock); btrfs_add_ordered_inode(inode); } else { u64 aligned_end; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 413b1012de53..5a38443a24ee 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -80,8 +80,6 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, u64 thresh; int ret = 0; - return 0; - if (for_del) thresh = total * 90; else @@ -249,7 +247,6 @@ not_found: static int run_delalloc_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; - u64 num_bytes; int ret; mutex_lock(&root->fs_info->fs_mutex); if (btrfs_test_opt(root, NODATACOW) || @@ -258,20 +255,34 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) else ret = cow_file_range(inode, start, end); - spin_lock(&root->fs_info->delalloc_lock); - num_bytes = end + 1 - start; - if (root->fs_info->delalloc_bytes < num_bytes) { - printk("delalloc accounting error total %llu sub %llu\n", - root->fs_info->delalloc_bytes, num_bytes); - } else { - root->fs_info->delalloc_bytes -= num_bytes; - } - spin_unlock(&root->fs_info->delalloc_lock); - mutex_unlock(&root->fs_info->fs_mutex); return ret; } +int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, + unsigned long bits) +{ + if ((bits & EXTENT_DELALLOC)) { + struct btrfs_root *root = BTRFS_I(inode)->root; + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->delalloc_bytes += end - start + 1; + spin_unlock(&root->fs_info->delalloc_lock); + } + return 0; +} + +int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, + unsigned long bits) +{ + if ((bits & EXTENT_DELALLOC)) { + struct btrfs_root *root = BTRFS_I(inode)->root; + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->delalloc_bytes -= end - start + 1; + spin_unlock(&root->fs_info->delalloc_lock); + } + return 0; +} + int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end) { struct inode *inode = page->mapping->host; @@ -908,28 +919,17 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, { char *kaddr; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct btrfs_root *root = BTRFS_I(inode)->root; u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = page_start + PAGE_CACHE_SIZE - 1; - u64 existing_delalloc; - u64 delalloc_start; int ret = 0; WARN_ON(!PageLocked(page)); set_page_extent_mapped(page); lock_extent(io_tree, page_start, page_end, GFP_NOFS); - delalloc_start = page_start; - existing_delalloc = count_range_bits(&BTRFS_I(inode)->io_tree, - &delalloc_start, page_end, - PAGE_CACHE_SIZE, EXTENT_DELALLOC); set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); - spin_lock(&root->fs_info->delalloc_lock); - root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - existing_delalloc; - spin_unlock(&root->fs_info->delalloc_lock); - if (zero_start != PAGE_CACHE_SIZE) { kaddr = kmap(page); memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); @@ -2456,8 +2456,6 @@ int btrfs_defrag_file(struct file *file) { unsigned long ra_index = 0; u64 page_start; u64 page_end; - u64 delalloc_start; - u64 existing_delalloc; unsigned long i; int ret; @@ -2491,19 +2489,9 @@ int btrfs_defrag_file(struct file *file) { page_end = page_start + PAGE_CACHE_SIZE - 1; lock_extent(io_tree, page_start, page_end, GFP_NOFS); - delalloc_start = page_start; - existing_delalloc = - count_range_bits(&BTRFS_I(inode)->io_tree, - &delalloc_start, page_end, - PAGE_CACHE_SIZE, EXTENT_DELALLOC); set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); - spin_lock(&root->fs_info->delalloc_lock); - root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - - existing_delalloc; - spin_unlock(&root->fs_info->delalloc_lock); - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); From 6f568d35a045dbb8a13fe71bfc32e85e39a986cb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Jan 2008 16:03:38 -0500 Subject: [PATCH 0401/2985] Btrfs: mount -o max_inline=size to control the maximum inline extent size Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/extent_io.c | 1 - fs/btrfs/file.c | 3 ++- fs/btrfs/super.c | 19 ++++++++++++++++++- 5 files changed, 22 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 127c86f795d0..8f93e066bf8b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -325,6 +325,7 @@ struct btrfs_fs_info { u64 last_trans_committed; unsigned long mount_opt; u64 max_extent; + u64 max_inline; u64 alloc_start; struct btrfs_transaction *running_transaction; struct btrfs_super_block super_copy; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e29a0716f6a9..6647b90ef745 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -659,6 +659,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->throttles = 0; fs_info->mount_opt = 0; fs_info->max_extent = (u64)-1; + fs_info->max_inline = 8192 * 1024; fs_info->delalloc_bytes = 0; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e81b35b06187..6999b819db62 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2451,7 +2451,6 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, goto out; sector = (em->block_start + start - em->start) >> inode->i_blkbits; -printk("bmap finds %Lu %Lu block %Lu\n", em->start, em->len, em->block_start); out: free_extent_map(em); return sector; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8e210616d702..7c10a90362ff 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -301,7 +301,8 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, */ inline_size = end_pos; if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) || - inline_size > 8192 || + inline_size > root->fs_info->max_inline || + (inline_size & (root->sectorsize -1)) == 0 || inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { u64 last_end; u64 existing_delalloc = 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f8a1016600b1..a46300c4753a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -64,7 +64,7 @@ static void btrfs_put_super (struct super_block * sb) enum { Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, - Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_err, + Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_err, }; static match_table_t tokens = { @@ -73,6 +73,7 @@ static match_table_t tokens = { {Opt_nodatacow, "nodatacow"}, {Opt_nobarrier, "nobarrier"}, {Opt_max_extent, "max_extent=%s"}, + {Opt_max_inline, "max_inline=%s"}, {Opt_alloc_start, "alloc_start=%s"}, {Opt_ssd, "ssd"}, {Opt_err, NULL} @@ -178,6 +179,22 @@ static int parse_options (char * options, } } break; + case Opt_max_inline: + if (info) { + char *num = match_strdup(&args[0]); + if (num) { + info->max_inline = + btrfs_parse_size(num); + kfree(num); + + info->max_inline = max_t(u64, + info->max_inline, + root->sectorsize); + printk("btrfs: max_inline at %Lu\n", + info->max_inline); + } + } + break; case Opt_alloc_start: if (info) { char *num = match_strdup(&args[0]); From b3236e68bf86b3ae87f58984a1822369225211cb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Jan 2008 16:06:37 -0500 Subject: [PATCH 0402/2985] Btrfs: Leave on the tree defragger in mount -o ssd, it still helps there Signed-off-by: Chris Mason --- fs/btrfs/tree-defrag.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 5935cbd8f2b8..5c58630dce03 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -179,9 +179,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (root->ref_cows == 0 && !is_extent) goto out; - if (btrfs_test_opt(root, SSD)) - goto out; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; From 5a01a2e3a9d4dc9cb4871dde4d832a3b8de9f748 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 30 Jan 2008 11:43:54 -0500 Subject: [PATCH 0403/2985] Btrfs: Copy correct tree when inserting into slot 0 Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index fb2e2bd506c8..cf32651876bf 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2443,13 +2443,16 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); ret = 0; - if (slot == 0) + if (slot == 0) { + btrfs_cpu_key_to_disk(&disk_key, cpu_key); ret = fixup_low_keys(trans, root, path, &disk_key, 1); + } if (btrfs_leaf_free_space(root, leaf) < 0) { btrfs_print_leaf(root, leaf); BUG(); } + out: return ret; } From b1632b10c0133992fd0290b4836c47661bed4396 Mon Sep 17 00:00:00 2001 From: Yan Date: Wed, 30 Jan 2008 11:54:04 -0500 Subject: [PATCH 0404/2985] Btrfs: Align extent length to sectorsize in --- Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5a38443a24ee..de17c61dfa0a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -823,6 +823,8 @@ search_again: btrfs_file_extent_num_bytes(leaf, fi); extent_num_bytes = inode->i_size - found_key.offset + root->sectorsize - 1; + extent_num_bytes = extent_num_bytes & + ~((u64)root->sectorsize - 1); btrfs_set_file_extent_num_bytes(leaf, fi, extent_num_bytes); num_dec = (orig_num_bytes - From f392a938f3cc36a2abe68db9286bd864c3b2f7dd Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 30 Jan 2008 11:54:05 -0500 Subject: [PATCH 0405/2985] Properly align the hole size in btrfs_setattr Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index de17c61dfa0a..515f8b01ab04 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1001,7 +1001,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) u64 mask = root->sectorsize - 1; u64 pos = (inode->i_size + mask) & ~mask; - u64 block_end = attr->ia_size | mask; + u64 block_end = (attr->ia_size + mask) & ~mask; u64 hole_start; u64 hole_size; u64 alloc_hint = 0; @@ -1022,7 +1022,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_truncate_page(inode->i_mapping, inode->i_size); - lock_extent(io_tree, pos, block_end, GFP_NOFS); + lock_extent(io_tree, pos, block_end - 1, GFP_NOFS); hole_size = block_end - hole_start; mutex_lock(&root->fs_info->fs_mutex); @@ -1043,7 +1043,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) } btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - unlock_extent(io_tree, pos, block_end, GFP_NOFS); + unlock_extent(io_tree, pos, block_end - 1, GFP_NOFS); if (err) return err; } From 1b0f7c29e2f4f41e1367e7581d3a9c8c70f0394a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 30 Jan 2008 14:33:02 -0500 Subject: [PATCH 0406/2985] Fix hole start calculation in btrfs_settar Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 515f8b01ab04..bcf3b35fb65e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1000,20 +1000,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; u64 mask = root->sectorsize - 1; - u64 pos = (inode->i_size + mask) & ~mask; + u64 hole_start = (inode->i_size + mask) & ~mask; u64 block_end = (attr->ia_size + mask) & ~mask; - u64 hole_start; u64 hole_size; u64 alloc_hint = 0; - if (attr->ia_size <= pos) + if (attr->ia_size <= hole_start) goto out; - if (pos != inode->i_size) - hole_start = pos + root->sectorsize; - else - hole_start = pos; - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); mutex_unlock(&root->fs_info->fs_mutex); @@ -1022,14 +1016,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_truncate_page(inode->i_mapping, inode->i_size); - lock_extent(io_tree, pos, block_end - 1, GFP_NOFS); + lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); hole_size = block_end - hole_start; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); err = btrfs_drop_extents(trans, root, inode, - pos, block_end, pos, + hole_start, block_end, hole_start, &alloc_hint); if (alloc_hint != EXTENT_MAP_INLINE) { @@ -1043,7 +1037,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) } btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - unlock_extent(io_tree, pos, block_end - 1, GFP_NOFS); + unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); if (err) return err; } From 0181e58f91c1edbf835edb7a87d6dfe81374709d Mon Sep 17 00:00:00 2001 From: Yan Date: Wed, 30 Jan 2008 14:39:54 -0500 Subject: [PATCH 0407/2985] btrfs_drop_extent fixe for inline items > 8K When truncating a inline extent, btrfs_drop_extents doesn't properly handle the case "key.offset > inline_limit". This bug can only happen when max line size is larger than 8K. Signed-off-by: Chris Mason --- fs/btrfs/file.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7c10a90362ff..7cbf8d8d7e27 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -591,8 +591,7 @@ next_slot: } } bookend = 1; - if (found_inline && start <= key.offset && - inline_limit < extent_end) + if (found_inline && start <= key.offset) keep = 1; } /* truncate existing extent */ @@ -672,11 +671,10 @@ next_slot: if (!bookend) continue; } - if (bookend && found_inline && start <= key.offset && - inline_limit < extent_end && key.offset <= inline_limit) { + if (bookend && found_inline && start <= key.offset) { u32 new_size; new_size = btrfs_file_extent_calc_inline_size( - extent_end - inline_limit); + extent_end - end); btrfs_truncate_item(trans, root, path, new_size, 0); } /* create bookend, splitting the extent in two */ From b0c68f8bed058d9f2023b067b16ed06a8c439544 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 31 Jan 2008 11:05:37 -0500 Subject: [PATCH 0408/2985] Btrfs: Enable delalloc accounting Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++++ fs/btrfs/extent_io.c | 14 +++++++------- fs/btrfs/extent_io.h | 4 ++-- fs/btrfs/inode.c | 18 +++++++++++++----- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6647b90ef745..ad72e2192015 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -861,6 +861,10 @@ int close_ctree(struct btrfs_root *root) write_ctree_super(NULL, root); mutex_unlock(&fs_info->fs_mutex); + if (fs_info->delalloc_bytes) { + printk("btrfs: at unmount delalloc count %Lu\n", + fs_info->delalloc_bytes); + } if (fs_info->extent_root->node) free_extent_buffer(fs_info->extent_root->node); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 6999b819db62..624aabc3e6f2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -262,7 +262,7 @@ static void set_state_cb(struct extent_io_tree *tree, { if (tree->ops && tree->ops->set_bit_hook) { tree->ops->set_bit_hook(tree->mapping->host, state->start, - state->end, bits); + state->end, state->state, bits); } } @@ -272,7 +272,7 @@ static void clear_state_cb(struct extent_io_tree *tree, { if (tree->ops && tree->ops->set_bit_hook) { tree->ops->clear_bit_hook(tree->mapping->host, state->start, - state->end, bits); + state->end, state->state, bits); } } @@ -298,10 +298,10 @@ static int insert_state(struct extent_io_tree *tree, } if (bits & EXTENT_DIRTY) tree->dirty_bytes += end - start + 1; + set_state_cb(tree, state, bits); state->state |= bits; state->start = start; state->end = end; - set_state_cb(tree, state, bits); node = tree_insert(&tree->state, end, &state->rb_node); if (node) { struct extent_state *found; @@ -369,8 +369,8 @@ static int clear_state_bit(struct extent_io_tree *tree, WARN_ON(range > tree->dirty_bytes); tree->dirty_bytes -= range; } - state->state &= ~bits; clear_state_cb(tree, state, bits); + state->state &= ~bits; if (wake) wake_up(&state->wq); if (delete || state->state == 0) { @@ -574,8 +574,8 @@ static void set_state_bits(struct extent_io_tree *tree, u64 range = state->end - state->start + 1; tree->dirty_bytes += range; } - state->state |= bits; set_state_cb(tree, state, bits); + state->state |= bits; } /* @@ -997,8 +997,8 @@ search_again: free_extent_state(state); goto search_again; } - state->state |= EXTENT_LOCKED; set_state_cb(tree, state, EXTENT_LOCKED); + state->state |= EXTENT_LOCKED; if (!found) *start = state->start; found++; @@ -1497,8 +1497,8 @@ static int end_bio_extent_readpage(struct bio *bio, } else { state = NULL; } - clear->state |= EXTENT_UPTODATE; set_state_cb(tree, clear, EXTENT_UPTODATE); + clear->state |= EXTENT_UPTODATE; clear_state_bit(tree, clear, EXTENT_LOCKED, 1, 0); if (cur == start) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index a96c5a14134e..6fd5e2c90615 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -34,9 +34,9 @@ struct extent_io_ops { void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, - unsigned long bits); + unsigned long old, unsigned long bits); int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, - unsigned long bits); + unsigned long old, unsigned long bits); }; struct extent_io_tree { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bcf3b35fb65e..f13b1dbc0619 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -260,9 +260,9 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) } int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, - unsigned long bits) + unsigned long old, unsigned long bits) { - if ((bits & EXTENT_DELALLOC)) { + if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; spin_lock(&root->fs_info->delalloc_lock); root->fs_info->delalloc_bytes += end - start + 1; @@ -272,12 +272,18 @@ int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, } int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, - unsigned long bits) + unsigned long old, unsigned long bits) { - if ((bits & EXTENT_DELALLOC)) { + if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; spin_lock(&root->fs_info->delalloc_lock); - root->fs_info->delalloc_bytes -= end - start + 1; + if (end - start + 1 > root->fs_info->delalloc_bytes) { + printk("warning: delalloc account %Lu %Lu\n", + end - start + 1, root->fs_info->delalloc_bytes); + root->fs_info->delalloc_bytes = 0; + } else { + root->fs_info->delalloc_bytes -= end - start + 1; + } spin_unlock(&root->fs_info->delalloc_lock); } return 0; @@ -3002,6 +3008,8 @@ static struct extent_io_ops btrfs_extent_io_ops = { .writepage_io_hook = btrfs_writepage_io_hook, .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, + .set_bit_hook = btrfs_set_bit_hook, + .clear_bit_hook = btrfs_clear_bit_hook, }; static struct address_space_operations btrfs_aops = { From 4529ba495c6fd0d79247784d0df55ae6512fa883 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 31 Jan 2008 16:45:07 -0500 Subject: [PATCH 0409/2985] Btrfs: Add data block hints to SSD mode too Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 28 ++++++++++++++++------------ fs/btrfs/transaction.c | 1 + 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8f93e066bf8b..35e9a7af10a5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -354,6 +354,7 @@ struct btrfs_fs_info { spinlock_t new_trans_lock; u64 delalloc_bytes; u64 last_alloc; + u64 last_data_alloc; }; /* * in ram representation of the tree. extent_root is used for all allocations diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ad72e2192015..dda8ad6c0077 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -688,6 +688,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->closing = 0; fs_info->total_pinned = 0; fs_info->last_alloc = 0; + fs_info->last_data_alloc = 0; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 90718f77bbc0..fa54ea590078 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1410,6 +1410,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, int ret; int slot = 0; u64 last_byte = 0; + u64 *last_ptr = NULL; u64 orig_search_start = search_start; int start_found; struct extent_buffer *l; @@ -1433,14 +1434,17 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, /* for SSD, cluster allocations together as much as possible */ if (btrfs_test_opt(root, SSD)) { - if (!data) { - if (root->fs_info->last_alloc) - hint_byte = root->fs_info->last_alloc; - else { - hint_byte = hint_byte & - ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); - empty_size += 16 * 1024 * 1024; - } + if (data) + last_ptr = &root->fs_info->last_data_alloc; + else + last_ptr = &root->fs_info->last_alloc; + if (*last_ptr) { + hint_byte = *last_ptr; + } + else { + hint_byte = hint_byte & + ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); + empty_size += 16 * 1024 * 1024; } } @@ -1470,8 +1474,8 @@ check_failed: search_start = find_search_start(root, &block_group, search_start, total_needed, data); - if (!data && btrfs_test_opt(root, SSD) && info->last_alloc && - search_start != info->last_alloc) { + if (btrfs_test_opt(root, SSD) && *last_ptr && + search_start != *last_ptr) { info->last_alloc = 0; if (!empty_size) { empty_size += 16 * 1024 * 1024; @@ -1609,6 +1613,8 @@ check_pending: } ins->offset = num_bytes; btrfs_free_path(path); + if (btrfs_test_opt(root, SSD)) + *last_ptr = ins->objectid + ins->offset; return 0; new_group: @@ -1636,8 +1642,6 @@ enospc: error: btrfs_release_path(root, path); btrfs_free_path(path); - if (btrfs_test_opt(root, SSD) && !ret && !data) - info->last_alloc = ins->objectid + ins->offset; return ret; } /* diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b4a1bc62a784..3f64d0c7ddb9 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -58,6 +58,7 @@ static int join_transaction(struct btrfs_root *root) root->fs_info->generation++; root->fs_info->running_transaction = cur_trans; root->fs_info->last_alloc = 0; + root->fs_info->last_data_alloc = 0; cur_trans->num_writers = 1; cur_trans->num_joined = 0; cur_trans->transid = root->fs_info->generation; From 80ea96b1f3bd2431e0d71c9df6ab45c3de0c5840 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 1 Feb 2008 14:51:59 -0500 Subject: [PATCH 0410/2985] Btrfs: Add a lookup cache to the extent state tree Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 57 +++++++++++++++++++++++++++++++------------- fs/btrfs/extent_io.h | 1 + 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 624aabc3e6f2..5f2fbf2054f2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -90,6 +90,7 @@ void extent_io_tree_init(struct extent_io_tree *tree, tree->mapping = mapping; INIT_LIST_HEAD(&tree->buffer_lru); tree->lru_size = 0; + tree->last = NULL; } EXPORT_SYMBOL(extent_io_tree_init); @@ -158,16 +159,23 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, return NULL; } -static struct rb_node *__tree_search(struct rb_root *root, u64 offset, +static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, struct rb_node **prev_ret, struct rb_node **next_ret) { + struct rb_root *root = &tree->state; struct rb_node * n = root->rb_node; struct rb_node *prev = NULL; struct rb_node *orig_prev = NULL; struct tree_entry *entry; struct tree_entry *prev_entry = NULL; + if (tree->last) { + struct extent_state *state; + state = tree->last; + if (state->start <= offset && offset <= state->end) + return &tree->last->rb_node; + } while(n) { entry = rb_entry(n, struct tree_entry, rb_node); prev = n; @@ -177,8 +185,10 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, n = n->rb_left; else if (offset > entry->end) n = n->rb_right; - else + else { + tree->last = rb_entry(n, struct extent_state, rb_node); return n; + } } if (prev_ret) { @@ -202,14 +212,20 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, return NULL; } -static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) +static inline struct rb_node *tree_search(struct extent_io_tree *tree, + u64 offset) { struct rb_node *prev = NULL; struct rb_node *ret; - ret = __tree_search(root, offset, &prev, NULL); - if (!ret) + ret = __etree_search(tree, offset, &prev, NULL); + if (!ret) { + if (prev) { + tree->last = rb_entry(prev, struct extent_state, + rb_node); + } return prev; + } return ret; } @@ -238,6 +254,8 @@ static int merge_state(struct extent_io_tree *tree, other->state == state->state) { state->start = other->start; other->tree = NULL; + if (tree->last == other) + tree->last = NULL; rb_erase(&other->rb_node, &tree->state); free_extent_state(other); } @@ -249,6 +267,8 @@ static int merge_state(struct extent_io_tree *tree, other->state == state->state) { other->start = state->start; state->tree = NULL; + if (tree->last == state) + tree->last = NULL; rb_erase(&state->rb_node, &tree->state); free_extent_state(state); } @@ -311,6 +331,7 @@ static int insert_state(struct extent_io_tree *tree, return -EEXIST; } state->tree = tree; + tree->last = state; merge_state(tree, state); return 0; } @@ -375,6 +396,8 @@ static int clear_state_bit(struct extent_io_tree *tree, wake_up(&state->wq); if (delete || state->state == 0) { if (state->tree) { + if (tree->last == state) + tree->last = NULL; rb_erase(&state->rb_node, &tree->state); state->tree = NULL; free_extent_state(state); @@ -422,7 +445,7 @@ again: * this search will find the extents that end after * our range starts */ - node = tree_search(&tree->state, start); + node = tree_search(tree, start); if (!node) goto out; state = rb_entry(node, struct extent_state, rb_node); @@ -533,7 +556,7 @@ again: * this search will find all the extents that end after * our range starts */ - node = tree_search(&tree->state, start); + node = tree_search(tree, start); if (!node) break; @@ -612,7 +635,7 @@ again: * this search will find all the extents that end after * our range starts. */ - node = tree_search(&tree->state, start); + node = tree_search(tree, start); if (!node) { err = insert_state(tree, prealloc, start, end, bits); prealloc = NULL; @@ -915,7 +938,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, * this search will find all the extents that end after * our range starts. */ - node = tree_search(&tree->state, start); + node = tree_search(tree, start); if (!node || IS_ERR(node)) { goto out; } @@ -953,7 +976,7 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree, * our range starts. */ search_again: - node = tree_search(&tree->state, cur_start); + node = tree_search(tree, cur_start); if (!node || IS_ERR(node)) { *end = (u64)-1; goto out; @@ -1041,7 +1064,7 @@ u64 count_range_bits(struct extent_io_tree *tree, * this search will find all the extents that end after * our range starts. */ - node = tree_search(&tree->state, cur_start); + node = tree_search(tree, cur_start); if (!node || IS_ERR(node)) { goto out; } @@ -1142,7 +1165,7 @@ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) * this search will find all the extents that end after * our range starts. */ - node = tree_search(&tree->state, start); + node = tree_search(tree, start); if (!node || IS_ERR(node)) { ret = -ENOENT; goto out; @@ -1169,7 +1192,7 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) * this search will find all the extents that end after * our range starts. */ - node = tree_search(&tree->state, start); + node = tree_search(tree, start); if (!node || IS_ERR(node)) { ret = -ENOENT; goto out; @@ -1200,7 +1223,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned long flags; spin_lock_irqsave(&tree->lock, flags); - node = tree_search(&tree->state, start); + node = tree_search(tree, start); while (node && start <= end) { state = rb_entry(node, struct extent_state, rb_node); @@ -1348,7 +1371,7 @@ static int end_bio_extent_writepage(struct bio *bio, spin_lock_irqsave(&tree->lock, flags); if (!state || state->end != end) { state = NULL; - node = __tree_search(&tree->state, start, NULL, NULL); + node = __etree_search(tree, start, NULL, NULL); if (node) { state = rb_entry(node, struct extent_state, rb_node); @@ -1468,7 +1491,7 @@ static int end_bio_extent_readpage(struct bio *bio, spin_lock_irqsave(&tree->lock, flags); if (!state || state->end != end) { state = NULL; - node = __tree_search(&tree->state, start, NULL, NULL); + node = __etree_search(tree, start, NULL, NULL); if (node) { state = rb_entry(node, struct extent_state, rb_node); @@ -1631,7 +1654,7 @@ static int submit_one_bio(int rw, struct bio *bio) end = start + bvec->bv_len - 1; spin_lock_irq(&tree->lock); - node = __tree_search(&tree->state, start, NULL, NULL); + node = __etree_search(tree, start, NULL, NULL); BUG_ON(!node); state = rb_entry(node, struct extent_state, rb_node); while(state->end < end) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 6fd5e2c90615..160da3f9d78e 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -48,6 +48,7 @@ struct extent_io_tree { spinlock_t lru_lock; struct list_head buffer_lru; int lru_size; + struct extent_state *last; }; struct extent_state { From 47e4bb988c853d9af79d76fc5135aee9eeffed77 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 1 Feb 2008 14:51:59 -0500 Subject: [PATCH 0411/2985] Btrfs: Insert extent record and the first backref in a single balance Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fa54ea590078..0fc8dfd58da9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1664,12 +1664,13 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 root_used; u64 search_start = 0; u64 new_hint; + u32 sizes[2]; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; - struct btrfs_extent_item extent_item; + struct btrfs_extent_item *extent_item; + struct btrfs_extent_ref *ref; struct btrfs_path *path; - - btrfs_set_stack_extent_refs(&extent_item, 1); + struct btrfs_key keys[2]; new_hint = max(hint_byte, root->fs_info->alloc_start); if (new_hint < btrfs_super_total_bytes(&info->super_copy)) @@ -1707,20 +1708,37 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, WARN_ON(trans->alloc_exclude_nr); trans->alloc_exclude_start = ins->objectid; trans->alloc_exclude_nr = ins->offset; - ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, - sizeof(extent_item)); - trans->alloc_exclude_start = 0; - trans->alloc_exclude_nr = 0; - BUG_ON(ret); + memcpy(&keys[0], ins, sizeof(*ins)); + keys[1].offset = hash_extent_ref(root_objectid, ref_generation, + owner, owner_offset); + keys[1].objectid = ins->objectid; + keys[1].type = BTRFS_EXTENT_REF_KEY; + sizes[0] = sizeof(*extent_item); + sizes[1] = sizeof(*ref); path = btrfs_alloc_path(); BUG_ON(!path); - ret = btrfs_insert_extent_backref(trans, extent_root, path, - ins->objectid, root_objectid, - ref_generation, owner, owner_offset); + + ret = btrfs_insert_empty_items(trans, extent_root, path, keys, + sizes, 2); BUG_ON(ret); + extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_extent_item); + btrfs_set_extent_refs(path->nodes[0], extent_item, 1); + ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, + struct btrfs_extent_ref); + + btrfs_set_ref_root(path->nodes[0], ref, root_objectid); + btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); + btrfs_set_ref_objectid(path->nodes[0], ref, owner); + btrfs_set_ref_offset(path->nodes[0], ref, owner_offset); + + btrfs_mark_buffer_dirty(path->nodes[0]); + + trans->alloc_exclude_start = 0; + trans->alloc_exclude_nr = 0; btrfs_free_path(path); finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); From 21a4989d26d5ce43aac452fd67be592463a5996d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 1 Feb 2008 14:51:59 -0500 Subject: [PATCH 0412/2985] Btrfs: Hash in the offset and owner for file extent backref keys This makes searches for backrefs and backref insertion much more efficient when there are many backrefs for a single extent Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0fc8dfd58da9..8761aec59e37 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -397,13 +397,12 @@ static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); lenum = cpu_to_le64(ref_generation); low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); - -#if 0 - lenum = cpu_to_le64(owner); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); - lenum = cpu_to_le64(owner_offset); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); -#endif + if (owner >= BTRFS_FIRST_FREE_OBJECTID) { + lenum = cpu_to_le64(owner); + low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + lenum = cpu_to_le64(owner_offset); + low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + } return ((u64)high_crc << 32) | (u64)low_crc; } From 60cde612c8eb51ba79dabb4088f149c6a9802fdf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 1 Feb 2008 14:51:59 -0500 Subject: [PATCH 0413/2985] Btrfs: Use last_alloc optimizations for metadata, even without -o ssd Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8761aec59e37..6f161abe2867 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1431,19 +1431,19 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_MIXED; } - /* for SSD, cluster allocations together as much as possible */ - if (btrfs_test_opt(root, SSD)) { - if (data) - last_ptr = &root->fs_info->last_data_alloc; - else - last_ptr = &root->fs_info->last_alloc; - if (*last_ptr) { + if (!data) + last_ptr = &root->fs_info->last_alloc; + + if (data && btrfs_test_opt(root, SSD)) + last_ptr = &root->fs_info->last_data_alloc; + + if (last_ptr) { + if (*last_ptr) hint_byte = *last_ptr; - } else { hint_byte = hint_byte & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); - empty_size += 16 * 1024 * 1024; + empty_size += 2 * 1024 * 1024; } } @@ -1473,9 +1473,8 @@ check_failed: search_start = find_search_start(root, &block_group, search_start, total_needed, data); - if (btrfs_test_opt(root, SSD) && *last_ptr && - search_start != *last_ptr) { - info->last_alloc = 0; + if (last_ptr && *last_ptr && search_start != *last_ptr) { + *last_ptr = 0; if (!empty_size) { empty_size += 16 * 1024 * 1024; total_needed += empty_size; @@ -1612,7 +1611,7 @@ check_pending: } ins->offset = num_bytes; btrfs_free_path(path); - if (btrfs_test_opt(root, SSD)) + if (last_ptr) *last_ptr = ins->objectid + ins->offset; return 0; @@ -1827,8 +1826,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, buf->start, buf->start + buf->len - 1, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; - if (!btrfs_test_opt(root, SSD)) - btrfs_set_buffer_defrag(buf); + btrfs_set_buffer_defrag(buf); trans->blocks_used++; return buf; } From 2f375ab9c55bf22bf77ed9e3082b93e77ea10f55 Mon Sep 17 00:00:00 2001 From: Yan Date: Fri, 1 Feb 2008 14:58:07 -0500 Subject: [PATCH 0414/2985] Call btrfs_cow_block while lowering tree level. When freeing root block of a tree, btrfs_free_extent' parameter 'ref_generation' is from root block itseft. When freeing non-root block, 'ref_generation' is from its parent. so when converting a non-root block to root block, we must guarantee its generation is equal to its parent's generation. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index cf32651876bf..ef8809c1e69c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -720,6 +720,9 @@ static int balance_level(struct btrfs_trans_handle *trans, /* promote the child to a root */ child = read_node_slot(root, mid, 0); BUG_ON(!child); + ret = btrfs_cow_block(trans, root, child, mid, 0, &child); + BUG_ON(ret); + root->node = child; path->nodes[level] = NULL; clean_tree_block(trans, root, mid); From ae9d12853b44f4e0c06732166e8e3399ec01a680 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 1 Feb 2008 15:42:15 -0500 Subject: [PATCH 0415/2985] Btrfs: Fix delalloc account on state deletion Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5f2fbf2054f2..5d2fb527fd96 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -396,6 +396,7 @@ static int clear_state_bit(struct extent_io_tree *tree, wake_up(&state->wq); if (delete || state->state == 0) { if (state->tree) { + clear_state_cb(tree, state, state->state); if (tree->last == state) tree->last = NULL; rb_erase(&state->rb_node, &tree->state); From 80b6794d1153ed91a040d873396efb9bd60969fd Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 1 Feb 2008 16:35:04 -0500 Subject: [PATCH 0416/2985] Btrfs: Lower stack usage in transaction.c Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3f64d0c7ddb9..e9a0983897f3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -33,7 +33,7 @@ static struct workqueue_struct *trans_wq; #define BTRFS_ROOT_TRANS_TAG 0 #define BTRFS_ROOT_DEFRAG_TAG 1 -static void put_transaction(struct btrfs_transaction *transaction) +static noinline void put_transaction(struct btrfs_transaction *transaction) { WARN_ON(transaction->use_count == 0); transaction->use_count--; @@ -46,7 +46,7 @@ static void put_transaction(struct btrfs_transaction *transaction) } } -static int join_transaction(struct btrfs_root *root) +static noinline int join_transaction(struct btrfs_root *root) { struct btrfs_transaction *cur_trans; cur_trans = root->fs_info->running_transaction; @@ -82,7 +82,7 @@ static int join_transaction(struct btrfs_root *root) return 0; } -static int record_root_in_trans(struct btrfs_root *root) +static noinline int record_root_in_trans(struct btrfs_root *root) { u64 running_trans_id = root->fs_info->running_transaction->transid; if (root->ref_cows && root->last_trans < running_trans_id) { @@ -225,8 +225,8 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, return 0; } -static int wait_for_commit(struct btrfs_root *root, - struct btrfs_transaction *commit) +static noinline int wait_for_commit(struct btrfs_root *root, + struct btrfs_transaction *commit) { DEFINE_WAIT(wait); mutex_lock(&root->fs_info->trans_mutex); @@ -265,9 +265,9 @@ int btrfs_add_dead_root(struct btrfs_root *root, return 0; } -static int add_dirty_roots(struct btrfs_trans_handle *trans, - struct radix_tree_root *radix, - struct list_head *list) +static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, + struct radix_tree_root *radix, + struct list_head *list) { struct dirty_root *dirty; struct btrfs_root *gang[8]; @@ -406,8 +406,8 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) return err; } -static int drop_dirty_roots(struct btrfs_root *tree_root, - struct list_head *list) +static noinline int drop_dirty_roots(struct btrfs_root *tree_root, + struct list_head *list) { struct dirty_root *dirty; struct btrfs_trans_handle *trans; @@ -529,23 +529,28 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, return 0; } -static int create_pending_snapshot(struct btrfs_trans_handle *trans, +static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_pending_snapshot *pending) { struct btrfs_key key; - struct btrfs_root_item new_root_item; + struct btrfs_root_item *new_root_item; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *root = pending->root; struct extent_buffer *tmp; int ret; u64 objectid; + new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); + if (!new_root_item) { + ret = -ENOMEM; + goto fail; + } ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); if (ret) goto fail; - memcpy(&new_root_item, &root->root_item, sizeof(new_root_item)); + memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); key.objectid = objectid; key.offset = 1; @@ -557,10 +562,10 @@ static int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_copy_root(trans, root, root->node, &tmp, objectid); - btrfs_set_root_bytenr(&new_root_item, tmp->start); - btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp)); + btrfs_set_root_bytenr(new_root_item, tmp->start); + btrfs_set_root_level(new_root_item, btrfs_header_level(tmp)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, - &new_root_item); + new_root_item); free_extent_buffer(tmp); if (ret) goto fail; @@ -581,11 +586,12 @@ static int create_pending_snapshot(struct btrfs_trans_handle *trans, pending->name, strlen(pending->name), objectid, root->fs_info->sb->s_root->d_inode->i_ino); fail: + kfree(new_root_item); return ret; } -static int create_pending_snapshots(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) { struct btrfs_pending_snapshot *pending; struct list_head *head = &trans->transaction->pending_snapshots; From c2e639f02cff721172569add21ba5d058fd1a81c Mon Sep 17 00:00:00 2001 From: Yan Date: Mon, 4 Feb 2008 08:57:25 -0500 Subject: [PATCH 0417/2985] Btrfs: Fix typo in extent_io.c --- Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 4 ++-- fs/btrfs/inode.c | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5d2fb527fd96..36a943e51808 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1416,7 +1416,7 @@ static int end_bio_extent_writepage(struct bio *bio, * to the correct offsets in the file */ if (state && (state->end + 1 != start || - !state->state & EXTENT_WRITEBACK)) { + !(state->state & EXTENT_WRITEBACK))) { state = NULL; } spin_unlock_irqrestore(&tree->lock, flags); @@ -1539,7 +1539,7 @@ static int end_bio_extent_readpage(struct bio *bio, * to the correct offsets in the file */ if (state && (state->end + 1 != start || - !state->state & EXTENT_WRITEBACK)) { + !(state->state & EXTENT_LOCKED))) { state = NULL; } spin_unlock_irqrestore(&tree->lock, flags); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f13b1dbc0619..44fc94fee2c4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -364,8 +364,7 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, if (btrfs_test_opt(root, NODATASUM) || btrfs_test_flag(inode, NODATASUM)) return 0; - - if (state->start == start) { + if (state && state->start == start) { private = state->private; ret = 0; } else { From f594706643607d7d24901479e985ec62e0ce129a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 4 Feb 2008 10:10:13 -0500 Subject: [PATCH 0418/2985] Btrfs: Add debugging for block group update failure Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6f161abe2867..a340eeedd6c7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1751,7 +1751,11 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, update_block: ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, data); - BUG_ON(ret); + if (ret) { + printk("update block group failed for %Lu %Lu\n", + ins->objectid, ins->offset); + BUG(); + } return 0; } From 47b0c4f8c717890877058f30e07a30e05f74a7bb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 4 Feb 2008 10:10:43 -0500 Subject: [PATCH 0419/2985] Btrfs: Update magic Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 35e9a7af10a5..dfb700efcd6b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -37,7 +37,7 @@ extern struct kmem_cache *btrfs_transaction_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; -#define BTRFS_MAGIC "_B3RfS_M" +#define BTRFS_MAGIC "_B4RfS_M" #define BTRFS_MAX_LEVEL 8 #define BTRFS_ROOT_TREE_OBJECTID 1ULL From dacb473ad81d5c1650cb73d1cb9b866202f6bab0 Mon Sep 17 00:00:00 2001 From: Yan Date: Tue, 5 Feb 2008 09:07:49 -0500 Subject: [PATCH 0420/2985] Btrfs: Fix hole creation in file_write Signed-off-by: Chris Mason --- fs/btrfs/file.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7cbf8d8d7e27..184bde1cfc3d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -265,16 +265,14 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS); /* FIXME...EIEIO, ENOSPC and more */ - /* insert any holes we need to create */ - if (inode->i_size < start_pos) { + if (isize < end_pos) { u64 last_pos_in_file; u64 hole_size; u64 mask = root->sectorsize - 1; last_pos_in_file = (isize + mask) & ~mask; hole_size = (end_pos - last_pos_in_file + mask) & ~mask; - - if (last_pos_in_file < start_pos) { + if (last_pos_in_file < end_pos) { err = btrfs_drop_extents(trans, root, inode, last_pos_in_file, last_pos_in_file + hole_size, From 068fe39fa18e8921062e2e16e6aa2b46768b2a35 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 5 Feb 2008 10:20:17 -0500 Subject: [PATCH 0421/2985] Btrfs: Add checks for last byte in disk to allocator grouping Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a340eeedd6c7..9e0869959cde 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1611,8 +1611,13 @@ check_pending: } ins->offset = num_bytes; btrfs_free_path(path); - if (last_ptr) + if (last_ptr) { *last_ptr = ins->objectid + ins->offset; + if (*last_ptr == + btrfs_super_total_bytes(&root->fs_info->super_copy)) { + *last_ptr = 0; + } + } return 0; new_group: From 7261009ceaee65e4683be6e474d1ea85a1b1b26c Mon Sep 17 00:00:00 2001 From: Yan Date: Tue, 5 Feb 2008 15:40:36 -0500 Subject: [PATCH 0422/2985] btrfs_drop_extents: handle BTRFS_INODE_REF_KEY types It's possible "key.type == BTRFS_INODE_REF_KEY" and "key.offset >= end". Signed-off-by: Chris Mason --- fs/btrfs/file.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 184bde1cfc3d..bfa4149c053d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -507,11 +507,12 @@ next_slot: slot = path->slots[0]; ret = 0; btrfs_item_key_to_cpu(leaf, &key, slot); - - if (key.offset >= end || key.objectid != inode->i_ino) { + if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY && + key.offset >= end) { goto out; } - if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY) { + if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || + key.objectid != inode->i_ino) { goto out; } if (recow) { From 5d196fc15dbe1cc54b81801694aafe218121e054 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 6 Feb 2008 10:00:57 -0500 Subject: [PATCH 0423/2985] Btrfs: Use 2MB as the empty_size for clustered allocations Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9e0869959cde..220e9be9ad82 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1476,7 +1476,7 @@ check_failed: if (last_ptr && *last_ptr && search_start != *last_ptr) { *last_ptr = 0; if (!empty_size) { - empty_size += 16 * 1024 * 1024; + empty_size += 2 * 1024 * 1024; total_needed += empty_size; } search_start = find_search_start(root, &block_group, From 961d023213b94861480f5d6533f7adc8332eb527 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 6 Feb 2008 11:01:42 -0500 Subject: [PATCH 0424/2985] Btrfs: Don't case unsigned long to int in bio submission Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 36a943e51808..b68e57f66a96 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1705,7 +1705,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, return 0; } } - nr = min_t(int, max_pages, bio_get_nr_vecs(bdev)); + nr = bio_get_nr_vecs(bdev); bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); if (!bio) { printk("failed to allocate bio nr %d\n", nr); From a86c12c73d982c545a37a8ecdd66528ab260b770 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 7 Feb 2008 10:50:54 -0500 Subject: [PATCH 0425/2985] Btrfs: Create larger bios for btree blocks Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 10 ++++++---- fs/btrfs/extent_io.c | 12 +++++++++--- fs/btrfs/extent_io.h | 3 ++- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dda8ad6c0077..6f0825b617d1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -197,7 +197,8 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) WARN_ON(1); } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); - read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1); + read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1, + btree_get_extent); btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { @@ -339,7 +340,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) if (!buf) return 0; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, 0, 0); + buf, 0, 0, btree_get_extent); free_extent_buffer(buf); return ret; } @@ -358,7 +359,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1); + read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1, + btree_get_extent); if (buf->flags & EXTENT_CSUM) return buf; @@ -1009,7 +1011,7 @@ int btrfs_read_buffer(struct extent_buffer *buf) struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, 0, 1); + buf, 0, 1, btree_get_extent); } static struct extent_io_ops btree_extent_io_ops = { diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b68e57f66a96..fcae999ab712 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2859,8 +2859,8 @@ EXPORT_SYMBOL(extent_buffer_uptodate); int read_extent_buffer_pages(struct extent_io_tree *tree, struct extent_buffer *eb, - u64 start, - int wait) + u64 start, int wait, + get_extent_t *get_extent) { unsigned long i; unsigned long start_i; @@ -2868,6 +2868,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, int err; int ret = 0; unsigned long num_pages; + struct bio *bio = NULL; + if (eb->flags & EXTENT_UPTODATE) return 0; @@ -2899,7 +2901,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, lock_page(page); } if (!PageUptodate(page)) { - err = page->mapping->a_ops->readpage(NULL, page); + err = __extent_read_full_page(tree, page, + get_extent, &bio); if (err) { ret = err; } @@ -2908,6 +2911,9 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, } } + if (bio) + submit_one_bio(READ, bio); + if (ret || !wait) { return ret; } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 160da3f9d78e..151fdada4dc4 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -157,7 +157,8 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, gfp_t mask); void free_extent_buffer(struct extent_buffer *eb); int read_extent_buffer_pages(struct extent_io_tree *tree, - struct extent_buffer *eb, u64 start, int wait); + struct extent_buffer *eb, u64 start, int wait, + get_extent_t *get_extent); static inline void extent_buffer_get(struct extent_buffer *eb) { From 9069218d448ea547dbad5f1cbd537e88d6519d66 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 8 Feb 2008 13:49:28 -0500 Subject: [PATCH 0426/2985] Btrfs: Fix i_blocks accounting Now that delayed allocation accounting works, i_blocks accounting is changed to only modify i_blocks when extents inserted or removed. The fillattr call is changed to include the delayed allocation byte count in the i_blocks result. Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/ctree.h | 9 +++++++++ fs/btrfs/file.c | 17 +++++++++++------ fs/btrfs/inode.c | 43 ++++++++++++++++++++++++++++-------------- 4 files changed, 50 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 9a0647dc5a0b..9b9db9cbc019 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -36,6 +36,7 @@ struct btrfs_inode { * transid of the trans_handle that last modified this inode */ u64 last_trans; + u64 delalloc_bytes; u32 flags; }; static inline struct btrfs_inode *BTRFS_I(struct inode *inode) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index dfb700efcd6b..52144b04f1af 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1161,6 +1161,15 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); /* inode.c */ +static inline void dec_i_blocks(struct inode *inode, u64 dec) +{ + dec = dec >> 9; + if (dec <= inode->i_blocks) + inode->i_blocks -= dec; + else + inode->i_blocks = 0; +} + unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index bfa4149c053d..f89396082544 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -175,6 +175,7 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); + inode->i_blocks += (offset + size - found_end) >> 9; } if (found_end < offset) { ptr = btrfs_file_extent_inline_start(ei) + found_size; @@ -184,6 +185,7 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, insert: btrfs_release_path(root, path); datasize = offset + size - key.offset; + inode->i_blocks += datasize >> 9; datasize = btrfs_file_extent_calc_inline_size(datasize); ret = btrfs_insert_empty_item(trans, root, path, &key, datasize); @@ -256,7 +258,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, goto out_unlock; } btrfs_set_trans_block_group(trans, inode); - inode->i_blocks += num_bytes >> 9; hint_byte = 0; if ((end_of_last_block & 4095) == 0) { @@ -410,7 +411,7 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode) if (found_key.type != BTRFS_EXTENT_DATA_KEY) goto out; - if (found_key.offset != last_offset) { + if (found_key.offset < last_offset) { WARN_ON(1); btrfs_print_leaf(root, leaf); printk("inode %lu found offset %Lu expected %Lu\n", @@ -435,7 +436,7 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode) last_offset = extent_end; path->slots[0]++; } - if (last_offset < inode->i_size) { + if (0 && last_offset < inode->i_size) { WARN_ON(1); btrfs_print_leaf(root, leaf); printk("inode %lu found offset %Lu size %Lu\n", inode->i_ino, @@ -608,8 +609,7 @@ next_slot: extent); if (btrfs_file_extent_disk_bytenr(leaf, extent)) { - inode->i_blocks -= - (old_num - new_num) >> 9; + dec_i_blocks(inode, old_num - new_num); } btrfs_set_file_extent_num_bytes(leaf, extent, new_num); @@ -620,6 +620,8 @@ next_slot: u32 new_size; new_size = btrfs_file_extent_calc_inline_size( inline_limit - key.offset); + dec_i_blocks(inode, (extent_end - key.offset) - + (inline_limit - key.offset)); btrfs_truncate_item(trans, root, path, new_size, 1); } @@ -653,7 +655,7 @@ next_slot: btrfs_release_path(root, path); extent = NULL; if (found_extent && disk_bytenr != 0) { - inode->i_blocks -= extent_num_bytes >> 9; + dec_i_blocks(inode, extent_num_bytes); ret = btrfs_free_extent(trans, root, disk_bytenr, disk_num_bytes, @@ -674,6 +676,8 @@ next_slot: u32 new_size; new_size = btrfs_file_extent_calc_inline_size( extent_end - end); + dec_i_blocks(inode, (extent_end - key.offset) - + (extent_end - end)); btrfs_truncate_item(trans, root, path, new_size, 0); } /* create bookend, splitting the extent in two */ @@ -718,6 +722,7 @@ next_slot: } out: btrfs_free_path(path); + btrfs_check_file(root, inode); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 44fc94fee2c4..913ab128eee1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -134,6 +134,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start, ins.objectid, ins.offset, ins.offset); + inode->i_blocks += ins.offset >> 9; btrfs_check_file(root, inode); num_bytes -= cur_alloc_size; alloc_hint = ins.objectid + ins.offset; @@ -142,6 +143,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) btrfs_drop_extent_cache(inode, orig_start, orig_start + orig_num_bytes - 1); btrfs_add_ordered_inode(inode); + btrfs_update_inode(trans, root, inode); out: btrfs_end_transaction(trans, root); return ret; @@ -265,6 +267,7 @@ int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; spin_lock(&root->fs_info->delalloc_lock); + BTRFS_I(inode)->delalloc_bytes += end - start + 1; root->fs_info->delalloc_bytes += end - start + 1; spin_unlock(&root->fs_info->delalloc_lock); } @@ -281,8 +284,10 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, printk("warning: delalloc account %Lu %Lu\n", end - start + 1, root->fs_info->delalloc_bytes); root->fs_info->delalloc_bytes = 0; + BTRFS_I(inode)->delalloc_bytes = 0; } else { root->fs_info->delalloc_bytes -= end - start + 1; + BTRFS_I(inode)->delalloc_bytes -= end - start + 1; } spin_unlock(&root->fs_info->delalloc_lock); } @@ -833,32 +838,37 @@ search_again: btrfs_set_file_extent_num_bytes(leaf, fi, extent_num_bytes); num_dec = (orig_num_bytes - - extent_num_bytes) >> 9; - if (extent_start != 0) { - inode->i_blocks -= num_dec; - } + extent_num_bytes); + if (extent_start != 0) + dec_i_blocks(inode, num_dec); btrfs_mark_buffer_dirty(leaf); } else { extent_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); /* FIXME blocksize != 4096 */ - num_dec = btrfs_file_extent_num_bytes(leaf, - fi) >> 9; + num_dec = btrfs_file_extent_num_bytes(leaf, fi); if (extent_start != 0) { found_extent = 1; - inode->i_blocks -= num_dec; + dec_i_blocks(inode, num_dec); } root_gen = btrfs_header_generation(leaf); root_owner = btrfs_header_owner(leaf); } - } else if (extent_type == BTRFS_FILE_EXTENT_INLINE && - !del_item) { - u32 newsize = inode->i_size - found_key.offset; - newsize = btrfs_file_extent_calc_inline_size(newsize); - ret = btrfs_truncate_item(trans, root, path, - newsize, 1); - BUG_ON(ret); + } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { + if (!del_item) { + u32 newsize = inode->i_size - found_key.offset; + dec_i_blocks(inode, item_end + 1 - + found_key.offset - newsize); + newsize = + btrfs_file_extent_calc_inline_size(newsize); + ret = btrfs_truncate_item(trans, root, path, + newsize, 1); + BUG_ON(ret); + } else { + dec_i_blocks(inode, item_end + 1 - + found_key.offset); + } } delete: if (del_item) { @@ -1222,6 +1232,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) struct btrfs_iget_args *args = p; inode->i_ino = args->ino; BTRFS_I(inode)->root = args->root; + BTRFS_I(inode)->delalloc_bytes = 0; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); @@ -1528,6 +1539,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); + BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->root = root; if (mode & S_IFDIR) @@ -1746,6 +1758,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); + BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; @@ -2797,6 +2810,7 @@ static int btrfs_getattr(struct vfsmount *mnt, struct inode *inode = dentry->d_inode; generic_fillattr(inode, stat); stat->blksize = PAGE_CACHE_SIZE; + stat->blocks = inode->i_blocks + (BTRFS_I(inode)->delalloc_bytes >> 9); return 0; } @@ -2912,6 +2926,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); + BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; From 39b5637f6f195852259004bb27b58e2dcf9fb378 Mon Sep 17 00:00:00 2001 From: Yan Date: Fri, 15 Feb 2008 10:40:50 -0500 Subject: [PATCH 0427/2985] Btrfs: Fix "no csum found for inode" issue. A few codes were not properly updated for changes of extent map. This may be the causes of "no csum found for inode" issue. Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 5 +++-- fs/btrfs/file.c | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fcae999ab712..9262ab37a7cd 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2417,12 +2417,13 @@ int try_release_extent_mapping(struct extent_map_tree *map, u64 end = start + PAGE_CACHE_SIZE - 1; u64 orig_start = start; int ret = 1; - if ((mask & __GFP_WAIT) && page->mapping->host->i_size > 16 * 1024 * 1024) { + u64 len; while (start <= end) { + len = end - start + 1; spin_lock(&map->lock); - em = lookup_extent_mapping(map, start, end); + em = lookup_extent_mapping(map, start, len); if (!em || IS_ERR(em)) { spin_unlock(&map->lock); break; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f89396082544..b0352b5958a5 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -357,10 +357,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) { struct extent_map *em; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + u64 len = end - start + 1; + if (end == (u64)-1) + len = (u64)-1; while(1) { spin_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, start, end); + em = lookup_extent_mapping(em_tree, start, len); if (!em) { spin_unlock(&em_tree->lock); break; From df68b8a7ad4a18c9e63f1c12015a59c3b7031adb Mon Sep 17 00:00:00 2001 From: David Miller Date: Fri, 15 Feb 2008 10:40:52 -0500 Subject: [PATCH 0428/2985] Btrfs: unaligned access fixes Btrfs set/get macros lose type information needed to avoid unaligned accesses on sparc64. ere is a patch for the kernel bits which fixes most of the unaligned accesses on sparc64. btrfs_name_hash is modified to return the hash value instead of getting a return location via a (potentially unaligned) pointer. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 17 ++++++----------- fs/btrfs/dir-item.c | 12 ++++-------- fs/btrfs/hash.c | 17 ++++++++--------- fs/btrfs/hash.h | 2 +- fs/btrfs/struct-funcs.c | 30 ++++++++++++++---------------- 5 files changed, 33 insertions(+), 45 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 52144b04f1af..086cb0525cd1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -495,22 +495,17 @@ void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ { \ - char *kaddr = kmap_atomic(eb->first_page, KM_USER0); \ - unsigned long offset = offsetof(type, member); \ - u##bits res; \ - __le##bits *tmp = (__le##bits *)(kaddr + offset); \ - res = le##bits##_to_cpu(*tmp); \ - kunmap_atomic(kaddr, KM_USER0); \ + type *p = kmap_atomic(eb->first_page, KM_USER0); \ + u##bits res = le##bits##_to_cpu(p->member); \ + kunmap_atomic(p, KM_USER0); \ return res; \ } \ static inline void btrfs_set_##name(struct extent_buffer *eb, \ u##bits val) \ { \ - char *kaddr = kmap_atomic(eb->first_page, KM_USER0); \ - unsigned long offset = offsetof(type, member); \ - __le##bits *tmp = (__le##bits *)(kaddr + offset); \ - *tmp = cpu_to_le##bits(val); \ - kunmap_atomic(kaddr, KM_USER0); \ + type *p = kmap_atomic(eb->first_page, KM_USER0); \ + p->member = cpu_to_le##bits(val); \ + kunmap_atomic(p, KM_USER0); \ } #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 5247a9a41f08..7a73dc59dc4d 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -71,8 +71,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, key.objectid = dir; btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); - ret = btrfs_name_hash(name, name_len, &key.offset); - BUG_ON(ret); + key.offset = btrfs_name_hash(name, name_len); path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -125,8 +124,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root key.objectid = dir; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - ret = btrfs_name_hash(name, name_len, &key.offset); - BUG_ON(ret); + key.offset = btrfs_name_hash(name, name_len); path = btrfs_alloc_path(); data_size = sizeof(*dir_item) + name_len; dir_item = insert_with_overflow(trans, root, path, &key, data_size, @@ -199,8 +197,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, key.objectid = dir; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - ret = btrfs_name_hash(name, name_len, &key.offset); - BUG_ON(ret); + key.offset = btrfs_name_hash(name, name_len); ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); if (ret < 0) @@ -261,8 +258,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, key.objectid = dir; btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); - ret = btrfs_name_hash(name, name_len, &key.offset); - BUG_ON(ret); + key.offset = btrfs_name_hash(name, name_len); ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); if (ret < 0) return ERR_PTR(ret); diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index e5c76903d722..d5252f5c9d01 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -76,19 +76,18 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) *buf++ = pad; } -int btrfs_name_hash(const char *name, int len, u64 *hash_result) +u64 btrfs_name_hash(const char *name, int len) { __u32 hash; __u32 minor_hash = 0; const char *p; __u32 in[8], buf[2]; + u64 hash_result; if (len == 1 && *name == '.') { - *hash_result = 1; - return 0; + return 1; } else if (len == 2 && name[0] == '.' && name[1] == '.') { - *hash_result = 2; - return 0; + return 2; } /* Initialize the default seed for the hash checksum functions */ @@ -106,8 +105,8 @@ int btrfs_name_hash(const char *name, int len, u64 *hash_result) } hash = buf[0]; minor_hash = buf[1]; - *hash_result = buf[0]; - *hash_result <<= 32; - *hash_result |= buf[1]; - return 0; + hash_result = buf[0]; + hash_result <<= 32; + hash_result |= buf[1]; + return hash_result; } diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h index d3be02670585..868ee17ca77a 100644 --- a/fs/btrfs/hash.h +++ b/fs/btrfs/hash.h @@ -18,5 +18,5 @@ #ifndef __HASH__ #define __HASH__ -int btrfs_name_hash(const char *name, int len, u64 *hash_result); +u64 btrfs_name_hash(const char *name, int len); #endif diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index c5715a60554c..ad03a32d1116 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c @@ -21,16 +21,15 @@ u##bits btrfs_##name(struct extent_buffer *eb, \ type *s) \ { \ - unsigned long offset = (unsigned long)s + \ - offsetof(type, member); \ - __le##bits *tmp; \ + unsigned long part_offset = (unsigned long)s; \ + unsigned long offset = part_offset + offsetof(type, member); \ + type *p; \ /* ugly, but we want the fast path here */ \ if (eb->map_token && offset >= eb->map_start && \ offset + sizeof(((type *)0)->member) <= eb->map_start + \ eb->map_len) { \ - tmp = (__le##bits *)(eb->kaddr + offset - \ - eb->map_start); \ - return le##bits##_to_cpu(*tmp); \ + p = (type *)(eb->kaddr + part_offset - eb->map_start); \ + return le##bits##_to_cpu(p->member); \ } \ { \ int err; \ @@ -48,8 +47,8 @@ u##bits btrfs_##name(struct extent_buffer *eb, \ read_eb_member(eb, s, type, member, &res); \ return le##bits##_to_cpu(res); \ } \ - tmp = (__le##bits *)(kaddr + offset - map_start); \ - res = le##bits##_to_cpu(*tmp); \ + p = (type *)(kaddr + part_offset - map_start); \ + res = le##bits##_to_cpu(p->member); \ if (unmap_on_exit) \ unmap_extent_buffer(eb, map_token, KM_USER1); \ return res; \ @@ -58,16 +57,15 @@ u##bits btrfs_##name(struct extent_buffer *eb, \ void btrfs_set_##name(struct extent_buffer *eb, \ type *s, u##bits val) \ { \ - unsigned long offset = (unsigned long)s + \ - offsetof(type, member); \ - __le##bits *tmp; \ + unsigned long part_offset = (unsigned long)s; \ + unsigned long offset = part_offset + offsetof(type, member); \ + type *p; \ /* ugly, but we want the fast path here */ \ if (eb->map_token && offset >= eb->map_start && \ offset + sizeof(((type *)0)->member) <= eb->map_start + \ eb->map_len) { \ - tmp = (__le##bits *)(eb->kaddr + offset - \ - eb->map_start); \ - *tmp = cpu_to_le##bits(val); \ + p = (type *)(eb->kaddr + part_offset - eb->map_start); \ + p->member = cpu_to_le##bits(val); \ return; \ } \ { \ @@ -86,8 +84,8 @@ void btrfs_set_##name(struct extent_buffer *eb, \ write_eb_member(eb, s, type, member, &val); \ return; \ } \ - tmp = (__le##bits *)(kaddr + offset - map_start); \ - *tmp = cpu_to_le##bits(val); \ + p = (type *)(kaddr + part_offset - map_start); \ + p->member = cpu_to_le##bits(val); \ if (unmap_on_exit) \ unmap_extent_buffer(eb, map_token, KM_USER1); \ } \ From 9afbb0b752ef30a429c45b9de6706e28ad1a36e1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Feb 2008 13:19:35 -0500 Subject: [PATCH 0429/2985] Btrfs: Disable tree defrag in SSD mode Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 ++- fs/btrfs/tree-defrag.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 220e9be9ad82..af5d4a085280 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1835,7 +1835,8 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, buf->start, buf->start + buf->len - 1, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; - btrfs_set_buffer_defrag(buf); + if (!btrfs_test_opt(root, SSD)) + btrfs_set_buffer_defrag(buf); trans->blocks_used++; return buf; } diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 5c58630dce03..5935cbd8f2b8 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -179,6 +179,9 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (root->ref_cows == 0 && !is_extent) goto out; + if (btrfs_test_opt(root, SSD)) + goto out; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; From d7fc640e6fed46932f7c74e14f9b58b8637c66cf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 18 Feb 2008 12:12:38 -0500 Subject: [PATCH 0430/2985] Btrfs: Allocator improvements Reduce CPU time searching for free blocks by optimizing find_first_extent_bit Fix find_free_extent to make better use of the last_alloc hint. Before it was often finding blocks just before the hint. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 34 ++++++++++++++++++++++++---------- fs/btrfs/extent_io.c | 38 ++++++++++++++++++++++++++++++++++---- fs/btrfs/extent_io.h | 11 +++++++++++ 4 files changed, 70 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6f0825b617d1..88e21bdbc478 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -951,7 +951,7 @@ void btrfs_throttle(struct btrfs_root *root) void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) { balance_dirty_pages_ratelimited_nr( - root->fs_info->btree_inode->i_mapping, 1); + root->fs_info->btree_inode->i_mapping, 1); } void btrfs_set_buffer_defrag(struct extent_buffer *buf) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index af5d4a085280..239e9d8669cb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -188,9 +188,10 @@ static u64 noinline find_search_start(struct btrfs_root *root, { int ret; struct btrfs_block_group_cache *cache = *cache_ret; + struct extent_io_tree *free_space_cache; + struct extent_state *state; u64 last; u64 start = 0; - u64 end = 0; u64 cache_miss = 0; u64 total_fs_bytes; int wrapped = 0; @@ -199,6 +200,8 @@ static u64 noinline find_search_start(struct btrfs_root *root, goto out; } total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); + free_space_cache = &root->fs_info->free_space_cache; + again: ret = cache_block_group(root, cache); if (ret) @@ -206,22 +209,27 @@ again: last = max(search_start, cache->key.objectid); + spin_lock_irq(&free_space_cache->lock); + state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY); while(1) { - ret = find_first_extent_bit(&root->fs_info->free_space_cache, - last, &start, &end, EXTENT_DIRTY); - if (ret) { + if (!state) { if (!cache_miss) cache_miss = last; + spin_unlock_irq(&free_space_cache->lock); goto new_group; } - start = max(last, start); - last = end + 1; + start = max(last, state->start); + last = state->end + 1; if (last - start < num) { if (last == cache->key.objectid + cache->key.offset) cache_miss = start; + do { + state = extent_state_next(state); + } while(state && !(state->state & EXTENT_DIRTY)); continue; } + spin_unlock_irq(&free_space_cache->lock); if (data != BTRFS_BLOCK_GROUP_MIXED && start + num > cache->key.objectid + cache->key.offset) goto new_group; @@ -1420,6 +1428,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; + int empty_cluster; u64 cached_start; WARN_ON(num_bytes < root->sectorsize); @@ -1431,11 +1440,15 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_MIXED; } - if (!data) + if (!data) { last_ptr = &root->fs_info->last_alloc; + empty_cluster = 128 * 1024; + } - if (data && btrfs_test_opt(root, SSD)) + if (data && btrfs_test_opt(root, SSD)) { last_ptr = &root->fs_info->last_data_alloc; + empty_cluster = 2 * 1024 * 1024; + } if (last_ptr) { if (*last_ptr) @@ -1443,8 +1456,9 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, else { hint_byte = hint_byte & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); - empty_size += 2 * 1024 * 1024; + empty_size += empty_cluster; } + search_start = max(search_start, hint_byte); } search_end = min(search_end, @@ -1476,7 +1490,7 @@ check_failed: if (last_ptr && *last_ptr && search_start != *last_ptr) { *last_ptr = 0; if (!empty_size) { - empty_size += 2 * 1024 * 1024; + empty_size += empty_cluster; total_needed += empty_size; } search_start = find_search_start(root, &block_group, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9262ab37a7cd..fb6400895ed6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -255,7 +255,7 @@ static int merge_state(struct extent_io_tree *tree, state->start = other->start; other->tree = NULL; if (tree->last == other) - tree->last = NULL; + tree->last = state; rb_erase(&other->rb_node, &tree->state); free_extent_state(other); } @@ -268,7 +268,7 @@ static int merge_state(struct extent_io_tree *tree, other->start = state->start; state->tree = NULL; if (tree->last == state) - tree->last = NULL; + tree->last = other; rb_erase(&state->rb_node, &tree->state); free_extent_state(state); } @@ -397,8 +397,9 @@ static int clear_state_bit(struct extent_io_tree *tree, if (delete || state->state == 0) { if (state->tree) { clear_state_cb(tree, state, state->state); - if (tree->last == state) - tree->last = NULL; + if (tree->last == state) { + tree->last = extent_state_next(state); + } rb_erase(&state->rb_node, &tree->state); state->tree = NULL; free_extent_state(state); @@ -962,6 +963,35 @@ out: } EXPORT_SYMBOL(find_first_extent_bit); +struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, + u64 start, int bits) +{ + struct rb_node *node; + struct extent_state *state; + + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(tree, start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->end >= start && (state->state & bits)) { + return state; + } + node = rb_next(node); + if (!node) + break; + } +out: + return NULL; +} +EXPORT_SYMBOL(find_first_extent_bit_state); + u64 find_lock_delalloc_range(struct extent_io_tree *tree, u64 *start, u64 *end, u64 max_bytes) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 151fdada4dc4..fcc4bb078c24 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -81,6 +81,15 @@ struct extent_buffer { struct extent_map_tree; +static inline struct extent_state *extent_state_next(struct extent_state *state) +{ + struct rb_node *node; + node = rb_next(&state->rb_node); + if (!node) + return NULL; + return rb_entry(node, struct extent_state, rb_node); +} + typedef struct extent_map *(get_extent_t)(struct inode *inode, struct page *page, size_t page_offset, @@ -122,6 +131,8 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int find_first_extent_bit(struct extent_io_tree *tree, u64 start, u64 *start_ret, u64 *end_ret, int bits); +struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, + u64 start, int bits); int extent_invalidatepage(struct extent_io_tree *tree, struct page *page, unsigned long offset); int extent_write_full_page(struct extent_io_tree *tree, struct page *page, From 952fccac50350481742425cac0c80f36ba8b83f2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 18 Feb 2008 16:33:44 -0500 Subject: [PATCH 0431/2985] Btrfs: Remove extent back refs in batches, and avoid duplicate searches Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 63 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 239e9d8669cb..8f441783a6ed 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1251,6 +1251,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_root *extent_root = info->extent_root; struct extent_buffer *leaf; int ret; + int extent_slot = 0; + int found_extent = 0; + int num_to_del = 1; struct btrfs_extent_item *ei; u32 refs; @@ -1267,7 +1270,24 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ref_generation, owner_objectid, owner_offset, 1); if (ret == 0) { - ret = btrfs_del_item(trans, extent_root, path); + struct btrfs_key found_key; + extent_slot = path->slots[0]; + while(extent_slot > 0) { + extent_slot--; + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + extent_slot); + if (found_key.objectid != bytenr) + break; + if (found_key.type == BTRFS_EXTENT_ITEM_KEY && + found_key.offset == num_bytes) { + found_extent = 1; + break; + } + if (path->slots[0] - extent_slot > 5) + break; + } + if (!found_extent) + ret = btrfs_del_item(trans, extent_root, path); } else { btrfs_print_leaf(extent_root, path->nodes[0]); WARN_ON(1); @@ -1276,21 +1296,46 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root root_objectid, ref_generation, owner_objectid, owner_offset); } - btrfs_release_path(extent_root, path); - ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); - if (ret < 0) - return ret; - BUG_ON(ret); + if (!found_extent) { + btrfs_release_path(extent_root, path); + ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); + if (ret < 0) + return ret; + BUG_ON(ret); + extent_slot = path->slots[0]; + } leaf = path->nodes[0]; - ei = btrfs_item_ptr(leaf, path->slots[0], + ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item); refs = btrfs_extent_refs(leaf, ei); BUG_ON(refs == 0); refs -= 1; btrfs_set_extent_refs(leaf, ei, refs); + btrfs_mark_buffer_dirty(leaf); + if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { + /* if the back ref and the extent are next to each other + * they get deleted below in one shot + */ + path->slots[0] = extent_slot; + num_to_del = 2; + } else if (found_extent) { + /* otherwise delete the extent back ref */ + ret = btrfs_del_item(trans, extent_root, path); + BUG_ON(ret); + /* if refs are 0, we need to setup the path for deletion */ + if (refs == 0) { + btrfs_release_path(extent_root, path); + ret = btrfs_search_slot(trans, extent_root, &key, path, + -1, 1); + if (ret < 0) + return ret; + BUG_ON(ret); + } + } + if (refs == 0) { u64 super_used; u64 root_used; @@ -1311,8 +1356,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root root_used = btrfs_root_used(&root->root_item); btrfs_set_root_used(&root->root_item, root_used - num_bytes); - - ret = btrfs_del_item(trans, extent_root, path); + ret = btrfs_del_items(trans, extent_root, path, path->slots[0], + num_to_del); if (ret) { return ret; } From 0762704b196d41941a9b439e9165efaf85c6609e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 19 Feb 2008 11:29:24 -0500 Subject: [PATCH 0432/2985] Btrfs: Properly clear dirty and delalloc extent bits while preparing the file for write Yan Zheng noticed that we don't clear the extent state tree dirty and delalloc bits when we clear the dirty bits on the page during file write. This leads to csum errors later on. Signed-off-by: Chris Mason --- fs/btrfs/file.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b0352b5958a5..1a47251a9d4d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -762,6 +762,13 @@ static int prepare_pages(struct btrfs_root *root, struct file *file, set_page_extent_mapped(pages[i]); WARN_ON(!PageLocked(pages[i])); } + if (start_pos < inode->i_size) { + u64 last_pos; + last_pos = (index + num_pages) << PAGE_CACHE_SHIFT; + clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, + last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC, + GFP_NOFS); + } return 0; } From 5e591a0703bc03047d37938586f97fd8ca960c08 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Tue, 19 Feb 2008 11:41:02 -0500 Subject: [PATCH 0433/2985] Btrfs: Fix looping on readdir of the subvol roots Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 913ab128eee1..3e79572a5cb1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1465,7 +1465,10 @@ read_dir_items: di = (struct btrfs_dir_item *)((char *)di + di_len); } } - filp->f_pos = INT_LIMIT(typeof(filp->f_pos)); + if (key_type == BTRFS_DIR_INDEX_KEY) + filp->f_pos = INT_LIMIT(typeof(filp->f_pos)); + else + filp->f_pos++; nopos: ret = 0; err: From d99cb30a11ea117ad0f58cda09aa43438ebbf81c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 19 Feb 2008 12:55:05 -0500 Subject: [PATCH 0434/2985] Btrfs: Take the extent lock before dropping the delalloc bits Signed-off-by: Chris Mason --- fs/btrfs/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1a47251a9d4d..51466dcd44e0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -765,9 +765,13 @@ static int prepare_pages(struct btrfs_root *root, struct file *file, if (start_pos < inode->i_size) { u64 last_pos; last_pos = (index + num_pages) << PAGE_CACHE_SHIFT; + lock_extent(&BTRFS_I(inode)->io_tree, + start_pos, last_pos - 1, GFP_NOFS); clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC, GFP_NOFS); + unlock_extent(&BTRFS_I(inode)->io_tree, + start_pos, last_pos - 1, GFP_NOFS); } return 0; } From 0740c82bc7585a23e100c9a59d5e2abbf99459fb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 19 Feb 2008 16:24:18 -0500 Subject: [PATCH 0435/2985] Btrfs: Properly cast before shifting Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 51466dcd44e0..5fa450452f9b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -764,7 +764,7 @@ static int prepare_pages(struct btrfs_root *root, struct file *file, } if (start_pos < inode->i_size) { u64 last_pos; - last_pos = (index + num_pages) << PAGE_CACHE_SHIFT; + last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; lock_extent(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, GFP_NOFS); clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, From 065631f6dccea07bfad48d8981369f6d9cfd6e2b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 20 Feb 2008 12:07:25 -0500 Subject: [PATCH 0436/2985] Btrfs: checksum file data at bio submission time instead of during writepage When we checkum file data during writepage, the checksumming is done one page at a time, making it difficult to do bulk metadata modifications to insert checksums for large ranges of the file at once. This patch changes btrfs to checksum on a per-bio basis instead. The bios are checksummed before they are handed off to the block layer, so each bio is contiguous and only has pages from the same inode. Checksumming on a bio basis allows us to insert and modify the file checksum items in large groups. It also allows the checksumming to be done more easily by async worker threads. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 8 +++---- fs/btrfs/extent_io.c | 2 ++ fs/btrfs/extent_io.h | 1 + fs/btrfs/file-item.c | 50 ++++++++++++++++++++++++++++++++++---------- fs/btrfs/inode.c | 32 ++++++++++++++++++++++++++-- 5 files changed, 75 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 086cb0525cd1..92d892f92075 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1142,11 +1142,9 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, u64 bytenr, int mod); -int btrfs_csum_file_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, - u64 objectid, u64 offset, - char *data, size_t len); +int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + struct bio *bio); struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fb6400895ed6..e8130c876330 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1705,6 +1705,8 @@ static int submit_one_bio(int rw, struct bio *bio) (unsigned long long)bio->bi_sector); WARN_ON(1); } + if (tree->ops && tree->ops->submit_bio_hook) + tree->ops->submit_bio_hook(rw, bio); submit_bio(rw, bio); if (bio_flagged(bio, BIO_EOPNOTSUPP)) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index fcc4bb078c24..9d6654667089 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -28,6 +28,7 @@ struct extent_state; struct extent_io_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); + int (*submit_bio_hook)(int rw, struct bio *bio); int (*readpage_io_hook)(struct page *page, u64 start, u64 end); int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 3ebbc058d082..3f0e71b0e5d9 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -16,6 +16,9 @@ * Boston, MA 021110-1307, USA. */ +#include +#include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -131,28 +134,35 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, return ret; } -int btrfs_csum_file_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, - u64 objectid, u64 offset, - char *data, size_t len) +int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + struct bio *bio) { + u64 objectid = inode->i_ino; + u64 offset; int ret; struct btrfs_key file_key; struct btrfs_key found_key; - u64 next_offset = (u64)-1; - int found_next = 0; + u64 next_offset; + int found_next; struct btrfs_path *path; struct btrfs_csum_item *item; + struct btrfs_csum_item *item_end; struct extent_buffer *leaf = NULL; u64 csum_offset; - u32 csum_result = ~(u32)0; + u32 csum_result; u32 nritems; u32 ins_size; + int bio_index = 0; + struct bio_vec *bvec = bio->bi_io_vec; + char *data; path = btrfs_alloc_path(); BUG_ON(!path); - +again: + next_offset = (u64)-1; + found_next = 0; + offset = page_offset(bvec->bv_page) + bvec->bv_offset; file_key.objectid = objectid; file_key.offset = offset; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); @@ -259,7 +269,15 @@ csum: item = (struct btrfs_csum_item *)((unsigned char *)item + csum_offset * BTRFS_CRC32_SIZE); found: - csum_result = btrfs_csum_data(root, data, csum_result, len); + item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); + item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + + btrfs_item_size_nr(leaf, path->slots[0])); +next_bvec: + data = kmap_atomic(bvec->bv_page, KM_IRQ0); + csum_result = ~(u32)0; + csum_result = btrfs_csum_data(root, data + bvec->bv_offset, + csum_result, bvec->bv_len); + kunmap_atomic(data, KM_IRQ0); btrfs_csum_final(csum_result, (char *)&csum_result); if (csum_result == 0) { printk("csum result is 0 for inode %lu offset %Lu\n", inode->i_ino, offset); @@ -267,9 +285,19 @@ found: write_extent_buffer(leaf, &csum_result, (unsigned long)item, BTRFS_CRC32_SIZE); + bio_index++; + bvec++; + if (bio_index < bio->bi_vcnt) { + item = (struct btrfs_csum_item *)((char *)item + BTRFS_CRC32_SIZE); + if (item < item_end) + goto next_bvec; + } btrfs_mark_buffer_dirty(path->nodes[0]); + if (bio_index < bio->bi_vcnt) { + btrfs_release_path(root, path); + goto again; + } fail: - btrfs_release_path(root, path); btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3e79572a5cb1..bbb71fad861a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include #include #include #include @@ -294,6 +295,32 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, return 0; } +int btrfs_submit_bio_hook(int rw, struct bio *bio) +{ + // struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct bio_vec *bvec = bio->bi_io_vec; + struct inode *inode = bvec->bv_page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + int ret = 0; + + if (rw != WRITE) + return 0; + + if (btrfs_test_opt(root, NODATASUM) || + btrfs_test_flag(inode, NODATASUM)) + return 0; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + btrfs_csum_file_blocks(trans, root, inode, bio); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} +#if 0 int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end) { struct inode *inode = page->mapping->host; @@ -318,7 +345,7 @@ int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end) mutex_unlock(&root->fs_info->fs_mutex); return ret; } - +#endif int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) { int ret = 0; @@ -3022,7 +3049,8 @@ static struct file_operations btrfs_dir_file_operations = { static struct extent_io_ops btrfs_extent_io_ops = { .fill_delalloc = run_delalloc_range, - .writepage_io_hook = btrfs_writepage_io_hook, + // .writepage_io_hook = btrfs_writepage_io_hook, + .submit_bio_hook = btrfs_submit_bio_hook, .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, .set_bit_hook = btrfs_set_bit_hook, From 6e92f5e651a34f24ab31ebdf3f113c7d23a36000 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 20 Feb 2008 12:07:25 -0500 Subject: [PATCH 0437/2985] Btrfs: While doing checksums on bios, cache the extent_buffer mapping Signed-off-by: Chris Mason --- fs/btrfs/file-item.c | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 3f0e71b0e5d9..ee25e50e7c04 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -156,6 +156,11 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, int bio_index = 0; struct bio_vec *bvec = bio->bi_io_vec; char *data; + char *eb_map; + char *eb_token; + unsigned long map_len; + unsigned long map_start; + path = btrfs_alloc_path(); BUG_ON(!path); @@ -272,6 +277,7 @@ found: item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + btrfs_item_size_nr(leaf, path->slots[0])); + eb_token = NULL; next_bvec: data = kmap_atomic(bvec->bv_page, KM_IRQ0); csum_result = ~(u32)0; @@ -283,15 +289,39 @@ next_bvec: printk("csum result is 0 for inode %lu offset %Lu\n", inode->i_ino, offset); } - write_extent_buffer(leaf, &csum_result, (unsigned long)item, - BTRFS_CRC32_SIZE); + if (!eb_token || + (unsigned long)item + BTRFS_CRC32_SIZE >= map_start + map_len) { + int err; + + if (eb_token) + unmap_extent_buffer(leaf, eb_token, KM_IRQ1); + eb_token = NULL; + err = map_private_extent_buffer(leaf, (unsigned long)item, + BTRFS_CRC32_SIZE, + &eb_token, &eb_map, + &map_start, &map_len, KM_IRQ1); + if (err) + eb_token = NULL; + } + if (eb_token) { + memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)), + &csum_result, BTRFS_CRC32_SIZE); + } else { + write_extent_buffer(leaf, &csum_result, (unsigned long)item, + BTRFS_CRC32_SIZE); + } bio_index++; bvec++; if (bio_index < bio->bi_vcnt) { - item = (struct btrfs_csum_item *)((char *)item + BTRFS_CRC32_SIZE); + item = (struct btrfs_csum_item *)((char *)item + + BTRFS_CRC32_SIZE); if (item < item_end) goto next_bvec; } + if (eb_token) { + unmap_extent_buffer(leaf, eb_token, KM_IRQ1); + eb_token = NULL; + } btrfs_mark_buffer_dirty(path->nodes[0]); if (bio_index < bio->bi_vcnt) { btrfs_release_path(root, path); From e3fe4e7120bc753552b071773022efcff704e34b Mon Sep 17 00:00:00 2001 From: Greg KH Date: Wed, 20 Feb 2008 14:14:16 -0500 Subject: [PATCH 0438/2985] btrfs: fixes for kobject changes in mainline Here's a patch against the unstable tree that gets the code to build against Linus's current tree (2.6.24-git12). This is needed as the kobject/kset api has changed there. I tried to make the smallest changes needed, and it builds and loads successfully, but I don't have a btrfs volume anywhere (yet) to try to see if things still work properly :) Signed-off-by: Greg Kroah-Hartman Signed-off-by: Chris Mason --- fs/btrfs/sysfs.c | 39 +++++++++++++++------------------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index cd673ca9bb98..973d56e05698 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -184,7 +184,8 @@ static struct kobj_type btrfs_super_ktype = { .release = btrfs_super_release, }; -static struct kset btrfs_kset; +/* /sys/fs/btrfs/ entry */ +static struct kset *btrfs_kset; int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) { @@ -208,14 +209,9 @@ int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) } name[len] = '\0'; - fs->super_kobj.kset = &btrfs_kset; - fs->super_kobj.ktype = &btrfs_super_ktype; - - error = kobject_set_name(&fs->super_kobj, "%s", name); - if (error) - goto fail; - - error = kobject_register(&fs->super_kobj); + fs->super_kobj.kset = btrfs_kset; + error = kobject_init_and_add(&fs->super_kobj, &btrfs_super_ktype, + NULL, "%s", name); if (error) goto fail; @@ -232,15 +228,9 @@ int btrfs_sysfs_add_root(struct btrfs_root *root) { int error; - root->root_kobj.ktype = &btrfs_root_ktype; - root->root_kobj.parent = &root->fs_info->super_kobj; - - error = kobject_set_name(&root->root_kobj, "%s", root->name); - if (error) { - goto fail; - } - - error = kobject_register(&root->root_kobj); + error = kobject_init_and_add(&root->root_kobj, &btrfs_root_ktype, + &root->fs_info->super_kobj, + "%s", root->name); if (error) goto fail; @@ -253,24 +243,25 @@ fail: void btrfs_sysfs_del_root(struct btrfs_root *root) { - kobject_unregister(&root->root_kobj); + kobject_put(&root->root_kobj); wait_for_completion(&root->kobj_unregister); } void btrfs_sysfs_del_super(struct btrfs_fs_info *fs) { - kobject_unregister(&fs->super_kobj); + kobject_put(&fs->super_kobj); wait_for_completion(&fs->kobj_unregister); } int btrfs_init_sysfs() { - kobj_set_kset_s(&btrfs_kset, fs_subsys); - kobject_set_name(&btrfs_kset.kobj, "btrfs"); - return kset_register(&btrfs_kset); + btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); + if (!btrfs_kset) + return -ENOMEM; + return 0; } void btrfs_exit_sysfs() { - kset_unregister(&btrfs_kset); + kset_unregister(btrfs_kset); } From 6885f308b557020efce6ef9c1fcc96adaabecbb6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 20 Feb 2008 16:11:05 -0500 Subject: [PATCH 0439/2985] Btrfs: Misc 2.6.25 updates Remove the btrfs read_inode method, and use save_mount_options Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 29 +---------------------------- fs/btrfs/super.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 29 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bbb71fad861a..315dd550d560 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -297,7 +297,6 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, int btrfs_submit_bio_hook(int rw, struct bio *bio) { - // struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct bio_vec *bvec = bio->bi_io_vec; struct inode *inode = bvec->bv_page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -320,32 +319,7 @@ int btrfs_submit_bio_hook(int rw, struct bio *bio) mutex_unlock(&root->fs_info->fs_mutex); return ret; } -#if 0 -int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end) -{ - struct inode *inode = page->mapping->host; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; - char *kaddr; - int ret = 0; - u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; - size_t offset = start - page_start; - if (btrfs_test_opt(root, NODATASUM) || - btrfs_test_flag(inode, NODATASUM)) - return 0; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - kaddr = kmap(page); - btrfs_csum_file_block(trans, root, inode, inode->i_ino, - start, kaddr + offset, end - start + 1); - kunmap(page); - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); - return ret; -} -#endif + int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) { int ret = 0; @@ -3049,7 +3023,6 @@ static struct file_operations btrfs_dir_file_operations = { static struct extent_io_ops btrfs_extent_io_ops = { .fill_delalloc = run_delalloc_range, - // .writepage_io_hook = btrfs_writepage_io_hook, .submit_bio_hook = btrfs_submit_bio_hook, .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a46300c4753a..4423a91206a1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -272,6 +272,11 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) sb->s_root = root_dentry; btrfs_transaction_queue_work(tree_root, HZ * 30); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) + save_mount_options(sb, data); +#endif + return 0; fail_close: @@ -462,9 +467,13 @@ static struct super_operations btrfs_super_ops = { .delete_inode = btrfs_delete_inode, .put_inode = btrfs_put_inode, .put_super = btrfs_put_super, - .read_inode = btrfs_read_locked_inode, .write_super = btrfs_write_super, .sync_fs = btrfs_sync_fs, +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) + .read_inode = btrfs_read_locked_inode, +#else + .show_options = generic_show_options, +#endif .write_inode = btrfs_write_inode, .dirty_inode = btrfs_dirty_inode, .alloc_inode = btrfs_alloc_inode, From 2e1a992e3103624af48f1501aaad4e4d8317f88c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 20 Feb 2008 15:44:32 -0500 Subject: [PATCH 0440/2985] Btrfs: Make sure bio pages are adjacent during bulk csumming Signed-off-by: Chris Mason --- fs/btrfs/file-item.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index ee25e50e7c04..e17cce33fe95 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -161,7 +161,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, unsigned long map_len; unsigned long map_start; - path = btrfs_alloc_path(); BUG_ON(!path); again: @@ -315,8 +314,11 @@ next_bvec: if (bio_index < bio->bi_vcnt) { item = (struct btrfs_csum_item *)((char *)item + BTRFS_CRC32_SIZE); - if (item < item_end) + if (item < item_end && offset + PAGE_CACHE_SIZE == + page_offset(bvec->bv_page)) { + offset = page_offset(bvec->bv_page); goto next_bvec; + } } if (eb_token) { unmap_extent_buffer(leaf, eb_token, KM_IRQ1); From 55d474148982785dd8995dc690febad3a16a2c46 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 20 Feb 2008 16:02:51 -0500 Subject: [PATCH 0441/2985] Btrfs: Disable sysfs files on older kernels Signed-off-by: Chris Mason --- fs/btrfs/sysfs.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 973d56e05698..1cf4f4f8d2ce 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -28,6 +28,7 @@ #include "disk-io.h" #include "transaction.h" +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", @@ -265,3 +266,36 @@ void btrfs_exit_sysfs() { kset_unregister(btrfs_kset); } + +#else + +int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) +{ + return 0; +} + +int btrfs_sysfs_add_root(struct btrfs_root *root) +{ + return 0; +} + +void btrfs_sysfs_del_root(struct btrfs_root *root) +{ + return; +} + +void btrfs_sysfs_del_super(struct btrfs_fs_info *fs) +{ + return; +} + +int btrfs_init_sysfs() +{ + return 0; +} + +void btrfs_exit_sysfs() +{ + return; +} +#endif From eb20978f318ab5e360ef9c1b24b5dea14d0fee6a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 21 Feb 2008 09:30:08 -0500 Subject: [PATCH 0442/2985] Btrfs: Use KM_USERN instead of KM_IRQ during data summing Signed-off-by: Chris Mason --- fs/btrfs/file-item.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index e17cce33fe95..374d0245b29d 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -278,11 +278,11 @@ found: btrfs_item_size_nr(leaf, path->slots[0])); eb_token = NULL; next_bvec: - data = kmap_atomic(bvec->bv_page, KM_IRQ0); + data = kmap_atomic(bvec->bv_page, KM_USER0); csum_result = ~(u32)0; csum_result = btrfs_csum_data(root, data + bvec->bv_offset, csum_result, bvec->bv_len); - kunmap_atomic(data, KM_IRQ0); + kunmap_atomic(data, KM_USER0); btrfs_csum_final(csum_result, (char *)&csum_result); if (csum_result == 0) { printk("csum result is 0 for inode %lu offset %Lu\n", inode->i_ino, offset); @@ -293,12 +293,12 @@ next_bvec: int err; if (eb_token) - unmap_extent_buffer(leaf, eb_token, KM_IRQ1); + unmap_extent_buffer(leaf, eb_token, KM_USER1); eb_token = NULL; err = map_private_extent_buffer(leaf, (unsigned long)item, BTRFS_CRC32_SIZE, &eb_token, &eb_map, - &map_start, &map_len, KM_IRQ1); + &map_start, &map_len, KM_USER1); if (err) eb_token = NULL; } @@ -321,7 +321,7 @@ next_bvec: } } if (eb_token) { - unmap_extent_buffer(leaf, eb_token, KM_IRQ1); + unmap_extent_buffer(leaf, eb_token, KM_USER1); eb_token = NULL; } btrfs_mark_buffer_dirty(path->nodes[0]); From 7f93bf8d27653726e3721c01fefc523487ecf2af Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:01:28 -0400 Subject: [PATCH 0443/2985] Match the extent tree code to btrfs-progs for multi-device merging Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 59 +++++------------------------------------- 1 file changed, 7 insertions(+), 52 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8f441783a6ed..ebfd304138ca 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -209,27 +209,22 @@ again: last = max(search_start, cache->key.objectid); - spin_lock_irq(&free_space_cache->lock); - state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY); while(1) { - if (!state) { + ret = find_first_extent_bit(&root->fs_info->free_space_cache, + last, &start, &end, EXTENT_DIRTY); + if (ret) { if (!cache_miss) cache_miss = last; - spin_unlock_irq(&free_space_cache->lock); goto new_group; } - start = max(last, state->start); - last = state->end + 1; + start = max(last, start); + last = end + 1; if (last - start < num) { if (last == cache->key.objectid + cache->key.offset) cache_miss = start; - do { - state = extent_state_next(state); - } while(state && !(state->state & EXTENT_DIRTY)); continue; } - spin_unlock_irq(&free_space_cache->lock); if (data != BTRFS_BLOCK_GROUP_MIXED && start + num > cache->key.objectid + cache->key.offset) goto new_group; @@ -1485,29 +1480,8 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_MIXED; } - if (!data) { - last_ptr = &root->fs_info->last_alloc; - empty_cluster = 128 * 1024; - } - - if (data && btrfs_test_opt(root, SSD)) { - last_ptr = &root->fs_info->last_data_alloc; - empty_cluster = 2 * 1024 * 1024; - } - - if (last_ptr) { - if (*last_ptr) - hint_byte = *last_ptr; - else { - hint_byte = hint_byte & - ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); - empty_size += empty_cluster; - } - search_start = max(search_start, hint_byte); - } - - search_end = min(search_end, - btrfs_super_total_bytes(&info->super_copy)); + if (search_end == (u64)-1) + search_end = btrfs_super_total_bytes(&info->super_copy); if (hint_byte) { block_group = btrfs_lookup_block_group(info, hint_byte); if (!block_group) @@ -1531,18 +1505,6 @@ check_failed: } search_start = find_search_start(root, &block_group, search_start, total_needed, data); - - if (last_ptr && *last_ptr && search_start != *last_ptr) { - *last_ptr = 0; - if (!empty_size) { - empty_size += empty_cluster; - total_needed += empty_size; - } - search_start = find_search_start(root, &block_group, - search_start, total_needed, - data); - } - search_start = stripe_align(root, search_start); cached_start = search_start; btrfs_init_path(path); @@ -1670,13 +1632,6 @@ check_pending: } ins->offset = num_bytes; btrfs_free_path(path); - if (last_ptr) { - *last_ptr = ins->objectid + ins->offset; - if (*last_ptr == - btrfs_super_total_bytes(&root->fs_info->super_copy)) { - *last_ptr = 0; - } - } return 0; new_group: From 0b86a832a1f38abec695864ec2eaedc9d2383f1b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:01:56 -0400 Subject: [PATCH 0444/2985] Btrfs: Add support for multiple devices per filesystem Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.c | 38 +- fs/btrfs/ctree.h | 313 +++++++++++++-- fs/btrfs/disk-io.c | 97 ++++- fs/btrfs/disk-io.h | 2 + fs/btrfs/extent-tree.c | 439 ++++++--------------- fs/btrfs/extent_io.c | 6 +- fs/btrfs/extent_io.h | 2 +- fs/btrfs/inode.c | 26 +- fs/btrfs/print-tree.c | 48 +++ fs/btrfs/transaction.c | 54 ++- fs/btrfs/volumes.c | 852 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.h | 78 ++++ 13 files changed, 1570 insertions(+), 387 deletions(-) create mode 100644 fs/btrfs/volumes.c create mode 100644 fs/btrfs/volumes.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9c61609f3714..5ac6f2946c21 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -6,7 +6,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ - extent_io.o + extent_io.o volumes.o ifeq ($(CONFIG_FS_POSIX_ACL),y) btrfs-y += acl.o diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ef8809c1e69c..70b6ddfe15a1 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -70,6 +70,14 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) memset(p, 0, sizeof(*p)); } +static void add_root_to_dirty_list(struct btrfs_root *root) +{ + if (root->track_dirty && list_empty(&root->dirty_list)) { + list_add(&root->dirty_list, + &root->fs_info->dirty_cowonly_roots); + } +} + int btrfs_copy_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, @@ -196,6 +204,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, root_gen, 0, 0, 1); } free_extent_buffer(buf); + add_root_to_dirty_list(root); } else { root_gen = btrfs_header_generation(parent); btrfs_set_node_blockptr(parent, parent_slot, @@ -241,7 +250,7 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } - search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); + search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); return ret; @@ -724,6 +733,7 @@ static int balance_level(struct btrfs_trans_handle *trans, BUG_ON(ret); root->node = child; + add_root_to_dirty_list(root); path->nodes[level] = NULL; clean_tree_block(trans, root, mid); wait_on_tree_block_writeback(root, mid); @@ -1369,6 +1379,7 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, /* the super has an extra ref to root->node */ free_extent_buffer(root->node); root->node = c; + add_root_to_dirty_list(root); extent_buffer_get(c); path->nodes[level] = c; path->slots[level] = 0; @@ -2777,3 +2788,28 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) } return 0; } + +int btrfs_previous_item(struct btrfs_root *root, + struct btrfs_path *path, u64 min_objectid, + int type) +{ + struct btrfs_key found_key; + struct extent_buffer *leaf; + int ret; + + while(1) { + if (path->slots[0] == 0) { + ret = btrfs_prev_leaf(root, path); + if (ret != 0) + return ret; + } else { + path->slots[0]--; + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.type == type) + return 0; + } + return 1; +} + diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 92d892f92075..1453d995fef6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -40,12 +40,44 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_MAGIC "_B4RfS_M" #define BTRFS_MAX_LEVEL 8 + +/* holds pointers to all of the tree roots */ #define BTRFS_ROOT_TREE_OBJECTID 1ULL + +/* stores information about which extents are in use, and reference counts */ #define BTRFS_EXTENT_TREE_OBJECTID 2ULL + +/* one per subvolume, storing files and directories */ #define BTRFS_FS_TREE_OBJECTID 3ULL + +/* directory objectid inside the root tree */ #define BTRFS_ROOT_TREE_DIR_OBJECTID 4ULL + + +/* + * chunk tree stores translations from logical -> physical block numbering + * the super block points to the chunk tree + */ +#define BTRFS_CHUNK_TREE_OBJECTID 5ULL + +/* + * stores information about which areas of a given device are in use. + * one per device. The tree of tree roots points to the device tree + */ +#define BTRFS_DEV_TREE_OBJECTID 6ULL + +/* + * All files have objectids higher than this. + */ #define BTRFS_FIRST_FREE_OBJECTID 256ULL + +/* + * the device items go into the chunk tree. The key is in the form + * [ 1 BTRFS_DEV_ITEM_KEY device_id ] + */ +#define BTRFS_DEV_ITEMS_OBJECTID 1ULL + /* * we can actually store much bigger names, but lets not confuse the rest * of linux @@ -95,6 +127,81 @@ struct btrfs_key { u64 offset; } __attribute__ ((__packed__)); +struct btrfs_mapping_tree { + struct extent_map_tree map_tree; +}; + +#define BTRFS_DEV_UUID_SIZE 16 +struct btrfs_dev_item { + /* the internal btrfs device id */ + __le64 devid; + + /* size of the device */ + __le64 total_bytes; + + /* bytes used */ + __le64 bytes_used; + + /* optimal io alignment for this device */ + __le32 io_align; + + /* optimal io width for this device */ + __le32 io_width; + + /* minimal io size for this device */ + __le32 sector_size; + + /* the kernel device number */ + __le64 rdev; + + /* type and info about this device */ + __le64 type; + + /* partition number, 0 for whole dev */ + __le32 partition; + + /* length of the name data at the end of the item */ + __le16 name_len; + + /* physical drive uuid (or lvm uuid) */ + u8 uuid[BTRFS_DEV_UUID_SIZE]; + /* name goes here */ +} __attribute__ ((__packed__)); + +struct btrfs_stripe { + __le64 devid; + __le64 offset; +} __attribute__ ((__packed__)); + +struct btrfs_chunk { + __le64 owner; + __le64 stripe_len; + __le64 type; + + /* optimal io alignment for this chunk */ + __le32 io_align; + + /* optimal io width for this chunk */ + __le32 io_width; + + /* minimal io size for this chunk */ + __le32 sector_size; + + /* 2^16 stripes is quite a lot, a second limit is the size of a single + * item in the btree + */ + __le16 num_stripes; + struct btrfs_stripe stripe; + /* additional stripes go here */ +} __attribute__ ((__packed__)); + +static inline unsigned long btrfs_chunk_item_size(int num_stripes) +{ + BUG_ON(num_stripes == 0); + return sizeof(struct btrfs_chunk) + + sizeof(struct btrfs_stripe) * (num_stripes - 1); +} + #define BTRFS_FSID_SIZE 16 /* * every tree block (leaf or node) starts with this header. @@ -119,6 +226,13 @@ struct btrfs_header { sizeof(struct btrfs_item) - \ sizeof(struct btrfs_file_extent_item)) + +/* + * this is a very generous portion of the super block, giving us + * room to translate 14 chunks with 3 stripes each. + */ +#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048 + /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc @@ -131,6 +245,7 @@ struct btrfs_super_block { __le64 magic; __le64 generation; __le64 root; + __le64 chunk_root; __le64 total_bytes; __le64 bytes_used; __le64 root_dir_objectid; @@ -138,7 +253,10 @@ struct btrfs_super_block { __le32 nodesize; __le32 leafsize; __le32 stripesize; + __le32 sys_chunk_array_size; u8 root_level; + u8 chunk_root_level; + u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; } __attribute__ ((__packed__)); /* @@ -208,12 +326,22 @@ struct btrfs_extent_ref { __le64 offset; } __attribute__ ((__packed__)); +/* dev extents record free space on individual devices. The owner + * field points back to the chunk allocation mapping tree that allocated + * the extent + */ +struct btrfs_dev_extent { + __le64 owner; + __le64 length; +} __attribute__ ((__packed__)); + + struct btrfs_inode_ref { __le16 name_len; /* name goes here */ } __attribute__ ((__packed__)); -struct btrfs_inode_timespec { +struct btrfs_timespec { __le64 sec; __le32 nsec; } __attribute__ ((__packed__)); @@ -231,13 +359,13 @@ struct btrfs_inode_item { __le32 uid; __le32 gid; __le32 mode; - __le32 rdev; + __le64 rdev; __le16 flags; __le16 compat_flags; - struct btrfs_inode_timespec atime; - struct btrfs_inode_timespec ctime; - struct btrfs_inode_timespec mtime; - struct btrfs_inode_timespec otime; + struct btrfs_timespec atime; + struct btrfs_timespec ctime; + struct btrfs_timespec mtime; + struct btrfs_timespec otime; } __attribute__ ((__packed__)); struct btrfs_dir_item { @@ -290,29 +418,34 @@ struct btrfs_csum_item { u8 csum; } __attribute__ ((__packed__)); -/* tag for the radix tree of block groups in ram */ -#define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) +/* different types of block groups (and chunks) */ +#define BTRFS_BLOCK_GROUP_DATA (1 << 0) +#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) +#define BTRFS_BLOCK_GROUP_METADATA (1 << 2) -#define BTRFS_BLOCK_GROUP_DATA 1 -#define BTRFS_BLOCK_GROUP_MIXED 2 - struct btrfs_block_group_item { __le64 used; - u8 flags; + __le64 chunk_tree; + __le64 chunk_objectid; + __le64 flags; } __attribute__ ((__packed__)); struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; - int data; - int cached; u64 pinned; + u64 flags; + int cached; }; + +struct btrfs_device; struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; struct btrfs_root *extent_root; struct btrfs_root *tree_root; + struct btrfs_root *chunk_root; + struct btrfs_root *dev_root; struct radix_tree_root fs_roots_radix; struct extent_io_tree free_space_cache; @@ -321,6 +454,9 @@ struct btrfs_fs_info { struct extent_io_tree pending_del; struct extent_io_tree extent_ins; + /* logical->physical extent mapping */ + struct btrfs_mapping_tree mapping_tree; + u64 generation; u64 last_trans_committed; unsigned long mount_opt; @@ -330,6 +466,7 @@ struct btrfs_fs_info { struct btrfs_transaction *running_transaction; struct btrfs_super_block super_copy; struct extent_buffer *sb_buffer; + struct block_device *__bdev; struct super_block *sb; struct inode *btree_inode; spinlock_t hash_lock; @@ -350,12 +487,17 @@ struct btrfs_fs_info { unsigned long throttles; u64 total_pinned; + struct list_head dirty_cowonly_roots; + + struct list_head devices; + struct list_head *last_device; spinlock_t delalloc_lock; spinlock_t new_trans_lock; u64 delalloc_bytes; u64 last_alloc; u64 last_data_alloc; }; + /* * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. @@ -387,14 +529,19 @@ struct btrfs_root { u64 highest_inode; u64 last_inode_alloc; int ref_cows; + int track_dirty; struct btrfs_key defrag_progress; int defrag_running; int defrag_level; char *name; int in_sysfs; + + /* the dirty list is only used by non-reference counted roots */ + struct list_head dirty_list; }; /* + * inode items have the data typically returned from stat and store other * info about object characteristics. There is one for every file and dir in * the FS @@ -439,6 +586,10 @@ struct btrfs_root { */ #define BTRFS_BLOCK_GROUP_ITEM_KEY 50 +#define BTRFS_DEV_EXTENT_KEY 75 +#define BTRFS_DEV_ITEM_KEY 76 +#define BTRFS_CHUNK_ITEM_KEY 77 + /* * string items are for debugging. They just store a short string of * data in the FS @@ -518,13 +669,104 @@ static inline void btrfs_set_##name(type *s, u##bits val) \ s->member = cpu_to_le##bits(val); \ } +BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64); +BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64); +BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64); +BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32); +BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); +BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32); +BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); +BTRFS_SETGET_FUNCS(device_rdev, struct btrfs_dev_item, rdev, 64); +BTRFS_SETGET_FUNCS(device_partition, struct btrfs_dev_item, partition, 32); +BTRFS_SETGET_FUNCS(device_name_len, struct btrfs_dev_item, name_len, 16); + +static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) +{ + return (char *)d + offsetof(struct btrfs_dev_item, uuid); +} + +static inline char *btrfs_device_name(struct btrfs_dev_item *d) +{ + return (char *)(d + 1); +} + +BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64); +BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); +BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32); +BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32); +BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32); +BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64); +BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); +BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64); +BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk, + stripe_len, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk, + io_align, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk, + io_width, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk, + sector_size, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk, + num_stripes, 16); +BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64); + +static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c, + int nr) +{ + unsigned long offset = (unsigned long)c; + offset += offsetof(struct btrfs_chunk, stripe); + offset += nr * sizeof(struct btrfs_stripe); + return (struct btrfs_stripe *)offset; +} + +static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr)); +} + +static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr, + u64 val) +{ + btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val); +} + +static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr)); +} + +static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr, + u64 val) +{ + btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val); +} + /* struct btrfs_block_group_item */ BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, used, 64); BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, used, 64); -BTRFS_SETGET_FUNCS(disk_block_group_flags, struct btrfs_block_group_item, - flags, 8); +BTRFS_SETGET_STACK_FUNCS(block_group_chunk_tree, struct btrfs_block_group_item, + chunk_tree, 64); +BTRFS_SETGET_FUNCS(disk_block_group_chunk_tree, struct btrfs_block_group_item, + chunk_tree, 64); +BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid, + struct btrfs_block_group_item, chunk_objectid, 64); +BTRFS_SETGET_FUNCS(disk_block_group_chunk_objecitd, + struct btrfs_block_group_item, chunk_objectid, 64); +BTRFS_SETGET_FUNCS(disk_block_group_flags, + struct btrfs_block_group_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(block_group_flags, + struct btrfs_block_group_item, flags, 64); /* struct btrfs_inode_ref */ BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); @@ -538,49 +780,53 @@ BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); -BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 32); +BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64); BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 16); BTRFS_SETGET_FUNCS(inode_compat_flags, struct btrfs_inode_item, compat_flags, 16); -static inline struct btrfs_inode_timespec * +static inline struct btrfs_timespec * btrfs_inode_atime(struct btrfs_inode_item *inode_item) { unsigned long ptr = (unsigned long)inode_item; ptr += offsetof(struct btrfs_inode_item, atime); - return (struct btrfs_inode_timespec *)ptr; + return (struct btrfs_timespec *)ptr; } -static inline struct btrfs_inode_timespec * +static inline struct btrfs_timespec * btrfs_inode_mtime(struct btrfs_inode_item *inode_item) { unsigned long ptr = (unsigned long)inode_item; ptr += offsetof(struct btrfs_inode_item, mtime); - return (struct btrfs_inode_timespec *)ptr; + return (struct btrfs_timespec *)ptr; } -static inline struct btrfs_inode_timespec * +static inline struct btrfs_timespec * btrfs_inode_ctime(struct btrfs_inode_item *inode_item) { unsigned long ptr = (unsigned long)inode_item; ptr += offsetof(struct btrfs_inode_item, ctime); - return (struct btrfs_inode_timespec *)ptr; + return (struct btrfs_timespec *)ptr; } -static inline struct btrfs_inode_timespec * +static inline struct btrfs_timespec * btrfs_inode_otime(struct btrfs_inode_item *inode_item) { unsigned long ptr = (unsigned long)inode_item; ptr += offsetof(struct btrfs_inode_item, otime); - return (struct btrfs_inode_timespec *)ptr; + return (struct btrfs_timespec *)ptr; } -BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_inode_timespec, sec, 64); -BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_inode_timespec, nsec, 32); +BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); +BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); /* struct btrfs_extent_item */ BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); +/* struct btrfs_dev_extent */ +BTRFS_SETGET_FUNCS(dev_extent_owner, struct btrfs_dev_extent, owner, 64); +BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); + /* struct btrfs_extent_ref */ BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); @@ -846,8 +1092,14 @@ BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, generation, 64); BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); +BTRFS_SETGET_STACK_FUNCS(super_sys_array_size, + struct btrfs_super_block, sys_chunk_array_size, 32); BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block, + chunk_root, 64); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block, + chunk_root_level, 64); BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, total_bytes, 64); BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, @@ -1009,7 +1261,14 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); +int btrfs_make_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytes_used, + u64 type, u64 chunk_tree, u64 chunk_objectid, + u64 size); /* ctree.c */ +int btrfs_previous_item(struct btrfs_root *root, + struct btrfs_path *path, u64 min_objectid, + int type); int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 88e21bdbc478..8e37fa120cc8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -28,6 +28,7 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" +#include "volumes.h" #include "print-tree.h" #if 0 @@ -234,6 +235,19 @@ static int btree_writepage_io_hook(struct page *page, u64 start, u64 end) return 0; } +static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 offset; + offset = bio->bi_sector << 9; + if (offset == BTRFS_SUPER_INFO_OFFSET) { + bio->bi_bdev = root->fs_info->sb->s_bdev; + submit_bio(rw, bio); + return 0; + } + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio); +} + static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct extent_io_tree *tree; @@ -345,6 +359,23 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) return ret; } +static int close_all_devices(struct btrfs_fs_info *fs_info) +{ + struct list_head *list; + struct list_head *next; + struct btrfs_device *device; + + list = &fs_info->devices; + while(!list_empty(list)) { + next = list->next; + list_del(next); + device = list_entry(next, struct btrfs_device, dev_list); + kfree(device->name); + kfree(device); + } + return 0; +} + struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { @@ -420,6 +451,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->leafsize = leafsize; root->stripesize = stripesize; root->ref_cows = 0; + root->track_dirty = 0; + root->fs_info = fs_info; root->objectid = objectid; root->last_trans = 0; @@ -427,6 +460,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->last_inode_alloc = 0; root->name = NULL; root->in_sysfs = 0; + + INIT_LIST_HEAD(&root->dirty_list); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); @@ -634,6 +669,10 @@ struct btrfs_root *open_ctree(struct super_block *sb) GFP_NOFS); struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), GFP_NOFS); + struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); int ret; int err = -EIO; struct btrfs_super_block *disk_super; @@ -657,6 +696,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->last_trans_committed = 0; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; + fs_info->chunk_root = chunk_root; + fs_info->dev_root = dev_root; + INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); + INIT_LIST_HEAD(&fs_info->devices); + btrfs_mapping_init(&fs_info->mapping_tree); + fs_info->last_device = &fs_info->devices; fs_info->sb = sb; fs_info->throttles = 0; fs_info->mount_opt = 0; @@ -714,12 +759,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) goto fail_iput; } #endif - __setup_root(512, 512, 512, 512, tree_root, + __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); fs_info->sb_buffer = read_tree_block(tree_root, BTRFS_SUPER_INFO_OFFSET, - 512); + 4096); if (!fs_info->sb_buffer) goto fail_iput; @@ -730,6 +775,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) read_extent_buffer(fs_info->sb_buffer, fs_info->fsid, (unsigned long)btrfs_super_fsid(fs_info->sb_buffer), BTRFS_FSID_SIZE); + disk_super = &fs_info->super_copy; if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; @@ -753,23 +799,47 @@ struct btrfs_root *open_ctree(struct super_block *sb) goto fail_sb_buffer; } + mutex_lock(&fs_info->fs_mutex); + ret = btrfs_read_sys_array(tree_root); + BUG_ON(ret); + + blocksize = btrfs_level_size(tree_root, + btrfs_super_chunk_root_level(disk_super)); + + __setup_root(nodesize, leafsize, sectorsize, stripesize, + chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); + + chunk_root->node = read_tree_block(chunk_root, + btrfs_super_chunk_root(disk_super), + blocksize); + BUG_ON(!chunk_root->node); + + ret = btrfs_read_chunk_tree(chunk_root); + BUG_ON(ret); + blocksize = btrfs_level_size(tree_root, btrfs_super_root_level(disk_super)); + tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), blocksize); if (!tree_root->node) goto fail_sb_buffer; - mutex_lock(&fs_info->fs_mutex); ret = find_and_setup_root(tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); - if (ret) { - mutex_unlock(&fs_info->fs_mutex); + if (ret) goto fail_tree_root; - } + extent_root->track_dirty = 1; + + ret = find_and_setup_root(tree_root, fs_info, + BTRFS_DEV_TREE_OBJECTID, dev_root); + dev_root->track_dirty = 1; + + if (ret) + goto fail_extent_root; btrfs_read_block_groups(extent_root); @@ -777,7 +847,10 @@ struct btrfs_root *open_ctree(struct super_block *sb) mutex_unlock(&fs_info->fs_mutex); return tree_root; +fail_extent_root: + free_extent_buffer(extent_root->node); fail_tree_root: + mutex_unlock(&fs_info->fs_mutex); free_extent_buffer(tree_root->node); fail_sb_buffer: free_extent_buffer(fs_info->sb_buffer); @@ -874,6 +947,12 @@ int close_ctree(struct btrfs_root *root) if (fs_info->tree_root->node) free_extent_buffer(fs_info->tree_root->node); + if (root->fs_info->chunk_root->node); + free_extent_buffer(root->fs_info->chunk_root->node); + + if (root->fs_info->dev_root->node); + free_extent_buffer(root->fs_info->dev_root->node); + free_extent_buffer(fs_info->sb_buffer); btrfs_free_block_groups(root->fs_info); @@ -901,8 +980,13 @@ int close_ctree(struct btrfs_root *root) kfree(hasher); } #endif + close_all_devices(fs_info); + btrfs_mapping_tree_free(&fs_info->mapping_tree); + kfree(fs_info->extent_root); kfree(fs_info->tree_root); + kfree(fs_info->chunk_root); + kfree(fs_info->dev_root); return 0; } @@ -1016,4 +1100,5 @@ int btrfs_read_buffer(struct extent_buffer *buf) static struct extent_io_ops btree_extent_io_ops = { .writepage_io_hook = btree_writepage_io_hook, + .submit_bio_hook = btree_submit_bio_hook, }; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 828f3a2081b6..206cb48638f7 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -20,6 +20,7 @@ #define __DISKIO__ #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) +struct btrfs_device; struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); @@ -65,4 +66,5 @@ int btrfs_read_buffer(struct extent_buffer *buf); u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, char *result); void btrfs_throttle(struct btrfs_root *root); +int btrfs_open_device(struct btrfs_device *dev); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ebfd304138ca..2cd957d6e8d8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -24,37 +24,19 @@ #include "disk-io.h" #include "print-tree.h" #include "transaction.h" +#include "volumes.h" -#define BLOCK_GROUP_DATA EXTENT_WRITEBACK +#define BLOCK_GROUP_DATA EXTENT_WRITEBACK #define BLOCK_GROUP_METADATA EXTENT_UPTODATE +#define BLOCK_GROUP_SYSTEM EXTENT_NEW + #define BLOCK_GROUP_DIRTY EXTENT_DIRTY static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -static int find_previous_extent(struct btrfs_root *root, - struct btrfs_path *path) -{ - struct btrfs_key found_key; - struct extent_buffer *leaf; - int ret; - while(1) { - if (path->slots[0] == 0) { - ret = btrfs_prev_leaf(root, path); - if (ret != 0) - return ret; - } else { - path->slots[0]--; - } - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.type == BTRFS_EXTENT_ITEM_KEY) - return 0; - } - return 1; -} static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) @@ -91,7 +73,7 @@ static int cache_block_group(struct btrfs_root *root, ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) return ret; - ret = find_previous_extent(root, path); + ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); if (ret < 0) return ret; if (ret == 0) { @@ -168,7 +150,8 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct block_group_cache = &info->block_group_cache; ret = find_first_extent_bit(block_group_cache, bytenr, &start, &end, - BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA); + BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | + BLOCK_GROUP_SYSTEM); if (ret) { return NULL; } @@ -182,23 +165,38 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct return block_group; return NULL; } -static u64 noinline find_search_start(struct btrfs_root *root, + +static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) +{ + if ((bits & BLOCK_GROUP_DATA) && + (cache->flags & BTRFS_BLOCK_GROUP_DATA)) + return 1; + if ((bits & BLOCK_GROUP_METADATA) && + (cache->flags & BTRFS_BLOCK_GROUP_METADATA)) + return 1; + if ((bits & BLOCK_GROUP_SYSTEM) && + (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM)) + return 1; + return 0; +} + +static int noinline find_search_start(struct btrfs_root *root, struct btrfs_block_group_cache **cache_ret, - u64 search_start, int num, int data) + u64 *start_ret, int num, int data) { int ret; struct btrfs_block_group_cache *cache = *cache_ret; struct extent_io_tree *free_space_cache; - struct extent_state *state; u64 last; u64 start = 0; + u64 end = 0; u64 cache_miss = 0; u64 total_fs_bytes; + u64 search_start = *start_ret; int wrapped = 0; - if (!cache) { + if (!cache) goto out; - } total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); free_space_cache = &root->fs_info->free_space_cache; @@ -208,6 +206,9 @@ again: goto out; last = max(search_start, cache->key.objectid); + if (!block_group_bits(cache, data)) { + goto new_group; + } while(1) { ret = find_first_extent_bit(&root->fs_info->free_space_cache, @@ -225,22 +226,20 @@ again: cache_miss = start; continue; } - if (data != BTRFS_BLOCK_GROUP_MIXED && - start + num > cache->key.objectid + cache->key.offset) + if (start + num > cache->key.objectid + cache->key.offset) goto new_group; if (start + num > total_fs_bytes) goto new_group; - return start; + *start_ret = start; + return 0; } out: cache = btrfs_lookup_block_group(root->fs_info, search_start); if (!cache) { - printk("Unable to find block group for %Lu\n", - search_start); + printk("Unable to find block group for %Lu\n", search_start); WARN_ON(1); - return search_start; } - return search_start; + return -ENOSPC; new_group: last = cache->key.objectid + cache->key.offset; @@ -251,7 +250,6 @@ no_cache: if (!wrapped) { wrapped = 1; last = search_start; - data = BTRFS_BLOCK_GROUP_MIXED; goto wrapped; } goto out; @@ -299,7 +297,6 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int ret; int full_search = 0; int factor = 8; - int data_swap = 0; block_group_cache = &info->block_group_cache; total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); @@ -307,19 +304,12 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (!owner) factor = 8; - if (data == BTRFS_BLOCK_GROUP_MIXED) { - bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; - factor = 10; - } else if (data) - bit = BLOCK_GROUP_DATA; - else - bit = BLOCK_GROUP_METADATA; + bit = data; if (search_start && search_start < total_fs_bytes) { struct btrfs_block_group_cache *shint; shint = btrfs_lookup_block_group(info, search_start); - if (shint && (shint->data == data || - shint->data == BTRFS_BLOCK_GROUP_MIXED)) { + if (shint && block_group_bits(shint, data)) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < div_factor(shint->key.offset, factor)) { @@ -327,8 +317,8 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, } } } - if (hint && hint->key.objectid < total_fs_bytes && - (hint->data == data || hint->data == BTRFS_BLOCK_GROUP_MIXED)) { + if (hint && block_group_bits(hint, data) && + hint->key.objectid < total_fs_bytes) { used = btrfs_block_group_used(&hint->item); if (used + hint->pinned < div_factor(hint->key.offset, factor)) { @@ -379,12 +369,6 @@ again: full_search = 1; goto again; } - if (!data_swap) { - data_swap = 1; - bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; - last = search_start; - goto again; - } found: return found_group; } @@ -1002,7 +986,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc, - int mark_free, int data) + int mark_free) { struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; @@ -1027,41 +1011,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val = btrfs_block_group_used(&cache->item); num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { - if (cache->data != data && - old_val < (cache->key.offset >> 1)) { - int bit_to_clear; - int bit_to_set; - cache->data = data; - if (data) { - bit_to_clear = BLOCK_GROUP_METADATA; - bit_to_set = BLOCK_GROUP_DATA; - cache->item.flags &= - ~BTRFS_BLOCK_GROUP_MIXED; - cache->item.flags |= - BTRFS_BLOCK_GROUP_DATA; - } else { - bit_to_clear = BLOCK_GROUP_DATA; - bit_to_set = BLOCK_GROUP_METADATA; - cache->item.flags &= - ~BTRFS_BLOCK_GROUP_MIXED; - cache->item.flags &= - ~BTRFS_BLOCK_GROUP_DATA; - } - clear_extent_bits(&info->block_group_cache, - start, end, bit_to_clear, - GFP_NOFS); - set_extent_bits(&info->block_group_cache, - start, end, bit_to_set, - GFP_NOFS); - } else if (cache->data != data && - cache->data != BTRFS_BLOCK_GROUP_MIXED) { - cache->data = BTRFS_BLOCK_GROUP_MIXED; - set_extent_bits(&info->block_group_cache, - start, end, - BLOCK_GROUP_DATA | - BLOCK_GROUP_METADATA, - GFP_NOFS); - } old_val += num_bytes; } else { old_val -= num_bytes; @@ -1357,7 +1306,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } ret = update_block_group(trans, root, bytenr, num_bytes, 0, - mark_free, 0); + mark_free); BUG_ON(ret); } btrfs_free_path(path); @@ -1450,38 +1399,21 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, u64 exclude_start, u64 exclude_nr, int data) { - struct btrfs_path *path; - struct btrfs_key key; - u64 hole_size = 0; - u64 aligned; int ret; - int slot = 0; - u64 last_byte = 0; - u64 *last_ptr = NULL; u64 orig_search_start = search_start; - int start_found; - struct extent_buffer *l; struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; u64 total_needed = num_bytes; - int level; struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; - int empty_cluster; - u64 cached_start; WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); - level = btrfs_header_level(root->node); - - if (num_bytes >= 32 * 1024 * 1024 && hint_byte) { - data = BTRFS_BLOCK_GROUP_MIXED; - } - if (search_end == (u64)-1) search_end = btrfs_super_total_bytes(&info->super_copy); + if (hint_byte) { block_group = btrfs_lookup_block_group(info, hint_byte); if (!block_group) @@ -1495,7 +1427,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, } total_needed += empty_size; - path = btrfs_alloc_path(); + check_failed: if (!block_group) { block_group = btrfs_lookup_block_group(info, search_start); @@ -1503,135 +1435,49 @@ check_failed: block_group = btrfs_lookup_block_group(info, orig_search_start); } - search_start = find_search_start(root, &block_group, search_start, - total_needed, data); + ret = find_search_start(root, &block_group, &search_start, + total_needed, data); + if (ret) + goto error; + search_start = stripe_align(root, search_start); - cached_start = search_start; - btrfs_init_path(path); ins->objectid = search_start; - ins->offset = 0; - start_found = 0; - path->reada = 2; - - ret = btrfs_search_slot(trans, root, ins, path, 0, 0); - if (ret < 0) - goto error; - ret = find_previous_extent(root, path); - if (ret < 0) - goto error; - l = path->nodes[0]; - btrfs_item_key_to_cpu(l, &key, path->slots[0]); - while (1) { - l = path->nodes[0]; - slot = path->slots[0]; - if (slot >= btrfs_header_nritems(l)) { - ret = btrfs_next_leaf(root, path); - if (ret == 0) - continue; - if (ret < 0) - goto error; - - search_start = max(search_start, - block_group->key.objectid); - if (!start_found) { - aligned = stripe_align(root, search_start); - ins->objectid = aligned; - if (aligned >= search_end) { - ret = -ENOSPC; - goto error; - } - ins->offset = search_end - aligned; - start_found = 1; - goto check_pending; - } - ins->objectid = stripe_align(root, - last_byte > search_start ? - last_byte : search_start); - if (search_end <= ins->objectid) { - ret = -ENOSPC; - goto error; - } - ins->offset = search_end - ins->objectid; - BUG_ON(ins->objectid >= search_end); - goto check_pending; - } - btrfs_item_key_to_cpu(l, &key, slot); - - if (key.objectid >= search_start && key.objectid > last_byte && - start_found) { - if (last_byte < search_start) - last_byte = search_start; - aligned = stripe_align(root, last_byte); - hole_size = key.objectid - aligned; - if (key.objectid > aligned && hole_size >= num_bytes) { - ins->objectid = aligned; - ins->offset = hole_size; - goto check_pending; - } - } - if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { - if (!start_found && btrfs_key_type(&key) == - BTRFS_BLOCK_GROUP_ITEM_KEY) { - last_byte = key.objectid; - start_found = 1; - } - goto next; - } - - - start_found = 1; - last_byte = key.objectid + key.offset; - - if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED && - last_byte >= block_group->key.objectid + - block_group->key.offset) { - btrfs_release_path(root, path); - search_start = block_group->key.objectid + - block_group->key.offset; - goto new_group; - } -next: - path->slots[0]++; - cond_resched(); - } -check_pending: - /* we have to make sure we didn't find an extent that has already - * been allocated by the map tree or the original allocation - */ - btrfs_release_path(root, path); - BUG_ON(ins->objectid < search_start); + ins->offset = num_bytes; if (ins->objectid + num_bytes >= search_end) goto enospc; - if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED && - ins->objectid + num_bytes > block_group-> - key.objectid + block_group->key.offset) { + + if (ins->objectid + num_bytes > + block_group->key.objectid + block_group->key.offset) { search_start = block_group->key.objectid + block_group->key.offset; goto new_group; } + if (test_range_bit(&info->extent_ins, ins->objectid, ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { search_start = ins->objectid + num_bytes; goto new_group; } + if (test_range_bit(&info->pinned_extents, ins->objectid, ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { search_start = ins->objectid + num_bytes; goto new_group; } + if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && ins->objectid < exclude_start + exclude_nr)) { search_start = exclude_start + exclude_nr; goto new_group; } - if (!data) { + + if (!(data & BLOCK_GROUP_DATA)) { block_group = btrfs_lookup_block_group(info, ins->objectid); if (block_group) trans->block_group = block_group; } ins->offset = num_bytes; - btrfs_free_path(path); return 0; new_group: @@ -1646,7 +1492,6 @@ enospc: if (!full_scan) total_needed -= empty_size; full_scan = 1; - data = BTRFS_BLOCK_GROUP_MIXED; } else wrapped = 1; } @@ -1657,8 +1502,6 @@ enospc: goto check_failed; error: - btrfs_release_path(root, path); - btrfs_free_path(path); return ret; } /* @@ -1689,6 +1532,13 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_key keys[2]; + if (data) + data = BLOCK_GROUP_DATA; + else if (root == root->fs_info->chunk_root) + data = BLOCK_GROUP_SYSTEM; + else + data = BLOCK_GROUP_METADATA; + new_hint = max(hint_byte, root->fs_info->alloc_start); if (new_hint < btrfs_super_total_bytes(&info->super_copy)) hint_byte = new_hint; @@ -1718,7 +1568,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, EXTENT_LOCKED, GFP_NOFS); - WARN_ON(data == 1); goto update_block; } @@ -1768,8 +1617,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } update_block: - ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, - data); + ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0); if (ret) { printk("update block group failed for %Lu %Lu\n", ins->objectid, ins->offset); @@ -2457,7 +2305,7 @@ again: if (ret < 0) goto out; - ret = find_previous_extent(root, path); + ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); if (ret < 0) goto out; if (ret == 0) { @@ -2604,95 +2452,48 @@ out: int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 new_size) { - struct btrfs_path *path; - u64 nr = 0; - u64 cur_byte; - u64 old_size; - unsigned long rem; - struct btrfs_block_group_cache *cache; - struct btrfs_block_group_item *item; - struct btrfs_fs_info *info = root->fs_info; - struct extent_io_tree *block_group_cache; - struct btrfs_key key; - struct extent_buffer *leaf; - int ret; - int bit; - - old_size = btrfs_super_total_bytes(&info->super_copy); - block_group_cache = &info->block_group_cache; - - root = info->extent_root; - - cache = btrfs_lookup_block_group(root->fs_info, old_size - 1); - - cur_byte = cache->key.objectid + cache->key.offset; - if (cur_byte >= new_size) - goto set_size; - - key.offset = BTRFS_BLOCK_GROUP_SIZE; - btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - while(cur_byte < new_size) { - key.objectid = cur_byte; - ret = btrfs_insert_empty_item(trans, root, path, &key, - sizeof(struct btrfs_block_group_item)); - BUG_ON(ret); - leaf = path->nodes[0]; - item = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_block_group_item); - - btrfs_set_disk_block_group_used(leaf, item, 0); - div_long_long_rem(nr, 3, &rem); - if (rem) { - btrfs_set_disk_block_group_flags(leaf, item, - BTRFS_BLOCK_GROUP_DATA); - } else { - btrfs_set_disk_block_group_flags(leaf, item, 0); - } - nr++; - - cache = kmalloc(sizeof(*cache), GFP_NOFS); - BUG_ON(!cache); - - read_extent_buffer(leaf, &cache->item, (unsigned long)item, - sizeof(cache->item)); - - memcpy(&cache->key, &key, sizeof(key)); - cache->cached = 0; - cache->pinned = 0; - cur_byte = key.objectid + key.offset; - btrfs_release_path(root, path); - - if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { - bit = BLOCK_GROUP_DATA; - cache->data = BTRFS_BLOCK_GROUP_DATA; - } else { - bit = BLOCK_GROUP_METADATA; - cache->data = 0; - } - - /* use EXTENT_LOCKED to prevent merging */ - set_extent_bits(block_group_cache, key.objectid, - key.objectid + key.offset - 1, - bit | EXTENT_LOCKED, GFP_NOFS); - set_state_private(block_group_cache, key.objectid, - (unsigned long)cache); - } - btrfs_free_path(path); -set_size: - btrfs_set_super_total_bytes(&info->super_copy, new_size); + btrfs_set_super_total_bytes(&root->fs_info->super_copy, new_size); return 0; } +int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *key) +{ + int ret; + struct btrfs_key found_key; + struct extent_buffer *leaf; + int slot; + + ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret < 0) + return ret; + while(1) { + slot = path->slots[0]; + leaf = path->nodes[0]; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto error; + break; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + + if (found_key.objectid >= key->objectid && + found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) + return 0; + path->slots[0]++; + } + ret = -ENOENT; +error: + return ret; +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; int ret; - int err = 0; int bit; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; @@ -2702,28 +2503,28 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct extent_buffer *leaf; block_group_cache = &info->block_group_cache; - root = info->extent_root; key.objectid = 0; - key.offset = BTRFS_BLOCK_GROUP_SIZE; + key.offset = 0; btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); - path = btrfs_alloc_path(); if (!path) return -ENOMEM; while(1) { - ret = btrfs_search_slot(NULL, info->extent_root, - &key, path, 0, 0); - if (ret != 0) { - err = ret; - break; + ret = find_first_block_group(root, path, &key); + if (ret > 0) { + ret = 0; + goto error; } + if (ret != 0) + goto error; + leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); cache = kmalloc(sizeof(*cache), GFP_NOFS); if (!cache) { - err = -1; + ret = -ENOMEM; break; } @@ -2733,18 +2534,17 @@ int btrfs_read_block_groups(struct btrfs_root *root) memcpy(&cache->key, &found_key, sizeof(found_key)); cache->cached = 0; cache->pinned = 0; + key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); - - if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) { - bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; - cache->data = BTRFS_BLOCK_GROUP_MIXED; - } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { + cache->flags = btrfs_block_group_flags(&cache->item); + bit = 0; + if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { bit = BLOCK_GROUP_DATA; - cache->data = BTRFS_BLOCK_GROUP_DATA; - } else { + } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { + bit = BLOCK_GROUP_SYSTEM; + } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { bit = BLOCK_GROUP_METADATA; - cache->data = 0; } /* use EXTENT_LOCKED to prevent merging */ @@ -2758,7 +2558,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) btrfs_super_total_bytes(&info->super_copy)) break; } - + ret = 0; +error: btrfs_free_path(path); - return 0; + return ret; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e8130c876330..7e3a1ebde9fc 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1706,9 +1706,9 @@ static int submit_one_bio(int rw, struct bio *bio) WARN_ON(1); } if (tree->ops && tree->ops->submit_bio_hook) - tree->ops->submit_bio_hook(rw, bio); - - submit_bio(rw, bio); + tree->ops->submit_bio_hook(page->mapping->host, rw, bio); + else + submit_bio(rw, bio); if (bio_flagged(bio, BIO_EOPNOTSUPP)) ret = -EOPNOTSUPP; bio_put(bio); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 9d6654667089..0dca89328f98 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -28,7 +28,7 @@ struct extent_state; struct extent_io_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); - int (*submit_bio_hook)(int rw, struct bio *bio); + int (*submit_bio_hook)(struct inode *inode, int rw, struct bio *bio); int (*readpage_io_hook)(struct page *page, u64 start, u64 end); int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 315dd550d560..17063cd2cb73 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -40,6 +40,7 @@ #include "btrfs_inode.h" #include "ioctl.h" #include "print-tree.h" +#include "volumes.h" struct btrfs_iget_args { u64 ino; @@ -295,20 +296,20 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, return 0; } -int btrfs_submit_bio_hook(int rw, struct bio *bio) +int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) { - struct bio_vec *bvec = bio->bi_io_vec; - struct inode *inode = bvec->bv_page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; int ret = 0; - if (rw != WRITE) - return 0; + if (rw != WRITE) { + goto mapit; + } if (btrfs_test_opt(root, NODATASUM) || - btrfs_test_flag(inode, NODATASUM)) - return 0; + btrfs_test_flag(inode, NODATASUM)) { + goto mapit; + } mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -317,7 +318,8 @@ int btrfs_submit_bio_hook(int rw, struct bio *bio) ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); - return ret; +mapit: + return btrfs_map_bio(root, rw, bio); } int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) @@ -406,7 +408,7 @@ void btrfs_read_locked_inode(struct inode *inode) struct btrfs_path *path; struct extent_buffer *leaf; struct btrfs_inode_item *inode_item; - struct btrfs_inode_timespec *tspec; + struct btrfs_timespec *tspec; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; u64 alloc_group_block; @@ -455,7 +457,8 @@ void btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); if (!BTRFS_I(inode)->block_group) { BTRFS_I(inode)->block_group = btrfs_find_block_group(root, - NULL, 0, 0, 0); + NULL, 0, + BTRFS_BLOCK_GROUP_METADATA, 0); } btrfs_free_path(path); inode_item = NULL; @@ -1550,7 +1553,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, owner = 0; else owner = 1; - group = btrfs_find_block_group(root, group, 0, 0, owner); + group = btrfs_find_block_group(root, group, 0, + BTRFS_BLOCK_GROUP_METADATA, owner); BTRFS_I(inode)->block_group = group; BTRFS_I(inode)->flags = 0; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index da0b4dcf3617..9c1335dad40c 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -20,6 +20,40 @@ #include "disk-io.h" #include "print-tree.h" +static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk) +{ + int num_stripes = btrfs_chunk_num_stripes(eb, chunk); + int i; + printk("\t\tchunk owner %llu type %llu num_stripes %d\n", + (unsigned long long)btrfs_chunk_owner(eb, chunk), + (unsigned long long)btrfs_chunk_type(eb, chunk), + num_stripes); + for (i = 0 ; i < num_stripes ; i++) { + printk("\t\t\tstripe %d devid %llu offset %llu\n", i, + (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i), + (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i)); + } +} +static void print_dev_item(struct extent_buffer *eb, + struct btrfs_dev_item *dev_item) +{ + char *name; + int name_len; + + name_len = btrfs_device_name_len(eb, dev_item); + name = kmalloc(name_len, GFP_NOFS); + if (name) { + read_extent_buffer(eb, name, + (unsigned long)btrfs_device_name(dev_item), + name_len); + } + printk("\t\tdev item name %.*s devid %llu " + "total_bytes %llu bytes used %Lu\n", name_len, name, + (unsigned long long)btrfs_device_id(eb, dev_item), + (unsigned long long)btrfs_device_total_bytes(eb, dev_item), + (unsigned long long)btrfs_device_bytes_used(eb, dev_item)); + kfree(name); +} void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) { int i; @@ -34,6 +68,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) struct btrfs_key key; struct btrfs_key found_key; struct btrfs_extent_ref *ref; + struct btrfs_dev_extent *dev_extent; u32 type; printk("leaf %llu total ptrs %d free space %d\n", @@ -106,6 +141,19 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) printk("\t\tblock group used %llu\n", (unsigned long long)btrfs_disk_block_group_used(l, bi)); break; + case BTRFS_CHUNK_ITEM_KEY: + print_chunk(l, btrfs_item_ptr(l, i, struct btrfs_chunk)); + break; + case BTRFS_DEV_ITEM_KEY: + print_dev_item(l, btrfs_item_ptr(l, i, + struct btrfs_dev_item)); + break; + case BTRFS_DEV_EXTENT_KEY: + dev_extent = btrfs_item_ptr(l, i, + struct btrfs_dev_extent); + printk("\t\tdev extent owner %llu length %llu\n", + (unsigned long long)btrfs_dev_extent_owner(l, dev_extent), + (unsigned long long)btrfs_dev_extent_length(l, dev_extent)); }; } } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e9a0983897f3..5e9f69244f9f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -198,29 +198,42 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, return werr; } +static int update_cowonly_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + u64 old_root_bytenr; + struct btrfs_root *tree_root = root->fs_info->tree_root; + + btrfs_write_dirty_block_groups(trans, root); + while(1) { + old_root_bytenr = btrfs_root_bytenr(&root->root_item); + if (old_root_bytenr == root->node->start) + break; + btrfs_set_root_bytenr(&root->root_item, + root->node->start); + btrfs_set_root_level(&root->root_item, + btrfs_header_level(root->node)); + ret = btrfs_update_root(trans, tree_root, + &root->root_key, + &root->root_item); + BUG_ON(ret); + btrfs_write_dirty_block_groups(trans, root); + } + return 0; +} + int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - int ret; - u64 old_extent_block; struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_root *tree_root = fs_info->tree_root; - struct btrfs_root *extent_root = fs_info->extent_root; + struct list_head *next; - btrfs_write_dirty_block_groups(trans, extent_root); - while(1) { - old_extent_block = btrfs_root_bytenr(&extent_root->root_item); - if (old_extent_block == extent_root->node->start) - break; - btrfs_set_root_bytenr(&extent_root->root_item, - extent_root->node->start); - btrfs_set_root_level(&extent_root->root_item, - btrfs_header_level(extent_root->node)); - ret = btrfs_update_root(trans, tree_root, - &extent_root->root_key, - &extent_root->root_item); - BUG_ON(ret); - btrfs_write_dirty_block_groups(trans, extent_root); + while(!list_empty(&fs_info->dirty_cowonly_roots)) { + next = fs_info->dirty_cowonly_roots.next; + list_del_init(next); + root = list_entry(next, struct btrfs_root, dirty_list); + update_cowonly_root(trans, root); } return 0; } @@ -616,6 +629,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, unsigned long timeout = 1; struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; + struct btrfs_root *chunk_root = root->fs_info->chunk_root; struct list_head dirty_fs_roots; struct extent_io_tree *pinned_copy; DEFINE_WAIT(wait); @@ -714,6 +728,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_set_super_root_level(&root->fs_info->super_copy, btrfs_header_level(root->fs_info->tree_root->node)); + btrfs_set_super_chunk_root(&root->fs_info->super_copy, + chunk_root->node->start); + btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, + btrfs_header_level(chunk_root->node)); write_extent_buffer(root->fs_info->sb_buffer, &root->fs_info->super_copy, 0, sizeof(root->fs_info->super_copy)); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c new file mode 100644 index 000000000000..90a8d45dc6d7 --- /dev/null +++ b/fs/btrfs/volumes.c @@ -0,0 +1,852 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#include +#include +#include "ctree.h" +#include "extent_map.h" +#include "disk-io.h" +#include "transaction.h" +#include "print-tree.h" +#include "volumes.h" + +struct map_lookup { + struct btrfs_device *dev; + u64 physical; +}; + +/* + * this uses a pretty simple search, the expectation is that it is + * called very infrequently and that a given device has a small number + * of extents + */ +static int find_free_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, + struct btrfs_path *path, + u64 num_bytes, u64 *start) +{ + struct btrfs_key key; + struct btrfs_root *root = device->dev_root; + struct btrfs_dev_extent *dev_extent = NULL; + u64 hole_size = 0; + u64 last_byte = 0; + u64 search_start = 0; + u64 search_end = device->total_bytes; + int ret; + int slot = 0; + int start_found; + struct extent_buffer *l; + + start_found = 0; + path->reada = 2; + + /* FIXME use last free of some kind */ + + key.objectid = device->devid; + key.offset = search_start; + key.type = BTRFS_DEV_EXTENT_KEY; + ret = btrfs_search_slot(trans, root, &key, path, 0, 0); + if (ret < 0) + goto error; + ret = btrfs_previous_item(root, path, 0, key.type); + if (ret < 0) + goto error; + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &key, path->slots[0]); + while (1) { + l = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(l)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto error; +no_more_items: + if (!start_found) { + if (search_start >= search_end) { + ret = -ENOSPC; + goto error; + } + *start = search_start; + start_found = 1; + goto check_pending; + } + *start = last_byte > search_start ? + last_byte : search_start; + if (search_end <= *start) { + ret = -ENOSPC; + goto error; + } + goto check_pending; + } + btrfs_item_key_to_cpu(l, &key, slot); + + if (key.objectid < device->devid) + goto next; + + if (key.objectid > device->devid) + goto no_more_items; + + if (key.offset >= search_start && key.offset > last_byte && + start_found) { + if (last_byte < search_start) + last_byte = search_start; + hole_size = key.offset - last_byte; + if (key.offset > last_byte && + hole_size >= num_bytes) { + *start = last_byte; + goto check_pending; + } + } + if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) { + goto next; + } + + start_found = 1; + dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); + last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); +next: + path->slots[0]++; + cond_resched(); + } +check_pending: + /* we have to make sure we didn't find an extent that has already + * been allocated by the map tree or the original allocation + */ + btrfs_release_path(root, path); + BUG_ON(*start < search_start); + + if (*start + num_bytes >= search_end) { + ret = -ENOSPC; + goto error; + } + /* check for pending inserts here */ + return 0; + +error: + btrfs_release_path(root, path); + return ret; +} + +int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, + u64 owner, u64 num_bytes, u64 *start) +{ + int ret; + struct btrfs_path *path; + struct btrfs_root *root = device->dev_root; + struct btrfs_dev_extent *extent; + struct extent_buffer *leaf; + struct btrfs_key key; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = find_free_dev_extent(trans, device, path, num_bytes, start); + if (ret) + goto err; + + key.objectid = device->devid; + key.offset = *start; + key.type = BTRFS_DEV_EXTENT_KEY; + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(*extent)); + BUG_ON(ret); + + leaf = path->nodes[0]; + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_dev_extent); + btrfs_set_dev_extent_owner(leaf, extent, owner); + btrfs_set_dev_extent_length(leaf, extent, num_bytes); + btrfs_mark_buffer_dirty(leaf); +err: + btrfs_free_path(path); + return ret; +} + +static int find_next_chunk(struct btrfs_root *root, u64 *objectid) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + struct btrfs_key found_key; + + path = btrfs_alloc_path(); + BUG_ON(!path); + + key.objectid = (u64)-1; + key.offset = (u64)-1; + key.type = BTRFS_CHUNK_ITEM_KEY; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto error; + + BUG_ON(ret == 0); + + ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY); + if (ret) { + *objectid = 0; + } else { + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + *objectid = found_key.objectid + found_key.offset; + } + ret = 0; +error: + btrfs_free_path(path); + return ret; +} + +static struct btrfs_device *next_device(struct list_head *head, + struct list_head *last) +{ + struct list_head *next = last->next; + struct btrfs_device *dev; + + if (list_empty(head)) + return NULL; + + if (next == head) + next = next->next; + + dev = list_entry(next, struct btrfs_device, dev_list); + return dev; +} + +static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path, + u64 *objectid) +{ + int ret; + struct btrfs_key key; + struct btrfs_key found_key; + + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto error; + + BUG_ON(ret == 0); + + ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, + BTRFS_DEV_ITEM_KEY); + if (ret) { + *objectid = 1; + } else { + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + *objectid = found_key.offset + 1; + } + ret = 0; +error: + btrfs_release_path(root, path); + return ret; +} + +/* + * the device information is stored in the chunk root + * the btrfs_device struct should be fully filled in + */ +int btrfs_add_device(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_device *device) +{ + int ret; + struct btrfs_path *path; + struct btrfs_dev_item *dev_item; + struct extent_buffer *leaf; + struct btrfs_key key; + unsigned long ptr; + u64 free_devid; + + root = root->fs_info->chunk_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = find_next_devid(root, path, &free_devid); + if (ret) + goto out; + + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = free_devid; + + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(*dev_item) + device->name_len); + if (ret) + goto out; + + leaf = path->nodes[0]; + dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); + + btrfs_set_device_id(leaf, dev_item, device->devid); + btrfs_set_device_type(leaf, dev_item, device->type); + btrfs_set_device_io_align(leaf, dev_item, device->io_align); + btrfs_set_device_io_width(leaf, dev_item, device->io_width); + btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); + btrfs_set_device_rdev(leaf, dev_item, device->rdev); + btrfs_set_device_partition(leaf, dev_item, device->partition); + btrfs_set_device_name_len(leaf, dev_item, device->name_len); + btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); + btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); + + ptr = (unsigned long)btrfs_device_name(dev_item); + write_extent_buffer(leaf, device->name, ptr, device->name_len); + + ptr = (unsigned long)btrfs_device_uuid(dev_item); + write_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE); + btrfs_mark_buffer_dirty(leaf); + ret = 0; + +out: + btrfs_free_path(path); + return ret; +} +int btrfs_update_device(struct btrfs_trans_handle *trans, + struct btrfs_device *device) +{ + int ret; + struct btrfs_path *path; + struct btrfs_root *root; + struct btrfs_dev_item *dev_item; + struct extent_buffer *leaf; + struct btrfs_key key; + + root = device->dev_root->fs_info->chunk_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = device->devid; + + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) + goto out; + + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + leaf = path->nodes[0]; + dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); + + btrfs_set_device_id(leaf, dev_item, device->devid); + btrfs_set_device_type(leaf, dev_item, device->type); + btrfs_set_device_io_align(leaf, dev_item, device->io_align); + btrfs_set_device_io_width(leaf, dev_item, device->io_width); + btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); + btrfs_set_device_rdev(leaf, dev_item, device->rdev); + btrfs_set_device_partition(leaf, dev_item, device->partition); + btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); + btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); + btrfs_mark_buffer_dirty(leaf); + +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *key, + struct btrfs_chunk *chunk, int item_size) +{ + struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct btrfs_disk_key disk_key; + u32 array_size; + u8 *ptr; + + array_size = btrfs_super_sys_array_size(super_copy); + if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) + return -EFBIG; + + ptr = super_copy->sys_chunk_array + array_size; + btrfs_cpu_key_to_disk(&disk_key, key); + memcpy(ptr, &disk_key, sizeof(disk_key)); + ptr += sizeof(disk_key); + memcpy(ptr, chunk, item_size); + item_size += sizeof(disk_key); + btrfs_set_super_sys_array_size(super_copy, array_size + item_size); + return 0; +} + +int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 *start, + u64 *num_bytes, u32 type) +{ + u64 dev_offset; + struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; + struct btrfs_stripe *stripes; + struct btrfs_device *device = NULL; + struct btrfs_chunk *chunk; + struct list_head *dev_list = &extent_root->fs_info->devices; + struct list_head *last_dev = extent_root->fs_info->last_device; + struct extent_map_tree *em_tree; + struct map_lookup *map; + struct extent_map *em; + u64 physical; + u64 calc_size = 1024 * 1024 * 1024; + int num_stripes; + int ret; + int index = 0; + struct btrfs_key key; + + + ret = find_next_chunk(chunk_root, &key.objectid); + if (ret) + return ret; + + num_stripes = 1; + chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS); + if (!chunk) + return -ENOMEM; + + stripes = &chunk->stripe; + + *num_bytes = calc_size; + while(index < num_stripes) { + device = next_device(dev_list, last_dev); + BUG_ON(!device); + last_dev = &device->dev_list; + extent_root->fs_info->last_device = last_dev; + + ret = btrfs_alloc_dev_extent(trans, device, + key.objectid, + calc_size, &dev_offset); + BUG_ON(ret); + + device->bytes_used += calc_size; + ret = btrfs_update_device(trans, device); + BUG_ON(ret); + + btrfs_set_stack_stripe_devid(stripes + index, device->devid); + btrfs_set_stack_stripe_offset(stripes + index, dev_offset); + physical = dev_offset; + index++; + } + + /* key.objectid was set above */ + key.offset = *num_bytes; + key.type = BTRFS_CHUNK_ITEM_KEY; + btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); + btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024); + btrfs_set_stack_chunk_type(chunk, type); + btrfs_set_stack_chunk_num_stripes(chunk, num_stripes); + btrfs_set_stack_chunk_io_align(chunk, extent_root->sectorsize); + btrfs_set_stack_chunk_io_width(chunk, extent_root->sectorsize); + btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); + + ret = btrfs_insert_item(trans, chunk_root, &key, chunk, + btrfs_chunk_item_size(num_stripes)); + BUG_ON(ret); + *start = key.objectid; + + em = alloc_extent_map(GFP_NOFS); + if (!em) + return -ENOMEM; + map = kmalloc(sizeof(*map), GFP_NOFS); + if (!map) { + free_extent_map(em); + return -ENOMEM; + } + + em->bdev = (struct block_device *)map; + em->start = key.objectid; + em->len = key.offset; + em->block_start = 0; + + map->physical = physical; + map->dev = device; + + if (!map->dev) { + kfree(map); + free_extent_map(em); + return -EIO; + } + kfree(chunk); + + em_tree = &extent_root->fs_info->mapping_tree.map_tree; + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em); + BUG_ON(ret); + spin_unlock(&em_tree->lock); + free_extent_map(em); + return ret; +} + +void btrfs_mapping_init(struct btrfs_mapping_tree *tree) +{ + extent_map_tree_init(&tree->map_tree, GFP_NOFS); +} + +void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) +{ + struct extent_map *em; + + while(1) { + spin_lock(&tree->map_tree.lock); + em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); + if (em) + remove_extent_mapping(&tree->map_tree, em); + spin_unlock(&tree->map_tree.lock); + if (!em) + break; + kfree(em->bdev); + /* once for us */ + free_extent_map(em); + /* once for the tree */ + free_extent_map(em); + } +} + +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, + u64 logical, u64 *phys, u64 *length, + struct btrfs_device **dev) +{ + struct extent_map *em; + struct map_lookup *map; + struct extent_map_tree *em_tree = &map_tree->map_tree; + u64 offset; + + + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, logical, *length); + BUG_ON(!em); + + BUG_ON(em->start > logical || em->start + em->len < logical); + map = (struct map_lookup *)em->bdev; + offset = logical - em->start; + *phys = map->physical + offset; + *length = em->len - offset; + *dev = map->dev; + free_extent_map(em); + spin_unlock(&em_tree->lock); + return 0; +} + +int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) +{ + struct btrfs_mapping_tree *map_tree; + struct btrfs_device *dev; + u64 logical = bio->bi_sector << 9; + u64 physical; + u64 length = 0; + u64 map_length; + struct bio_vec *bvec; + int i; + int ret; + + bio_for_each_segment(bvec, bio, i) { + length += bvec->bv_len; + } + map_tree = &root->fs_info->mapping_tree; + map_length = length; + ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); + BUG_ON(map_length < length); + bio->bi_sector = physical >> 9; + bio->bi_bdev = dev->bdev; + submit_bio(rw, bio); + return 0; +} + +struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid) +{ + struct btrfs_device *dev; + struct list_head *cur = root->fs_info->devices.next; + struct list_head *head = &root->fs_info->devices; + + while(cur != head) { + dev = list_entry(cur, struct btrfs_device, dev_list); + if (dev->devid == devid) + return dev; + cur = cur->next; + } + return NULL; +} + +static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk) +{ + struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; + struct map_lookup *map; + struct extent_map *em; + u64 logical; + u64 length; + u64 devid; + int ret; + + logical = key->objectid; + length = key->offset; + spin_lock(&map_tree->map_tree.lock); + em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); + + /* already mapped? */ + if (em && em->start <= logical && em->start + em->len > logical) { + free_extent_map(em); + spin_unlock(&map_tree->map_tree.lock); + return 0; + } else if (em) { + free_extent_map(em); + } + spin_unlock(&map_tree->map_tree.lock); + + map = kzalloc(sizeof(*map), GFP_NOFS); + if (!map) + return -ENOMEM; + + em = alloc_extent_map(GFP_NOFS); + if (!em) + return -ENOMEM; + map = kmalloc(sizeof(*map), GFP_NOFS); + if (!map) { + free_extent_map(em); + return -ENOMEM; + } + + em->bdev = (struct block_device *)map; + em->start = logical; + em->len = length; + em->block_start = 0; + + map->physical = btrfs_stripe_offset_nr(leaf, chunk, 0); + devid = btrfs_stripe_devid_nr(leaf, chunk, 0); + map->dev = btrfs_find_device(root, devid); + if (!map->dev) { + kfree(map); + free_extent_map(em); + return -EIO; + } + + spin_lock(&map_tree->map_tree.lock); + ret = add_extent_mapping(&map_tree->map_tree, em); + BUG_ON(ret); + spin_unlock(&map_tree->map_tree.lock); + free_extent_map(em); + + return 0; +} + +static int fill_device_from_item(struct extent_buffer *leaf, + struct btrfs_dev_item *dev_item, + struct btrfs_device *device) +{ + unsigned long ptr; + char *name; + + device->devid = btrfs_device_id(leaf, dev_item); + device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); + device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); + device->type = btrfs_device_type(leaf, dev_item); + device->io_align = btrfs_device_io_align(leaf, dev_item); + device->io_width = btrfs_device_io_width(leaf, dev_item); + device->sector_size = btrfs_device_sector_size(leaf, dev_item); + device->rdev = btrfs_device_rdev(leaf, dev_item); + device->partition = btrfs_device_partition(leaf, dev_item); + device->name_len = btrfs_device_name_len(leaf, dev_item); + + ptr = (unsigned long)btrfs_device_uuid(dev_item); + read_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE); + + name = kmalloc(device->name_len + 1, GFP_NOFS); + if (!name) + return -ENOMEM; + device->name = name; + ptr = (unsigned long)btrfs_device_name(dev_item); + read_extent_buffer(leaf, name, ptr, device->name_len); + name[device->name_len] = '\0'; + return 0; +} + +static int read_one_dev(struct btrfs_root *root, struct btrfs_key *key, + struct extent_buffer *leaf, + struct btrfs_dev_item *dev_item) +{ + struct btrfs_device *device; + u64 devid; + int ret; + + devid = btrfs_device_id(leaf, dev_item); + if (btrfs_find_device(root, devid)) + return 0; + + device = kmalloc(sizeof(*device), GFP_NOFS); + if (!device) + return -ENOMEM; + + fill_device_from_item(leaf, dev_item, device); + device->dev_root = root->fs_info->dev_root; + device->bdev = root->fs_info->sb->s_bdev; + list_add(&device->dev_list, &root->fs_info->devices); + memcpy(&device->dev_key, key, sizeof(*key)); + ret = 0; +#if 0 + ret = btrfs_open_device(device); + if (ret) { + kfree(device); + } +#endif + return ret; +} + +int btrfs_read_sys_array(struct btrfs_root *root) +{ + struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct extent_buffer *sb = root->fs_info->sb_buffer; + struct btrfs_disk_key *disk_key; + struct btrfs_dev_item *dev_item; + struct btrfs_chunk *chunk; + struct btrfs_key key; + u32 num_stripes; + u32 array_size; + u32 len = 0; + u8 *ptr; + unsigned long sb_ptr; + u32 cur; + int ret; + int dev_only = 1; + + array_size = btrfs_super_sys_array_size(super_copy); + + /* + * we do this loop twice, once for the device items and + * once for all of the chunks. This way there are device + * structs filled in for every chunk + */ +again: + ptr = super_copy->sys_chunk_array; + sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array); + cur = 0; + + while (cur < array_size) { + disk_key = (struct btrfs_disk_key *)ptr; + btrfs_disk_key_to_cpu(&key, disk_key); + + len = sizeof(*disk_key); + ptr += len; + sb_ptr += len; + cur += len; + + if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID && + key.type == BTRFS_DEV_ITEM_KEY) { + dev_item = (struct btrfs_dev_item *)sb_ptr; + if (dev_only) { + ret = read_one_dev(root, &key, sb, dev_item); + BUG_ON(ret); + } + len = sizeof(*dev_item); + len += btrfs_device_name_len(sb, dev_item); + } else if (key.type == BTRFS_CHUNK_ITEM_KEY) { + + chunk = (struct btrfs_chunk *)sb_ptr; + if (!dev_only) { + ret = read_one_chunk(root, &key, sb, chunk); + BUG_ON(ret); + } + num_stripes = btrfs_chunk_num_stripes(sb, chunk); + len = btrfs_chunk_item_size(num_stripes); + } else { + BUG(); + } + ptr += len; + sb_ptr += len; + cur += len; + } + if (dev_only == 1) { + dev_only = 0; + goto again; + } + return 0; +} + +int btrfs_read_chunk_tree(struct btrfs_root *root) +{ + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_key key; + struct btrfs_key found_key; + int ret; + int slot; + + root = root->fs_info->chunk_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* first we search for all of the device items, and then we + * read in all of the chunk items. This way we can create chunk + * mappings that reference all of the devices that are afound + */ + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.offset = 0; + key.type = 0; +again: + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + while(1) { + leaf = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto error; + break; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { + if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID) + break; + if (found_key.type == BTRFS_DEV_ITEM_KEY) { + struct btrfs_dev_item *dev_item; + dev_item = btrfs_item_ptr(leaf, slot, + struct btrfs_dev_item); + ret = read_one_dev(root, &found_key, leaf, + dev_item); + BUG_ON(ret); + } + } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { + struct btrfs_chunk *chunk; + chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); + ret = read_one_chunk(root, &found_key, leaf, chunk); + } + path->slots[0]++; + } + if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { + key.objectid = 0; + btrfs_release_path(root, path); + goto again; + } + + btrfs_free_path(path); + ret = 0; +error: + return ret; +} + diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h new file mode 100644 index 000000000000..4a47dcb00a73 --- /dev/null +++ b/fs/btrfs/volumes.h @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_VOLUMES_ +#define __BTRFS_VOLUMES_ +struct btrfs_device { + struct list_head dev_list; + struct btrfs_root *dev_root; + struct btrfs_key dev_key; + + struct block_device *bdev; + + /* the internal btrfs device id */ + u64 devid; + + /* size of the device */ + u64 total_bytes; + + /* bytes used */ + u64 bytes_used; + + /* optimal io alignment for this device */ + u32 io_align; + + /* optimal io width for this device */ + u32 io_width; + + /* minimal io size for this device */ + u32 sector_size; + + /* the kernel device number */ + u64 rdev; + + /* type and info about this device */ + u64 type; + + /* partition number, 0 for whole dev */ + int partition; + + /* length of the name data at the end of the item */ + int name_len; + + /* physical drive uuid (or lvm uuid) */ + u8 uuid[BTRFS_DEV_UUID_SIZE]; + + char *name; +}; + +int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, + u64 owner, u64 num_bytes, u64 *start); +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, + u64 logical, u64 *phys, u64 *length, + struct btrfs_device **dev); +int btrfs_read_sys_array(struct btrfs_root *root); +int btrfs_read_chunk_tree(struct btrfs_root *root); +int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 *start, + u64 *num_bytes, u32 type); +void btrfs_mapping_init(struct btrfs_mapping_tree *tree); +void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); +int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio); +#endif From 6324fbf334f4586325057197da7752f4ffa409d3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:01:59 -0400 Subject: [PATCH 0445/2985] Btrfs: Dynamic chunk and block group allocation Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 12 ++- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 208 +++++++++++++++++++++++++++++++++++++---- fs/btrfs/inode.c | 9 +- fs/btrfs/volumes.c | 87 ++++++++++------- fs/btrfs/volumes.h | 2 +- 6 files changed, 263 insertions(+), 57 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1453d995fef6..a877105f5c47 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -431,9 +431,19 @@ struct btrfs_block_group_item { __le64 flags; } __attribute__ ((__packed__)); +struct btrfs_space_info { + u64 flags; + u64 total_bytes; + u64 bytes_used; + u64 bytes_pinned; + int full; + struct list_head list; +}; + struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; + struct btrfs_space_info *space_info; u64 pinned; u64 flags; int cached; @@ -490,7 +500,7 @@ struct btrfs_fs_info { struct list_head dirty_cowonly_roots; struct list_head devices; - struct list_head *last_device; + struct list_head space_info; spinlock_t delalloc_lock; spinlock_t new_trans_lock; u64 delalloc_bytes; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e37fa120cc8..2a239ae49f78 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -700,8 +700,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->dev_root = dev_root; INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->devices); + INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); - fs_info->last_device = &fs_info->devices; fs_info->sb = sb; fs_info->throttles = 0; fs_info->mount_opt = 0; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2cd957d6e8d8..15082b1087be 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -36,6 +36,10 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +int btrfs_make_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytes_used, + u64 type, u64 chunk_tree, u64 chunk_objectid, + u64 size); static int cache_block_group(struct btrfs_root *root, @@ -168,16 +172,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) { - if ((bits & BLOCK_GROUP_DATA) && - (cache->flags & BTRFS_BLOCK_GROUP_DATA)) - return 1; - if ((bits & BLOCK_GROUP_METADATA) && - (cache->flags & BTRFS_BLOCK_GROUP_METADATA)) - return 1; - if ((bits & BLOCK_GROUP_SYSTEM) && - (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM)) - return 1; - return 0; + return (cache->flags & bits); } static int noinline find_search_start(struct btrfs_root *root, @@ -276,6 +271,18 @@ static u64 div_factor(u64 num, int factor) return num; } +static int block_group_state_bits(u64 flags) +{ + int bits = 0; + if (flags & BTRFS_BLOCK_GROUP_DATA) + bits |= BLOCK_GROUP_DATA; + if (flags & BTRFS_BLOCK_GROUP_METADATA) + bits |= BLOCK_GROUP_METADATA; + if (flags & BTRFS_BLOCK_GROUP_SYSTEM) + bits |= BLOCK_GROUP_SYSTEM; + return bits; +} + struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, @@ -304,7 +311,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (!owner) factor = 8; - bit = data; + bit = block_group_state_bits(data); if (search_start && search_start < total_fs_bytes) { struct btrfs_block_group_cache *shint; @@ -358,10 +365,15 @@ again: free_check = cache->key.offset; else free_check = div_factor(cache->key.offset, factor); + if (used + cache->pinned < free_check) { found_group = cache; goto found; } + if (full_search) { + printk("failed on cache %Lu used %Lu total %Lu\n", + cache->key.objectid, used, cache->key.offset); + } cond_resched(); } if (!full_search) { @@ -983,6 +995,58 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, return werr; } +static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, + u64 flags) +{ + struct list_head *head = &info->space_info; + struct list_head *cur; + struct btrfs_space_info *found; + list_for_each(cur, head) { + found = list_entry(cur, struct btrfs_space_info, list); + if (found->flags == flags) + return found; + } + return NULL; + +} + +static int do_chunk_alloc(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 alloc_bytes, + u64 flags) +{ + struct btrfs_space_info *space_info; + u64 thresh; + u64 start; + u64 num_bytes; + int ret; + + space_info = __find_space_info(extent_root->fs_info, flags); + BUG_ON(!space_info); + + if (space_info->full) + return 0; + + thresh = div_factor(space_info->total_bytes, 7); + if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < + thresh) + return 0; + + ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags); + if (ret == -ENOSPC) { +printk("space info full %Lu\n", flags); + space_info->full = 1; + return 0; + } + + BUG_ON(ret); + + ret = btrfs_make_block_group(trans, extent_root, 0, flags, + extent_root->fs_info->chunk_root->root_key.objectid, + start, num_bytes); + BUG_ON(ret); + return 0; +} + static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc, @@ -1012,8 +1076,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { old_val += num_bytes; + cache->space_info->bytes_used += num_bytes; } else { old_val -= num_bytes; + cache->space_info->bytes_used -= num_bytes; if (mark_free) { set_extent_dirty(&info->free_space_cache, bytenr, bytenr + num_bytes - 1, @@ -1026,6 +1092,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, } return 0; } + static int update_pinned_extents(struct btrfs_root *root, u64 bytenr, u64 num, int pin) { @@ -1047,9 +1114,11 @@ static int update_pinned_extents(struct btrfs_root *root, (bytenr - cache->key.objectid)); if (pin) { cache->pinned += len; + cache->space_info->bytes_pinned += len; fs_info->total_pinned += len; } else { cache->pinned -= len; + cache->space_info->bytes_pinned -= len; fs_info->total_pinned -= len; } bytenr += len; @@ -1472,7 +1541,7 @@ check_failed: goto new_group; } - if (!(data & BLOCK_GROUP_DATA)) { + if (!(data & BTRFS_BLOCK_GROUP_DATA)) { block_group = btrfs_lookup_block_group(info, ins->objectid); if (block_group) trans->block_group = block_group; @@ -1532,12 +1601,25 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_key keys[2]; - if (data) - data = BLOCK_GROUP_DATA; - else if (root == root->fs_info->chunk_root) - data = BLOCK_GROUP_SYSTEM; - else - data = BLOCK_GROUP_METADATA; + if (data) { + data = BTRFS_BLOCK_GROUP_DATA; + } else if (root == root->fs_info->chunk_root) { + data = BTRFS_BLOCK_GROUP_SYSTEM; + } else { + data = BTRFS_BLOCK_GROUP_METADATA; + } + + if (root->ref_cows) { + if (data != BTRFS_BLOCK_GROUP_METADATA) { + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + num_bytes, + BTRFS_BLOCK_GROUP_METADATA); + BUG_ON(ret); + } + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + num_bytes, data); + BUG_ON(ret); + } new_hint = max(hint_byte, root->fs_info->alloc_start); if (new_hint < btrfs_super_total_bytes(&info->super_copy)) @@ -2490,6 +2572,34 @@ error: return ret; } +static int update_space_info(struct btrfs_fs_info *info, u64 flags, + u64 total_bytes, u64 bytes_used, + struct btrfs_space_info **space_info) +{ + struct btrfs_space_info *found; + + found = __find_space_info(info, flags); + if (found) { + found->total_bytes += total_bytes; + found->bytes_used += bytes_used; + WARN_ON(found->total_bytes < found->bytes_used); + *space_info = found; + return 0; + } + found = kmalloc(sizeof(*found), GFP_NOFS); + if (!found) + return -ENOMEM; + + list_add(&found->list, &info->space_info); + found->flags = flags; + found->total_bytes = total_bytes; + found->bytes_used = bytes_used; + found->bytes_pinned = 0; + found->full = 0; + *space_info = found; + return 0; +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; @@ -2497,6 +2607,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) int bit; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; + struct btrfs_space_info *space_info; struct extent_io_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; @@ -2547,6 +2658,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) bit = BLOCK_GROUP_METADATA; } + ret = update_space_info(info, cache->flags, found_key.offset, + btrfs_block_group_used(&cache->item), + &space_info); + BUG_ON(ret); + cache->space_info = space_info; + /* use EXTENT_LOCKED to prevent merging */ set_extent_bits(block_group_cache, found_key.objectid, found_key.objectid + found_key.offset - 1, @@ -2563,3 +2680,58 @@ error: btrfs_free_path(path); return ret; } + +int btrfs_make_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytes_used, + u64 type, u64 chunk_tree, u64 chunk_objectid, + u64 size) +{ + int ret; + int bit = 0; + struct btrfs_root *extent_root; + struct btrfs_block_group_cache *cache; + struct extent_io_tree *block_group_cache; + + extent_root = root->fs_info->extent_root; + block_group_cache = &root->fs_info->block_group_cache; + + cache = kmalloc(sizeof(*cache), GFP_NOFS); + BUG_ON(!cache); + cache->key.objectid = chunk_objectid; + cache->key.offset = size; + cache->cached = 0; + cache->pinned = 0; + btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); + memset(&cache->item, 0, sizeof(cache->item)); + btrfs_set_block_group_used(&cache->item, bytes_used); + btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree); + btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); + cache->flags = type; + btrfs_set_block_group_flags(&cache->item, type); + + ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, + &cache->space_info); + BUG_ON(ret); + + if (type & BTRFS_BLOCK_GROUP_DATA) { + bit = BLOCK_GROUP_DATA; + } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { + bit = BLOCK_GROUP_SYSTEM; + } else if (type & BTRFS_BLOCK_GROUP_METADATA) { + bit = BLOCK_GROUP_METADATA; + } + set_extent_bits(block_group_cache, chunk_objectid, + chunk_objectid + size - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + set_state_private(block_group_cache, chunk_objectid, + (unsigned long)cache); + + ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, + sizeof(cache->item)); + BUG_ON(ret); + + finish_current_insert(trans, extent_root); + ret = del_pending_extents(trans, extent_root); + BUG_ON(ret); + return 0; +} diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 17063cd2cb73..109576b57f69 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1527,6 +1527,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, { struct inode *inode; struct btrfs_inode_item *inode_item; + struct btrfs_block_group_cache *new_inode_group; struct btrfs_key *location; struct btrfs_path *path; struct btrfs_inode_ref *ref; @@ -1553,9 +1554,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, owner = 0; else owner = 1; - group = btrfs_find_block_group(root, group, 0, + new_inode_group = btrfs_find_block_group(root, group, 0, BTRFS_BLOCK_GROUP_METADATA, owner); - BTRFS_I(inode)->block_group = group; + if (!new_inode_group) { + printk("find_block group failed\n"); + new_inode_group = group; + } + BTRFS_I(inode)->block_group = new_inode_group; BTRFS_I(inode)->flags = 0; key[0].objectid = objectid; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 90a8d45dc6d7..a52a13f365d6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -131,7 +131,7 @@ check_pending: btrfs_release_path(root, path); BUG_ON(*start < search_start); - if (*start + num_bytes >= search_end) { + if (*start + num_bytes > search_end) { ret = -ENOSPC; goto error; } @@ -159,8 +159,9 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, return -ENOMEM; ret = find_free_dev_extent(trans, device, path, num_bytes, start); - if (ret) + if (ret) { goto err; + } key.objectid = device->devid; key.offset = *start; @@ -214,22 +215,6 @@ error: return ret; } -static struct btrfs_device *next_device(struct list_head *head, - struct list_head *last) -{ - struct list_head *next = last->next; - struct btrfs_device *dev; - - if (list_empty(head)) - return NULL; - - if (next == head) - next = next->next; - - dev = list_entry(next, struct btrfs_device, dev_list); - return dev; -} - static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path, u64 *objectid) { @@ -397,31 +382,63 @@ int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 *start, - u64 *num_bytes, u32 type) + u64 *num_bytes, u64 type) { u64 dev_offset; struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; struct btrfs_stripe *stripes; struct btrfs_device *device = NULL; struct btrfs_chunk *chunk; + struct list_head private_devs; struct list_head *dev_list = &extent_root->fs_info->devices; - struct list_head *last_dev = extent_root->fs_info->last_device; + struct list_head *cur; struct extent_map_tree *em_tree; struct map_lookup *map; struct extent_map *em; u64 physical; u64 calc_size = 1024 * 1024 * 1024; - int num_stripes; + u64 avail; + u64 max_avail = 0; + int num_stripes = 1; + int looped = 0; int ret; - int index = 0; + int index; struct btrfs_key key; + if (list_empty(dev_list)) + return -ENOSPC; +again: + INIT_LIST_HEAD(&private_devs); + cur = dev_list->next; + index = 0; + /* build a private list of devices we will allocate from */ + while(index < num_stripes) { + device = list_entry(cur, struct btrfs_device, dev_list); + avail = device->total_bytes - device->bytes_used; + cur = cur->next; + if (avail > max_avail) + max_avail = avail; + if (avail >= calc_size) { + list_move_tail(&device->dev_list, &private_devs); + index++; + } + if (cur == dev_list) + break; + } + if (index < num_stripes) { + list_splice(&private_devs, dev_list); + if (!looped && max_avail > 0) { + looped = 1; + calc_size = max_avail; + goto again; + } + return -ENOSPC; + } ret = find_next_chunk(chunk_root, &key.objectid); if (ret) return ret; - num_stripes = 1; chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS); if (!chunk) return -ENOMEM; @@ -429,11 +446,12 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, stripes = &chunk->stripe; *num_bytes = calc_size; + index = 0; while(index < num_stripes) { - device = next_device(dev_list, last_dev); - BUG_ON(!device); - last_dev = &device->dev_list; - extent_root->fs_info->last_device = last_dev; + BUG_ON(list_empty(&private_devs)); + cur = private_devs.next; + device = list_entry(cur, struct btrfs_device, dev_list); + list_move_tail(&device->dev_list, dev_list); ret = btrfs_alloc_dev_extent(trans, device, key.objectid, @@ -449,6 +467,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, physical = dev_offset; index++; } + BUG_ON(!list_empty(&private_devs)); /* key.objectid was set above */ key.offset = *num_bytes; @@ -692,17 +711,17 @@ static int read_one_dev(struct btrfs_root *root, struct btrfs_key *key, int ret; devid = btrfs_device_id(leaf, dev_item); - if (btrfs_find_device(root, devid)) - return 0; - - device = kmalloc(sizeof(*device), GFP_NOFS); - if (!device) - return -ENOMEM; + device = btrfs_find_device(root, devid); + if (!device) { + device = kmalloc(sizeof(*device), GFP_NOFS); + if (!device) + return -ENOMEM; + list_add(&device->dev_list, &root->fs_info->devices); + } fill_device_from_item(leaf, dev_item, device); device->dev_root = root->fs_info->dev_root; device->bdev = root->fs_info->sb->s_bdev; - list_add(&device->dev_list, &root->fs_info->devices); memcpy(&device->dev_key, key, sizeof(*key)); ret = 0; #if 0 diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 4a47dcb00a73..4ab9891878a8 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -71,7 +71,7 @@ int btrfs_read_sys_array(struct btrfs_root *root); int btrfs_read_chunk_tree(struct btrfs_root *root); int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 *start, - u64 *num_bytes, u32 type); + u64 *num_bytes, u64 type); void btrfs_mapping_init(struct btrfs_mapping_tree *tree); void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio); From 7d1660d411a24f9b5a2f38e41c9896d7151c934f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:02:03 -0400 Subject: [PATCH 0446/2985] Btrfs: Bring back find_free_extent CPU usage optimizations Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 15082b1087be..5e392804ab15 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -182,9 +182,9 @@ static int noinline find_search_start(struct btrfs_root *root, int ret; struct btrfs_block_group_cache *cache = *cache_ret; struct extent_io_tree *free_space_cache; + struct extent_state *state; u64 last; u64 start = 0; - u64 end = 0; u64 cache_miss = 0; u64 total_fs_bytes; u64 search_start = *start_ret; @@ -205,30 +205,34 @@ again: goto new_group; } + spin_lock_irq(&free_space_cache->lock); + state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY); while(1) { - ret = find_first_extent_bit(&root->fs_info->free_space_cache, - last, &start, &end, EXTENT_DIRTY); - if (ret) { + if (!state) { if (!cache_miss) cache_miss = last; + spin_unlock_irq(&free_space_cache->lock); goto new_group; } - start = max(last, start); - last = end + 1; + start = max(last, state->start); + last = state->end + 1; if (last - start < num) { if (last == cache->key.objectid + cache->key.offset) cache_miss = start; + do { + state = extent_state_next(state); + } while(state && !(state->state & EXTENT_DIRTY)); continue; } + spin_unlock_irq(&free_space_cache->lock); if (start + num > cache->key.objectid + cache->key.offset) goto new_group; if (start + num > total_fs_bytes) goto new_group; *start_ret = start; return 0; - } -out: + } out: cache = btrfs_lookup_block_group(root->fs_info, search_start); if (!cache) { printk("Unable to find block group for %Lu\n", search_start); From a9218f6b00ada101c9772ee9c44af04fa5125d2a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:02:04 -0400 Subject: [PATCH 0447/2985] Add /dev/btrfs-control for device scanning ioctls Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 ---- fs/btrfs/super.c | 41 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5e392804ab15..14eb8fc87015 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -374,10 +374,6 @@ again: found_group = cache; goto found; } - if (full_search) { - printk("failed on cache %Lu used %Lu total %Lu\n", - cache->key.objectid, used, cache->key.offset); - } cond_resched(); } if (!full_search) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4423a91206a1..67ed216df475 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -444,6 +445,13 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } +static long btrfs_control_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + printk("btrfs control ioctl %d\n", cmd); + return 0; +} + static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -451,6 +459,7 @@ static struct file_system_type btrfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; + static void btrfs_write_super_lockfs(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); @@ -482,6 +491,30 @@ static struct super_operations btrfs_super_ops = { .write_super_lockfs = btrfs_write_super_lockfs, .unlockfs = btrfs_unlockfs, }; + +static const struct file_operations btrfs_ctl_fops = { + .unlocked_ioctl = btrfs_control_ioctl, + .compat_ioctl = btrfs_control_ioctl, + .owner = THIS_MODULE, +}; + +static struct miscdevice btrfs_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "btrfs-control", + .fops = &btrfs_ctl_fops +}; + +static int btrfs_interface_init(void) +{ + return misc_register(&btrfs_misc); +} + +void btrfs_interface_exit(void) +{ + if (misc_deregister(&btrfs_misc) < 0) + printk("misc_deregister failed for control device"); +} + static int __init init_btrfs_fs(void) { int err; @@ -503,11 +536,16 @@ static int __init init_btrfs_fs(void) if (err) goto free_extent_io; - err = register_filesystem(&btrfs_fs_type); + err = btrfs_interface_init(); if (err) goto free_extent_map; + err = register_filesystem(&btrfs_fs_type); + if (err) + goto unregister_ioctl; return 0; +unregister_ioctl: + btrfs_interface_exit(); free_extent_map: extent_map_exit(); free_extent_io: @@ -526,6 +564,7 @@ static void __exit exit_btrfs_fs(void) btrfs_destroy_cachep(); extent_map_exit(); extent_io_exit(); + btrfs_interface_exit(); unregister_filesystem(&btrfs_fs_type); btrfs_exit_sysfs(); } From e085def2c4cc2d7c0c316376b4b66b86b10e3a4b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:02:07 -0400 Subject: [PATCH 0448/2985] Btrfs: Make the FS tree the last objectid in the tree of tree roots Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a877105f5c47..285fb7e46106 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -47,24 +47,23 @@ extern struct kmem_cache *btrfs_path_cachep; /* stores information about which extents are in use, and reference counts */ #define BTRFS_EXTENT_TREE_OBJECTID 2ULL -/* one per subvolume, storing files and directories */ -#define BTRFS_FS_TREE_OBJECTID 3ULL - -/* directory objectid inside the root tree */ -#define BTRFS_ROOT_TREE_DIR_OBJECTID 4ULL - - /* * chunk tree stores translations from logical -> physical block numbering * the super block points to the chunk tree */ -#define BTRFS_CHUNK_TREE_OBJECTID 5ULL +#define BTRFS_CHUNK_TREE_OBJECTID 3ULL /* * stores information about which areas of a given device are in use. * one per device. The tree of tree roots points to the device tree */ -#define BTRFS_DEV_TREE_OBJECTID 6ULL +#define BTRFS_DEV_TREE_OBJECTID 4ULL + +/* one per subvolume, storing files and directories */ +#define BTRFS_FS_TREE_OBJECTID 5ULL + +/* directory objectid inside the root tree */ +#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL /* * All files have objectids higher than this. From 0d81ba5dbedef0c3970d6aa318aa84920943e6e3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:02:07 -0400 Subject: [PATCH 0449/2985] Btrfs: Move device information into the super block so it can be scanned Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 21 ++------------- fs/btrfs/disk-io.c | 4 ++- fs/btrfs/print-tree.c | 15 ++--------- fs/btrfs/volumes.c | 61 +++++++++++-------------------------------- fs/btrfs/volumes.h | 13 +-------- 5 files changed, 23 insertions(+), 91 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 285fb7e46106..96a493217860 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -150,21 +150,11 @@ struct btrfs_dev_item { /* minimal io size for this device */ __le32 sector_size; - /* the kernel device number */ - __le64 rdev; - /* type and info about this device */ __le64 type; - /* partition number, 0 for whole dev */ - __le32 partition; - - /* length of the name data at the end of the item */ - __le16 name_len; - - /* physical drive uuid (or lvm uuid) */ + /* btrfs generated uuid for this device */ u8 uuid[BTRFS_DEV_UUID_SIZE]; - /* name goes here */ } __attribute__ ((__packed__)); struct btrfs_stripe { @@ -255,6 +245,7 @@ struct btrfs_super_block { __le32 sys_chunk_array_size; u8 root_level; u8 chunk_root_level; + struct btrfs_dev_item dev_item; u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; } __attribute__ ((__packed__)); @@ -685,20 +676,12 @@ BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32); BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32); BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); -BTRFS_SETGET_FUNCS(device_rdev, struct btrfs_dev_item, rdev, 64); -BTRFS_SETGET_FUNCS(device_partition, struct btrfs_dev_item, partition, 32); -BTRFS_SETGET_FUNCS(device_name_len, struct btrfs_dev_item, name_len, 16); static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) { return (char *)d + offsetof(struct btrfs_dev_item, uuid); } -static inline char *btrfs_device_name(struct btrfs_dev_item *d) -{ - return (char *)(d + 1); -} - BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64); BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2a239ae49f78..26185d46712c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -370,7 +370,6 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) next = list->next; list_del(next); device = list_entry(next, struct btrfs_device, dev_list); - kfree(device->name); kfree(device); } return 0; @@ -800,6 +799,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) } mutex_lock(&fs_info->fs_mutex); + ret = btrfs_read_super_device(tree_root, fs_info->sb_buffer); + BUG_ON(ret); + ret = btrfs_read_sys_array(tree_root); BUG_ON(ret); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 9c1335dad40c..ee0de112cf5a 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -37,22 +37,11 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk) static void print_dev_item(struct extent_buffer *eb, struct btrfs_dev_item *dev_item) { - char *name; - int name_len; - - name_len = btrfs_device_name_len(eb, dev_item); - name = kmalloc(name_len, GFP_NOFS); - if (name) { - read_extent_buffer(eb, name, - (unsigned long)btrfs_device_name(dev_item), - name_len); - } - printk("\t\tdev item name %.*s devid %llu " - "total_bytes %llu bytes used %Lu\n", name_len, name, + printk("\t\tdev item devid %llu " + "total_bytes %llu bytes used %Lu\n", (unsigned long long)btrfs_device_id(eb, dev_item), (unsigned long long)btrfs_device_total_bytes(eb, dev_item), (unsigned long long)btrfs_device_bytes_used(eb, dev_item)); - kfree(name); } void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a52a13f365d6..ae22d01ecf54 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -278,7 +278,7 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, key.offset = free_devid; ret = btrfs_insert_empty_item(trans, root, path, &key, - sizeof(*dev_item) + device->name_len); + sizeof(*dev_item)); if (ret) goto out; @@ -290,15 +290,9 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, btrfs_set_device_io_align(leaf, dev_item, device->io_align); btrfs_set_device_io_width(leaf, dev_item, device->io_width); btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); - btrfs_set_device_rdev(leaf, dev_item, device->rdev); - btrfs_set_device_partition(leaf, dev_item, device->partition); - btrfs_set_device_name_len(leaf, dev_item, device->name_len); btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); - ptr = (unsigned long)btrfs_device_name(dev_item); - write_extent_buffer(leaf, device->name, ptr, device->name_len); - ptr = (unsigned long)btrfs_device_uuid(dev_item); write_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); @@ -345,8 +339,6 @@ int btrfs_update_device(struct btrfs_trans_handle *trans, btrfs_set_device_io_align(leaf, dev_item, device->io_align); btrfs_set_device_io_width(leaf, dev_item, device->io_width); btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); - btrfs_set_device_rdev(leaf, dev_item, device->rdev); - btrfs_set_device_partition(leaf, dev_item, device->partition); btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); btrfs_mark_buffer_dirty(leaf); @@ -676,7 +668,6 @@ static int fill_device_from_item(struct extent_buffer *leaf, struct btrfs_device *device) { unsigned long ptr; - char *name; device->devid = btrfs_device_id(leaf, dev_item); device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); @@ -685,24 +676,14 @@ static int fill_device_from_item(struct extent_buffer *leaf, device->io_align = btrfs_device_io_align(leaf, dev_item); device->io_width = btrfs_device_io_width(leaf, dev_item); device->sector_size = btrfs_device_sector_size(leaf, dev_item); - device->rdev = btrfs_device_rdev(leaf, dev_item); - device->partition = btrfs_device_partition(leaf, dev_item); - device->name_len = btrfs_device_name_len(leaf, dev_item); ptr = (unsigned long)btrfs_device_uuid(dev_item); read_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE); - name = kmalloc(device->name_len + 1, GFP_NOFS); - if (!name) - return -ENOMEM; - device->name = name; - ptr = (unsigned long)btrfs_device_name(dev_item); - read_extent_buffer(leaf, name, ptr, device->name_len); - name[device->name_len] = '\0'; return 0; } -static int read_one_dev(struct btrfs_root *root, struct btrfs_key *key, +static int read_one_dev(struct btrfs_root *root, struct extent_buffer *leaf, struct btrfs_dev_item *dev_item) { @@ -722,7 +703,6 @@ static int read_one_dev(struct btrfs_root *root, struct btrfs_key *key, fill_device_from_item(leaf, dev_item, device); device->dev_root = root->fs_info->dev_root; device->bdev = root->fs_info->sb->s_bdev; - memcpy(&device->dev_key, key, sizeof(*key)); ret = 0; #if 0 ret = btrfs_open_device(device); @@ -733,12 +713,20 @@ static int read_one_dev(struct btrfs_root *root, struct btrfs_key *key, return ret; } +int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf) +{ + struct btrfs_dev_item *dev_item; + + dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block, + dev_item); + return read_one_dev(root, buf, dev_item); +} + int btrfs_read_sys_array(struct btrfs_root *root) { struct btrfs_super_block *super_copy = &root->fs_info->super_copy; struct extent_buffer *sb = root->fs_info->sb_buffer; struct btrfs_disk_key *disk_key; - struct btrfs_dev_item *dev_item; struct btrfs_chunk *chunk; struct btrfs_key key; u32 num_stripes; @@ -748,7 +736,6 @@ int btrfs_read_sys_array(struct btrfs_root *root) unsigned long sb_ptr; u32 cur; int ret; - int dev_only = 1; array_size = btrfs_super_sys_array_size(super_copy); @@ -757,7 +744,6 @@ int btrfs_read_sys_array(struct btrfs_root *root) * once for all of the chunks. This way there are device * structs filled in for every chunk */ -again: ptr = super_copy->sys_chunk_array; sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array); cur = 0; @@ -771,22 +757,10 @@ again: sb_ptr += len; cur += len; - if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID && - key.type == BTRFS_DEV_ITEM_KEY) { - dev_item = (struct btrfs_dev_item *)sb_ptr; - if (dev_only) { - ret = read_one_dev(root, &key, sb, dev_item); - BUG_ON(ret); - } - len = sizeof(*dev_item); - len += btrfs_device_name_len(sb, dev_item); - } else if (key.type == BTRFS_CHUNK_ITEM_KEY) { - + if (key.type == BTRFS_CHUNK_ITEM_KEY) { chunk = (struct btrfs_chunk *)sb_ptr; - if (!dev_only) { - ret = read_one_chunk(root, &key, sb, chunk); - BUG_ON(ret); - } + ret = read_one_chunk(root, &key, sb, chunk); + BUG_ON(ret); num_stripes = btrfs_chunk_num_stripes(sb, chunk); len = btrfs_chunk_item_size(num_stripes); } else { @@ -796,10 +770,6 @@ again: sb_ptr += len; cur += len; } - if (dev_only == 1) { - dev_only = 0; - goto again; - } return 0; } @@ -846,8 +816,7 @@ again: struct btrfs_dev_item *dev_item; dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); - ret = read_one_dev(root, &found_key, leaf, - dev_item); + ret = read_one_dev(root, leaf, dev_item); BUG_ON(ret); } } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 4ab9891878a8..77fa6efd79cf 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -21,7 +21,6 @@ struct btrfs_device { struct list_head dev_list; struct btrfs_root *dev_root; - struct btrfs_key dev_key; struct block_device *bdev; @@ -43,22 +42,11 @@ struct btrfs_device { /* minimal io size for this device */ u32 sector_size; - /* the kernel device number */ - u64 rdev; - /* type and info about this device */ u64 type; - /* partition number, 0 for whole dev */ - int partition; - - /* length of the name data at the end of the item */ - int name_len; - /* physical drive uuid (or lvm uuid) */ u8 uuid[BTRFS_DEV_UUID_SIZE]; - - char *name; }; int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, @@ -75,4 +63,5 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, void btrfs_mapping_init(struct btrfs_mapping_tree *tree); void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio); +int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf); #endif From 239b14b32dc39232ebf9cce29ff77c4c564355fd Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:02:07 -0400 Subject: [PATCH 0450/2985] Btrfs: Bring back mount -o ssd optimizations Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 +++ fs/btrfs/disk-io.c | 2 ++ fs/btrfs/extent-tree.c | 59 ++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/extent_io.c | 2 ++ fs/btrfs/extent_io.h | 2 ++ fs/btrfs/inode.c | 29 +++++++++++++++++++++ fs/btrfs/volumes.c | 5 ++++ fs/btrfs/volumes.h | 3 +++ 8 files changed, 103 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 96a493217860..acf22ad6115c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1405,6 +1405,9 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); /* inode.c */ +int btrfs_merge_bio_hook(struct page *page, unsigned long offset, + size_t size, struct bio *bio); + static inline void dec_i_blocks(struct inode *inode, u64 dec) { dec = dec >> 9; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 26185d46712c..4890151cd68d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1103,4 +1103,6 @@ int btrfs_read_buffer(struct extent_buffer *buf) static struct extent_io_ops btree_extent_io_ops = { .writepage_io_hook = btree_writepage_io_hook, .submit_bio_hook = btree_submit_bio_hook, + /* note we're sharing with inode.c for the merge bio hook */ + .merge_bio_hook = btrfs_merge_bio_hook, }; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 14eb8fc87015..e9ef644ff56f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1473,13 +1473,31 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; u64 total_needed = num_bytes; + u64 *last_ptr = NULL; struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; + int empty_cluster = 2 * 1024 * 1024; WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + if (data & BTRFS_BLOCK_GROUP_METADATA) { + last_ptr = &root->fs_info->last_alloc; + } + + if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { + last_ptr = &root->fs_info->last_data_alloc; + } + + if (last_ptr) { + if (*last_ptr) + hint_byte = *last_ptr; + else { + empty_size += empty_cluster; + } + } + if (search_end == (u64)-1) search_end = btrfs_super_total_bytes(&info->super_copy); @@ -1489,11 +1507,14 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, hint_byte = search_start; block_group = btrfs_find_block_group(root, block_group, hint_byte, data, 1); + if (last_ptr && *last_ptr == 0 && block_group) + hint_byte = block_group->key.objectid; } else { block_group = btrfs_find_block_group(root, trans->block_group, search_start, data, 1); } + search_start = max(search_start, hint_byte); total_needed += empty_size; @@ -1506,9 +1527,36 @@ check_failed: } ret = find_search_start(root, &block_group, &search_start, total_needed, data); + if (ret == -ENOSPC && last_ptr && *last_ptr) { + *last_ptr = 0; + block_group = btrfs_lookup_block_group(info, + orig_search_start); + search_start = orig_search_start; + ret = find_search_start(root, &block_group, &search_start, + total_needed, data); + } + if (ret == -ENOSPC) + goto enospc; if (ret) goto error; + if (last_ptr && *last_ptr && search_start != *last_ptr) { + *last_ptr = 0; + if (!empty_size) { + empty_size += empty_cluster; + total_needed += empty_size; + } + block_group = btrfs_lookup_block_group(info, + orig_search_start); + search_start = orig_search_start; + ret = find_search_start(root, &block_group, + &search_start, total_needed, data); + if (ret == -ENOSPC) + goto enospc; + if (ret) + goto error; + } + search_start = stripe_align(root, search_start); ins->objectid = search_start; ins->offset = num_bytes; @@ -1547,6 +1595,13 @@ check_failed: trans->block_group = block_group; } ins->offset = num_bytes; + if (last_ptr) { + *last_ptr = ins->objectid + ins->offset; + if (*last_ptr == + btrfs_super_total_bytes(&root->fs_info->super_copy)) { + *last_ptr = 0; + } + } return 0; new_group: @@ -1612,12 +1667,12 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (root->ref_cows) { if (data != BTRFS_BLOCK_GROUP_METADATA) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes, + 2 * 1024 * 1024, BTRFS_BLOCK_GROUP_METADATA); BUG_ON(ret); } ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes, data); + num_bytes + 2 * 1024 * 1024, data); BUG_ON(ret); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7e3a1ebde9fc..6dab664529c1 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1730,6 +1730,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, if (bio_ret && *bio_ret) { bio = *bio_ret; if (bio->bi_sector + (bio->bi_size >> 9) != sector || + (tree->ops && tree->ops->merge_bio_hook && + tree->ops->merge_bio_hook(page, offset, size, bio)) || bio_add_page(bio, page, size, offset) < size) { ret = submit_one_bio(rw, bio); bio = NULL; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 0dca89328f98..8b5319db2516 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -29,6 +29,8 @@ struct extent_io_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); int (*submit_bio_hook)(struct inode *inode, int rw, struct bio *bio); + int (*merge_bio_hook)(struct page *page, unsigned long offset, + size_t size, struct bio *bio); int (*readpage_io_hook)(struct page *page, u64 start, u64 end); int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 109576b57f69..5140d6801846 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -296,6 +296,34 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, return 0; } +int btrfs_merge_bio_hook(struct page *page, unsigned long offset, + size_t size, struct bio *bio) +{ + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; + struct btrfs_mapping_tree *map_tree; + struct btrfs_device *dev; + u64 logical = bio->bi_sector << 9; + u64 physical; + u64 length = 0; + u64 map_length; + struct bio_vec *bvec; + int i; + int ret; + + bio_for_each_segment(bvec, bio, i) { + length += bvec->bv_len; + } + map_tree = &root->fs_info->mapping_tree; + map_length = length; + ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); + if (map_length < length + size) { + printk("merge bio hook logical %Lu bio len %Lu physical %Lu " + "len %Lu\n", logical, length, physical, map_length); + return 1; + } + return 0; +} + int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -3033,6 +3061,7 @@ static struct file_operations btrfs_dir_file_operations = { static struct extent_io_ops btrfs_extent_io_ops = { .fill_delalloc = run_delalloc_range, .submit_bio_hook = btrfs_submit_bio_hook, + .merge_bio_hook = btrfs_merge_bio_hook, .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, .set_bit_hook = btrfs_set_bit_hook, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ae22d01ecf54..16fb6bbe6e28 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -578,6 +578,11 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) map_tree = &root->fs_info->mapping_tree; map_length = length; ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); + if (map_length < length) { + printk("mapping failed logical %Lu bio len %Lu physical %Lu " + "len %Lu\n", logical, length, physical, map_length); + BUG(); + } BUG_ON(map_length < length); bio->bi_sector = physical >> 9; bio->bi_bdev = dev->bdev; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 77fa6efd79cf..20259128152e 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -64,4 +64,7 @@ void btrfs_mapping_init(struct btrfs_mapping_tree *tree); void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio); int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf); +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, + u64 logical, u64 *phys, u64 *length, + struct btrfs_device **dev); #endif From 8a4b83cc8bd75fca29ac68615896d9e92820e7c2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:02:07 -0400 Subject: [PATCH 0451/2985] Btrfs: Add support for device scanning and detection ioctls Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 21 +++- fs/btrfs/disk-io.c | 24 +++-- fs/btrfs/disk-io.h | 4 +- fs/btrfs/ioctl.h | 6 +- fs/btrfs/super.c | 61 ++++++++---- fs/btrfs/volumes.c | 236 ++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/volumes.h | 25 +++++ 7 files changed, 333 insertions(+), 44 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index acf22ad6115c..7556f8319c60 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -37,7 +37,7 @@ extern struct kmem_cache *btrfs_transaction_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; -#define BTRFS_MAGIC "_B4RfS_M" +#define BTRFS_MAGIC "_B5RfS_M" #define BTRFS_MAX_LEVEL 8 @@ -238,6 +238,7 @@ struct btrfs_super_block { __le64 total_bytes; __le64 bytes_used; __le64 root_dir_objectid; + __le64 num_devices; __le32 sectorsize; __le32 nodesize; __le32 leafsize; @@ -440,6 +441,7 @@ struct btrfs_block_group_cache { }; struct btrfs_device; +struct btrfs_fs_devices; struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; struct btrfs_root *extent_root; @@ -489,7 +491,7 @@ struct btrfs_fs_info { u64 total_pinned; struct list_head dirty_cowonly_roots; - struct list_head devices; + struct btrfs_fs_devices *fs_devices; struct list_head space_info; spinlock_t delalloc_lock; spinlock_t new_trans_lock; @@ -677,6 +679,19 @@ BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32); BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item, + bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item, + io_align, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item, + io_width, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item, + sector_size, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64); + static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) { return (char *)d + offsetof(struct btrfs_dev_item, uuid); @@ -1106,6 +1121,8 @@ BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block, stripesize, 32); BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, root_dir_objectid, 64); +BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block, + num_devices, 64); static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4890151cd68d..f971a29e4f20 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -365,12 +365,12 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) struct list_head *next; struct btrfs_device *device; - list = &fs_info->devices; - while(!list_empty(list)) { - next = list->next; - list_del(next); + list = &fs_info->fs_devices->devices; + list_for_each(next, list) { device = list_entry(next, struct btrfs_device, dev_list); - kfree(device); + if (device->bdev && device->bdev != fs_info->sb->s_bdev) + close_bdev_excl(device->bdev); + device->bdev = NULL; } return 0; } @@ -655,7 +655,8 @@ static int add_hasher(struct btrfs_fs_info *info, char *type) { return 0; } #endif -struct btrfs_root *open_ctree(struct super_block *sb) +struct btrfs_root *open_ctree(struct super_block *sb, + struct btrfs_fs_devices *fs_devices) { u32 sectorsize; u32 nodesize; @@ -697,8 +698,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->extent_root = extent_root; fs_info->chunk_root = chunk_root; fs_info->dev_root = dev_root; + fs_info->fs_devices = fs_devices; INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); - INIT_LIST_HEAD(&fs_info->devices); INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); fs_info->sb = sb; @@ -779,6 +780,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; + if (btrfs_super_num_devices(disk_super) != fs_devices->num_devices) { + printk("Btrfs: wanted %llu devices, but found %llu\n", + (unsigned long long)btrfs_super_num_devices(disk_super), + (unsigned long long)fs_devices->num_devices); + goto fail_sb_buffer; + } nodesize = btrfs_super_nodesize(disk_super); leafsize = btrfs_super_leafsize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); @@ -799,8 +806,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) } mutex_lock(&fs_info->fs_mutex); - ret = btrfs_read_super_device(tree_root, fs_info->sb_buffer); - BUG_ON(ret); ret = btrfs_read_sys_array(tree_root); BUG_ON(ret); @@ -859,6 +864,7 @@ fail_sb_buffer: fail_iput: iput(fs_info->btree_inode); fail: + close_all_devices(fs_info); kfree(extent_root); kfree(tree_root); kfree(fs_info); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 206cb48638f7..b7cbc58a5553 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -21,6 +21,7 @@ #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) struct btrfs_device; +struct btrfs_fs_devices; struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); @@ -29,7 +30,8 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); -struct btrfs_root *open_ctree(struct super_block *sb); +struct btrfs_root *open_ctree(struct super_block *sb, + struct btrfs_fs_devices *fs_devices); int close_ctree(struct btrfs_root *root); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 8c6290665d49..4551e82013c8 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -22,8 +22,10 @@ #define BTRFS_IOCTL_MAGIC 0x94 #define BTRFS_VOL_NAME_MAX 255 +#define BTRFS_PATH_NAME_MAX 4095 + struct btrfs_ioctl_vol_args { - char name[BTRFS_VOL_NAME_MAX + 1]; + char name[BTRFS_PATH_NAME_MAX + 1]; }; #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ @@ -32,4 +34,6 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ + struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 67ed216df475..9624923a33dc 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -44,6 +44,7 @@ #include "ioctl.h" #include "print-tree.h" #include "xattr.h" +#include "volumes.h" #define BTRFS_SUPER_MAGIC 0x9123683E @@ -216,7 +217,9 @@ static int parse_options (char * options, return 1; } -static int btrfs_fill_super(struct super_block * sb, void * data, int silent) +static int btrfs_fill_super(struct super_block * sb, + struct btrfs_fs_devices *fs_devices, + void * data, int silent) { struct inode * inode; struct dentry * root_dentry; @@ -231,7 +234,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; - tree_root = open_ctree(sb); + tree_root = open_ctree(sb, fs_devices); if (!tree_root || IS_ERR(tree_root)) { printk("btrfs: open_ctree failed\n"); @@ -334,18 +337,23 @@ static int test_bdev_super(struct super_block *s, void *data) int btrfs_get_sb_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int), struct vfsmount *mnt, const char *subvol) { struct block_device *bdev = NULL; struct super_block *s; struct dentry *root; + struct btrfs_fs_devices *fs_devices = NULL; int error = 0; - bdev = open_bdev_excl(dev_name, flags, fs_type); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); + error = btrfs_scan_one_device(dev_name, flags, fs_type, &fs_devices); + if (error) + return error; + error = btrfs_open_devices(fs_devices, flags, fs_type); + if (error) + return error; + + bdev = fs_devices->lowest_bdev; /* * once the super is inserted into the list by sget, s_umount * will protect the lockfs code from trying to start a snapshot @@ -372,7 +380,8 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, s->s_flags = flags; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(bdev)); - error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); + error = btrfs_fill_super(s, fs_devices, data, + flags & MS_SILENT ? 1 : 0); if (error) { up_write(&s->s_umount); deactivate_super(s); @@ -408,7 +417,7 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, error_s: error = PTR_ERR(s); error_bdev: - close_bdev_excl(bdev); + btrfs_close_devices(fs_devices); error: return error; } @@ -421,8 +430,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, char *subvol_name = NULL; parse_options((char *)data, NULL, &subvol_name); - ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, - btrfs_fill_super, mnt, + ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, mnt, subvol_name ? subvol_name : "default"); if (subvol_name) kfree(subvol_name); @@ -445,13 +453,6 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static long btrfs_control_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - printk("btrfs control ioctl %d\n", cmd); - return 0; -} - static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -460,6 +461,31 @@ static struct file_system_type btrfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; +static long btrfs_control_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct btrfs_ioctl_vol_args *vol; + struct btrfs_fs_devices *fs_devices; + int ret; + int len; + + vol = kmalloc(sizeof(*vol), GFP_KERNEL); + if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) { + ret = -EFAULT; + goto out; + } + len = strnlen(vol->name, BTRFS_PATH_NAME_MAX); + switch (cmd) { + case BTRFS_IOC_SCAN_DEV: + ret = btrfs_scan_one_device(vol->name, MS_RDONLY, + &btrfs_fs_type, &fs_devices); + break; + } +out: + kfree(vol); + return 0; +} + static void btrfs_write_super_lockfs(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); @@ -567,6 +593,7 @@ static void __exit exit_btrfs_fs(void) btrfs_interface_exit(); unregister_filesystem(&btrfs_fs_type); btrfs_exit_sysfs(); + btrfs_cleanup_fs_uuids(); } module_init(init_btrfs_fs) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 16fb6bbe6e28..263f01cc3db4 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -17,6 +17,7 @@ */ #include #include +#include #include "ctree.h" #include "extent_map.h" #include "disk-io.h" @@ -28,6 +29,215 @@ struct map_lookup { struct btrfs_device *dev; u64 physical; }; +static DEFINE_MUTEX(uuid_mutex); +static LIST_HEAD(fs_uuids); + +int btrfs_cleanup_fs_uuids(void) +{ + struct btrfs_fs_devices *fs_devices; + struct list_head *uuid_cur; + struct list_head *devices_cur; + struct btrfs_device *dev; + + list_for_each(uuid_cur, &fs_uuids) { + fs_devices = list_entry(uuid_cur, struct btrfs_fs_devices, + list); + while(!list_empty(&fs_devices->devices)) { + devices_cur = fs_devices->devices.next; + dev = list_entry(devices_cur, struct btrfs_device, + dev_list); + printk("uuid cleanup finds %s\n", dev->name); + if (dev->bdev) { + printk("closing\n"); + close_bdev_excl(dev->bdev); + } + list_del(&dev->dev_list); + kfree(dev); + } + } + return 0; +} + +static struct btrfs_device *__find_device(struct list_head *head, u64 devid) +{ + struct btrfs_device *dev; + struct list_head *cur; + + list_for_each(cur, head) { + dev = list_entry(cur, struct btrfs_device, dev_list); + if (dev->devid == devid) + return dev; + } + return NULL; +} + +static struct btrfs_fs_devices *find_fsid(u8 *fsid) +{ + struct list_head *cur; + struct btrfs_fs_devices *fs_devices; + + list_for_each(cur, &fs_uuids) { + fs_devices = list_entry(cur, struct btrfs_fs_devices, list); + if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0) + return fs_devices; + } + return NULL; +} + +static int device_list_add(const char *path, + struct btrfs_super_block *disk_super, + u64 devid, struct btrfs_fs_devices **fs_devices_ret) +{ + struct btrfs_device *device; + struct btrfs_fs_devices *fs_devices; + u64 found_transid = btrfs_super_generation(disk_super); + + fs_devices = find_fsid(disk_super->fsid); + if (!fs_devices) { + fs_devices = kmalloc(sizeof(*fs_devices), GFP_NOFS); + if (!fs_devices) + return -ENOMEM; + INIT_LIST_HEAD(&fs_devices->devices); + list_add(&fs_devices->list, &fs_uuids); + memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); + fs_devices->latest_devid = devid; + fs_devices->latest_trans = found_transid; + fs_devices->lowest_devid = (u64)-1; + fs_devices->num_devices = 0; + device = NULL; + } else { + device = __find_device(&fs_devices->devices, devid); + } + if (!device) { + device = kzalloc(sizeof(*device), GFP_NOFS); + if (!device) { + /* we can safely leave the fs_devices entry around */ + return -ENOMEM; + } + device->devid = devid; + device->name = kstrdup(path, GFP_NOFS); + if (!device->name) { + kfree(device); + return -ENOMEM; + } + list_add(&device->dev_list, &fs_devices->devices); + fs_devices->num_devices++; + } + + if (found_transid > fs_devices->latest_trans) { + fs_devices->latest_devid = devid; + fs_devices->latest_trans = found_transid; + } + if (fs_devices->lowest_devid > devid) { + fs_devices->lowest_devid = devid; + printk("lowest devid now %Lu\n", devid); + } + *fs_devices_ret = fs_devices; + return 0; +} + +int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) +{ + struct list_head *head = &fs_devices->devices; + struct list_head *cur; + struct btrfs_device *device; + + mutex_lock(&uuid_mutex); + list_for_each(cur, head) { + device = list_entry(cur, struct btrfs_device, dev_list); + if (device->bdev) { + close_bdev_excl(device->bdev); + printk("close devices closes %s\n", device->name); + } + device->bdev = NULL; + } + mutex_unlock(&uuid_mutex); + return 0; +} + +int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, + int flags, void *holder) +{ + struct block_device *bdev; + struct list_head *head = &fs_devices->devices; + struct list_head *cur; + struct btrfs_device *device; + int ret; + + mutex_lock(&uuid_mutex); + list_for_each(cur, head) { + device = list_entry(cur, struct btrfs_device, dev_list); + bdev = open_bdev_excl(device->name, flags, holder); +printk("opening %s devid %Lu\n", device->name, device->devid); + if (IS_ERR(bdev)) { + printk("open %s failed\n", device->name); + ret = PTR_ERR(bdev); + goto fail; + } + if (device->devid == fs_devices->latest_devid) + fs_devices->latest_bdev = bdev; + if (device->devid == fs_devices->lowest_devid) { + fs_devices->lowest_bdev = bdev; +printk("lowest bdev %s\n", device->name); + } + device->bdev = bdev; + } + mutex_unlock(&uuid_mutex); + return 0; +fail: + mutex_unlock(&uuid_mutex); + btrfs_close_devices(fs_devices); + return ret; +} + +int btrfs_scan_one_device(const char *path, int flags, void *holder, + struct btrfs_fs_devices **fs_devices_ret) +{ + struct btrfs_super_block *disk_super; + struct block_device *bdev; + struct buffer_head *bh; + int ret; + u64 devid; + + mutex_lock(&uuid_mutex); + + printk("scan one opens %s\n", path); + bdev = open_bdev_excl(path, flags, holder); + + if (IS_ERR(bdev)) { + printk("open failed\n"); + ret = PTR_ERR(bdev); + goto error; + } + + ret = set_blocksize(bdev, 4096); + if (ret) + goto error_close; + bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + if (!bh) { + ret = -EIO; + goto error_close; + } + disk_super = (struct btrfs_super_block *)bh->b_data; + if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, + sizeof(disk_super->magic))) { + printk("no btrfs found on %s\n", path); + ret = -ENOENT; + goto error_brelse; + } + devid = le64_to_cpu(disk_super->dev_item.devid); + printk("found device %Lu on %s\n", devid, path); + ret = device_list_add(path, disk_super, devid, fs_devices_ret); + +error_brelse: + brelse(bh); +error_close: + close_bdev_excl(bdev); + printk("scan one closes bdev %s\n", path); +error: + mutex_unlock(&uuid_mutex); + return ret; +} /* * this uses a pretty simple search, the expectation is that it is @@ -56,6 +266,10 @@ static int find_free_dev_extent(struct btrfs_trans_handle *trans, /* FIXME use last free of some kind */ + /* we don't want to overwrite the superblock on the drive, + * so we make sure to start at an offset of at least 1MB + */ + search_start = max((u64)1024 * 1024, search_start); key.objectid = device->devid; key.offset = search_start; key.type = BTRFS_DEV_EXTENT_KEY; @@ -285,6 +499,7 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); + device->devid = free_devid; btrfs_set_device_id(leaf, dev_item, device->devid); btrfs_set_device_type(leaf, dev_item, device->type); btrfs_set_device_io_align(leaf, dev_item, device->io_align); @@ -382,7 +597,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_device *device = NULL; struct btrfs_chunk *chunk; struct list_head private_devs; - struct list_head *dev_list = &extent_root->fs_info->devices; + struct list_head *dev_list = &extent_root->fs_info->fs_devices->devices; struct list_head *cur; struct extent_map_tree *em_tree; struct map_lookup *map; @@ -449,7 +664,7 @@ again: key.objectid, calc_size, &dev_offset); BUG_ON(ret); - +printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); device->bytes_used += calc_size; ret = btrfs_update_device(trans, device); BUG_ON(ret); @@ -592,17 +807,9 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid) { - struct btrfs_device *dev; - struct list_head *cur = root->fs_info->devices.next; - struct list_head *head = &root->fs_info->devices; + struct list_head *head = &root->fs_info->fs_devices->devices; - while(cur != head) { - dev = list_entry(cur, struct btrfs_device, dev_list); - if (dev->devid == devid) - return dev; - cur = cur->next; - } - return NULL; + return __find_device(head, devid); } static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, @@ -699,15 +906,16 @@ static int read_one_dev(struct btrfs_root *root, devid = btrfs_device_id(leaf, dev_item); device = btrfs_find_device(root, devid); if (!device) { + printk("warning devid %Lu not found already\n", devid); device = kmalloc(sizeof(*device), GFP_NOFS); if (!device) return -ENOMEM; - list_add(&device->dev_list, &root->fs_info->devices); + list_add(&device->dev_list, + &root->fs_info->fs_devices->devices); } fill_device_from_item(leaf, dev_item, device); device->dev_root = root->fs_info->dev_root; - device->bdev = root->fs_info->sb->s_bdev; ret = 0; #if 0 ret = btrfs_open_device(device); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 20259128152e..12f297eb0559 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -24,6 +24,8 @@ struct btrfs_device { struct block_device *bdev; + char *name; + /* the internal btrfs device id */ u64 devid; @@ -49,6 +51,20 @@ struct btrfs_device { u8 uuid[BTRFS_DEV_UUID_SIZE]; }; +struct btrfs_fs_devices { + u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ + + /* the device with this id has the most recent coyp of the super */ + u64 latest_devid; + u64 latest_trans; + u64 lowest_devid; + u64 num_devices; + struct block_device *latest_bdev; + struct block_device *lowest_bdev; + struct list_head devices; + struct list_head list; +}; + int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 owner, u64 num_bytes, u64 *start); @@ -67,4 +83,13 @@ int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf); int btrfs_map_block(struct btrfs_mapping_tree *map_tree, u64 logical, u64 *phys, u64 *length, struct btrfs_device **dev); +int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, + int flags, void *holder); +int btrfs_scan_one_device(const char *path, int flags, void *holder, + struct btrfs_fs_devices **fs_devices_ret); +int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); +int btrfs_add_device(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_device *device); +int btrfs_cleanup_fs_uuids(void); #endif From 593060d756e0c2382d59cf00d5f3b9e3a336c408 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 25 Mar 2008 16:50:33 -0400 Subject: [PATCH 0452/2985] Btrfs: Implement raid0 when multiple devices are present Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 + fs/btrfs/disk-io.c | 2 + fs/btrfs/extent-tree.c | 102 +++++++++++++++++++++----------- fs/btrfs/inode.c | 2 - fs/btrfs/volumes.c | 130 +++++++++++++++++++++++++++++++---------- 5 files changed, 173 insertions(+), 66 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7556f8319c60..f43dfe7ffe6d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -413,6 +413,7 @@ struct btrfs_csum_item { #define BTRFS_BLOCK_GROUP_DATA (1 << 0) #define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) #define BTRFS_BLOCK_GROUP_METADATA (1 << 2) +#define BTRFS_BLOCK_GROUP_RAID0 (1 << 3) struct btrfs_block_group_item { @@ -498,6 +499,8 @@ struct btrfs_fs_info { u64 delalloc_bytes; u64 last_alloc; u64 last_data_alloc; + int extra_data_alloc_bits; + int extra_alloc_bits; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f971a29e4f20..d8474796dea2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -736,6 +736,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->total_pinned = 0; fs_info->last_alloc = 0; fs_info->last_data_alloc = 0; + fs_info->extra_alloc_bits = 0; + fs_info->extra_data_alloc_bits = 0; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e9ef644ff56f..c226656f29b7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -172,7 +172,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) { - return (cache->flags & bits); + return (cache->flags & bits) == bits; } static int noinline find_search_start(struct btrfs_root *root, @@ -1010,6 +1010,35 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, } +static int update_space_info(struct btrfs_fs_info *info, u64 flags, + u64 total_bytes, u64 bytes_used, + struct btrfs_space_info **space_info) +{ + struct btrfs_space_info *found; + + found = __find_space_info(info, flags); + if (found) { + found->total_bytes += total_bytes; + found->bytes_used += bytes_used; + WARN_ON(found->total_bytes < found->bytes_used); + *space_info = found; + return 0; + } + found = kmalloc(sizeof(*found), GFP_NOFS); + if (!found) + return -ENOMEM; + + list_add(&found->list, &info->space_info); + found->flags = flags; + found->total_bytes = total_bytes; + found->bytes_used = bytes_used; + found->bytes_pinned = 0; + found->full = 0; + *space_info = found; + return 0; +} + + static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags) @@ -1021,6 +1050,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, int ret; space_info = __find_space_info(extent_root->fs_info, flags); + if (!space_info) { + ret = update_space_info(extent_root->fs_info, flags, + 0, 0, &space_info); + BUG_ON(ret); + } BUG_ON(!space_info); if (space_info->full) @@ -1044,6 +1078,17 @@ printk("space info full %Lu\n", flags); extent_root->fs_info->chunk_root->root_key.objectid, start, num_bytes); BUG_ON(ret); + + if (flags & BTRFS_BLOCK_GROUP_RAID0) { + if (flags & BTRFS_BLOCK_GROUP_DATA) { + extent_root->fs_info->extra_data_alloc_bits = + BTRFS_BLOCK_GROUP_RAID0; + } + if (flags & BTRFS_BLOCK_GROUP_METADATA) { + extent_root->fs_info->extra_alloc_bits = + BTRFS_BLOCK_GROUP_RAID0; + } + } return 0; } @@ -1655,24 +1700,31 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_extent_ref *ref; struct btrfs_path *path; struct btrfs_key keys[2]; + int extra_chunk_alloc_bits = 0; if (data) { - data = BTRFS_BLOCK_GROUP_DATA; + data = BTRFS_BLOCK_GROUP_DATA | info->extra_data_alloc_bits; } else if (root == root->fs_info->chunk_root) { data = BTRFS_BLOCK_GROUP_SYSTEM; } else { - data = BTRFS_BLOCK_GROUP_METADATA; + data = BTRFS_BLOCK_GROUP_METADATA | info->extra_alloc_bits; } + if (btrfs_super_num_devices(&info->super_copy) > 1 && + !(data & BTRFS_BLOCK_GROUP_SYSTEM)) + extra_chunk_alloc_bits = BTRFS_BLOCK_GROUP_RAID0; if (root->ref_cows) { - if (data != BTRFS_BLOCK_GROUP_METADATA) { + if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2 * 1024 * 1024, - BTRFS_BLOCK_GROUP_METADATA); + BTRFS_BLOCK_GROUP_METADATA | + info->extra_alloc_bits | + extra_chunk_alloc_bits); BUG_ON(ret); } ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes + 2 * 1024 * 1024, data); + num_bytes + 2 * 1024 * 1024, data | + extra_chunk_alloc_bits); BUG_ON(ret); } @@ -2627,34 +2679,6 @@ error: return ret; } -static int update_space_info(struct btrfs_fs_info *info, u64 flags, - u64 total_bytes, u64 bytes_used, - struct btrfs_space_info **space_info) -{ - struct btrfs_space_info *found; - - found = __find_space_info(info, flags); - if (found) { - found->total_bytes += total_bytes; - found->bytes_used += bytes_used; - WARN_ON(found->total_bytes < found->bytes_used); - *space_info = found; - return 0; - } - found = kmalloc(sizeof(*found), GFP_NOFS); - if (!found) - return -ENOMEM; - - list_add(&found->list, &info->space_info); - found->flags = flags; - found->total_bytes = total_bytes; - found->bytes_used = bytes_used; - found->bytes_pinned = 0; - found->full = 0; - *space_info = found; - return 0; -} - int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; @@ -2712,6 +2736,16 @@ int btrfs_read_block_groups(struct btrfs_root *root) } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { bit = BLOCK_GROUP_METADATA; } + if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) { + if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { + info->extra_data_alloc_bits = + BTRFS_BLOCK_GROUP_RAID0; + } + if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { + info->extra_alloc_bits = + BTRFS_BLOCK_GROUP_RAID0; + } + } ret = update_space_info(info, cache->flags, found_key.offset, btrfs_block_group_used(&cache->item), diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5140d6801846..db60d85598ce 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -317,8 +317,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, map_length = length; ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); if (map_length < length + size) { - printk("merge bio hook logical %Lu bio len %Lu physical %Lu " - "len %Lu\n", logical, length, physical, map_length); return 1; } return 0; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 263f01cc3db4..d8fce32a3bbc 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "ctree.h" #include "extent_map.h" #include "disk-io.h" @@ -25,10 +26,24 @@ #include "print-tree.h" #include "volumes.h" -struct map_lookup { +struct stripe { struct btrfs_device *dev; u64 physical; }; + +struct map_lookup { + u64 type; + int io_align; + int io_width; + int stripe_len; + int sector_size; + int num_stripes; + struct stripe stripes[]; +}; + +#define map_lookup_size(n) (sizeof(struct map_lookup) + \ + (sizeof(struct stripe) * (n))) + static DEFINE_MUTEX(uuid_mutex); static LIST_HEAD(fs_uuids); @@ -592,6 +607,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 *num_bytes, u64 type) { u64 dev_offset; + struct btrfs_fs_info *info = extent_root->fs_info; struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; struct btrfs_stripe *stripes; struct btrfs_device *device = NULL; @@ -610,10 +626,18 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, int looped = 0; int ret; int index; + int stripe_len = 64 * 1024; struct btrfs_key key; if (list_empty(dev_list)) return -ENOSPC; + + if (type & BTRFS_BLOCK_GROUP_RAID0) + num_stripes = btrfs_super_num_devices(&info->super_copy); + if (type & BTRFS_BLOCK_GROUP_DATA) + stripe_len = 64 * 1024; + if (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) + stripe_len = 32 * 1024; again: INIT_LIST_HEAD(&private_devs); cur = dev_list->next; @@ -650,9 +674,15 @@ again: if (!chunk) return -ENOMEM; + map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); + if (!map) { + kfree(chunk); + return -ENOMEM; + } + stripes = &chunk->stripe; - *num_bytes = calc_size; + *num_bytes = calc_size * num_stripes; index = 0; while(index < num_stripes) { BUG_ON(list_empty(&private_devs)); @@ -669,6 +699,8 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); ret = btrfs_update_device(trans, device); BUG_ON(ret); + map->stripes[index].dev = device; + map->stripes[index].physical = dev_offset; btrfs_set_stack_stripe_devid(stripes + index, device->devid); btrfs_set_stack_stripe_offset(stripes + index, dev_offset); physical = dev_offset; @@ -680,12 +712,18 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); key.offset = *num_bytes; key.type = BTRFS_CHUNK_ITEM_KEY; btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); - btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024); + btrfs_set_stack_chunk_stripe_len(chunk, stripe_len); btrfs_set_stack_chunk_type(chunk, type); btrfs_set_stack_chunk_num_stripes(chunk, num_stripes); - btrfs_set_stack_chunk_io_align(chunk, extent_root->sectorsize); - btrfs_set_stack_chunk_io_width(chunk, extent_root->sectorsize); + btrfs_set_stack_chunk_io_align(chunk, stripe_len); + btrfs_set_stack_chunk_io_width(chunk, stripe_len); btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); + map->sector_size = extent_root->sectorsize; + map->stripe_len = stripe_len; + map->io_align = stripe_len; + map->io_width = stripe_len; + map->type = type; + map->num_stripes = num_stripes; ret = btrfs_insert_item(trans, chunk_root, &key, chunk, btrfs_chunk_item_size(num_stripes)); @@ -695,25 +733,11 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); em = alloc_extent_map(GFP_NOFS); if (!em) return -ENOMEM; - map = kmalloc(sizeof(*map), GFP_NOFS); - if (!map) { - free_extent_map(em); - return -ENOMEM; - } - em->bdev = (struct block_device *)map; em->start = key.objectid; em->len = key.offset; em->block_start = 0; - map->physical = physical; - map->dev = device; - - if (!map->dev) { - kfree(map); - free_extent_map(em); - return -EIO; - } kfree(chunk); em_tree = &extent_root->fs_info->mapping_tree.map_tree; @@ -758,6 +782,9 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, struct map_lookup *map; struct extent_map_tree *em_tree = &map_tree->map_tree; u64 offset; + u64 stripe_offset; + u64 stripe_nr; + int stripe_index; spin_lock(&em_tree->lock); @@ -767,9 +794,40 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, BUG_ON(em->start > logical || em->start + em->len < logical); map = (struct map_lookup *)em->bdev; offset = logical - em->start; - *phys = map->physical + offset; - *length = em->len - offset; - *dev = map->dev; + + stripe_nr = offset; + /* + * stripe_nr counts the total number of stripes we have to stride + * to get to this block + */ + do_div(stripe_nr, map->stripe_len); + + stripe_offset = stripe_nr * map->stripe_len; + BUG_ON(offset < stripe_offset); + + /* stripe_offset is the offset of this block in its stripe*/ + stripe_offset = offset - stripe_offset; + + /* + * after this do_div call, stripe_nr is the number of stripes + * on this device we have to walk to find the data, and + * stripe_index is the number of our device in the stripe array + */ + stripe_index = do_div(stripe_nr, map->num_stripes); + + BUG_ON(stripe_index >= map->num_stripes); + + *phys = map->stripes[stripe_index].physical + stripe_offset + + stripe_nr * map->stripe_len; + + if (map->type & BTRFS_BLOCK_GROUP_RAID0) { + /* we limit the length of each bio to what fits in a stripe */ + *length = min_t(u64, em->len - offset, + map->stripe_len - stripe_offset); + } else { + *length = em->len - offset; + } + *dev = map->stripes[stripe_index].dev; free_extent_map(em); spin_unlock(&em_tree->lock); return 0; @@ -822,7 +880,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, u64 logical; u64 length; u64 devid; + int num_stripes; int ret; + int i; logical = key->objectid; length = key->offset; @@ -846,7 +906,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, em = alloc_extent_map(GFP_NOFS); if (!em) return -ENOMEM; - map = kmalloc(sizeof(*map), GFP_NOFS); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); if (!map) { free_extent_map(em); return -ENOMEM; @@ -857,13 +918,22 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, em->len = length; em->block_start = 0; - map->physical = btrfs_stripe_offset_nr(leaf, chunk, 0); - devid = btrfs_stripe_devid_nr(leaf, chunk, 0); - map->dev = btrfs_find_device(root, devid); - if (!map->dev) { - kfree(map); - free_extent_map(em); - return -EIO; + map->num_stripes = num_stripes; + map->io_width = btrfs_chunk_io_width(leaf, chunk); + map->io_align = btrfs_chunk_io_align(leaf, chunk); + map->sector_size = btrfs_chunk_sector_size(leaf, chunk); + map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + map->type = btrfs_chunk_type(leaf, chunk); + for (i = 0; i < num_stripes; i++) { + map->stripes[i].physical = + btrfs_stripe_offset_nr(leaf, chunk, i); + devid = btrfs_stripe_devid_nr(leaf, chunk, i); + map->stripes[i].dev = btrfs_find_device(root, devid); + if (!map->stripes[i].dev) { + kfree(map); + free_extent_map(em); + return -EIO; + } } spin_lock(&map_tree->map_tree.lock); From 04160088141a6be06d5cb9e53e34076b51803ffc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 26 Mar 2008 10:28:07 -0400 Subject: [PATCH 0453/2985] Create a btrfs backing dev info This allows intelligent versions of unplug and congestion functions Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/disk-io.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/inode.c | 5 +++++ 3 files changed, 60 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f43dfe7ffe6d..fec96ba7c23b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "bit-radix.h" #include "extent_io.h" @@ -472,6 +473,7 @@ struct btrfs_fs_info { struct block_device *__bdev; struct super_block *sb; struct inode *btree_inode; + struct backing_dev_info bdi; spinlock_t hash_lock; struct mutex trans_mutex; struct mutex fs_mutex; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d8474796dea2..6a70e4916ab4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -655,6 +655,55 @@ static int add_hasher(struct btrfs_fs_info *info, char *type) { return 0; } #endif + +static int btrfs_congested_fn(void *congested_data, int bdi_bits) +{ + struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; + int ret = 0; + struct list_head *cur; + struct btrfs_device *device; + struct backing_dev_info *bdi; + + list_for_each(cur, &info->fs_devices->devices) { + device = list_entry(cur, struct btrfs_device, dev_list); + bdi = blk_get_backing_dev_info(device->bdev); + if (bdi && bdi_congested(bdi, bdi_bits)) { + ret = 1; + break; + } + } + return ret; +} + +void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) +{ + struct list_head *cur; + struct btrfs_device *device; + struct btrfs_fs_info *info; + + info = (struct btrfs_fs_info *)bdi->unplug_io_data; + list_for_each(cur, &info->fs_devices->devices) { + device = list_entry(cur, struct btrfs_device, dev_list); + bdi = blk_get_backing_dev_info(device->bdev); + if (bdi->unplug_io_fn) { + bdi->unplug_io_fn(bdi, page); + } + } +} + +static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) +{ + bdi_init(bdi); + bdi->ra_pages = default_backing_dev_info.ra_pages; + bdi->state = 0; + bdi->capabilities = default_backing_dev_info.capabilities; + bdi->unplug_io_fn = btrfs_unplug_io_fn; + bdi->unplug_io_data = info; + bdi->congested_fn = btrfs_congested_fn; + bdi->congested_data = info; + return 0; +} + struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices) { @@ -708,11 +757,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; fs_info->delalloc_bytes = 0; + setup_bdi(fs_info, &fs_info->bdi); fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; + extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, fs_info->btree_inode->i_mapping, GFP_NOFS); @@ -992,6 +1044,7 @@ int close_ctree(struct btrfs_root *root) #endif close_all_devices(fs_info); btrfs_mapping_tree_free(&fs_info->mapping_tree); + bdi_destroy(&fs_info->bdi); kfree(fs_info->extent_root); kfree(fs_info->tree_root); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index db60d85598ce..0fa7cf227f1a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -494,6 +494,7 @@ void btrfs_read_locked_inode(struct inode *inode) switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; + inode->i_mapping->backing_dev_info = &root->fs_info->bdi; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; @@ -508,6 +509,7 @@ void btrfs_read_locked_inode(struct inode *inode) case S_IFLNK: inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; + inode->i_mapping->backing_dev_info = &root->fs_info->bdi; break; default: init_special_inode(inode, inode->i_mode, rdev); @@ -1792,6 +1794,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, drop_inode = 1; else { inode->i_mapping->a_ops = &btrfs_aops; + inode->i_mapping->backing_dev_info = &root->fs_info->bdi; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); @@ -2960,6 +2963,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, drop_inode = 1; else { inode->i_mapping->a_ops = &btrfs_aops; + inode->i_mapping->backing_dev_info = &root->fs_info->bdi; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); @@ -2999,6 +3003,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; + inode->i_mapping->backing_dev_info = &root->fs_info->bdi; inode->i_size = name_len - 1; err = btrfs_update_inode(trans, root, inode); if (err) From 83041add611056e830e29fda913029e37e857239 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 26 Mar 2008 12:02:55 -0400 Subject: [PATCH 0454/2985] Btrfs: Use a higher default ra pages Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6a70e4916ab4..134b722587ec 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -694,7 +694,7 @@ void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) { bdi_init(bdi); - bdi->ra_pages = default_backing_dev_info.ra_pages; + bdi->ra_pages = default_backing_dev_info.ra_pages * 4; bdi->state = 0; bdi->capabilities = default_backing_dev_info.capabilities; bdi->unplug_io_fn = btrfs_unplug_io_fn; From 2d2ae547979854d10b75d557b3abdb3eb7511bbc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 26 Mar 2008 16:24:23 -0400 Subject: [PATCH 0455/2985] Btrfs: Add leak debugging for extent_buffer and extent_state This also fixes one leak around the super block when failing to mount the FS. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 ++ fs/btrfs/extent_io.c | 28 ++++++++++++++++++++++++++-- fs/btrfs/extent_io.h | 3 ++- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 134b722587ec..1c5e097a4df2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -915,12 +915,14 @@ fail_tree_root: free_extent_buffer(tree_root->node); fail_sb_buffer: free_extent_buffer(fs_info->sb_buffer); + extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); fail_iput: iput(fs_info->btree_inode); fail: close_all_devices(fs_info); kfree(extent_root); kfree(tree_root); + bdi_destroy(&fs_info->bdi); kfree(fs_info); return ERR_PTR(err); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 6dab664529c1..b99cfabd008e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -26,6 +26,7 @@ static struct kmem_cache *extent_buffer_cache; static LIST_HEAD(buffers); static LIST_HEAD(states); +static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; #define BUFFER_LRU_MAX 64 @@ -64,15 +65,22 @@ free_state_cache: void extent_io_exit(void) { struct extent_state *state; + struct extent_buffer *eb; while (!list_empty(&states)) { - state = list_entry(states.next, struct extent_state, list); + state = list_entry(states.next, struct extent_state, leak_list); printk("state leak: start %Lu end %Lu state %lu in tree %p refs %d\n", state->start, state->end, state->state, state->tree, atomic_read(&state->refs)); - list_del(&state->list); + list_del(&state->leak_list); kmem_cache_free(extent_state_cache, state); } + while (!list_empty(&buffers)) { + eb = list_entry(buffers.next, struct extent_buffer, leak_list); + printk("buffer leak start %Lu len %lu refs %d\n", eb->start, eb->len, atomic_read(&eb->refs)); + list_del(&eb->leak_list); + kmem_cache_free(extent_buffer_cache, eb); + } if (extent_state_cache) kmem_cache_destroy(extent_state_cache); if (extent_buffer_cache) @@ -109,6 +117,7 @@ EXPORT_SYMBOL(extent_io_tree_empty_lru); struct extent_state *alloc_extent_state(gfp_t mask) { struct extent_state *state; + unsigned long flags; state = kmem_cache_alloc(extent_state_cache, mask); if (!state || IS_ERR(state)) @@ -116,6 +125,9 @@ struct extent_state *alloc_extent_state(gfp_t mask) state->state = 0; state->private = 0; state->tree = NULL; + spin_lock_irqsave(&leak_lock, flags); + list_add(&state->leak_list, &states); + spin_unlock_irqrestore(&leak_lock, flags); atomic_set(&state->refs, 1); init_waitqueue_head(&state->wq); @@ -128,7 +140,11 @@ void free_extent_state(struct extent_state *state) if (!state) return; if (atomic_dec_and_test(&state->refs)) { + unsigned long flags; WARN_ON(state->tree); + spin_lock_irqsave(&leak_lock, flags); + list_del(&state->leak_list); + spin_unlock_irqrestore(&leak_lock, flags); kmem_cache_free(extent_state_cache, state); } } @@ -2582,6 +2598,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, gfp_t mask) { struct extent_buffer *eb = NULL; + unsigned long flags; spin_lock(&tree->lru_lock); eb = find_lru(tree, start, len); @@ -2594,6 +2611,9 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, INIT_LIST_HEAD(&eb->lru); eb->start = start; eb->len = len; + spin_lock_irqsave(&leak_lock, flags); + list_add(&eb->leak_list, &buffers); + spin_unlock_irqrestore(&leak_lock, flags); atomic_set(&eb->refs, 1); return eb; @@ -2601,6 +2621,10 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, static void __free_extent_buffer(struct extent_buffer *eb) { + unsigned long flags; + spin_lock_irqsave(&leak_lock, flags); + list_del(&eb->leak_list); + spin_unlock_irqrestore(&leak_lock, flags); kmem_cache_free(extent_buffer_cache, eb); } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 8b5319db2516..16d67a61a25c 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -66,7 +66,7 @@ struct extent_state { /* for use by the FS */ u64 private; - struct list_head list; + struct list_head leak_list; }; struct extent_buffer { @@ -80,6 +80,7 @@ struct extent_buffer { struct list_head lru; atomic_t refs; int flags; + struct list_head leak_list; }; struct extent_map_tree; From 63b10fc4874a014e22bc4c64e3d92b71180661fe Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 1 Apr 2008 11:21:32 -0400 Subject: [PATCH 0456/2985] Reorder the flags field in struct btrfs_header and record a flag on writeout This allows detection of blocks that have already been written in the running transaction so they can be recowed instead of modified again. It is step one in trusting the transid field of the block pointers. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 10 ++++++++-- fs/btrfs/ctree.h | 28 +++++++++++++++++++++++++--- fs/btrfs/disk-io.c | 13 +++++++++---- 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 70b6ddfe15a1..6ba5394834be 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -124,6 +124,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); btrfs_set_header_owner(cow, new_root_objectid); + btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); WARN_ON(btrfs_header_generation(buf) > trans->transid); ret = btrfs_inc_ref(trans, new_root, buf); @@ -183,6 +184,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); btrfs_set_header_owner(cow, root->root_key.objectid); + btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); WARN_ON(btrfs_header_generation(buf) > trans->transid); if (btrfs_header_generation(buf) != trans->transid) { @@ -245,11 +247,14 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, } header_trans = btrfs_header_generation(buf); - if (header_trans == trans->transid) { + spin_lock(&root->fs_info->hash_lock); + if (header_trans == trans->transid && + !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { *cow_ret = buf; + spin_unlock(&root->fs_info->hash_lock); return 0; } - + spin_unlock(&root->fs_info->hash_lock); search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); @@ -1494,6 +1499,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_bytenr(split, split->start); btrfs_set_header_generation(split, trans->transid); btrfs_set_header_owner(split, root->root_key.objectid); + btrfs_set_header_flags(split, 0); write_extent_buffer(split, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(split), BTRFS_FSID_SIZE); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fec96ba7c23b..67d533cf8f47 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -193,6 +193,8 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) } #define BTRFS_FSID_SIZE 16 +#define BTRFS_HEADER_FLAG_WRITTEN (1 << 0) + /* * every tree block (leaf or node) starts with this header. */ @@ -200,10 +202,10 @@ struct btrfs_header { u8 csum[BTRFS_CSUM_SIZE]; u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ __le64 bytenr; /* which block this node is supposed to live in */ + __le64 flags; __le64 generation; __le64 owner; __le32 nritems; - __le16 flags; u8 level; } __attribute__ ((__packed__)); @@ -229,9 +231,10 @@ struct btrfs_header { */ struct btrfs_super_block { u8 csum[BTRFS_CSUM_SIZE]; - /* the first 3 fields must match struct btrfs_header */ + /* the first 4 fields must match struct btrfs_header */ u8 fsid[16]; /* FS specific uuid */ __le64 bytenr; /* this block number */ + __le64 flags; __le64 magic; __le64 generation; __le64 root; @@ -1045,9 +1048,28 @@ BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header, generation, 64); BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); -BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 16); +BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64); BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); +static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag) +{ + return (btrfs_header_flags(eb) & flag) == flag; +} + +static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag) +{ + u64 flags = btrfs_header_flags(eb); + btrfs_set_header_flags(eb, flags | flag); + return (flags & flag) == flag; +} + +static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag) +{ + u64 flags = btrfs_header_flags(eb); + btrfs_set_header_flags(eb, flags & ~flag); + return (flags & flag) == flag; +} + static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) { unsigned long ptr = offsetof(struct btrfs_header, fsid); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1c5e097a4df2..709e33d08bb5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -159,17 +159,19 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, from_this_trans = 1; /* FIXME, this is not good */ - if (from_this_trans == 0 && - memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { + if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { u32 val; u32 found = 0; memcpy(&found, result, BTRFS_CRC32_SIZE); read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE); + WARN_ON(1); printk("btrfs: %s checksum verify failed on %llu " - "wanted %X found %X from_this_trans %d\n", + "wanted %X found %X from_this_trans %d " + "level %d\n", root->fs_info->sb->s_id, - buf->start, val, found, from_this_trans); + buf->start, val, found, from_this_trans, + btrfs_header_level(buf)); return 1; } } else { @@ -220,6 +222,9 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) goto err; } found_level = btrfs_header_level(eb); + spin_lock(&root->fs_info->hash_lock); + btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); + spin_unlock(&root->fs_info->hash_lock); csum_tree_block(root, eb, 0); err: free_extent_buffer(eb); From e58ca0203d32869a01540a293df40ddc480dc378 Mon Sep 17 00:00:00 2001 From: Yan Date: Tue, 1 Apr 2008 11:21:34 -0400 Subject: [PATCH 0457/2985] Fix btrfs_fill_super to return -EINVAL when no FS found Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 +-- fs/btrfs/super.c | 4 ++-- fs/btrfs/volumes.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 709e33d08bb5..5547607681f4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -728,9 +728,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); int ret; - int err = -EIO; + int err = -EINVAL; struct btrfs_super_block *disk_super; - if (!extent_root || !tree_root || !fs_info) { err = -ENOMEM; goto fail; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9624923a33dc..892864906880 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -236,9 +236,9 @@ static int btrfs_fill_super(struct super_block * sb, tree_root = open_ctree(sb, fs_devices); - if (!tree_root || IS_ERR(tree_root)) { + if (IS_ERR(tree_root)) { printk("btrfs: open_ctree failed\n"); - return -EIO; + return PTR_ERR(tree_root); } sb->s_fs_info = tree_root; disk_super = &tree_root->fs_info->super_copy; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d8fce32a3bbc..169be0f7285b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -237,7 +237,7 @@ int btrfs_scan_one_device(const char *path, int flags, void *holder, if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { printk("no btrfs found on %s\n", path); - ret = -ENOENT; + ret = -EINVAL; goto error_brelse; } devid = le64_to_cpu(disk_super->dev_item.devid); From 2b114d1d33551ab1dcaa1d0b3a956df3776fb767 Mon Sep 17 00:00:00 2001 From: Peter Date: Tue, 1 Apr 2008 11:21:40 -0400 Subject: [PATCH 0458/2985] Btrfs: Correct usage of IS_ERR() in extent_io.c Signed-off-by: Peter Teoh Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b99cfabd008e..776c33f1e6fe 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -120,7 +120,7 @@ struct extent_state *alloc_extent_state(gfp_t mask) unsigned long flags; state = kmem_cache_alloc(extent_state_cache, mask); - if (!state || IS_ERR(state)) + if (!state) return state; state->state = 0; state->private = 0; @@ -957,7 +957,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, * our range starts. */ node = tree_search(tree, start); - if (!node || IS_ERR(node)) { + if (!node) { goto out; } @@ -990,7 +990,7 @@ struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, * our range starts. */ node = tree_search(tree, start); - if (!node || IS_ERR(node)) { + if (!node) { goto out; } @@ -1024,7 +1024,7 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree, */ search_again: node = tree_search(tree, cur_start); - if (!node || IS_ERR(node)) { + if (!node) { *end = (u64)-1; goto out; } @@ -1112,7 +1112,7 @@ u64 count_range_bits(struct extent_io_tree *tree, * our range starts. */ node = tree_search(tree, cur_start); - if (!node || IS_ERR(node)) { + if (!node) { goto out; } @@ -1213,7 +1213,7 @@ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) * our range starts. */ node = tree_search(tree, start); - if (!node || IS_ERR(node)) { + if (!node) { ret = -ENOENT; goto out; } @@ -1240,7 +1240,7 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) * our range starts. */ node = tree_search(tree, start); - if (!node || IS_ERR(node)) { + if (!node) { ret = -ENOENT; goto out; } @@ -2642,7 +2642,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, int uptodate = 1; eb = __alloc_extent_buffer(tree, start, len, mask); - if (!eb || IS_ERR(eb)) + if (!eb) return NULL; if (eb->flags & EXTENT_BUFFER_FILLED) @@ -2717,7 +2717,7 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, int uptodate = 1; eb = __alloc_extent_buffer(tree, start, len, mask); - if (!eb || IS_ERR(eb)) + if (!eb) return NULL; if (eb->flags & EXTENT_BUFFER_FILLED) From 140dfd00ae544cdc0c9951b80ad6df61e62859e2 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Tue, 1 Apr 2008 11:21:40 -0400 Subject: [PATCH 0459/2985] btrfs-progs: Stop stomping on 'name' input parameter In btrfs_name_hash, Local variable 'buf' is declared as __u32 buf[2]; but we then try to do this: buf[0] = 0x67452301; buf[1] = 0xefcdab89; buf[2] = 0x98badcfe; buf[3] = 0x10325476; Oops. Fix buf to be the proper size. Signed-off-by: Alex Chiang Signed-off-by: Chris Mason --- fs/btrfs/hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index d5252f5c9d01..21037cfca9bd 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -81,7 +81,7 @@ u64 btrfs_name_hash(const char *name, int len) __u32 hash; __u32 minor_hash = 0; const char *p; - __u32 in[8], buf[2]; + __u32 in[8], buf[4]; u64 hash_result; if (len == 1 && *name == '.') { From ecbe2402cb4e4e7413544dc392c1a78d0f290292 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 1 Apr 2008 11:26:07 -0400 Subject: [PATCH 0460/2985] Btrfs: Keep fs_mutex during reads done by snapshot deletion There was an optimization to drop the fs_mutex when doing snapshot deletion reads, but this can lead to false positives on checksumming errors. Keep the lock for now. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c226656f29b7..cf283b0271ac 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2046,11 +2046,12 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, if (!next || !btrfs_buffer_uptodate(next)) { free_extent_buffer(next); reada_walk_down(root, cur, path->slots[*level]); - mutex_unlock(&root->fs_info->fs_mutex); next = read_tree_block(root, bytenr, blocksize); - mutex_lock(&root->fs_info->fs_mutex); - /* we dropped the lock, check one more time */ + /* we used to drop the lock above, keep the + * code to double check so that we won't forget + * when we drop the lock again in the future + */ ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); BUG_ON(ret); From 0999df54f850fe1aba29b10d5c869493af107478 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 1 Apr 2008 13:48:14 -0400 Subject: [PATCH 0461/2985] Btrfs: Verify checksums on tree blocks found without read_tree_block Checksums were only verified by btrfs_read_tree_block, which meant the functions to probe the page cache for blocks were not validating checksums. Normally this is fine because the buffers will only be in cache if they have already been validated. But, there is a window while the buffer is being read from disk where it could be up to date in the cache but not yet verified. This patch makes sure all buffers go through checksum verification before they are used. This is safer, and it prevents modification of buffers before they go through the csum code. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 5 ++ fs/btrfs/disk-io.c | 102 +++++++++++++++++++++++------------------ fs/btrfs/disk-io.h | 2 + fs/btrfs/extent-tree.c | 2 + fs/btrfs/tree-defrag.c | 1 + 5 files changed, 68 insertions(+), 44 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6ba5394834be..df090bf2eec0 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -158,6 +158,8 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, } else { root_gen = 0; } + if (!(buf->flags & EXTENT_CSUM)) + WARN_ON(1); WARN_ON(root->ref_cows && trans->transid != root->fs_info->running_transaction->transid); @@ -245,6 +247,8 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, root->fs_info->generation); WARN_ON(1); } + if (!(buf->flags & EXTENT_CSUM)) + WARN_ON(1); header_trans = btrfs_header_generation(buf); spin_lock(&root->fs_info->hash_lock); @@ -396,6 +400,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (search_start == 0) search_start = last_block; + btrfs_verify_block_csum(root, cur); err = __btrfs_cow_block(trans, root, cur, parent, i, &tmp, search_start, min(16 * blocksize, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5547607681f4..e40fb318ad99 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -46,27 +46,6 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) static struct extent_io_ops btree_extent_io_ops; -struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, - u64 bytenr, u32 blocksize) -{ - struct inode *btree_inode = root->fs_info->btree_inode; - struct extent_buffer *eb; - eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, - bytenr, blocksize, GFP_NOFS); - return eb; -} - -struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 bytenr, u32 blocksize) -{ - struct inode *btree_inode = root->fs_info->btree_inode; - struct extent_buffer *eb; - - eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, - bytenr, blocksize, NULL, GFP_NOFS); - return eb; -} - struct extent_map *btree_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 len, int create) @@ -380,13 +359,69 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) return 0; } +int btrfs_verify_block_csum(struct btrfs_root *root, + struct extent_buffer *buf) +{ + struct extent_io_tree *io_tree; + u64 end; + int ret; + + io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; + if (buf->flags & EXTENT_CSUM) + return 0; + + end = min_t(u64, buf->len, PAGE_CACHE_SIZE); + end = buf->start + end - 1; + if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { + buf->flags |= EXTENT_CSUM; + return 0; + } + + lock_extent(io_tree, buf->start, end, GFP_NOFS); + + if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { + buf->flags |= EXTENT_CSUM; + ret = 0; + goto out_unlock; + } + + ret = csum_tree_block(root, buf, 1); + set_extent_bits(io_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); + buf->flags |= EXTENT_CSUM; + +out_unlock: + unlock_extent(io_tree, buf->start, end, GFP_NOFS); + return ret; +} + +struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, + u64 bytenr, u32 blocksize) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + struct extent_buffer *eb; + eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, + bytenr, blocksize, GFP_NOFS); + return eb; +} + +struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 bytenr, u32 blocksize) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + struct extent_buffer *eb; + + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, + bytenr, blocksize, NULL, GFP_NOFS); + return eb; +} + + struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; struct extent_io_tree *io_tree; - u64 end; int ret; io_tree = &BTRFS_I(btree_inode)->io_tree; @@ -397,28 +432,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1, btree_get_extent); - if (buf->flags & EXTENT_CSUM) - return buf; - - end = buf->start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { - buf->flags |= EXTENT_CSUM; - return buf; - } - - lock_extent(io_tree, buf->start, end, GFP_NOFS); - - if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { - buf->flags |= EXTENT_CSUM; - goto out_unlock; - } - - ret = csum_tree_block(root, buf, 1); - set_extent_bits(io_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); - buf->flags |= EXTENT_CSUM; - -out_unlock: - unlock_extent(io_tree, buf->start, end, GFP_NOFS); + ret = btrfs_verify_block_csum(root, buf); return buf; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index b7cbc58a5553..05b88d0e75eb 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -69,4 +69,6 @@ u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, char *result); void btrfs_throttle(struct btrfs_root *root); int btrfs_open_device(struct btrfs_device *dev); +int btrfs_verify_block_csum(struct btrfs_root *root, + struct extent_buffer *buf); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cf283b0271ac..a34c289aec21 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2069,6 +2069,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, BUG_ON(ret); continue; } + } else if (next) { + btrfs_verify_block_csum(root, next); } WARN_ON(*level <= 0); if (path->nodes[*level-1]) diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 5935cbd8f2b8..256af1870eef 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -101,6 +101,7 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, path->slots[*level]++; continue; } + btrfs_verify_block_csum(root, next); } else { next = read_tree_block(root, bytenr, btrfs_level_size(root, *level - 1)); From 0ef8b2428a339e83fe225e82ef8bd0ea3f62b180 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Apr 2008 16:29:02 -0400 Subject: [PATCH 0462/2985] Btrfs: Properly dirty buffers in the split corner cases Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index df090bf2eec0..3c7891cf0d72 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2052,6 +2052,7 @@ again: path->nodes[0] = right; path->slots[0] = 0; path->slots[1] += 1; + btrfs_mark_buffer_dirty(right); return ret; } mid = slot; @@ -2082,6 +2083,7 @@ again: if (wret) ret = wret; } + btrfs_mark_buffer_dirty(right); return ret; } else if (extend && slot == 0) { mid = 1; From 8790d502e4401a4a3a4175b83a3a47e8d595c771 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Apr 2008 16:29:03 -0400 Subject: [PATCH 0463/2985] Btrfs: Add support for mirroring across drives Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 9 ++- fs/btrfs/disk-io.c | 20 ++---- fs/btrfs/extent-tree.c | 95 ++++++++++++++------------ fs/btrfs/inode.c | 4 +- fs/btrfs/volumes.c | 152 +++++++++++++++++++++++++++++++++-------- fs/btrfs/volumes.h | 11 +-- 6 files changed, 198 insertions(+), 93 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 67d533cf8f47..0a207861472e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -418,6 +418,7 @@ struct btrfs_csum_item { #define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) #define BTRFS_BLOCK_GROUP_METADATA (1 << 2) #define BTRFS_BLOCK_GROUP_RAID0 (1 << 3) +#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) struct btrfs_block_group_item { @@ -504,8 +505,12 @@ struct btrfs_fs_info { u64 delalloc_bytes; u64 last_alloc; u64 last_data_alloc; - int extra_data_alloc_bits; - int extra_alloc_bits; + int avail_data_alloc_bits; + int avail_metadata_alloc_bits; + int avail_system_alloc_bits; + int data_alloc_profile; + int metadata_alloc_profile; + int system_alloc_profile; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e40fb318ad99..ff75ad586767 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -735,7 +735,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), + struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), GFP_NOFS); struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); @@ -744,6 +744,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, int ret; int err = -EINVAL; struct btrfs_super_block *disk_super; + if (!extent_root || !tree_root || !fs_info) { err = -ENOMEM; goto fail; @@ -756,11 +757,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); - memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); init_completion(&fs_info->kobj_unregister); sb_set_blocksize(sb, 4096); - fs_info->running_transaction = NULL; - fs_info->last_trans_committed = 0; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->chunk_root = chunk_root; @@ -770,11 +768,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); fs_info->sb = sb; - fs_info->throttles = 0; - fs_info->mount_opt = 0; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; - fs_info->delalloc_bytes = 0; setup_bdi(fs_info, &fs_info->bdi); fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; @@ -802,12 +797,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, extent_io_tree_init(&fs_info->extent_ins, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; - fs_info->closing = 0; - fs_info->total_pinned = 0; - fs_info->last_alloc = 0; - fs_info->last_data_alloc = 0; - fs_info->extra_alloc_bits = 0; - fs_info->extra_data_alloc_bits = 0; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); @@ -923,6 +912,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; + if (btrfs_super_num_devices(disk_super) > 0) { + fs_info->data_alloc_profile = BTRFS_BLOCK_GROUP_RAID0; + fs_info->metadata_alloc_profile = BTRFS_BLOCK_GROUP_RAID1; + fs_info->system_alloc_profile = BTRFS_BLOCK_GROUP_RAID0; + } mutex_unlock(&fs_info->fs_mutex); return tree_root; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a34c289aec21..4ab98d8b73fa 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -230,9 +230,13 @@ again: goto new_group; if (start + num > total_fs_bytes) goto new_group; + if (!block_group_bits(cache, data)) { + printk("block group bits don't match %Lu %Lu\n", cache->flags, data); + } *start_ret = start; return 0; - } out: + } +out: cache = btrfs_lookup_block_group(root->fs_info, search_start); if (!cache) { printk("Unable to find block group for %Lu\n", search_start); @@ -365,14 +369,17 @@ again: if (cache->key.objectid > total_fs_bytes) break; - if (full_search) - free_check = cache->key.offset; - else - free_check = div_factor(cache->key.offset, factor); + if (block_group_bits(cache, data)) { + if (full_search) + free_check = cache->key.offset; + else + free_check = div_factor(cache->key.offset, + factor); - if (used + cache->pinned < free_check) { - found_group = cache; - goto found; + if (used + cache->pinned < free_check) { + found_group = cache; + goto found; + } } cond_resched(); } @@ -1038,6 +1045,19 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, return 0; } +static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) +{ + u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID1); + if (extra_flags) { + if (flags & BTRFS_BLOCK_GROUP_DATA) + fs_info->avail_data_alloc_bits |= extra_flags; + if (flags & BTRFS_BLOCK_GROUP_METADATA) + fs_info->avail_metadata_alloc_bits |= extra_flags; + if (flags & BTRFS_BLOCK_GROUP_SYSTEM) + fs_info->avail_system_alloc_bits |= extra_flags; + } +} static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, @@ -1060,7 +1080,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, if (space_info->full) return 0; - thresh = div_factor(space_info->total_bytes, 7); + thresh = div_factor(space_info->total_bytes, 6); if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < thresh) return 0; @@ -1079,16 +1099,7 @@ printk("space info full %Lu\n", flags); start, num_bytes); BUG_ON(ret); - if (flags & BTRFS_BLOCK_GROUP_RAID0) { - if (flags & BTRFS_BLOCK_GROUP_DATA) { - extent_root->fs_info->extra_data_alloc_bits = - BTRFS_BLOCK_GROUP_RAID0; - } - if (flags & BTRFS_BLOCK_GROUP_METADATA) { - extent_root->fs_info->extra_alloc_bits = - BTRFS_BLOCK_GROUP_RAID0; - } - } + set_avail_alloc_bits(extent_root->fs_info, flags); return 0; } @@ -1529,6 +1540,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if (data & BTRFS_BLOCK_GROUP_METADATA) { last_ptr = &root->fs_info->last_alloc; + empty_cluster = 256 * 1024; } if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { @@ -1693,6 +1705,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 root_used; u64 search_start = 0; u64 new_hint; + u64 alloc_profile; u32 sizes[2]; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; @@ -1700,31 +1713,32 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_extent_ref *ref; struct btrfs_path *path; struct btrfs_key keys[2]; - int extra_chunk_alloc_bits = 0; if (data) { - data = BTRFS_BLOCK_GROUP_DATA | info->extra_data_alloc_bits; + alloc_profile = info->avail_data_alloc_bits & + info->data_alloc_profile; + data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; } else if (root == root->fs_info->chunk_root) { - data = BTRFS_BLOCK_GROUP_SYSTEM; + alloc_profile = info->avail_system_alloc_bits & + info->system_alloc_profile; + data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; } else { - data = BTRFS_BLOCK_GROUP_METADATA | info->extra_alloc_bits; + alloc_profile = info->avail_metadata_alloc_bits & + info->metadata_alloc_profile; + data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } - if (btrfs_super_num_devices(&info->super_copy) > 1 && - !(data & BTRFS_BLOCK_GROUP_SYSTEM)) - extra_chunk_alloc_bits = BTRFS_BLOCK_GROUP_RAID0; if (root->ref_cows) { if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2 * 1024 * 1024, BTRFS_BLOCK_GROUP_METADATA | - info->extra_alloc_bits | - extra_chunk_alloc_bits); + (info->metadata_alloc_profile & + info->avail_metadata_alloc_bits)); BUG_ON(ret); } ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes + 2 * 1024 * 1024, data | - extra_chunk_alloc_bits); + num_bytes + 2 * 1024 * 1024, data); BUG_ON(ret); } @@ -2046,12 +2060,12 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, if (!next || !btrfs_buffer_uptodate(next)) { free_extent_buffer(next); reada_walk_down(root, cur, path->slots[*level]); - next = read_tree_block(root, bytenr, blocksize); - /* we used to drop the lock above, keep the - * code to double check so that we won't forget - * when we drop the lock again in the future - */ + mutex_unlock(&root->fs_info->fs_mutex); + next = read_tree_block(root, bytenr, blocksize); + mutex_lock(&root->fs_info->fs_mutex); + + /* we've dropped the lock, double check */ ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); BUG_ON(ret); @@ -2739,16 +2753,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { bit = BLOCK_GROUP_METADATA; } - if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) { - if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { - info->extra_data_alloc_bits = - BTRFS_BLOCK_GROUP_RAID0; - } - if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { - info->extra_alloc_bits = - BTRFS_BLOCK_GROUP_RAID0; - } - } + set_avail_alloc_bits(info, cache->flags); ret = update_space_info(info, cache->flags, found_key.offset, btrfs_block_group_used(&cache->item), diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0fa7cf227f1a..a8ae68c6fbb8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -306,6 +306,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, u64 physical; u64 length = 0; u64 map_length; + int total_devs; struct bio_vec *bvec; int i; int ret; @@ -315,7 +316,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, } map_tree = &root->fs_info->mapping_tree; map_length = length; - ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); + ret = btrfs_map_block(map_tree, READ, 0, logical, &physical, + &map_length, &dev, &total_devs); if (map_length < length + size) { return 1; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 169be0f7285b..bc3c0b97588e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -31,6 +31,13 @@ struct stripe { u64 physical; }; +struct multi_bio { + atomic_t stripes; + bio_end_io_t *end_io; + void *private; + int error; +}; + struct map_lookup { u64 type; int io_align; @@ -632,12 +639,12 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (list_empty(dev_list)) return -ENOSPC; - if (type & BTRFS_BLOCK_GROUP_RAID0) + if (type & (BTRFS_BLOCK_GROUP_RAID0)) num_stripes = btrfs_super_num_devices(&info->super_copy); - if (type & BTRFS_BLOCK_GROUP_DATA) - stripe_len = 64 * 1024; - if (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) - stripe_len = 32 * 1024; + if (type & (BTRFS_BLOCK_GROUP_RAID1)) { + num_stripes = min_t(u64, 2, + btrfs_super_num_devices(&info->super_copy)); + } again: INIT_LIST_HEAD(&private_devs); cur = dev_list->next; @@ -682,7 +689,11 @@ again: stripes = &chunk->stripe; - *num_bytes = calc_size * num_stripes; + if (type & BTRFS_BLOCK_GROUP_RAID1) + *num_bytes = calc_size; + else + *num_bytes = calc_size * num_stripes; + index = 0; while(index < num_stripes) { BUG_ON(list_empty(&private_devs)); @@ -694,7 +705,7 @@ again: key.objectid, calc_size, &dev_offset); BUG_ON(ret); -printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); +printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.objectid, calc_size, device->devid, type); device->bytes_used += calc_size; ret = btrfs_update_device(trans, device); BUG_ON(ret); @@ -774,9 +785,9 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) } } -int btrfs_map_block(struct btrfs_mapping_tree *map_tree, - u64 logical, u64 *phys, u64 *length, - struct btrfs_device **dev) +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, + int dev_nr, u64 logical, u64 *phys, u64 *length, + struct btrfs_device **dev, int *total_devs) { struct extent_map *em; struct map_lookup *map; @@ -808,19 +819,39 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, /* stripe_offset is the offset of this block in its stripe*/ stripe_offset = offset - stripe_offset; - /* - * after this do_div call, stripe_nr is the number of stripes - * on this device we have to walk to find the data, and - * stripe_index is the number of our device in the stripe array - */ - stripe_index = do_div(stripe_nr, map->num_stripes); + if (map->type & BTRFS_BLOCK_GROUP_RAID1) { + stripe_index = dev_nr; + if (rw & (1 << BIO_RW)) + *total_devs = map->num_stripes; + else { + int i; + u64 least = (u64)-1; + struct btrfs_device *cur; + for (i = 0; i < map->num_stripes; i++) { + cur = map->stripes[i].dev; + spin_lock(&cur->io_lock); + if (cur->total_ios < least) { + least = cur->total_ios; + stripe_index = i; + } + spin_unlock(&cur->io_lock); + } + *total_devs = 1; + } + } else { + /* + * after this do_div call, stripe_nr is the number of stripes + * on this device we have to walk to find the data, and + * stripe_index is the number of our device in the stripe array + */ + stripe_index = do_div(stripe_nr, map->num_stripes); + } BUG_ON(stripe_index >= map->num_stripes); - *phys = map->stripes[stripe_index].physical + stripe_offset + stripe_nr * map->stripe_len; - if (map->type & BTRFS_BLOCK_GROUP_RAID0) { + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1)) { /* we limit the length of each bio to what fits in a stripe */ *length = min_t(u64, em->len - offset, map->stripe_len - stripe_offset); @@ -833,33 +864,98 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, return 0; } +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_multi_stripe(struct bio *bio, int err) +#else +static int end_bio_multi_stripe(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + struct multi_bio *multi = bio->bi_private; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + if (err) + multi->error = err; + + if (atomic_dec_and_test(&multi->stripes)) { + bio->bi_private = multi->private; + bio->bi_end_io = multi->end_io; + + if (!err && multi->error) + err = multi->error; + kfree(multi); + + bio_endio(bio, err); + } else { + bio_put(bio); + } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) { struct btrfs_mapping_tree *map_tree; struct btrfs_device *dev; + struct bio *first_bio = bio; u64 logical = bio->bi_sector << 9; u64 physical; u64 length = 0; u64 map_length; struct bio_vec *bvec; + struct multi_bio *multi = NULL; int i; int ret; + int dev_nr = 0; + int total_devs = 1; bio_for_each_segment(bvec, bio, i) { length += bvec->bv_len; } + map_tree = &root->fs_info->mapping_tree; map_length = length; - ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); - if (map_length < length) { - printk("mapping failed logical %Lu bio len %Lu physical %Lu " - "len %Lu\n", logical, length, physical, map_length); - BUG(); + while(dev_nr < total_devs) { + ret = btrfs_map_block(map_tree, rw, dev_nr, logical, + &physical, &map_length, &dev, + &total_devs); + if (map_length < length) { + printk("mapping failed logical %Lu bio len %Lu physical %Lu " + "len %Lu\n", logical, length, physical, map_length); + BUG(); + } + BUG_ON(map_length < length); + if (total_devs > 1) { + if (!multi) { + multi = kmalloc(sizeof(*multi), GFP_NOFS); + atomic_set(&multi->stripes, 1); + multi->end_io = bio->bi_end_io; + multi->private = first_bio->bi_private; + multi->error = 0; + } else { + atomic_inc(&multi->stripes); + } + if (dev_nr < total_devs - 1) { + bio = bio_clone(first_bio, GFP_NOFS); + BUG_ON(!bio); + } else { + bio = first_bio; + } + bio->bi_private = multi; + bio->bi_end_io = end_bio_multi_stripe; + } + bio->bi_sector = physical >> 9; + bio->bi_bdev = dev->bdev; + spin_lock(&dev->io_lock); + dev->total_ios++; + spin_unlock(&dev->io_lock); + submit_bio(rw, bio); + dev_nr++; } - BUG_ON(map_length < length); - bio->bi_sector = physical >> 9; - bio->bi_bdev = dev->bdev; - submit_bio(rw, bio); return 0; } @@ -982,6 +1078,8 @@ static int read_one_dev(struct btrfs_root *root, return -ENOMEM; list_add(&device->dev_list, &root->fs_info->fs_devices->devices); + device->total_ios = 0; + spin_lock_init(&device->io_lock); } fill_device_from_item(leaf, dev_item, device); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 12f297eb0559..35dec3efd78c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -18,12 +18,16 @@ #ifndef __BTRFS_VOLUMES_ #define __BTRFS_VOLUMES_ + struct btrfs_device { struct list_head dev_list; struct btrfs_root *dev_root; + spinlock_t io_lock; struct block_device *bdev; + u64 total_ios; + char *name; /* the internal btrfs device id */ @@ -68,9 +72,9 @@ struct btrfs_fs_devices { int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 owner, u64 num_bytes, u64 *start); -int btrfs_map_block(struct btrfs_mapping_tree *map_tree, +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int stripe_nr, u64 logical, u64 *phys, u64 *length, - struct btrfs_device **dev); + struct btrfs_device **dev, int *total_stripes); int btrfs_read_sys_array(struct btrfs_root *root); int btrfs_read_chunk_tree(struct btrfs_root *root); int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, @@ -80,9 +84,6 @@ void btrfs_mapping_init(struct btrfs_mapping_tree *tree); void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio); int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf); -int btrfs_map_block(struct btrfs_mapping_tree *map_tree, - u64 logical, u64 *phys, u64 *length, - struct btrfs_device **dev); int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int flags, void *holder); int btrfs_scan_one_device(const char *path, int flags, void *holder, From 611f0e00a27fe0e5a571194a12443ecdc99a43ef Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Apr 2008 16:29:03 -0400 Subject: [PATCH 0464/2985] Btrfs: Add support for duplicate blocks on a single spindle Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 8 +++++--- fs/btrfs/extent-tree.c | 5 +++-- fs/btrfs/volumes.c | 32 ++++++++++++++++++++++++++++---- 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0a207861472e..72deae63ec28 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -419,6 +419,7 @@ struct btrfs_csum_item { #define BTRFS_BLOCK_GROUP_METADATA (1 << 2) #define BTRFS_BLOCK_GROUP_RAID0 (1 << 3) #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) +#define BTRFS_BLOCK_GROUP_DUP (1 << 5) struct btrfs_block_group_item { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ff75ad586767..42522232fde4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -913,9 +913,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->generation = btrfs_super_generation(disk_super) + 1; if (btrfs_super_num_devices(disk_super) > 0) { - fs_info->data_alloc_profile = BTRFS_BLOCK_GROUP_RAID0; - fs_info->metadata_alloc_profile = BTRFS_BLOCK_GROUP_RAID1; - fs_info->system_alloc_profile = BTRFS_BLOCK_GROUP_RAID0; + fs_info->data_alloc_profile = BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID1; + fs_info->metadata_alloc_profile = BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_DUP; + fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; } mutex_unlock(&fs_info->fs_mutex); return tree_root; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4ab98d8b73fa..1885ec4280c8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -231,7 +231,7 @@ again: if (start + num > total_fs_bytes) goto new_group; if (!block_group_bits(cache, data)) { - printk("block group bits don't match %Lu %Lu\n", cache->flags, data); + printk("block group bits don't match %Lu %d\n", cache->flags, data); } *start_ret = start; return 0; @@ -1048,7 +1048,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) { u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_BLOCK_GROUP_RAID1); + BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_DUP); if (extra_flags) { if (flags & BTRFS_BLOCK_GROUP_DATA) fs_info->avail_data_alloc_bits |= extra_flags; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bc3c0b97588e..b9294e3c05f0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -627,6 +627,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct extent_map *em; u64 physical; u64 calc_size = 1024 * 1024 * 1024; + u64 min_free = calc_size; u64 avail; u64 max_avail = 0; int num_stripes = 1; @@ -641,6 +642,8 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (type & (BTRFS_BLOCK_GROUP_RAID0)) num_stripes = btrfs_super_num_devices(&info->super_copy); + if (type & (BTRFS_BLOCK_GROUP_DUP)) + num_stripes = 2; if (type & (BTRFS_BLOCK_GROUP_RAID1)) { num_stripes = min_t(u64, 2, btrfs_super_num_devices(&info->super_copy)); @@ -649,16 +652,23 @@ again: INIT_LIST_HEAD(&private_devs); cur = dev_list->next; index = 0; + + if (type & BTRFS_BLOCK_GROUP_DUP) + min_free = calc_size * 2; + /* build a private list of devices we will allocate from */ while(index < num_stripes) { device = list_entry(cur, struct btrfs_device, dev_list); + avail = device->total_bytes - device->bytes_used; cur = cur->next; if (avail > max_avail) max_avail = avail; - if (avail >= calc_size) { + if (avail >= min_free) { list_move_tail(&device->dev_list, &private_devs); index++; + if (type & BTRFS_BLOCK_GROUP_DUP) + index++; } if (cur == dev_list) break; @@ -689,17 +699,22 @@ again: stripes = &chunk->stripe; - if (type & BTRFS_BLOCK_GROUP_RAID1) + if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) *num_bytes = calc_size; else *num_bytes = calc_size * num_stripes; index = 0; +printk("new chunk type %Lu start %Lu size %Lu\n", type, key.objectid, *num_bytes); while(index < num_stripes) { BUG_ON(list_empty(&private_devs)); cur = private_devs.next; device = list_entry(cur, struct btrfs_device, dev_list); - list_move_tail(&device->dev_list, dev_list); + + /* loop over this device again if we're doing a dup group */ + if (!(type & BTRFS_BLOCK_GROUP_DUP) || + (index == num_stripes - 1)) + list_move_tail(&device->dev_list, dev_list); ret = btrfs_alloc_dev_extent(trans, device, key.objectid, @@ -839,6 +854,14 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, } *total_devs = 1; } + } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { + if (rw == WRITE) { + *total_devs = map->num_stripes; + stripe_index = dev_nr; + } else { + stripe_index = 0; + *total_devs = 1; + } } else { /* * after this do_div call, stripe_nr is the number of stripes @@ -851,7 +874,8 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, *phys = map->stripes[stripe_index].physical + stripe_offset + stripe_nr * map->stripe_len; - if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1)) { + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_DUP)) { /* we limit the length of each bio to what fits in a stripe */ *length = min_t(u64, em->len - offset, map->stripe_len - stripe_offset); From 6bc34676c0b5836655ec1c7998e2647cabb933ec Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 4 Apr 2008 15:40:00 -0400 Subject: [PATCH 0465/2985] Btrfs: Don't allow written blocks from this transaction to be reallocated When a block is freed, it can be immediately reused if it is from the current transaction. But, an extra check is required to make sure the block had not been written yet. If it were reused after being written, the transid in the block header might match the transid of the next time the block was allocated. The parent node records the transaction ID of the block it is pointing to, and this is used as part of validating the block on reads. So, there can only be one version of a block per transaction. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1885ec4280c8..0c0dde6d9614 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1288,7 +1288,9 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, root->fs_info->running_transaction->transid; u64 header_transid = btrfs_header_generation(buf); - if (header_transid == transid) { + if (header_transid == transid && + !btrfs_header_flag(buf, + BTRFS_HEADER_FLAG_WRITTEN)) { clean_tree_block(NULL, root, buf); free_extent_buffer(buf); return 1; From d18a2c447524751137a12cc8ccaf9d1e0b7fa1b3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 4 Apr 2008 15:40:00 -0400 Subject: [PATCH 0466/2985] Btrfs: Fix allocation profile init Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 13 +++++++------ fs/btrfs/disk-io.c | 11 ++++------- fs/btrfs/extent-tree.c | 10 ++-------- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 72deae63ec28..4b3b20459f6e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -506,12 +506,13 @@ struct btrfs_fs_info { u64 delalloc_bytes; u64 last_alloc; u64 last_data_alloc; - int avail_data_alloc_bits; - int avail_metadata_alloc_bits; - int avail_system_alloc_bits; - int data_alloc_profile; - int metadata_alloc_profile; - int system_alloc_profile; + + u64 avail_data_alloc_bits; + u64 avail_metadata_alloc_bits; + u64 avail_system_alloc_bits; + u64 data_alloc_profile; + u64 metadata_alloc_profile; + u64 system_alloc_profile; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 42522232fde4..bc5bd5abb28a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -912,13 +912,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; - if (btrfs_super_num_devices(disk_super) > 0) { - fs_info->data_alloc_profile = BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_BLOCK_GROUP_RAID1; - fs_info->metadata_alloc_profile = BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_DUP; - fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; - } + fs_info->data_alloc_profile = (u64)-1; + fs_info->metadata_alloc_profile = (u64)-1; + fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; + mutex_unlock(&fs_info->fs_mutex); return tree_root; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0c0dde6d9614..0cb742884cdf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1100,7 +1100,6 @@ printk("space info full %Lu\n", flags); start, num_bytes); BUG_ON(ret); - set_avail_alloc_bits(extent_root->fs_info, flags); return 0; } @@ -2813,13 +2812,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, &cache->space_info); BUG_ON(ret); - if (type & BTRFS_BLOCK_GROUP_DATA) { - bit = BLOCK_GROUP_DATA; - } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { - bit = BLOCK_GROUP_SYSTEM; - } else if (type & BTRFS_BLOCK_GROUP_METADATA) { - bit = BLOCK_GROUP_METADATA; - } + bit = block_group_state_bits(type); set_extent_bits(block_group_cache, chunk_objectid, chunk_objectid + size - 1, bit | EXTENT_LOCKED, GFP_NOFS); @@ -2833,5 +2826,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, finish_current_insert(trans, extent_root); ret = del_pending_extents(trans, extent_root); BUG_ON(ret); + set_avail_alloc_bits(extent_root->fs_info, type); return 0; } From cea9e4452ebaf18dd0951e90dc84d82a5dee40b2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Apr 2008 16:28:12 -0400 Subject: [PATCH 0467/2985] Change btrfs_map_block to return a structure with mappings for all stripes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 4 +- fs/btrfs/inode.c | 8 +-- fs/btrfs/volumes.c | 133 +++++++++++++++++++++++++-------------------- fs/btrfs/volumes.h | 25 ++++++++- 4 files changed, 102 insertions(+), 68 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3c7891cf0d72..c92c6b0ee582 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2025,8 +2025,10 @@ again: root->root_key.objectid, root_gen, disk_key.objectid, 0, l->start, 0); - if (IS_ERR(right)) + if (IS_ERR(right)) { + BUG_ON(1); return PTR_ERR(right); + } memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); btrfs_set_header_bytenr(right, right->start); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a8ae68c6fbb8..7ae677d8a6de 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -301,12 +301,9 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, { struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; struct btrfs_mapping_tree *map_tree; - struct btrfs_device *dev; u64 logical = bio->bi_sector << 9; - u64 physical; u64 length = 0; u64 map_length; - int total_devs; struct bio_vec *bvec; int i; int ret; @@ -316,8 +313,9 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, } map_tree = &root->fs_info->mapping_tree; map_length = length; - ret = btrfs_map_block(map_tree, READ, 0, logical, &physical, - &map_length, &dev, &total_devs); + ret = btrfs_map_block(map_tree, READ, logical, + &map_length, NULL); + if (map_length < length + size) { return 1; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b9294e3c05f0..008d3640e8c2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -26,18 +26,6 @@ #include "print-tree.h" #include "volumes.h" -struct stripe { - struct btrfs_device *dev; - u64 physical; -}; - -struct multi_bio { - atomic_t stripes; - bio_end_io_t *end_io; - void *private; - int error; -}; - struct map_lookup { u64 type; int io_align; @@ -45,11 +33,11 @@ struct map_lookup { int stripe_len; int sector_size; int num_stripes; - struct stripe stripes[]; + struct btrfs_bio_stripe stripes[]; }; #define map_lookup_size(n) (sizeof(struct map_lookup) + \ - (sizeof(struct stripe) * (n))) + (sizeof(struct btrfs_bio_stripe) * (n))) static DEFINE_MUTEX(uuid_mutex); static LIST_HEAD(fs_uuids); @@ -801,8 +789,8 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) } int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, - int dev_nr, u64 logical, u64 *phys, u64 *length, - struct btrfs_device **dev, int *total_devs) + u64 logical, u64 *length, + struct btrfs_multi_bio **multi_ret) { struct extent_map *em; struct map_lookup *map; @@ -810,8 +798,21 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 offset; u64 stripe_offset; u64 stripe_nr; + int stripes_allocated = 8; int stripe_index; + int i; + struct btrfs_multi_bio *multi = NULL; + if (multi_ret && !(rw & (1 << BIO_RW))) { + stripes_allocated = 1; + } +again: + if (multi_ret) { + multi = kzalloc(btrfs_multi_bio_size(stripes_allocated), + GFP_NOFS); + if (!multi) + return -ENOMEM; + } spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, logical, *length); @@ -821,6 +822,17 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, map = (struct map_lookup *)em->bdev; offset = logical - em->start; + /* if our multi bio struct is too small, back off and try again */ + if (multi_ret && (rw & (1 << BIO_RW)) && + stripes_allocated < map->num_stripes && + ((map->type & BTRFS_BLOCK_GROUP_RAID1) || + (map->type & BTRFS_BLOCK_GROUP_DUP))) { + stripes_allocated = map->num_stripes; + spin_unlock(&em_tree->lock); + free_extent_map(em); + kfree(multi); + goto again; + } stripe_nr = offset; /* * stripe_nr counts the total number of stripes we have to stride @@ -834,10 +846,22 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, /* stripe_offset is the offset of this block in its stripe*/ stripe_offset = offset - stripe_offset; + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_DUP)) { + /* we limit the length of each bio to what fits in a stripe */ + *length = min_t(u64, em->len - offset, + map->stripe_len - stripe_offset); + } else { + *length = em->len - offset; + } + if (!multi_ret) + goto out; + + multi->num_stripes = 1; + stripe_index = 0; if (map->type & BTRFS_BLOCK_GROUP_RAID1) { - stripe_index = dev_nr; if (rw & (1 << BIO_RW)) - *total_devs = map->num_stripes; + multi->num_stripes = map->num_stripes; else { int i; u64 least = (u64)-1; @@ -852,16 +876,10 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, } spin_unlock(&cur->io_lock); } - *total_devs = 1; } } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { - if (rw == WRITE) { - *total_devs = map->num_stripes; - stripe_index = dev_nr; - } else { - stripe_index = 0; - *total_devs = 1; - } + if (rw & (1 << BIO_RW)) + multi->num_stripes = map->num_stripes; } else { /* * after this do_div call, stripe_nr is the number of stripes @@ -871,18 +889,17 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, stripe_index = do_div(stripe_nr, map->num_stripes); } BUG_ON(stripe_index >= map->num_stripes); - *phys = map->stripes[stripe_index].physical + stripe_offset + - stripe_nr * map->stripe_len; + BUG_ON(stripe_index != 0 && multi->num_stripes > 1); - if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_DUP)) { - /* we limit the length of each bio to what fits in a stripe */ - *length = min_t(u64, em->len - offset, - map->stripe_len - stripe_offset); - } else { - *length = em->len - offset; + for (i = 0; i < multi->num_stripes; i++) { + multi->stripes[i].physical = + map->stripes[stripe_index].physical + stripe_offset + + stripe_nr * map->stripe_len; + multi->stripes[i].dev = map->stripes[stripe_index].dev; + stripe_index++; } - *dev = map->stripes[stripe_index].dev; + *multi_ret = multi; +out: free_extent_map(em); spin_unlock(&em_tree->lock); return 0; @@ -895,7 +912,7 @@ static int end_bio_multi_stripe(struct bio *bio, unsigned int bytes_done, int err) #endif { - struct multi_bio *multi = bio->bi_private; + struct btrfs_multi_bio *multi = bio->bi_private; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) if (bio->bi_size) @@ -904,7 +921,7 @@ static int end_bio_multi_stripe(struct bio *bio, if (err) multi->error = err; - if (atomic_dec_and_test(&multi->stripes)) { + if (atomic_dec_and_test(&multi->stripes_pending)) { bio->bi_private = multi->private; bio->bi_end_io = multi->end_io; @@ -927,11 +944,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) struct btrfs_device *dev; struct bio *first_bio = bio; u64 logical = bio->bi_sector << 9; - u64 physical; u64 length = 0; u64 map_length; struct bio_vec *bvec; - struct multi_bio *multi = NULL; + struct btrfs_multi_bio *multi = NULL; int i; int ret; int dev_nr = 0; @@ -943,26 +959,22 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) map_tree = &root->fs_info->mapping_tree; map_length = length; + + ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi); + BUG_ON(ret); + + total_devs = multi->num_stripes; + if (map_length < length) { + printk("mapping failed logical %Lu bio len %Lu " + "len %Lu\n", logical, length, map_length); + BUG(); + } + multi->end_io = first_bio->bi_end_io; + multi->private = first_bio->bi_private; + atomic_set(&multi->stripes_pending, multi->num_stripes); + while(dev_nr < total_devs) { - ret = btrfs_map_block(map_tree, rw, dev_nr, logical, - &physical, &map_length, &dev, - &total_devs); - if (map_length < length) { - printk("mapping failed logical %Lu bio len %Lu physical %Lu " - "len %Lu\n", logical, length, physical, map_length); - BUG(); - } - BUG_ON(map_length < length); if (total_devs > 1) { - if (!multi) { - multi = kmalloc(sizeof(*multi), GFP_NOFS); - atomic_set(&multi->stripes, 1); - multi->end_io = bio->bi_end_io; - multi->private = first_bio->bi_private; - multi->error = 0; - } else { - atomic_inc(&multi->stripes); - } if (dev_nr < total_devs - 1) { bio = bio_clone(first_bio, GFP_NOFS); BUG_ON(!bio); @@ -972,7 +984,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) bio->bi_private = multi; bio->bi_end_io = end_bio_multi_stripe; } - bio->bi_sector = physical >> 9; + bio->bi_sector = multi->stripes[dev_nr].physical >> 9; + dev = multi->stripes[dev_nr].dev; bio->bi_bdev = dev->bdev; spin_lock(&dev->io_lock); dev->total_ios++; @@ -980,6 +993,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) submit_bio(rw, bio); dev_nr++; } + if (total_devs == 1) + kfree(multi); return 0; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 35dec3efd78c..10ca0104750c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -19,6 +19,8 @@ #ifndef __BTRFS_VOLUMES_ #define __BTRFS_VOLUMES_ +#include + struct btrfs_device { struct list_head dev_list; struct btrfs_root *dev_root; @@ -69,12 +71,29 @@ struct btrfs_fs_devices { struct list_head list; }; +struct btrfs_bio_stripe { + struct btrfs_device *dev; + u64 physical; +}; + +struct btrfs_multi_bio { + atomic_t stripes_pending; + bio_end_io_t *end_io; + void *private; + int error; + int num_stripes; + struct btrfs_bio_stripe stripes[]; +}; + +#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ + (sizeof(struct btrfs_bio_stripe) * (n))) + int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 owner, u64 num_bytes, u64 *start); -int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int stripe_nr, - u64 logical, u64 *phys, u64 *length, - struct btrfs_device **dev, int *total_stripes); +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, + u64 logical, u64 *length, + struct btrfs_multi_bio **multi_ret); int btrfs_read_sys_array(struct btrfs_root *root); int btrfs_read_chunk_tree(struct btrfs_root *root); int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, From 728131d8e40c2a47c59ca91a806299c4708029f9 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Apr 2008 16:28:12 -0400 Subject: [PATCH 0468/2985] Btrfs: Add additional debugging for metadata checksum failures Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 ++- fs/btrfs/extent_io.c | 54 +++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/extent_io.h | 2 ++ 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bc5bd5abb28a..e444b99e02da 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -283,6 +283,7 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) map = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_mapping(map, tree, page, gfp_flags); if (ret == 1) { + invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); @@ -376,7 +377,6 @@ int btrfs_verify_block_csum(struct btrfs_root *root, buf->flags |= EXTENT_CSUM; return 0; } - lock_extent(io_tree, buf->start, end, GFP_NOFS); if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { @@ -384,6 +384,7 @@ int btrfs_verify_block_csum(struct btrfs_root *root, ret = 0; goto out_unlock; } +WARN_ON(buf->flags & EXTENT_CSUM); ret = csum_tree_block(root, buf, 1); set_extent_bits(io_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 776c33f1e6fe..13cc2360e375 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2592,6 +2592,33 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, return p; } +int invalidate_extent_lru(struct extent_io_tree *tree, u64 start, + unsigned long len) +{ + struct list_head *lru = &tree->buffer_lru; + struct list_head *cur = lru->next; + struct extent_buffer *eb; + int found = 0; + + spin_lock(&tree->lru_lock); + if (list_empty(lru)) + goto out; + + do { + eb = list_entry(cur, struct extent_buffer, lru); + if (eb->start <= start && eb->start + eb->len > start) { + eb->flags &= ~EXTENT_UPTODATE; + } + if (eb->start == start) { + eb->flags &= ~EXTENT_CSUM; + } + cur = cur->next; + } while (cur != lru); +out: + spin_unlock(&tree->lru_lock); + return found; +} + static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, u64 start, unsigned long len, @@ -2909,10 +2936,32 @@ EXPORT_SYMBOL(set_extent_buffer_uptodate); int extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb) { + int ret = 0; + int ret2; + int num_pages; + int i; + struct page *page; + int pg_uptodate = 1; + if (eb->flags & EXTENT_UPTODATE) - return 1; - return test_range_bit(tree, eb->start, eb->start + eb->len - 1, + ret = 1; + + ret2 = test_range_bit(tree, eb->start, eb->start + eb->len - 1, EXTENT_UPTODATE, 1); + + num_pages = num_extent_pages(eb->start, eb->len); + for (i = 0; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + if (!PageUptodate(page)) { + pg_uptodate = 0; + break; + } + } + if ((ret || ret2) && !pg_uptodate) { +printk("uptodate error2 eb %Lu ret %d ret2 %d pg_uptodate %d\n", eb->start, ret, ret2, pg_uptodate); + WARN_ON(1); + } + return (ret || ret2); } EXPORT_SYMBOL(extent_buffer_uptodate); @@ -2929,7 +2978,6 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, unsigned long num_pages; struct bio *bio = NULL; - if (eb->flags & EXTENT_UPTODATE) return 0; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 16d67a61a25c..d9f5bc4dbd7f 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -216,4 +216,6 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long *map_start, unsigned long *map_len, int km); void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); +int invalidate_extent_lru(struct extent_io_tree *tree, u64 start, + unsigned long len); #endif From ce9adaa5a792c2099a83246265eb4055bc38b6b8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Apr 2008 16:28:12 -0400 Subject: [PATCH 0469/2985] Btrfs: Do metadata checksums for reads via a workqueue Before, metadata checksumming was done by the callers of read_tree_block, which would set EXTENT_CSUM bits in the extent tree to show that a given range of pages was already checksummed and didn't need to be verified again. But, those bits could go away via try_to_releasepage, and the end result was bogus checksum failures on pages that never left the cache. The new code validates checksums when the page is read. It is a little tricky because metadata blocks can span pages and a single read may end up going via multiple bios. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 5 - fs/btrfs/ctree.h | 4 + fs/btrfs/disk-io.c | 258 +++++++++++++++++++++++++++++++++++------ fs/btrfs/extent-tree.c | 4 - fs/btrfs/extent_io.c | 100 ++++++++++++---- fs/btrfs/extent_io.h | 4 +- fs/btrfs/transaction.c | 2 +- 7 files changed, 310 insertions(+), 67 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c92c6b0ee582..efce173a935c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -158,9 +158,6 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, } else { root_gen = 0; } - if (!(buf->flags & EXTENT_CSUM)) - WARN_ON(1); - WARN_ON(root->ref_cows && trans->transid != root->fs_info->running_transaction->transid); WARN_ON(root->ref_cows && trans->transid != root->last_trans); @@ -247,8 +244,6 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, root->fs_info->generation); WARN_ON(1); } - if (!(buf->flags & EXTENT_CSUM)) - WARN_ON(1); header_trans = btrfs_header_generation(buf); spin_lock(&root->fs_info->hash_lock); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4b3b20459f6e..e803c4daad21 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -485,6 +485,10 @@ struct btrfs_fs_info { struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; + struct list_head end_io_work_list; + struct work_struct end_io_work; + spinlock_t end_io_work_lock; + #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) struct work_struct trans_work; #else diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e444b99e02da..82109204788c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -24,6 +24,7 @@ #include #include #include // for block_sync_page +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -45,6 +46,16 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) #endif static struct extent_io_ops btree_extent_io_ops; +static struct workqueue_struct *end_io_workqueue; + +struct end_io_wq { + struct bio *bio; + bio_end_io_t *end_io; + void *private; + struct btrfs_fs_info *info; + int error; + struct list_head list; +}; struct extent_map *btree_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 len, @@ -219,11 +230,108 @@ static int btree_writepage_io_hook(struct page *page, u64 start, u64 end) return 0; } +int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, + struct extent_state *state) +{ + struct extent_io_tree *tree; + u64 found_start; + int found_level; + unsigned long len; + struct extent_buffer *eb; + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; + int ret; + + tree = &BTRFS_I(page->mapping->host)->io_tree; + if (page->private == EXTENT_PAGE_PRIVATE) + goto out; + if (!page->private) + goto out; + len = page->private >> 2; + if (len == 0) { + WARN_ON(1); + } + eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); + read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1, + btree_get_extent); + btrfs_clear_buffer_defrag(eb); + found_start = btrfs_header_bytenr(eb); + if (found_start != start) { + printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", + start, found_start, len); + WARN_ON(1); + goto err; + } + if (eb->first_page != page) { + printk("bad first page %lu %lu\n", eb->first_page->index, + page->index); + WARN_ON(1); + goto err; + } + found_level = btrfs_header_level(eb); + + ret = csum_tree_block(root, eb, 1); + + end = min_t(u64, eb->len, PAGE_CACHE_SIZE); + end = eb->start + end - 1; + release_extent_buffer_tail_pages(eb); +err: + free_extent_buffer(eb); +out: + return 0; +} + +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_workqueue_bio(struct bio *bio, int err) +#else +static int end_workqueue_bio(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + struct end_io_wq *end_io_wq = bio->bi_private; + struct btrfs_fs_info *fs_info; + unsigned long flags; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + fs_info = end_io_wq->info; + spin_lock_irqsave(&fs_info->end_io_work_lock, flags); + end_io_wq->error = err; + list_add_tail(&end_io_wq->list, &fs_info->end_io_work_list); + spin_unlock_irqrestore(&fs_info->end_io_work_lock, flags); + queue_work(end_io_workqueue, &fs_info->end_io_work); + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) { struct btrfs_root *root = BTRFS_I(inode)->root; + struct end_io_wq *end_io_wq; u64 offset; offset = bio->bi_sector << 9; + + if (rw & (1 << BIO_RW)) { + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio); + } + + end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS); + if (!end_io_wq) + return -ENOMEM; + + end_io_wq->private = bio->bi_private; + end_io_wq->end_io = bio->bi_end_io; + end_io_wq->info = root->fs_info; + end_io_wq->error = 0; + end_io_wq->bio = bio; + + bio->bi_private = end_io_wq; + bio->bi_end_io = end_workqueue_bio; + if (offset == BTRFS_SUPER_INFO_OFFSET) { bio->bi_bdev = root->fs_info->sb->s_bdev; submit_bio(rw, bio); @@ -363,36 +471,7 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) int btrfs_verify_block_csum(struct btrfs_root *root, struct extent_buffer *buf) { - struct extent_io_tree *io_tree; - u64 end; - int ret; - - io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; - if (buf->flags & EXTENT_CSUM) - return 0; - - end = min_t(u64, buf->len, PAGE_CACHE_SIZE); - end = buf->start + end - 1; - if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { - buf->flags |= EXTENT_CSUM; - return 0; - } - lock_extent(io_tree, buf->start, end, GFP_NOFS); - - if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { - buf->flags |= EXTENT_CSUM; - ret = 0; - goto out_unlock; - } -WARN_ON(buf->flags & EXTENT_CSUM); - - ret = csum_tree_block(root, buf, 1); - set_extent_bits(io_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); - buf->flags |= EXTENT_CSUM; - -out_unlock: - unlock_extent(io_tree, buf->start, end, GFP_NOFS); - return ret; + return btrfs_buffer_uptodate(buf); } struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, @@ -430,11 +509,15 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1, - btree_get_extent); - ret = btrfs_verify_block_csum(root, buf); + ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, + 1, btree_get_extent); + + if (ret == 0) { + buf->flags |= EXTENT_UPTODATE; + } return buf; + } int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -724,6 +807,99 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) return 0; } +static int bio_ready_for_csum(struct bio *bio) +{ + u64 length = 0; + u64 buf_len = 0; + u64 start = 0; + struct page *page; + struct extent_io_tree *io_tree = NULL; + struct btrfs_fs_info *info = NULL; + struct bio_vec *bvec; + int i; + int ret; + + bio_for_each_segment(bvec, bio, i) { + page = bvec->bv_page; + if (page->private == EXTENT_PAGE_PRIVATE) { + length += bvec->bv_len; + continue; + } + if (!page->private) { + length += bvec->bv_len; + continue; + } + length = bvec->bv_len; + buf_len = page->private >> 2; + start = page_offset(page) + bvec->bv_offset; + io_tree = &BTRFS_I(page->mapping->host)->io_tree; + info = BTRFS_I(page->mapping->host)->root->fs_info; + } + /* are we fully contained in this bio? */ + if (buf_len <= length) + return 1; + + ret = extent_range_uptodate(io_tree, start + length, + start + buf_len - 1); + if (ret == 1) + return ret; + return ret; +} + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) +void btrfs_end_io_csum(void *p) +#else +void btrfs_end_io_csum(struct work_struct *work) +#endif +{ +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + struct btrfs_fs_info *fs_info = p; +#else + struct btrfs_fs_info *fs_info = container_of(work, + struct btrfs_fs_info, + end_io_work); +#endif + unsigned long flags; + struct end_io_wq *end_io_wq; + struct bio *bio; + struct list_head *next; + int error; + int was_empty; + + while(1) { + spin_lock_irqsave(&fs_info->end_io_work_lock, flags); + if (list_empty(&fs_info->end_io_work_list)) { + spin_unlock_irqrestore(&fs_info->end_io_work_lock, + flags); + return; + } + next = fs_info->end_io_work_list.next; + list_del(next); + spin_unlock_irqrestore(&fs_info->end_io_work_lock, flags); + + end_io_wq = list_entry(next, struct end_io_wq, list); + + bio = end_io_wq->bio; + if (!bio_ready_for_csum(bio)) { + spin_lock_irqsave(&fs_info->end_io_work_lock, flags); + was_empty = list_empty(&fs_info->end_io_work_list); + list_add_tail(&end_io_wq->list, + &fs_info->end_io_work_list); + spin_unlock_irqrestore(&fs_info->end_io_work_lock, + flags); + if (was_empty) + return; + continue; + } + error = end_io_wq->error; + bio->bi_private = end_io_wq->private; + bio->bi_end_io = end_io_wq->end_io; + kfree(end_io_wq); + bio_endio(bio, error); + } +} + + struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices) { @@ -750,11 +926,16 @@ struct btrfs_root *open_ctree(struct super_block *sb, err = -ENOMEM; goto fail; } + end_io_workqueue = create_workqueue("btrfs-end-io"); + BUG_ON(!end_io_workqueue); + INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); + INIT_LIST_HEAD(&fs_info->end_io_work_list); spin_lock_init(&fs_info->hash_lock); + spin_lock_init(&fs_info->end_io_work_lock); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); @@ -799,6 +980,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; + INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); #else @@ -1044,6 +1226,8 @@ int close_ctree(struct btrfs_root *root) extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + flush_workqueue(end_io_workqueue); + destroy_workqueue(end_io_workqueue); iput(fs_info->btree_inode); #if 0 @@ -1171,12 +1355,18 @@ int btrfs_read_buffer(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, + int ret; + ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1, btree_get_extent); + if (ret == 0) { + buf->flags |= EXTENT_UPTODATE; + } + return ret; } static struct extent_io_ops btree_extent_io_ops = { .writepage_io_hook = btree_writepage_io_hook, + .readpage_end_io_hook = btree_readpage_end_io_hook, .submit_bio_hook = btree_submit_bio_hook, /* note we're sharing with inode.c for the merge bio hook */ .merge_bio_hook = btrfs_merge_bio_hook, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0cb742884cdf..283b08a32a43 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1898,10 +1898,6 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); - set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_CSUM, GFP_NOFS); - buf->flags |= EXTENT_CSUM; if (!btrfs_test_opt(root, SSD)) btrfs_set_buffer_defrag(buf); trans->blocks_used++; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 13cc2360e375..cfc383c17a3a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2592,6 +2592,22 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, return p; } +int release_extent_buffer_tail_pages(struct extent_buffer *eb) +{ + unsigned long num_pages = num_extent_pages(eb->start, eb->len); + struct page *page; + unsigned long i; + + if (num_pages == 1) + return 0; + for (i = 1; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + page_cache_release(page); + } + return 0; +} + + int invalidate_extent_lru(struct extent_io_tree *tree, u64 start, unsigned long len) { @@ -2609,9 +2625,6 @@ int invalidate_extent_lru(struct extent_io_tree *tree, u64 start, if (eb->start <= start && eb->start + eb->len > start) { eb->flags &= ~EXTENT_UPTODATE; } - if (eb->start == start) { - eb->flags &= ~EXTENT_CSUM; - } cur = cur->next; } while (cur != lru); out: @@ -2682,7 +2695,6 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, page_cache_get(page0); mark_page_accessed(page0); set_page_extent_mapped(page0); - WARN_ON(!PageUptodate(page0)); set_page_extent_head(page0, len); } else { i = 0; @@ -2933,13 +2945,39 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, } EXPORT_SYMBOL(set_extent_buffer_uptodate); +int extent_range_uptodate(struct extent_io_tree *tree, + u64 start, u64 end) +{ + struct page *page; + int ret; + int pg_uptodate = 1; + int uptodate; + unsigned long index; + + ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); + if (ret) + return 1; + while(start <= end) { + index = start >> PAGE_CACHE_SHIFT; + page = find_get_page(tree->mapping, index); + uptodate = PageUptodate(page); + page_cache_release(page); + if (!uptodate) { + pg_uptodate = 0; + break; + } + start += PAGE_CACHE_SIZE; + } + return pg_uptodate; +} + int extent_buffer_uptodate(struct extent_io_tree *tree, - struct extent_buffer *eb) + struct extent_buffer *eb) { int ret = 0; int ret2; - int num_pages; - int i; + unsigned long num_pages; + unsigned long i; struct page *page; int pg_uptodate = 1; @@ -2975,13 +3013,16 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, struct page *page; int err; int ret = 0; + int locked_pages = 0; + int all_uptodate = 1; + int inc_all_pages = 0; unsigned long num_pages; struct bio *bio = NULL; if (eb->flags & EXTENT_UPTODATE) return 0; - if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1, + if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, EXTENT_UPTODATE, 1)) { return 0; } @@ -2997,17 +3038,30 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); - if (PageUptodate(page)) { - continue; - } if (!wait) { - if (TestSetPageLocked(page)) { - continue; - } + if (TestSetPageLocked(page)) + goto unlock_exit; } else { lock_page(page); } + locked_pages++; if (!PageUptodate(page)) { + all_uptodate = 0; + } + } + if (all_uptodate) { + if (start_i == 0) + eb->flags |= EXTENT_UPTODATE; + goto unlock_exit; + } + + for (i = start_i; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + if (inc_all_pages) + page_cache_get(page); + if (!PageUptodate(page)) { + if (start_i == 0) + inc_all_pages = 1; err = __extent_read_full_page(tree, page, get_extent, &bio); if (err) { @@ -3034,6 +3088,16 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (!ret) eb->flags |= EXTENT_UPTODATE; return ret; + +unlock_exit: + i = start_i; + while(locked_pages > 0) { + page = extent_buffer_page(eb, i); + i++; + unlock_page(page); + locked_pages--; + } + return ret; } EXPORT_SYMBOL(read_extent_buffer_pages); @@ -3048,7 +3112,6 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, char *dst = (char *)dstv; size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - unsigned long num_pages = num_extent_pages(eb->start, eb->len); WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); @@ -3057,11 +3120,6 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, while(len > 0) { page = extent_buffer_page(eb, i); - if (!PageUptodate(page)) { - printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len); - WARN_ON(1); - } - WARN_ON(!PageUptodate(page)); cur = min(len, (PAGE_CACHE_SIZE - offset)); kaddr = kmap_atomic(page, KM_USER1); @@ -3105,7 +3163,6 @@ printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, } p = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(p)); kaddr = kmap_atomic(p, km); *token = kaddr; *map = kaddr + offset; @@ -3165,7 +3222,6 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, while(len > 0) { page = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(page)); cur = min(len, (PAGE_CACHE_SIZE - offset)); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index d9f5bc4dbd7f..9d2991d1d3ce 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -13,7 +13,6 @@ #define EXTENT_DEFRAG (1 << 6) #define EXTENT_DEFRAG_DONE (1 << 7) #define EXTENT_BUFFER_FILLED (1 << 8) -#define EXTENT_CSUM (1 << 9) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) /* @@ -218,4 +217,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); int invalidate_extent_lru(struct extent_io_tree *tree, u64 start, unsigned long len); +int release_extent_buffer_tail_pages(struct extent_buffer *eb); +int extent_range_uptodate(struct extent_io_tree *tree, + u64 start, u64 end); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5e9f69244f9f..1ed179c020af 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -843,7 +843,7 @@ void btrfs_transaction_flush_work(struct btrfs_root *root) void __init btrfs_init_transaction_sys(void) { - trans_wq = create_workqueue("btrfs"); + trans_wq = create_workqueue("btrfs-transaction"); } void btrfs_exit_transaction_sys(void) From 22c599485b1fdd95e4476a4752596a6cf6c6629a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Apr 2008 16:28:12 -0400 Subject: [PATCH 0470/2985] Btrfs: Handle data block end_io through the async work queue Before it was done by the bio end_io routine, the work queue code is able to scale much better with faster IO subsystems. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 34 +++++++++++++++++++++++----------- fs/btrfs/disk-io.h | 2 ++ fs/btrfs/inode.c | 4 +++- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 82109204788c..94ff87d0eae4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -54,6 +54,7 @@ struct end_io_wq { void *private; struct btrfs_fs_info *info; int error; + int metadata; struct list_head list; }; @@ -308,29 +309,40 @@ static int end_workqueue_bio(struct bio *bio, #endif } -static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) +int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, + int metadata) { - struct btrfs_root *root = BTRFS_I(inode)->root; struct end_io_wq *end_io_wq; - u64 offset; - offset = bio->bi_sector << 9; - - if (rw & (1 << BIO_RW)) { - return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio); - } - end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS); if (!end_io_wq) return -ENOMEM; end_io_wq->private = bio->bi_private; end_io_wq->end_io = bio->bi_end_io; - end_io_wq->info = root->fs_info; + end_io_wq->info = info; end_io_wq->error = 0; end_io_wq->bio = bio; + end_io_wq->metadata = metadata; bio->bi_private = end_io_wq; bio->bi_end_io = end_workqueue_bio; + return 0; +} + +static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 offset; + int ret; + + offset = bio->bi_sector << 9; + + if (rw & (1 << BIO_RW)) { + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio); + } + + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1); + BUG_ON(ret); if (offset == BTRFS_SUPER_INFO_OFFSET) { bio->bi_bdev = root->fs_info->sb->s_bdev; @@ -880,7 +892,7 @@ void btrfs_end_io_csum(struct work_struct *work) end_io_wq = list_entry(next, struct end_io_wq, list); bio = end_io_wq->bio; - if (!bio_ready_for_csum(bio)) { + if (end_io_wq->metadata && !bio_ready_for_csum(bio)) { spin_lock_irqsave(&fs_info->end_io_work_lock, flags); was_empty = list_empty(&fs_info->end_io_work_list); list_add_tail(&end_io_wq->list, diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 05b88d0e75eb..4fac0ccbf8f8 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -71,4 +71,6 @@ void btrfs_throttle(struct btrfs_root *root); int btrfs_open_device(struct btrfs_device *dev); int btrfs_verify_block_csum(struct btrfs_root *root, struct extent_buffer *buf); +int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, + int metadata); #endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7ae677d8a6de..e1ef1acdb350 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -328,7 +328,9 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) struct btrfs_trans_handle *trans; int ret = 0; - if (rw != WRITE) { + if (!(rw & (1 << BIO_RW))) { + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + BUG_ON(ret); goto mapit; } From f188591e987e21b6f7f8864c66a02858b95b530e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Apr 2008 16:28:12 -0400 Subject: [PATCH 0471/2985] Btrfs: Retry metadata reads in the face of checksum failures Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 11 ++++++- fs/btrfs/disk-io.c | 72 ++++++++++++++++++++++++++++++++------------ fs/btrfs/extent_io.c | 50 +++++++++++++++++------------- fs/btrfs/extent_io.h | 5 +-- fs/btrfs/inode.c | 7 +++-- fs/btrfs/volumes.c | 39 +++++++++++++++++++++--- fs/btrfs/volumes.h | 6 ++-- 7 files changed, 138 insertions(+), 52 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index efce173a935c..ff4e9c6859d1 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -569,7 +569,16 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, static int noinline check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { - return 0; + u64 found_start; + if (btrfs_header_level(path->nodes[level]) != level) + printk("warning: bad level %Lu wanted %d found %d\n", + path->nodes[level]->start, level, + btrfs_header_level(path->nodes[level])); + found_start = btrfs_header_bytenr(path->nodes[level]); + if (found_start != path->nodes[level]->start) { + printk("warning: bad bytentr %Lu found %Lu\n", + path->nodes[level]->start, found_start); + } #if 0 struct extent_buffer *buf = path->nodes[level]; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 94ff87d0eae4..59bdf0474be3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -156,7 +156,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, memcpy(&found, result, BTRFS_CRC32_SIZE); read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE); - WARN_ON(1); printk("btrfs: %s checksum verify failed on %llu " "wanted %X found %X from_this_trans %d " "level %d\n", @@ -171,6 +170,40 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, return 0; } +static int btree_read_extent_buffer_pages(struct btrfs_root *root, + struct extent_buffer *eb, + u64 start) +{ + struct extent_io_tree *io_tree; + int ret; + int num_copies = 0; + int mirror_num = 0; + + io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; + while (1) { + ret = read_extent_buffer_pages(io_tree, eb, start, 1, + btree_get_extent, mirror_num); + if (!ret) { + if (mirror_num) +printk("good read %Lu mirror %d total %d\n", eb->start, mirror_num, num_copies); + return ret; + } + num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, + eb->start, eb->len); +printk("failed to read %Lu mirror %d total %d\n", eb->start, mirror_num, num_copies); + if (num_copies == 1) { +printk("reading %Lu failed only one copy\n", eb->start); + return ret; + } + mirror_num++; + if (mirror_num > num_copies) { +printk("bailing at mirror %d of %d\n", mirror_num, num_copies); + return ret; + } + } +printk("read extent buffer page last\n"); + return -EIO; +} int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { @@ -180,6 +213,8 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) int found_level; unsigned long len; struct extent_buffer *eb; + int ret; + tree = &BTRFS_I(page->mapping->host)->io_tree; if (page->private == EXTENT_PAGE_PRIVATE) @@ -191,8 +226,8 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) WARN_ON(1); } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); - read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1, - btree_get_extent); + ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE); + BUG_ON(ret); btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { @@ -240,7 +275,7 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, unsigned long len; struct extent_buffer *eb; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; - int ret; + int ret = 0; tree = &BTRFS_I(page->mapping->host)->io_tree; if (page->private == EXTENT_PAGE_PRIVATE) @@ -252,25 +287,26 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, WARN_ON(1); } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); - read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1, - btree_get_extent); + btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { - printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", - start, found_start, len); - WARN_ON(1); +printk("bad start on %Lu found %Lu\n", eb->start, found_start); + ret = -EIO; goto err; } if (eb->first_page != page) { printk("bad first page %lu %lu\n", eb->first_page->index, page->index); WARN_ON(1); + ret = -EIO; goto err; } found_level = btrfs_header_level(eb); ret = csum_tree_block(root, eb, 1); + if (ret) + ret = -EIO; end = min_t(u64, eb->len, PAGE_CACHE_SIZE); end = eb->start + end - 1; @@ -278,7 +314,7 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, err: free_extent_buffer(eb); out: - return 0; + return ret; } #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) @@ -329,7 +365,8 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, return 0; } -static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) +static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, + int mirror_num) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 offset; @@ -338,7 +375,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) offset = bio->bi_sector << 9; if (rw & (1 << BIO_RW)) { - return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio); + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); } ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1); @@ -349,7 +386,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) submit_bio(rw, bio); return 0; } - return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio); + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); } static int btree_writepage(struct page *page, struct writeback_control *wbc) @@ -459,7 +496,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) if (!buf) return 0; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, 0, 0, btree_get_extent); + buf, 0, 0, btree_get_extent, 0); free_extent_buffer(buf); return ret; } @@ -522,8 +559,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, if (!buf) return NULL; - ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, - 1, btree_get_extent); + ret = btree_read_extent_buffer_pages(root, buf, 0); if (ret == 0) { buf->flags |= EXTENT_UPTODATE; @@ -1366,10 +1402,8 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) int btrfs_read_buffer(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; int ret; - ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, 0, 1, btree_get_extent); + ret = btree_read_extent_buffer_pages(root, buf, 0); if (ret == 0) { buf->flags |= EXTENT_UPTODATE; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index cfc383c17a3a..2f159375c878 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1546,7 +1546,7 @@ static int end_bio_extent_readpage(struct bio *bio, !(state->state & EXTENT_LOCKED)) state = NULL; } - if (!state) { + if (!state && uptodate) { spin_unlock_irqrestore(&tree->lock, flags); set_extent_uptodate(tree, start, end, GFP_ATOMIC); @@ -1567,8 +1567,10 @@ static int end_bio_extent_readpage(struct bio *bio, } else { state = NULL; } - set_state_cb(tree, clear, EXTENT_UPTODATE); - clear->state |= EXTENT_UPTODATE; + if (uptodate) { + set_state_cb(tree, clear, EXTENT_UPTODATE); + clear->state |= EXTENT_UPTODATE; + } clear_state_bit(tree, clear, EXTENT_LOCKED, 1, 0); if (cur == start) @@ -1685,7 +1687,7 @@ extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, return bio; } -static int submit_one_bio(int rw, struct bio *bio) +static int submit_one_bio(int rw, struct bio *bio, int mirror_num) { u64 maxsector; int ret = 0; @@ -1722,7 +1724,8 @@ static int submit_one_bio(int rw, struct bio *bio) WARN_ON(1); } if (tree->ops && tree->ops->submit_bio_hook) - tree->ops->submit_bio_hook(page->mapping->host, rw, bio); + tree->ops->submit_bio_hook(page->mapping->host, rw, bio, + mirror_num); else submit_bio(rw, bio); if (bio_flagged(bio, BIO_EOPNOTSUPP)) @@ -1737,7 +1740,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, struct block_device *bdev, struct bio **bio_ret, unsigned long max_pages, - bio_end_io_t end_io_func) + bio_end_io_t end_io_func, + int mirror_num) { int ret = 0; struct bio *bio; @@ -1749,7 +1753,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, (tree->ops && tree->ops->merge_bio_hook && tree->ops->merge_bio_hook(page, offset, size, bio)) || bio_add_page(bio, page, size, offset) < size) { - ret = submit_one_bio(rw, bio); + ret = submit_one_bio(rw, bio, mirror_num); bio = NULL; } else { return 0; @@ -1769,7 +1773,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, if (bio_ret) { *bio_ret = bio; } else { - ret = submit_one_bio(rw, bio); + ret = submit_one_bio(rw, bio, mirror_num); } return ret; @@ -1798,7 +1802,7 @@ void set_page_extent_head(struct page *page, unsigned long len) static int __extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, - struct bio **bio) + struct bio **bio, int mirror_num) { struct inode *inode = page->mapping->host; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; @@ -1901,7 +1905,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, ret = submit_extent_page(READ, tree, page, sector, iosize, page_offset, bdev, bio, nr, - end_bio_extent_readpage); + end_bio_extent_readpage, mirror_num); } if (ret) SetPageError(page); @@ -1923,9 +1927,9 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, struct bio *bio = NULL; int ret; - ret = __extent_read_full_page(tree, page, get_extent, &bio); + ret = __extent_read_full_page(tree, page, get_extent, &bio, 0); if (bio) - submit_one_bio(READ, bio); + submit_one_bio(READ, bio, 0); return ret; } EXPORT_SYMBOL(extent_read_full_page); @@ -2077,7 +2081,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, ret = submit_extent_page(WRITE, tree, page, sector, iosize, page_offset, bdev, &epd->bio, max_nr, - end_bio_extent_writepage); + end_bio_extent_writepage, 0); if (ret) SetPageError(page); } @@ -2244,7 +2248,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); if (epd.bio) { - submit_one_bio(WRITE, epd.bio); + submit_one_bio(WRITE, epd.bio, 0); } return ret; } @@ -2265,7 +2269,7 @@ int extent_writepages(struct extent_io_tree *tree, ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); if (epd.bio) { - submit_one_bio(WRITE, epd.bio); + submit_one_bio(WRITE, epd.bio, 0); } return ret; } @@ -2297,7 +2301,8 @@ int extent_readpages(struct extent_io_tree *tree, page_cache_get(page); if (!pagevec_add(&pvec, page)) __pagevec_lru_add(&pvec); - __extent_read_full_page(tree, page, get_extent, &bio); + __extent_read_full_page(tree, page, get_extent, + &bio, 0); } page_cache_release(page); } @@ -2305,7 +2310,7 @@ int extent_readpages(struct extent_io_tree *tree, __pagevec_lru_add(&pvec); BUG_ON(!list_empty(pages)); if (bio) - submit_one_bio(READ, bio); + submit_one_bio(READ, bio, 0); return 0; } EXPORT_SYMBOL(extent_readpages); @@ -2430,7 +2435,7 @@ int extent_prepare_write(struct extent_io_tree *tree, ret = submit_extent_page(READ, tree, page, sector, iosize, page_offset, em->bdev, NULL, 1, - end_bio_extent_preparewrite); + end_bio_extent_preparewrite, 0); iocount++; block_start = block_start + iosize; } else { @@ -2696,6 +2701,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, mark_page_accessed(page0); set_page_extent_mapped(page0); set_page_extent_head(page0, len); + uptodate = PageUptodate(page0); } else { i = 0; } @@ -3006,7 +3012,7 @@ EXPORT_SYMBOL(extent_buffer_uptodate); int read_extent_buffer_pages(struct extent_io_tree *tree, struct extent_buffer *eb, u64 start, int wait, - get_extent_t *get_extent) + get_extent_t *get_extent, int mirror_num) { unsigned long i; unsigned long start_i; @@ -3062,8 +3068,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (!PageUptodate(page)) { if (start_i == 0) inc_all_pages = 1; + ClearPageError(page); err = __extent_read_full_page(tree, page, - get_extent, &bio); + get_extent, &bio, + mirror_num); if (err) { ret = err; } @@ -3073,7 +3081,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, } if (bio) - submit_one_bio(READ, bio); + submit_one_bio(READ, bio, mirror_num); if (ret || !wait) { return ret; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 9d2991d1d3ce..8d6b8a14cc30 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -27,7 +27,8 @@ struct extent_state; struct extent_io_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); - int (*submit_bio_hook)(struct inode *inode, int rw, struct bio *bio); + int (*submit_bio_hook)(struct inode *inode, int rw, struct bio *bio, + int mirror_num); int (*merge_bio_hook)(struct page *page, unsigned long offset, size_t size, struct bio *bio); int (*readpage_io_hook)(struct page *page, u64 start, u64 end); @@ -172,7 +173,7 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, void free_extent_buffer(struct extent_buffer *eb); int read_extent_buffer_pages(struct extent_io_tree *tree, struct extent_buffer *eb, u64 start, int wait, - get_extent_t *get_extent); + get_extent_t *get_extent, int mirror_num); static inline void extent_buffer_get(struct extent_buffer *eb) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e1ef1acdb350..8c2d5d036bd6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -314,7 +314,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, map_tree = &root->fs_info->mapping_tree; map_length = length; ret = btrfs_map_block(map_tree, READ, logical, - &map_length, NULL); + &map_length, NULL, 0); if (map_length < length + size) { return 1; @@ -322,7 +322,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, return 0; } -int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) +int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, + int mirror_num) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; @@ -347,7 +348,7 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); mapit: - return btrfs_map_bio(root, rw, bio); + return btrfs_map_bio(root, rw, bio, mirror_num); } int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 008d3640e8c2..3b927f698320 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -788,9 +788,31 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) } } +int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) +{ + struct extent_map *em; + struct map_lookup *map; + struct extent_map_tree *em_tree = &map_tree->map_tree; + int ret; + + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, logical, len); + BUG_ON(!em); + + BUG_ON(em->start > logical || em->start + em->len < logical); + map = (struct map_lookup *)em->bdev; + if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1)) + ret = map->num_stripes; + else + ret = 1; + free_extent_map(em); + spin_unlock(&em_tree->lock); + return ret; +} + int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, - struct btrfs_multi_bio **multi_ret) + struct btrfs_multi_bio **multi_ret, int mirror_num) { struct extent_map *em; struct map_lookup *map; @@ -822,6 +844,9 @@ again: map = (struct map_lookup *)em->bdev; offset = logical - em->start; + if (mirror_num > map->num_stripes) + mirror_num = 0; + /* if our multi bio struct is too small, back off and try again */ if (multi_ret && (rw & (1 << BIO_RW)) && stripes_allocated < map->num_stripes && @@ -862,7 +887,9 @@ again: if (map->type & BTRFS_BLOCK_GROUP_RAID1) { if (rw & (1 << BIO_RW)) multi->num_stripes = map->num_stripes; - else { + else if (mirror_num) { + stripe_index = mirror_num - 1; + } else { int i; u64 least = (u64)-1; struct btrfs_device *cur; @@ -880,6 +907,8 @@ again: } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { if (rw & (1 << BIO_RW)) multi->num_stripes = map->num_stripes; + else if (mirror_num) + stripe_index = mirror_num - 1; } else { /* * after this do_div call, stripe_nr is the number of stripes @@ -938,7 +967,8 @@ static int end_bio_multi_stripe(struct bio *bio, #endif } -int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) +int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, + int mirror_num) { struct btrfs_mapping_tree *map_tree; struct btrfs_device *dev; @@ -960,7 +990,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) map_tree = &root->fs_info->mapping_tree; map_length = length; - ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi); + ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi, + mirror_num); BUG_ON(ret); total_devs = multi->num_stripes; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 10ca0104750c..3d5d0a9cb827 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -93,7 +93,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, u64 owner, u64 num_bytes, u64 *start); int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, - struct btrfs_multi_bio **multi_ret); + struct btrfs_multi_bio **multi_ret, int mirror_num); int btrfs_read_sys_array(struct btrfs_root *root); int btrfs_read_chunk_tree(struct btrfs_root *root); int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, @@ -101,7 +101,8 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 *num_bytes, u64 type); void btrfs_mapping_init(struct btrfs_mapping_tree *tree); void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); -int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio); +int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, + int mirror_num); int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf); int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int flags, void *holder); @@ -112,4 +113,5 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_device *device); int btrfs_cleanup_fs_uuids(void); +int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); #endif From 7e38326f5b125288a2baea1d815c500502ab9fc0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Apr 2008 16:28:12 -0400 Subject: [PATCH 0472/2985] Btrfs: Handle checksumming errors while reading data blocks Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/extent_io.c | 9 ++++ fs/btrfs/extent_io.h | 3 ++ fs/btrfs/inode.c | 114 ++++++++++++++++++++++++++++++++++++++++- 4 files changed, 126 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 9b9db9cbc019..fe6ef8e34166 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -29,6 +29,7 @@ struct btrfs_inode { struct btrfs_key location; struct extent_map_tree extent_tree; struct extent_io_tree io_tree; + struct extent_io_tree io_failure_tree; struct inode vfs_inode; u64 ordered_trans; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2f159375c878..866460c3d72c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1534,6 +1534,15 @@ static int end_bio_extent_readpage(struct bio *bio, if (ret) uptodate = 0; } + if (!uptodate && tree->ops && + tree->ops->readpage_io_failed_hook) { + ret = tree->ops->readpage_io_failed_hook(bio, page, + start, end, state); + if (ret == 0) { + state = NULL; + continue; + } + } spin_lock_irqsave(&tree->lock, flags); if (!state || state->end != end) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 8d6b8a14cc30..b47859ccd78a 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -32,6 +32,9 @@ struct extent_io_ops { int (*merge_bio_hook)(struct page *page, unsigned long offset, size_t size, struct bio *bio); int (*readpage_io_hook)(struct page *page, u64 start, u64 end); + int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, + u64 start, u64 end, + struct extent_state *state); int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8c2d5d036bd6..48f1d1b96450 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -385,6 +385,86 @@ out: return ret; } +struct io_failure_record { + struct page *page; + u64 start; + u64 len; + u64 logical; + int last_mirror; +}; + +int btrfs_readpage_io_failed_hook(struct bio *failed_bio, + struct page *page, u64 start, u64 end, + struct extent_state *state) +{ + struct io_failure_record *failrec = NULL; + u64 private; + struct extent_map *em; + struct inode *inode = page->mapping->host; + struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; + struct bio *bio; + int num_copies; + int ret; + u64 logical; + + ret = get_state_private(failure_tree, start, &private); + if (ret) { + size_t pg_offset = start - page_offset(page); + failrec = kmalloc(sizeof(*failrec), GFP_NOFS); + if (!failrec) + return -ENOMEM; + failrec->start = start; + failrec->len = end - start + 1; + failrec->last_mirror = 0; + + em = btrfs_get_extent(inode, NULL, pg_offset, start, + failrec->len, 0); + + if (!em || IS_ERR(em)) { + kfree(failrec); + return -EIO; + } + logical = start - em->start; + logical = em->block_start + logical; + failrec->logical = logical; + free_extent_map(em); + set_extent_bits(failure_tree, start, end, EXTENT_LOCKED | + EXTENT_DIRTY, GFP_NOFS); + set_state_private(failure_tree, start, (u64)failrec); + } else { + failrec = (struct io_failure_record *)private; + } + num_copies = btrfs_num_copies( + &BTRFS_I(inode)->root->fs_info->mapping_tree, + failrec->logical, failrec->len); + failrec->last_mirror++; + if (!state) { + spin_lock_irq(&BTRFS_I(inode)->io_tree.lock); + state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree, + failrec->start, + EXTENT_LOCKED); + if (state && state->start != failrec->start) + state = NULL; + spin_unlock_irq(&BTRFS_I(inode)->io_tree.lock); + } + if (!state || failrec->last_mirror > num_copies) { + set_state_private(failure_tree, failrec->start, 0); + clear_extent_bits(failure_tree, failrec->start, + failrec->start + failrec->len - 1, + EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); + kfree(failrec); + return -EIO; + } + bio = bio_alloc(GFP_NOFS, 1); + bio->bi_private = state; + bio->bi_end_io = failed_bio->bi_end_io; + bio->bi_sector = failrec->logical >> 9; + bio->bi_bdev = failed_bio->bi_bdev; + bio_add_page(bio, page, failrec->len, start - page_offset(page)); + btrfs_submit_bio_hook(inode, READ, bio, failrec->last_mirror); + return 0; +} + int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) { @@ -419,6 +499,29 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, } kunmap_atomic(kaddr, KM_IRQ0); local_irq_restore(flags); + + /* if the io failure tree for this inode is non-empty, + * check to see if we've recovered from a failed IO + */ + private = 0; + if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, + (u64)-1, 1, EXTENT_DIRTY)) { + u64 private_failure; + struct io_failure_record *failure; + ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, + start, &private_failure); + if (ret == 0) { + failure = (struct io_failure_record *)private_failure; + set_state_private(&BTRFS_I(inode)->io_failure_tree, + failure->start, 0); + clear_extent_bits(&BTRFS_I(inode)->io_failure_tree, + failure->start, + failure->start + failure->len - 1, + EXTENT_DIRTY | EXTENT_LOCKED, + GFP_NOFS); + kfree(failure); + } + } return 0; zeroit: @@ -429,7 +532,7 @@ zeroit: flush_dcache_page(page); kunmap_atomic(kaddr, KM_IRQ0); local_irq_restore(flags); - return 0; + return -EIO; } void btrfs_read_locked_inode(struct inode *inode) @@ -1271,6 +1374,8 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, + inode->i_mapping, GFP_NOFS); return 0; } @@ -1578,6 +1683,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, + inode->i_mapping, GFP_NOFS); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->root = root; @@ -1803,6 +1910,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, + inode->i_mapping, GFP_NOFS); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } @@ -2972,6 +3081,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, + inode->i_mapping, GFP_NOFS); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } @@ -3070,6 +3181,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { .merge_bio_hook = btrfs_merge_bio_hook, .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, + .readpage_io_failed_hook = btrfs_readpage_io_failed_hook, .set_bit_hook = btrfs_set_bit_hook, .clear_bit_hook = btrfs_clear_bit_hook, }; From 85d824c4a407ba8dbd897f4e05bbe21bf24cc071 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 10 Apr 2008 10:23:19 -0400 Subject: [PATCH 0473/2985] Btrfs: Disable extra debugging checks on tree blocks Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ff4e9c6859d1..e8bf6c221e4e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -570,6 +570,7 @@ static int noinline check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { u64 found_start; + return 0; if (btrfs_header_level(path->nodes[level]) != level) printk("warning: bad level %Lu wanted %d found %d\n", path->nodes[level]->start, level, From 16432985920f3c45af82da214e2498f3e2f9066b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 10 Apr 2008 10:23:21 -0400 Subject: [PATCH 0474/2985] Btrfs: Add O_DIRECT read and write (writes == buffered + cache flush) This adds basic O_DIRECT read and write support. In the write case, we just do a normal buffered write followed by a cache flush. O_DIRECT + O_SYNC are required to trigger metadata syncs. In the read case, there is a basic btrfs_get_block call for use by the generic O_DIRECT code. This does honor multi-volume mapping rules but it skips all checksumming. Signed-off-by: Chris Mason --- fs/btrfs/file.c | 11 ++++++-- fs/btrfs/inode.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5fa450452f9b..5b9386452218 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -796,8 +796,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, PAGE_CACHE_SIZE / (sizeof(struct page *))); pinned[0] = NULL; pinned[1] = NULL; - if (file->f_flags & O_DIRECT) - return -EINVAL; pos = *ppos; start_pos = pos; @@ -909,6 +907,15 @@ out_nolock: start_pos, num_written); if (err < 0) num_written = err; + } else if (num_written > 0 && (file->f_flags & O_DIRECT)) { + do_sync_mapping_range(inode->i_mapping, start_pos, + start_pos + num_written - 1, + SYNC_FILE_RANGE_WRITE | + SYNC_FILE_RANGE_WAIT_AFTER); + + invalidate_mapping_pages(inode->i_mapping, + start_pos >> PAGE_CACHE_SHIFT, + (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); } current->backing_dev_info = NULL; return num_written ? num_written : err; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 48f1d1b96450..32e778fb014e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2265,6 +2265,70 @@ out: return em; } +static int btrfs_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + struct extent_map *em; + u64 start = (u64)iblock << inode->i_blkbits; + struct btrfs_multi_bio *multi = NULL; + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 len; + u64 logical; + u64 map_length; + int ret = 0; + + em = btrfs_get_extent(inode, NULL, 0, start, bh_result->b_size, 0); + + if (!em || IS_ERR(em)) + goto out; + + if (em->start > start || em->start + em->len <= start) + goto out; + + if (em->block_start == EXTENT_MAP_INLINE) { + ret = -EINVAL; + goto out; + } + + if (em->block_start == EXTENT_MAP_HOLE || + em->block_start == EXTENT_MAP_DELALLOC) { + goto out; + } + + len = em->start + em->len - start; + len = min_t(u64, len, INT_LIMIT(typeof(bh_result->b_size))); + + logical = start - em->start; + logical = em->block_start + logical; + + map_length = len; + ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, + logical, &map_length, &multi, 0); + BUG_ON(ret); + bh_result->b_blocknr = multi->stripes[0].physical >> inode->i_blkbits; + bh_result->b_size = min(map_length, len); + bh_result->b_bdev = multi->stripes[0].dev->bdev; + set_buffer_mapped(bh_result); + kfree(multi); +out: + free_extent_map(em); + return ret; +} + +static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, + unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + + if (rw == WRITE) + return -EINVAL; + + return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + offset, nr_segs, btrfs_get_block, NULL); +} + static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) { return extent_bmap(mapping, iblock, btrfs_get_extent); @@ -3136,6 +3200,7 @@ out_fail: btrfs_throttle(root); return err; } + static int btrfs_permission(struct inode *inode, int mask, struct nameidata *nd) { @@ -3193,6 +3258,7 @@ static struct address_space_operations btrfs_aops = { .readpages = btrfs_readpages, .sync_page = block_sync_page, .bmap = btrfs_bmap, + .direct_IO = btrfs_direct_IO, .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, .set_page_dirty = __set_page_dirty_nobuffers, From f29844623de29a12358d7fba35d0959465b64adf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 10 Apr 2008 16:19:33 -0400 Subject: [PATCH 0475/2985] Btrfs: Write out all super blocks on commit, and bring back proper barrier support Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 118 +++++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/disk-io.h | 1 + fs/btrfs/volumes.c | 8 +-- fs/btrfs/volumes.h | 3 ++ 4 files changed, 122 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 59bdf0474be3..cf1de75f088a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -382,7 +382,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, BUG_ON(ret); if (offset == BTRFS_SUPER_INFO_OFFSET) { - bio->bi_bdev = root->fs_info->sb->s_bdev; + bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; submit_bio(rw, bio); return 0; } @@ -988,7 +988,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->new_trans_lock); init_completion(&fs_info->kobj_unregister); - sb_set_blocksize(sb, 4096); + sb_set_blocksize(sb, BTRFS_SUPER_INFO_SIZE); fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->chunk_root = chunk_root; @@ -1169,14 +1169,121 @@ fail: return ERR_PTR(err); } +static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) +{ + char b[BDEVNAME_SIZE]; + + if (uptodate) { + set_buffer_uptodate(bh); + } else { + if (!buffer_eopnotsupp(bh) && printk_ratelimit()) { + printk(KERN_WARNING "lost page write due to " + "I/O error on %s\n", + bdevname(bh->b_bdev, b)); + } + set_buffer_write_io_error(bh); + clear_buffer_uptodate(bh); + } + unlock_buffer(bh); + put_bh(bh); +} + +int write_all_supers(struct btrfs_root *root) +{ + struct list_head *cur; + struct list_head *head = &root->fs_info->fs_devices->devices; + struct btrfs_device *dev; + struct extent_buffer *sb; + struct btrfs_dev_item *dev_item; + struct buffer_head *bh; + int ret; + int do_barriers; + + do_barriers = !btrfs_test_opt(root, NOBARRIER); + + sb = root->fs_info->sb_buffer; + dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block, + dev_item); + list_for_each(cur, head) { + dev = list_entry(cur, struct btrfs_device, dev_list); + btrfs_set_device_type(sb, dev_item, dev->type); + btrfs_set_device_id(sb, dev_item, dev->devid); + btrfs_set_device_total_bytes(sb, dev_item, dev->total_bytes); + btrfs_set_device_bytes_used(sb, dev_item, dev->bytes_used); + btrfs_set_device_io_align(sb, dev_item, dev->io_align); + btrfs_set_device_io_width(sb, dev_item, dev->io_width); + btrfs_set_device_sector_size(sb, dev_item, dev->sector_size); + write_extent_buffer(sb, dev->uuid, + (unsigned long)btrfs_device_uuid(dev_item), + BTRFS_DEV_UUID_SIZE); + + btrfs_set_header_flag(sb, BTRFS_HEADER_FLAG_WRITTEN); + csum_tree_block(root, sb, 0); + + bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / + root->fs_info->sb->s_blocksize, + BTRFS_SUPER_INFO_SIZE); + + read_extent_buffer(sb, bh->b_data, 0, BTRFS_SUPER_INFO_SIZE); + dev->pending_io = bh; + + get_bh(bh); + set_buffer_uptodate(bh); + lock_buffer(bh); + bh->b_end_io = btrfs_end_buffer_write_sync; + + if (do_barriers && dev->barriers) { + ret = submit_bh(WRITE_BARRIER, bh); + if (ret == -EOPNOTSUPP) { + printk("btrfs: disabling barriers on dev %s\n", + dev->name); + set_buffer_uptodate(bh); + dev->barriers = 0; + get_bh(bh); + lock_buffer(bh); + ret = submit_bh(WRITE, bh); + } + } else { + ret = submit_bh(WRITE, bh); + } + BUG_ON(ret); + } + + list_for_each(cur, head) { + dev = list_entry(cur, struct btrfs_device, dev_list); + BUG_ON(!dev->pending_io); + bh = dev->pending_io; + wait_on_buffer(bh); + if (!buffer_uptodate(dev->pending_io)) { + if (do_barriers && dev->barriers) { + printk("btrfs: disabling barriers on dev %s\n", + dev->name); + set_buffer_uptodate(bh); + get_bh(bh); + lock_buffer(bh); + dev->barriers = 0; + ret = submit_bh(WRITE, bh); + BUG_ON(ret); + wait_on_buffer(bh); + BUG_ON(!buffer_uptodate(bh)); + } else { + BUG(); + } + + } + dev->pending_io = NULL; + brelse(bh); + } + return 0; +} + int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret; - struct extent_buffer *super = root->fs_info->sb_buffer; - struct inode *btree_inode = root->fs_info->btree_inode; - struct super_block *sb = root->fs_info->sb; + ret = write_all_supers(root); +#if 0 if (!btrfs_test_opt(root, NOBARRIER)) blkdev_issue_flush(sb->s_bdev, NULL); set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super); @@ -1184,6 +1291,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root super->start, super->len); if (!btrfs_test_opt(root, NOBARRIER)) blkdev_issue_flush(sb->s_bdev, NULL); +#endif return ret; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 4fac0ccbf8f8..60b01902db79 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -20,6 +20,7 @@ #define __DISKIO__ #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) +#define BTRFS_SUPER_INFO_SIZE 4096 struct btrfs_device; struct btrfs_fs_devices; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3b927f698320..07d43553141c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -125,6 +125,7 @@ static int device_list_add(const char *path, return -ENOMEM; } device->devid = devid; + device->barriers = 1; device->name = kstrdup(path, GFP_NOFS); if (!device->name) { kfree(device); @@ -208,6 +209,7 @@ int btrfs_scan_one_device(const char *path, int flags, void *holder, struct buffer_head *bh; int ret; u64 devid; + u64 transid; mutex_lock(&uuid_mutex); @@ -236,14 +238,14 @@ int btrfs_scan_one_device(const char *path, int flags, void *holder, goto error_brelse; } devid = le64_to_cpu(disk_super->dev_item.devid); - printk("found device %Lu on %s\n", devid, path); + transid = btrfs_super_generation(disk_super); + printk("found device %Lu transid %Lu on %s\n", devid, transid, path); ret = device_list_add(path, disk_super, devid, fs_devices_ret); error_brelse: brelse(bh); error_close: close_bdev_excl(bdev); - printk("scan one closes bdev %s\n", path); error: mutex_unlock(&uuid_mutex); return ret; @@ -1143,7 +1145,7 @@ static int read_one_dev(struct btrfs_root *root, device = btrfs_find_device(root, devid); if (!device) { printk("warning devid %Lu not found already\n", devid); - device = kmalloc(sizeof(*device), GFP_NOFS); + device = kzalloc(sizeof(*device), GFP_NOFS); if (!device) return -ENOMEM; list_add(&device->dev_list, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 3d5d0a9cb827..89548837a1cc 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -21,9 +21,12 @@ #include +struct buffer_head; struct btrfs_device { struct list_head dev_list; struct btrfs_root *dev_root; + struct buffer_head *pending_io; + int barriers; spinlock_t io_lock; struct block_device *bdev; From 3dd39914bc0737d055c780d29c20a1d117819cdb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 11 Apr 2008 10:51:07 -0400 Subject: [PATCH 0476/2985] Btrfs: Add extra checks to avoid removing extent_state from pages we can't free Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index cf1de75f088a..e22960e23501 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -436,6 +436,12 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) struct extent_map_tree *map; int ret; + if (page_count(page) > 3) { + /* once for page->private, once for the caller, once + * once for the page cache + */ + return 0; + } tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_mapping(map, tree, page, gfp_flags); From 587f77043a1c86e2a7900ff2ce86bef3c1f4e075 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 11 Apr 2008 12:16:46 -0400 Subject: [PATCH 0477/2985] Btrfs: Fixup a few u64<->pointer casts for 32 bit Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 32e778fb014e..46f54dfa2b0a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -430,9 +430,10 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, free_extent_map(em); set_extent_bits(failure_tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); - set_state_private(failure_tree, start, (u64)failrec); + set_state_private(failure_tree, start, + (u64)(unsigned long)failrec); } else { - failrec = (struct io_failure_record *)private; + failrec = (struct io_failure_record *)(unsigned long)private; } num_copies = btrfs_num_copies( &BTRFS_I(inode)->root->fs_info->mapping_tree, @@ -511,7 +512,8 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, start, &private_failure); if (ret == 0) { - failure = (struct io_failure_record *)private_failure; + failure = (struct io_failure_record *)(unsigned long) + private_failure; set_state_private(&BTRFS_I(inode)->io_failure_tree, failure->start, 0); clear_extent_bits(&BTRFS_I(inode)->io_failure_tree, From a5eb62e345fc1818d0d8b6181463200a9e8dfe39 Mon Sep 17 00:00:00 2001 From: Miguel Date: Fri, 11 Apr 2008 15:45:51 -0400 Subject: [PATCH 0478/2985] Btrfs: Endianess bug fix for v0.13 with kernels Fix for a endianess BUG when using btrfs v0.13 with kernels older than 2.6.23 Problem: Has of v0.13, btrfs-progs is using crc32c.c equivalent to the one found on linux-2.6.23/lib/libcrc32c.c Since crc32c_le() changed in linux-2.6.23, when running btrfs v0.13 with older kernels we have a missmatch between the versions of crc32c_le() from btrfs-progs and libcrc32c in the kernel. This missmatch causes a bug when using btrfs on big endian machines. Solution: btrfs_crc32c() macro that when compiling for kernels older than 2.6.23, does endianess conversion to parameters and return value of crc32c(). This endianess conversion nullifies the differences in implementation of crc32c_le(). If kernel 2.6.23 or better, it calls crc32c(). Signed-off-by: Miguel Sousa Filipe --- Signed-off-by: Chris Mason --- fs/btrfs/crc32c.h | 17 +++++++++++++++++ fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/extent-tree.c | 12 +++++------- 3 files changed, 24 insertions(+), 9 deletions(-) create mode 100644 fs/btrfs/crc32c.h diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h new file mode 100644 index 000000000000..a93255b4ee27 --- /dev/null +++ b/fs/btrfs/crc32c.h @@ -0,0 +1,17 @@ +#include +#include +#include + +/** + * implementation of crc32c_le() changed in linux-2.6.23, + * has of v0.13 btrfs-progs is using the latest version. + * We must workaround older implementations of crc32c_le() + * found on older kernel versions. + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) +#define btrfs_crc32c(seed, data, length) \ + __cpu_to_le32( crc32c( __le32_to_cpu(seed), data, length) ) +#else +#define btrfs_crc32c(seed, data, length) \ + crc32c(seed, data, length) +#endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e22960e23501..0bfcc31d94d7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -18,13 +18,13 @@ #include #include -#include #include #include #include #include #include // for block_sync_page #include +#include "crc32c.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -100,7 +100,7 @@ out: u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) { - return crc32c(seed, data, len); + return btrfs_crc32c(seed, data, len); } void btrfs_csum_final(u32 crc, char *result) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 283b08a32a43..a056a2df689a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -15,11 +15,10 @@ * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */ - #include -#include #include #include "hash.h" +#include "crc32c.h" #include "ctree.h" #include "disk-io.h" #include "print-tree.h" @@ -398,16 +397,15 @@ static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, u32 high_crc = ~(u32)0; u32 low_crc = ~(u32)0; __le64 lenum; - lenum = cpu_to_le64(root_objectid); - high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); + high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum)); lenum = cpu_to_le64(ref_generation); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); if (owner >= BTRFS_FIRST_FREE_OBJECTID) { lenum = cpu_to_le64(owner); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); lenum = cpu_to_le64(owner_offset); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); } return ((u64)high_crc << 32) | (u64)low_crc; } From 594994aa3ea4cb52cba9c83a4b6f03e6ff3a646e Mon Sep 17 00:00:00 2001 From: Miguel Date: Fri, 11 Apr 2008 15:46:48 -0400 Subject: [PATCH 0479/2985] Btrfs: define write_cache_pages for linux kernel <= 2.6.20 instead write_cache_pages doesn't exist in linux 2.6.20, change the #if condition to match that. Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 866460c3d72c..88322684be6a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2109,8 +2109,7 @@ done: return 0; } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,20) /* Taken directly from 2.6.23 for 2.6.18 back port */ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, void *data); From 73f61b2a6459df982cb4faf0e5cf5ac8b153aaff Mon Sep 17 00:00:00 2001 From: Miguel Date: Fri, 11 Apr 2008 15:50:59 -0400 Subject: [PATCH 0480/2985] Btrfs: bio_endio support for linux 2.6.23 and older. bio_endio() changed prototype on linux 2.6.24, support older kernels using the older prototype. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 ++++- fs/btrfs/volumes.c | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0bfcc31d94d7..aebe8c21ec80 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -949,11 +949,14 @@ void btrfs_end_io_csum(struct work_struct *work) bio->bi_private = end_io_wq->private; bio->bi_end_io = end_io_wq->end_io; kfree(end_io_wq); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + bio_endio(bio, bio->bi_size, err); +#else bio_endio(bio, error); +#endif } } - struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 07d43553141c..82bc6cfc110f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -960,7 +960,11 @@ static int end_bio_multi_stripe(struct bio *bio, err = multi->error; kfree(multi); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + bio_endio(bio, bio->bi_size, err); +#else bio_endio(bio, err); +#endif } else { bio_put(bio); } From 98d20f67cf99ccda638dbcdf7b3a9ee0a428d932 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 14 Apr 2008 09:46:10 -0400 Subject: [PATCH 0481/2985] Add a min size parameter to btrfs_alloc_extent On huge machines, delayed allocation may try to allocate massive extents. This change allows btrfs_alloc_extent to return something smaller than the caller asked for, and the data allocation routines will loop over the allocations until it fills the whole delayed alloc. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 ++- fs/btrfs/extent-tree.c | 12 +++++++++--- fs/btrfs/inode.c | 2 ++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e803c4daad21..09d614fcafb1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1286,7 +1286,8 @@ int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, u64 owner, u64 owner_offset); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 num_bytes, u64 root_objectid, u64 ref_generation, + u64 num_bytes, u64 min_bytes, + u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, int data); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a056a2df689a..e49147e767df 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1694,7 +1694,8 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 num_bytes, u64 root_objectid, u64 ref_generation, + u64 num_bytes, u64 min_alloc_size, + u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, int data) @@ -1727,7 +1728,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, info->metadata_alloc_profile; data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } - +again: if (root->ref_cows) { if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, @@ -1751,6 +1752,11 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, search_start, search_end, hint_byte, ins, trans->alloc_exclude_start, trans->alloc_exclude_nr, data); + if (ret == -ENOSPC && num_bytes > min_alloc_size) { + num_bytes = num_bytes >> 1; + num_bytes = max(num_bytes, min_alloc_size); + goto again; + } BUG_ON(ret); if (ret) return ret; @@ -1869,7 +1875,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int ret; struct extent_buffer *buf; - ret = btrfs_alloc_extent(trans, root, blocksize, + ret = btrfs_alloc_extent(trans, root, blocksize, blocksize, root_objectid, ref_generation, level, first_objectid, empty_size, hint, (u64)-1, &ins, 0); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 46f54dfa2b0a..7869c0157a1f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -125,6 +125,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) while(num_bytes > 0) { cur_alloc_size = min(num_bytes, root->fs_info->max_extent); ret = btrfs_alloc_extent(trans, root, cur_alloc_size, + root->sectorsize, root->root_key.objectid, trans->transid, inode->i_ino, start, 0, @@ -133,6 +134,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) WARN_ON(1); goto out; } + cur_alloc_size = ins.offset; ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start, ins.objectid, ins.offset, ins.offset); From b248a4152956cbae1b23f4c70ef5b51d6ea2ecfb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 14 Apr 2008 09:48:18 -0400 Subject: [PATCH 0482/2985] Btrfs: A few updates for 2.6.18 and versions older than 2.6.25 This includes fixing a missing spinlock init call that caused oops on mount for most kernels other than 2.6.25. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 12 ++++++++++-- fs/btrfs/file.c | 8 +++++++- fs/btrfs/volumes.c | 15 +++++++-------- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index aebe8c21ec80..9c94dddde704 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -850,7 +850,9 @@ void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) bdi_init(bdi); +#endif bdi->ra_pages = default_backing_dev_info.ra_pages * 4; bdi->state = 0; bdi->capabilities = default_backing_dev_info.capabilities; @@ -950,7 +952,7 @@ void btrfs_end_io_csum(struct work_struct *work) bio->bi_end_io = end_io_wq->end_io; kfree(end_io_wq); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - bio_endio(bio, bio->bi_size, err); + bio_endio(bio, bio->bi_size, error); #else bio_endio(bio, error); #endif @@ -1037,10 +1039,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; - INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum, fs_info); INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); #else + INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum); INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); #endif BTRFS_I(fs_info->btree_inode)->root = tree_root; @@ -1173,7 +1176,9 @@ fail: close_all_devices(fs_info); kfree(extent_root); kfree(tree_root); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) bdi_destroy(&fs_info->bdi); +#endif kfree(fs_info); return ERR_PTR(err); } @@ -1407,7 +1412,10 @@ int close_ctree(struct btrfs_root *root) #endif close_all_devices(fs_info); btrfs_mapping_tree_free(&fs_info->mapping_tree); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) bdi_destroy(&fs_info->bdi); +#endif kfree(fs_info->extent_root); kfree(fs_info->tree_root); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5b9386452218..9fbda6552069 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -908,11 +908,17 @@ out_nolock: if (err < 0) num_written = err; } else if (num_written > 0 && (file->f_flags & O_DIRECT)) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + do_sync_file_range(file, start_pos, + start_pos + num_written - 1, + SYNC_FILE_RANGE_WRITE | + SYNC_FILE_RANGE_WAIT_AFTER); +#else do_sync_mapping_range(inode->i_mapping, start_pos, start_pos + num_written - 1, SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER); - +#endif invalidate_mapping_pages(inode->i_mapping, start_pos >> PAGE_CACHE_SHIFT, (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 82bc6cfc110f..f81519f0e4a7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -126,6 +126,7 @@ static int device_list_add(const char *path, } device->devid = devid; device->barriers = 1; + spin_lock_init(&device->io_lock); device->name = kstrdup(path, GFP_NOFS); if (!device->name) { kfree(device); @@ -759,8 +760,8 @@ printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.objectid, c em_tree = &extent_root->fs_info->mapping_tree.map_tree; spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); - BUG_ON(ret); spin_unlock(&em_tree->lock); + BUG_ON(ret); free_extent_map(em); return ret; } @@ -799,6 +800,7 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, logical, len); + spin_unlock(&em_tree->lock); BUG_ON(!em); BUG_ON(em->start > logical || em->start + em->len < logical); @@ -808,7 +810,6 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) else ret = 1; free_extent_map(em); - spin_unlock(&em_tree->lock); return ret; } @@ -840,6 +841,7 @@ again: spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, logical, *length); + spin_unlock(&em_tree->lock); BUG_ON(!em); BUG_ON(em->start > logical || em->start + em->len < logical); @@ -855,7 +857,6 @@ again: ((map->type & BTRFS_BLOCK_GROUP_RAID1) || (map->type & BTRFS_BLOCK_GROUP_DUP))) { stripes_allocated = map->num_stripes; - spin_unlock(&em_tree->lock); free_extent_map(em); kfree(multi); goto again; @@ -932,7 +933,6 @@ again: *multi_ret = multi; out: free_extent_map(em); - spin_unlock(&em_tree->lock); return 0; } @@ -1060,16 +1060,15 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, length = key->offset; spin_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); + spin_unlock(&map_tree->map_tree.lock); /* already mapped? */ if (em && em->start <= logical && em->start + em->len > logical) { free_extent_map(em); - spin_unlock(&map_tree->map_tree.lock); return 0; } else if (em) { free_extent_map(em); } - spin_unlock(&map_tree->map_tree.lock); map = kzalloc(sizeof(*map), GFP_NOFS); if (!map) @@ -1110,8 +1109,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, spin_lock(&map_tree->map_tree.lock); ret = add_extent_mapping(&map_tree->map_tree, em); - BUG_ON(ret); spin_unlock(&map_tree->map_tree.lock); + BUG_ON(ret); free_extent_map(em); return 0; @@ -1154,7 +1153,7 @@ static int read_one_dev(struct btrfs_root *root, return -ENOMEM; list_add(&device->dev_list, &root->fs_info->fs_devices->devices); - device->total_ios = 0; + device->barriers = 1; spin_lock_init(&device->io_lock); } From e17cade25ff8074101d653557a78df09c16ca276 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 15 Apr 2008 15:41:47 -0400 Subject: [PATCH 0483/2985] Btrfs: Add chunk uuids and update multi-device back references Block headers now store the chunk tree uuid Chunk items records the device uuid for each stripes Device extent items record better back refs to the chunk tree Block groups record better back refs to the chunk tree The chunk tree format has also changed. The objectid of BTRFS_CHUNK_ITEM_KEY used to be the logical offset of the chunk. Now it is a chunk tree id, with the logical offset being stored in the offset field of the key. This allows a single chunk tree to record multiple logical address spaces, upping the number of bytes indexed by a chunk tree from 2^64 to 2^128. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 12 +++++++ fs/btrfs/ctree.h | 80 +++++++++++++++++++++++++++++++++++------- fs/btrfs/disk-io.c | 6 +++- fs/btrfs/extent-tree.c | 22 +++++------- fs/btrfs/print-tree.c | 20 +++++++---- fs/btrfs/volumes.c | 76 +++++++++++++++++++++++++-------------- fs/btrfs/volumes.h | 6 ++-- 7 files changed, 160 insertions(+), 62 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e8bf6c221e4e..618e526c9046 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1382,6 +1382,11 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, write_extent_buffer(c, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(c), BTRFS_FSID_SIZE); + + write_extent_buffer(c, root->fs_info->chunk_tree_uuid, + (unsigned long)btrfs_header_chunk_tree_uuid(c), + BTRFS_UUID_SIZE); + btrfs_set_node_key(c, &lower_key, 0); btrfs_set_node_blockptr(c, 0, lower->start); lower_gen = btrfs_header_generation(lower); @@ -1513,6 +1518,9 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root write_extent_buffer(split, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(split), BTRFS_FSID_SIZE); + write_extent_buffer(split, root->fs_info->chunk_tree_uuid, + (unsigned long)btrfs_header_chunk_tree_uuid(split), + BTRFS_UUID_SIZE); mid = (c_nritems + 1) / 2; @@ -2043,6 +2051,10 @@ again: write_extent_buffer(right, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(right), BTRFS_FSID_SIZE); + + write_extent_buffer(right, root->fs_info->chunk_tree_uuid, + (unsigned long)btrfs_header_chunk_tree_uuid(right), + BTRFS_UUID_SIZE); if (mid <= slot) { if (nritems == 1 || leaf_space_used(l, mid, nritems - mid) + space_needed > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 09d614fcafb1..82d67c3db8bc 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -70,6 +70,7 @@ extern struct kmem_cache *btrfs_path_cachep; * All files have objectids higher than this. */ #define BTRFS_FIRST_FREE_OBJECTID 256ULL +#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL /* @@ -131,7 +132,7 @@ struct btrfs_mapping_tree { struct extent_map_tree map_tree; }; -#define BTRFS_DEV_UUID_SIZE 16 +#define BTRFS_UUID_SIZE 16 struct btrfs_dev_item { /* the internal btrfs device id */ __le64 devid; @@ -154,17 +155,32 @@ struct btrfs_dev_item { /* type and info about this device */ __le64 type; + /* grouping information for allocation decisions */ + __le32 dev_group; + + /* seek speed 0-100 where 100 is fastest */ + u8 seek_speed; + + /* bandwidth 0-100 where 100 is fastest */ + u8 bandwidth; + /* btrfs generated uuid for this device */ - u8 uuid[BTRFS_DEV_UUID_SIZE]; + u8 uuid[BTRFS_UUID_SIZE]; } __attribute__ ((__packed__)); struct btrfs_stripe { __le64 devid; __le64 offset; + u8 dev_uuid[BTRFS_UUID_SIZE]; } __attribute__ ((__packed__)); struct btrfs_chunk { + /* size of this chunk in bytes */ + __le64 length; + + /* objectid of the root referencing this chunk */ __le64 owner; + __le64 stripe_len; __le64 type; @@ -199,10 +215,14 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) * every tree block (leaf or node) starts with this header. */ struct btrfs_header { + /* these first four must match the super block */ u8 csum[BTRFS_CSUM_SIZE]; u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ __le64 bytenr; /* which block this node is supposed to live in */ __le64 flags; + + /* allowed to be different from the super from here on down */ + u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; __le64 generation; __le64 owner; __le32 nritems; @@ -235,6 +255,8 @@ struct btrfs_super_block { u8 fsid[16]; /* FS specific uuid */ __le64 bytenr; /* this block number */ __le64 flags; + + /* allowed to be different from the btrfs_header from here own down */ __le64 magic; __le64 generation; __le64 root; @@ -323,14 +345,16 @@ struct btrfs_extent_ref { /* dev extents record free space on individual devices. The owner * field points back to the chunk allocation mapping tree that allocated - * the extent + * the extent. The chunk tree uuid field is a way to double check the owner */ struct btrfs_dev_extent { - __le64 owner; + __le64 chunk_tree; + __le64 chunk_objectid; + __le64 chunk_offset; __le64 length; + u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; } __attribute__ ((__packed__)); - struct btrfs_inode_ref { __le16 name_len; /* name goes here */ @@ -424,7 +448,6 @@ struct btrfs_csum_item { struct btrfs_block_group_item { __le64 used; - __le64 chunk_tree; __le64 chunk_objectid; __le64 flags; } __attribute__ ((__packed__)); @@ -451,6 +474,7 @@ struct btrfs_device; struct btrfs_fs_devices; struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; + u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct btrfs_root *chunk_root; @@ -697,6 +721,9 @@ BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32); BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32); BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); +BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32); +BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8); +BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8); BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64); BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item, @@ -710,12 +737,19 @@ BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item, BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item, sector_size, 32); BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_group, struct btrfs_dev_item, + dev_group, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item, + seek_speed, 8); +BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item, + bandwidth, 8); static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) { return (char *)d + offsetof(struct btrfs_dev_item, uuid); } +BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64); BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64); BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32); @@ -726,6 +760,12 @@ BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64); BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64); +static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s) +{ + return (char *)s + offsetof(struct btrfs_stripe, dev_uuid); +} + +BTRFS_SETGET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64); BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64); BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); @@ -781,13 +821,10 @@ BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, used, 64); BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, used, 64); -BTRFS_SETGET_STACK_FUNCS(block_group_chunk_tree, struct btrfs_block_group_item, - chunk_tree, 64); -BTRFS_SETGET_FUNCS(disk_block_group_chunk_tree, struct btrfs_block_group_item, - chunk_tree, 64); BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid, struct btrfs_block_group_item, chunk_objectid, 64); -BTRFS_SETGET_FUNCS(disk_block_group_chunk_objecitd, + +BTRFS_SETGET_FUNCS(disk_block_group_chunk_objectid, struct btrfs_block_group_item, chunk_objectid, 64); BTRFS_SETGET_FUNCS(disk_block_group_flags, struct btrfs_block_group_item, flags, 64); @@ -850,9 +887,20 @@ BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); /* struct btrfs_dev_extent */ -BTRFS_SETGET_FUNCS(dev_extent_owner, struct btrfs_dev_extent, owner, 64); +BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent, + chunk_tree, 64); +BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent, + chunk_objectid, 64); +BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent, + chunk_offset, 64); BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); +static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) +{ + unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid); + return (u8 *)((unsigned long)dev + ptr); +} + /* struct btrfs_extent_ref */ BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); @@ -1087,6 +1135,12 @@ static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) return (u8 *)ptr; } +static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) +{ + unsigned long ptr = offsetof(struct btrfs_header, chunk_tree_uuid); + return (u8 *)ptr; +} + static inline u8 *btrfs_super_fsid(struct extent_buffer *eb) { unsigned long ptr = offsetof(struct btrfs_super_block, fsid); @@ -1311,7 +1365,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytes_used, - u64 type, u64 chunk_tree, u64 chunk_objectid, + u64 type, u64 chunk_objectid, u64 chunk_offset, u64 size); /* ctree.c */ int btrfs_previous_item(struct btrfs_root *root, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9c94dddde704..79c284c87286 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1125,6 +1125,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, blocksize); BUG_ON(!chunk_root->node); + read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, + (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), + BTRFS_UUID_SIZE); + ret = btrfs_read_chunk_tree(chunk_root); BUG_ON(ret); @@ -1229,7 +1233,7 @@ int write_all_supers(struct btrfs_root *root) btrfs_set_device_sector_size(sb, dev_item, dev->sector_size); write_extent_buffer(sb, dev->uuid, (unsigned long)btrfs_device_uuid(dev_item), - BTRFS_DEV_UUID_SIZE); + BTRFS_UUID_SIZE); btrfs_set_header_flag(sb, BTRFS_HEADER_FLAG_WRITTEN); csum_tree_block(root, sb, 0); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e49147e767df..71f045c63493 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -35,10 +35,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -int btrfs_make_block_group(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytes_used, - u64 type, u64 chunk_tree, u64 chunk_objectid, - u64 size); static int cache_block_group(struct btrfs_root *root, @@ -980,7 +976,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, ret = get_state_private(block_group_cache, start, &ptr); if (ret) break; - cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; err = write_one_cache_group(trans, root, path, cache); @@ -1094,8 +1089,7 @@ printk("space info full %Lu\n", flags); BUG_ON(ret); ret = btrfs_make_block_group(trans, extent_root, 0, flags, - extent_root->fs_info->chunk_root->root_key.objectid, - start, num_bytes); + BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); BUG_ON(ret); return 0; @@ -2782,7 +2776,7 @@ error: int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytes_used, - u64 type, u64 chunk_tree, u64 chunk_objectid, + u64 type, u64 chunk_objectid, u64 chunk_offset, u64 size) { int ret; @@ -2796,14 +2790,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache = kmalloc(sizeof(*cache), GFP_NOFS); BUG_ON(!cache); - cache->key.objectid = chunk_objectid; + cache->key.objectid = chunk_offset; cache->key.offset = size; cache->cached = 0; cache->pinned = 0; + btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); memset(&cache->item, 0, sizeof(cache->item)); btrfs_set_block_group_used(&cache->item, bytes_used); - btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree); btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); cache->flags = type; btrfs_set_block_group_flags(&cache->item, type); @@ -2813,12 +2807,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, BUG_ON(ret); bit = block_group_state_bits(type); - set_extent_bits(block_group_cache, chunk_objectid, - chunk_objectid + size - 1, + set_extent_bits(block_group_cache, chunk_offset, + chunk_offset + size - 1, bit | EXTENT_LOCKED, GFP_NOFS); - set_state_private(block_group_cache, chunk_objectid, - (unsigned long)cache); + set_state_private(block_group_cache, chunk_offset, + (unsigned long)cache); ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, sizeof(cache->item)); BUG_ON(ret); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index ee0de112cf5a..e99f3249d05a 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -24,7 +24,8 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk) { int num_stripes = btrfs_chunk_num_stripes(eb, chunk); int i; - printk("\t\tchunk owner %llu type %llu num_stripes %d\n", + printk("\t\tchunk length %llu owner %llu type %llu num_stripes %d\n", + (unsigned long long)btrfs_chunk_length(eb, chunk), (unsigned long long)btrfs_chunk_owner(eb, chunk), (unsigned long long)btrfs_chunk_type(eb, chunk), num_stripes); @@ -140,17 +141,24 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) case BTRFS_DEV_EXTENT_KEY: dev_extent = btrfs_item_ptr(l, i, struct btrfs_dev_extent); - printk("\t\tdev extent owner %llu length %llu\n", - (unsigned long long)btrfs_dev_extent_owner(l, dev_extent), - (unsigned long long)btrfs_dev_extent_length(l, dev_extent)); + printk("\t\tdev extent chunk_tree %llu\n" + "\t\tchunk objectid %llu chunk offset %llu " + "length %llu\n", + (unsigned long long) + btrfs_dev_extent_chunk_tree(l, dev_extent), + (unsigned long long) + btrfs_dev_extent_chunk_objectid(l, dev_extent), + (unsigned long long) + btrfs_dev_extent_chunk_offset(l, dev_extent), + (unsigned long long) + btrfs_dev_extent_length(l, dev_extent)); }; } } void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) { - int i; - u32 nr; + int i; u32 nr; struct btrfs_key key; int level; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f81519f0e4a7..23ebd95b25e0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -180,7 +180,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, list_for_each(cur, head) { device = list_entry(cur, struct btrfs_device, dev_list); bdev = open_bdev_excl(device->name, flags, holder); -printk("opening %s devid %Lu\n", device->name, device->devid); + if (IS_ERR(bdev)) { printk("open %s failed\n", device->name); ret = PTR_ERR(bdev); @@ -190,7 +190,6 @@ printk("opening %s devid %Lu\n", device->name, device->devid); fs_devices->latest_bdev = bdev; if (device->devid == fs_devices->lowest_devid) { fs_devices->lowest_bdev = bdev; -printk("lowest bdev %s\n", device->name); } device->bdev = bdev; } @@ -372,7 +371,9 @@ error: int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, - u64 owner, u64 num_bytes, u64 *start) + u64 chunk_tree, u64 chunk_objectid, + u64 chunk_offset, + u64 num_bytes, u64 *start) { int ret; struct btrfs_path *path; @@ -400,7 +401,14 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); - btrfs_set_dev_extent_owner(leaf, extent, owner); + btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree); + btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid); + btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); + + write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, + (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent), + BTRFS_UUID_SIZE); + btrfs_set_dev_extent_length(leaf, extent, num_bytes); btrfs_mark_buffer_dirty(leaf); err: @@ -408,17 +416,18 @@ err: return ret; } -static int find_next_chunk(struct btrfs_root *root, u64 *objectid) +static int find_next_chunk(struct btrfs_root *root, u64 objectid, u64 *offset) { struct btrfs_path *path; int ret; struct btrfs_key key; + struct btrfs_chunk *chunk; struct btrfs_key found_key; path = btrfs_alloc_path(); BUG_ON(!path); - key.objectid = (u64)-1; + key.objectid = objectid; key.offset = (u64)-1; key.type = BTRFS_CHUNK_ITEM_KEY; @@ -430,11 +439,18 @@ static int find_next_chunk(struct btrfs_root *root, u64 *objectid) ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY); if (ret) { - *objectid = 0; + *offset = 0; } else { btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); - *objectid = found_key.objectid + found_key.offset; + if (found_key.objectid != objectid) + *offset = 0; + else { + chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_chunk); + *offset = found_key.offset + + btrfs_chunk_length(path->nodes[0], chunk); + } } ret = 0; error: @@ -520,9 +536,12 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); + btrfs_set_device_group(leaf, dev_item, 0); + btrfs_set_device_seek_speed(leaf, dev_item, 0); + btrfs_set_device_bandwidth(leaf, dev_item, 0); ptr = (unsigned long)btrfs_device_uuid(dev_item); - write_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE); + write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); ret = 0; @@ -674,7 +693,10 @@ again: return -ENOSPC; } - ret = find_next_chunk(chunk_root, &key.objectid); + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.type = BTRFS_CHUNK_ITEM_KEY; + ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, + &key.offset); if (ret) return ret; @@ -696,8 +718,9 @@ again: *num_bytes = calc_size * num_stripes; index = 0; -printk("new chunk type %Lu start %Lu size %Lu\n", type, key.objectid, *num_bytes); +printk("new chunk type %Lu start %Lu size %Lu\n", type, key.offset, *num_bytes); while(index < num_stripes) { + struct btrfs_stripe *stripe; BUG_ON(list_empty(&private_devs)); cur = private_devs.next; device = list_entry(cur, struct btrfs_device, dev_list); @@ -708,26 +731,28 @@ printk("new chunk type %Lu start %Lu size %Lu\n", type, key.objectid, *num_bytes list_move_tail(&device->dev_list, dev_list); ret = btrfs_alloc_dev_extent(trans, device, - key.objectid, - calc_size, &dev_offset); + info->chunk_root->root_key.objectid, + BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset, + calc_size, &dev_offset); BUG_ON(ret); -printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.objectid, calc_size, device->devid, type); +printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.offset, calc_size, device->devid, type); device->bytes_used += calc_size; ret = btrfs_update_device(trans, device); BUG_ON(ret); map->stripes[index].dev = device; map->stripes[index].physical = dev_offset; - btrfs_set_stack_stripe_devid(stripes + index, device->devid); - btrfs_set_stack_stripe_offset(stripes + index, dev_offset); + stripe = stripes + index; + btrfs_set_stack_stripe_devid(stripe, device->devid); + btrfs_set_stack_stripe_offset(stripe, dev_offset); + memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); physical = dev_offset; index++; } BUG_ON(!list_empty(&private_devs)); - /* key.objectid was set above */ - key.offset = *num_bytes; - key.type = BTRFS_CHUNK_ITEM_KEY; + /* key was set above */ + btrfs_set_stack_chunk_length(chunk, *num_bytes); btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); btrfs_set_stack_chunk_stripe_len(chunk, stripe_len); btrfs_set_stack_chunk_type(chunk, type); @@ -745,14 +770,14 @@ printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.objectid, c ret = btrfs_insert_item(trans, chunk_root, &key, chunk, btrfs_chunk_item_size(num_stripes)); BUG_ON(ret); - *start = key.objectid; + *start = key.offset;; em = alloc_extent_map(GFP_NOFS); if (!em) return -ENOMEM; em->bdev = (struct block_device *)map; - em->start = key.objectid; - em->len = key.offset; + em->start = key.offset; + em->len = *num_bytes; em->block_start = 0; kfree(chunk); @@ -1056,8 +1081,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, int ret; int i; - logical = key->objectid; - length = key->offset; + logical = key->offset; + length = btrfs_chunk_length(leaf, chunk); spin_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); spin_unlock(&map_tree->map_tree.lock); @@ -1131,7 +1156,7 @@ static int fill_device_from_item(struct extent_buffer *leaf, device->sector_size = btrfs_device_sector_size(leaf, dev_item); ptr = (unsigned long)btrfs_device_uuid(dev_item); - read_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE); + read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); return 0; } @@ -1143,7 +1168,6 @@ static int read_one_dev(struct btrfs_root *root, struct btrfs_device *device; u64 devid; int ret; - devid = btrfs_device_id(leaf, dev_item); device = btrfs_find_device(root, devid); if (!device) { diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 89548837a1cc..f9cae3072171 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -57,7 +57,7 @@ struct btrfs_device { u64 type; /* physical drive uuid (or lvm uuid) */ - u8 uuid[BTRFS_DEV_UUID_SIZE]; + u8 uuid[BTRFS_UUID_SIZE]; }; struct btrfs_fs_devices { @@ -93,7 +93,9 @@ struct btrfs_multi_bio { int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, - u64 owner, u64 num_bytes, u64 *start); + u64 chunk_tree, u64 chunk_objectid, + u64 chunk_offset, + u64 num_bytes, u64 *start); int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, struct btrfs_multi_bio **multi_ret, int mirror_num); From 321aecc65671ae8136bd2ca6879b56f0221f8ac8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 16 Apr 2008 10:49:51 -0400 Subject: [PATCH 0484/2985] Btrfs: Add RAID10 support Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 7 +++++++ fs/btrfs/extent-tree.c | 1 + fs/btrfs/volumes.c | 46 +++++++++++++++++++++++++++++++++++++----- 3 files changed, 49 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 82d67c3db8bc..a22edcf49174 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -197,6 +197,9 @@ struct btrfs_chunk { * item in the btree */ __le16 num_stripes; + + /* sub stripes only matter for raid10 */ + __le16 sub_stripes; struct btrfs_stripe stripe; /* additional stripes go here */ } __attribute__ ((__packed__)); @@ -444,6 +447,7 @@ struct btrfs_csum_item { #define BTRFS_BLOCK_GROUP_RAID0 (1 << 3) #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) #define BTRFS_BLOCK_GROUP_DUP (1 << 5) +#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) struct btrfs_block_group_item { @@ -757,6 +761,7 @@ BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32); BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32); BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64); BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); +BTRFS_SETGET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16); BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64); BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64); @@ -778,6 +783,8 @@ BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk, BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64); BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk, + sub_stripes, 16); BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64); BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 71f045c63493..4e5bd62e6e1a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1042,6 +1042,7 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) { u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_DUP); if (extra_flags) { if (flags & BTRFS_BLOCK_GROUP_DATA) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 23ebd95b25e0..e6417a573d44 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -33,6 +33,7 @@ struct map_lookup { int stripe_len; int sector_size; int num_stripes; + int sub_stripes; struct btrfs_bio_stripe stripes[]; }; @@ -641,6 +642,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 avail; u64 max_avail = 0; int num_stripes = 1; + int sub_stripes = 0; int looped = 0; int ret; int index; @@ -658,6 +660,13 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, num_stripes = min_t(u64, 2, btrfs_super_num_devices(&info->super_copy)); } + if (type & (BTRFS_BLOCK_GROUP_RAID10)) { + num_stripes = btrfs_super_num_devices(&info->super_copy); + if (num_stripes < 4) + return -ENOSPC; + num_stripes &= ~(u32)1; + sub_stripes = 2; + } again: INIT_LIST_HEAD(&private_devs); cur = dev_list->next; @@ -714,6 +723,8 @@ again: if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) *num_bytes = calc_size; + else if (type & BTRFS_BLOCK_GROUP_RAID10) + *num_bytes = calc_size * num_stripes / sub_stripes; else *num_bytes = calc_size * num_stripes; @@ -760,12 +771,14 @@ printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.offset, cal btrfs_set_stack_chunk_io_align(chunk, stripe_len); btrfs_set_stack_chunk_io_width(chunk, stripe_len); btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); + btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes); map->sector_size = extent_root->sectorsize; map->stripe_len = stripe_len; map->io_align = stripe_len; map->io_width = stripe_len; map->type = type; map->num_stripes = num_stripes; + map->sub_stripes = sub_stripes; ret = btrfs_insert_item(trans, chunk_root, &key, chunk, btrfs_chunk_item_size(num_stripes)); @@ -832,6 +845,8 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) map = (struct map_lookup *)em->bdev; if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1)) ret = map->num_stripes; + else if (map->type & BTRFS_BLOCK_GROUP_RAID10) + ret = map->sub_stripes; else ret = 1; free_extent_map(em); @@ -849,6 +864,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 stripe_offset; u64 stripe_nr; int stripes_allocated = 8; + int stripes_required = 1; int stripe_index; int i; struct btrfs_multi_bio *multi = NULL; @@ -877,10 +893,16 @@ again: mirror_num = 0; /* if our multi bio struct is too small, back off and try again */ - if (multi_ret && (rw & (1 << BIO_RW)) && - stripes_allocated < map->num_stripes && - ((map->type & BTRFS_BLOCK_GROUP_RAID1) || - (map->type & BTRFS_BLOCK_GROUP_DUP))) { + if (rw & (1 << BIO_RW)) { + if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_DUP)) { + stripes_required = map->num_stripes; + } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { + stripes_required = map->sub_stripes; + } + } + if (multi_ret && rw == WRITE && + stripes_allocated < stripes_required) { stripes_allocated = map->num_stripes; free_extent_map(em); kfree(multi); @@ -900,6 +922,7 @@ again: stripe_offset = offset - stripe_offset; if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_DUP)) { /* we limit the length of each bio to what fits in a stripe */ *length = min_t(u64, em->len - offset, @@ -937,6 +960,19 @@ again: multi->num_stripes = map->num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; + } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { + int factor = map->num_stripes / map->sub_stripes; + int orig_stripe_nr = stripe_nr; + + stripe_index = do_div(stripe_nr, factor); + stripe_index *= map->sub_stripes; + + if (rw & (1 << BIO_RW)) + multi->num_stripes = map->sub_stripes; + else if (mirror_num) + stripe_index += mirror_num - 1; + else + stripe_index += orig_stripe_nr % map->sub_stripes; } else { /* * after this do_div call, stripe_nr is the number of stripes @@ -946,7 +982,6 @@ again: stripe_index = do_div(stripe_nr, map->num_stripes); } BUG_ON(stripe_index >= map->num_stripes); - BUG_ON(stripe_index != 0 && multi->num_stripes > 1); for (i = 0; i < multi->num_stripes; i++) { multi->stripes[i].physical = @@ -1120,6 +1155,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, map->sector_size = btrfs_chunk_sector_size(leaf, chunk); map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); map->type = btrfs_chunk_type(leaf, chunk); + map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); for (i = 0; i < num_stripes; i++) { map->stripes[i].physical = btrfs_stripe_offset_nr(leaf, chunk, i); From 44b8bd7edda4f63de180d0f7325c9fb704b3806b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 16 Apr 2008 11:14:51 -0400 Subject: [PATCH 0485/2985] Btrfs: Create a work queue for bio writes This allows checksumming to happen in parallel among many cpus, and keeps us from bogging down pdflush with the checksumming code. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 ++ fs/btrfs/disk-io.c | 93 ++++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/disk-io.h | 3 ++ fs/btrfs/extent_io.h | 5 ++- fs/btrfs/inode.c | 28 +++++++++---- 5 files changed, 119 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a22edcf49174..ff15b8513f90 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -514,8 +514,11 @@ struct btrfs_fs_info { struct list_head hashers; struct list_head dead_roots; struct list_head end_io_work_list; + struct list_head async_submit_work_list; struct work_struct end_io_work; + struct work_struct async_submit_work; spinlock_t end_io_work_lock; + spinlock_t async_submit_work_lock; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) struct work_struct trans_work; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 79c284c87286..9e41ea93ebce 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -47,6 +47,7 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) static struct extent_io_ops btree_extent_io_ops; static struct workqueue_struct *end_io_workqueue; +static struct workqueue_struct *async_submit_workqueue; struct end_io_wq { struct bio *bio; @@ -58,6 +59,15 @@ struct end_io_wq { struct list_head list; }; +struct async_submit_bio { + struct inode *inode; + struct bio *bio; + struct list_head list; + extent_submit_bio_hook_t *submit_bio_hook; + int rw; + int mirror_num; +}; + struct extent_map *btree_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 len, int create) @@ -365,7 +375,31 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, return 0; } -static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, +int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, + int rw, struct bio *bio, int mirror_num, + extent_submit_bio_hook_t *submit_bio_hook) +{ + struct async_submit_bio *async; + + async = kmalloc(sizeof(*async), GFP_NOFS); + if (!async) + return -ENOMEM; + + async->inode = inode; + async->rw = rw; + async->bio = bio; + async->mirror_num = mirror_num; + async->submit_bio_hook = submit_bio_hook; + + spin_lock(&fs_info->async_submit_work_lock); + list_add_tail(&async->list, &fs_info->async_submit_work_list); + spin_unlock(&fs_info->async_submit_work_lock); + + queue_work(async_submit_workqueue, &fs_info->async_submit_work); + return 0; +} + +static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -389,6 +423,17 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); } +static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, + int mirror_num) +{ + if (!(rw & (1 << BIO_RW))) { + return __btree_submit_bio_hook(inode, rw, bio, mirror_num); + } + return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, + inode, rw, bio, mirror_num, + __btree_submit_bio_hook); +} + static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct extent_io_tree *tree; @@ -903,9 +948,9 @@ static int bio_ready_for_csum(struct bio *bio) } #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) -void btrfs_end_io_csum(void *p) +static void btrfs_end_io_csum(void *p) #else -void btrfs_end_io_csum(struct work_struct *work) +static void btrfs_end_io_csum(struct work_struct *work) #endif { #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) @@ -959,6 +1004,39 @@ void btrfs_end_io_csum(struct work_struct *work) } } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) +static void btrfs_async_submit_work(void *p) +#else +static void btrfs_async_submit_work(struct work_struct *work) +#endif +{ +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + struct btrfs_fs_info *fs_info = p; +#else + struct btrfs_fs_info *fs_info = container_of(work, + struct btrfs_fs_info, + async_submit_work); +#endif + struct async_submit_bio *async; + struct list_head *next; + + while(1) { + spin_lock(&fs_info->async_submit_work_lock); + if (list_empty(&fs_info->async_submit_work_list)) { + spin_unlock(&fs_info->async_submit_work_lock); + return; + } + next = fs_info->async_submit_work_list.next; + list_del(next); + spin_unlock(&fs_info->async_submit_work_lock); + + async = list_entry(next, struct async_submit_bio, list); + async->submit_bio_hook(async->inode, async->rw, async->bio, + async->mirror_num); + kfree(async); + } +} + struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices) { @@ -987,14 +1065,17 @@ struct btrfs_root *open_ctree(struct super_block *sb, } end_io_workqueue = create_workqueue("btrfs-end-io"); BUG_ON(!end_io_workqueue); + async_submit_workqueue = create_workqueue("btrfs-async-submit"); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); INIT_LIST_HEAD(&fs_info->end_io_work_list); + INIT_LIST_HEAD(&fs_info->async_submit_work_list); spin_lock_init(&fs_info->hash_lock); spin_lock_init(&fs_info->end_io_work_lock); + spin_lock_init(&fs_info->async_submit_work_lock); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); @@ -1041,9 +1122,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum, fs_info); + INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work, + fs_info); INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); #else INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum); + INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work); INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); #endif BTRFS_I(fs_info->btree_inode)->root = tree_root; @@ -1403,6 +1487,9 @@ int close_ctree(struct btrfs_root *root) flush_workqueue(end_io_workqueue); destroy_workqueue(end_io_workqueue); + flush_workqueue(async_submit_workqueue); + destroy_workqueue(async_submit_workqueue); + iput(fs_info->btree_inode); #if 0 while(!list_empty(&fs_info->hashers)) { diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 60b01902db79..5d5f6979c41c 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -74,4 +74,7 @@ int btrfs_verify_block_csum(struct btrfs_root *root, struct extent_buffer *buf); int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, int metadata); +int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, + int rw, struct bio *bio, int mirror_num, + extent_submit_bio_hook_t *submit_bio_hook); #endif diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index b47859ccd78a..232300d13be2 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -24,11 +24,12 @@ struct extent_state; +typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, + struct bio *bio, int mirror_num); struct extent_io_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); - int (*submit_bio_hook)(struct inode *inode, int rw, struct bio *bio, - int mirror_num); + extent_submit_bio_hook_t *submit_bio_hook; int (*merge_bio_hook)(struct page *page, unsigned long offset, size_t size, struct bio *bio); int (*readpage_io_hook)(struct page *page, u64 start, u64 end); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7869c0157a1f..457daf725d29 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -324,13 +324,29 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, return 0; } -int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, +int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; int ret = 0; + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + btrfs_csum_file_blocks(trans, root, inode, bio); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + return btrfs_map_bio(root, rw, bio, mirror_num); +} + +int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, + int mirror_num) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret = 0; + if (!(rw & (1 << BIO_RW))) { ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); BUG_ON(ret); @@ -342,13 +358,9 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, goto mapit; } - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - btrfs_csum_file_blocks(trans, root, inode, bio); - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); + return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, + inode, rw, bio, mirror_num, + __btrfs_submit_bio_hook); mapit: return btrfs_map_bio(root, rw, bio, mirror_num); } From e015640f9c4fa2417dcc3bbbb3b2b61ad4059ab0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 16 Apr 2008 11:15:20 -0400 Subject: [PATCH 0486/2985] Btrfs: Write bio checksumming outside the FS mutex This significantly improves streaming write performance by allowing concurrency in the data checksumming. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 +++- fs/btrfs/file-item.c | 46 ++++++++++++++++++++++++++++++-------------- fs/btrfs/inode.c | 11 ++++++++++- 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ff15b8513f90..b56ae1950658 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1515,7 +1515,9 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, u64 bytenr, int mod); int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - struct bio *bio); + struct bio *bio, char *sums); +int btrfs_csum_one_bio(struct btrfs_root *root, + struct bio *bio, char **sums_ret); struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 374d0245b29d..9259aece6ed1 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -134,9 +134,36 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, return ret; } +int btrfs_csum_one_bio(struct btrfs_root *root, + struct bio *bio, char **sums_ret) +{ + u32 *sums; + char *data; + struct bio_vec *bvec = bio->bi_io_vec; + int bio_index = 0; + + sums = kmalloc(bio->bi_vcnt * BTRFS_CRC32_SIZE, GFP_NOFS); + if (!sums) + return -ENOMEM; + *sums_ret = (char *)sums; + + while(bio_index < bio->bi_vcnt) { + data = kmap_atomic(bvec->bv_page, KM_USER0); + *sums = ~(u32)0; + *sums = btrfs_csum_data(root, data + bvec->bv_offset, + *sums, bvec->bv_len); + kunmap_atomic(data, KM_USER0); + btrfs_csum_final(*sums, (char *)sums); + sums++; + bio_index++; + bvec++; + } + return 0; +} + int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - struct bio *bio) + struct bio *bio, char *sums) { u64 objectid = inode->i_ino; u64 offset; @@ -150,12 +177,11 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_csum_item *item_end; struct extent_buffer *leaf = NULL; u64 csum_offset; - u32 csum_result; + u32 *sums32 = (u32 *)sums; u32 nritems; u32 ins_size; int bio_index = 0; struct bio_vec *bvec = bio->bi_io_vec; - char *data; char *eb_map; char *eb_token; unsigned long map_len; @@ -278,15 +304,6 @@ found: btrfs_item_size_nr(leaf, path->slots[0])); eb_token = NULL; next_bvec: - data = kmap_atomic(bvec->bv_page, KM_USER0); - csum_result = ~(u32)0; - csum_result = btrfs_csum_data(root, data + bvec->bv_offset, - csum_result, bvec->bv_len); - kunmap_atomic(data, KM_USER0); - btrfs_csum_final(csum_result, (char *)&csum_result); - if (csum_result == 0) { - printk("csum result is 0 for inode %lu offset %Lu\n", inode->i_ino, offset); - } if (!eb_token || (unsigned long)item + BTRFS_CRC32_SIZE >= map_start + map_len) { @@ -304,13 +321,14 @@ next_bvec: } if (eb_token) { memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)), - &csum_result, BTRFS_CRC32_SIZE); + sums32, BTRFS_CRC32_SIZE); } else { - write_extent_buffer(leaf, &csum_result, (unsigned long)item, + write_extent_buffer(leaf, sums32, (unsigned long)item, BTRFS_CRC32_SIZE); } bio_index++; bvec++; + sums32++; if (bio_index < bio->bi_vcnt) { item = (struct btrfs_csum_item *)((char *)item + BTRFS_CRC32_SIZE); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 457daf725d29..3e1f21c632c1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -330,14 +330,23 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; int ret = 0; + char *sums = NULL; + + ret = btrfs_csum_one_bio(root, bio, &sums); + BUG_ON(ret); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); - btrfs_csum_file_blocks(trans, root, inode, bio); + btrfs_csum_file_blocks(trans, root, inode, bio, sums); + ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); + + kfree(sums); + return btrfs_map_bio(root, rw, bio, mirror_num); } From 699122f55904bec252ea4f8a64228ce98e3d9fda Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 16 Apr 2008 12:59:22 -0400 Subject: [PATCH 0487/2985] Btrfs: Don't wait on tree block writeback before freeing them anymore This isn't required anymore because we don't reallocate blocks that have already been written in this transaction. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 5 ----- fs/btrfs/extent-tree.c | 1 - fs/btrfs/inode.c | 2 ++ 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 618e526c9046..1c3d9d6fbdad 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -751,7 +751,6 @@ static int balance_level(struct btrfs_trans_handle *trans, add_root_to_dirty_list(root); path->nodes[level] = NULL; clean_tree_block(trans, root, mid); - wait_on_tree_block_writeback(root, mid); /* once for the path */ free_extent_buffer(mid); ret = btrfs_free_extent(trans, root, mid->start, mid->len, @@ -810,7 +809,6 @@ static int balance_level(struct btrfs_trans_handle *trans, u32 blocksize = right->len; clean_tree_block(trans, root, right); - wait_on_tree_block_writeback(root, right); free_extent_buffer(right); right = NULL; wret = del_ptr(trans, root, path, level + 1, pslot + @@ -854,7 +852,6 @@ static int balance_level(struct btrfs_trans_handle *trans, u64 bytenr = mid->start; u32 blocksize = mid->len; clean_tree_block(trans, root, mid); - wait_on_tree_block_writeback(root, mid); free_extent_buffer(mid); mid = NULL; wret = del_ptr(trans, root, path, level + 1, pslot); @@ -2638,7 +2635,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, } else { u64 root_gen = btrfs_header_generation(path->nodes[1]); clean_tree_block(trans, root, leaf); - wait_on_tree_block_writeback(root, leaf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; @@ -2690,7 +2686,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, path->nodes[1]); clean_tree_block(trans, root, leaf); - wait_on_tree_block_writeback(root, leaf); wret = del_ptr(trans, root, path, 1, slot); if (wret) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4e5bd62e6e1a..76fd5d7146e1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1887,7 +1887,6 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, } btrfs_set_header_generation(buf, trans->transid); clean_tree_block(trans, root, buf); - wait_on_tree_block_writeback(root, buf); btrfs_set_buffer_uptodate(buf); if (PageDirty(buf->first_page)) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3e1f21c632c1..5632ea760077 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -383,9 +383,11 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) struct btrfs_csum_item *item; struct btrfs_path *path = NULL; u32 csum; + if (btrfs_test_opt(root, NODATASUM) || btrfs_test_flag(inode, NODATASUM)) return 0; + mutex_lock(&root->fs_info->fs_mutex); path = btrfs_alloc_path(); item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); From 7b859fe7cd383fa5e7e379884a748680d0af7bcc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 16 Apr 2008 13:06:16 -0400 Subject: [PATCH 0488/2985] Btrfs: Only do async bio submission for pdflush Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9e41ea93ebce..76ee7a4ae00c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -381,6 +381,13 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, { struct async_submit_bio *async; + /* + * inline writerback should stay inline, only hop to the async + * queue if we're pdflush + */ + if (!current_is_pdflush()) + return submit_bio_hook(inode, rw, bio, mirror_num); + async = kmalloc(sizeof(*async), GFP_NOFS); if (!async) return -ENOMEM; From 3b951516ed703af0f6d82053937655ad69b60864 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Apr 2008 11:29:12 -0400 Subject: [PATCH 0489/2985] Btrfs: Use the extent map cache to find the logical disk block during data retries The data read retry code needs to find the logical disk block before it can resubmit new bios. But, finding this block isn't allowed to take the fs_mutex because that will deadlock with a number of different callers. This changes the retry code to use the extent map cache instead, but that requires the extent map cache to have the extent we're looking for. This is a problem because btrfs_drop_extent_cache just drops the entire extent instead of the little tiny part it is invalidating. The bulk of the code in this patch changes btrfs_drop_extent_cache to invalidate only a portion of the extent cache, and changes btrfs_get_extent to deal with the results. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 1 + fs/btrfs/extent_io.c | 12 +++-- fs/btrfs/file.c | 47 +++++++++++++++- fs/btrfs/inode.c | 120 +++++++++++++++++++++++++++++++++++++---- fs/btrfs/volumes.c | 3 ++ 5 files changed, 168 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 76fd5d7146e1..593011e5d455 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1747,6 +1747,7 @@ again: search_start, search_end, hint_byte, ins, trans->alloc_exclude_start, trans->alloc_exclude_nr, data); + if (ret == -ENOSPC && num_bytes > min_alloc_size) { num_bytes = num_bytes >> 1; num_bytes = max(num_bytes, min_alloc_size); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 88322684be6a..21597bea21fd 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1025,7 +1025,8 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree, search_again: node = tree_search(tree, cur_start); if (!node) { - *end = (u64)-1; + if (!found) + *end = (u64)-1; goto out; } @@ -1540,6 +1541,8 @@ static int end_bio_extent_readpage(struct bio *bio, start, end, state); if (ret == 0) { state = NULL; + uptodate = + test_bit(BIO_UPTODATE, &bio->bi_flags); continue; } } @@ -1555,10 +1558,11 @@ static int end_bio_extent_readpage(struct bio *bio, !(state->state & EXTENT_LOCKED)) state = NULL; } - if (!state && uptodate) { + if (!state) { spin_unlock_irqrestore(&tree->lock, flags); - set_extent_uptodate(tree, start, end, - GFP_ATOMIC); + if (uptodate) + set_extent_uptodate(tree, start, end, + GFP_ATOMIC); unlock_extent(tree, start, end, GFP_ATOMIC); goto next_io; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9fbda6552069..3f5525f0834c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -356,12 +356,23 @@ out_unlock: int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) { struct extent_map *em; + struct extent_map *split = NULL; + struct extent_map *split2 = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 len = end - start + 1; + int ret; + int testend = 1; - if (end == (u64)-1) + if (end == (u64)-1) { len = (u64)-1; + testend = 0; + } while(1) { + if (!split) + split = alloc_extent_map(GFP_NOFS); + if (!split2) + split2 = alloc_extent_map(GFP_NOFS); + spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); if (!em) { @@ -369,6 +380,36 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) break; } remove_extent_mapping(em_tree, em); + + if (em->block_start < EXTENT_MAP_LAST_BYTE && + em->start < start) { + split->start = em->start; + split->len = start - em->start; + split->block_start = em->block_start; + split->bdev = em->bdev; + split->flags = em->flags; + ret = add_extent_mapping(em_tree, split); + BUG_ON(ret); + free_extent_map(split); + split = split2; + split2 = NULL; + } + if (em->block_start < EXTENT_MAP_LAST_BYTE && + testend && em->start + em->len > start + len) { + u64 diff = start + len - em->start; + + split->start = start + len; + split->len = em->start + em->len - (start + len); + split->bdev = em->bdev; + split->flags = em->flags; + + split->block_start = em->block_start + diff; + + ret = add_extent_mapping(em_tree, split); + BUG_ON(ret); + free_extent_map(split); + split = NULL; + } spin_unlock(&em_tree->lock); /* once for us */ @@ -376,6 +417,10 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) /* once for the tree*/ free_extent_map(em); } + if (split) + free_extent_map(split); + if (split2) + free_extent_map(split2); return 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5632ea760077..40f8da884090 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -122,6 +122,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) if (alloc_hint == EXTENT_MAP_INLINE) goto out; + BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); + while(num_bytes > 0) { cur_alloc_size = min(num_bytes, root->fs_info->max_extent); ret = btrfs_alloc_extent(trans, root, cur_alloc_size, @@ -140,6 +142,11 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) ins.offset); inode->i_blocks += ins.offset >> 9; btrfs_check_file(root, inode); + if (num_bytes < cur_alloc_size) { + printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes, + cur_alloc_size); + break; + } num_bytes -= cur_alloc_size; alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; @@ -427,6 +434,7 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, struct extent_map *em; struct inode *inode = page->mapping->host; struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct bio *bio; int num_copies; int ret; @@ -434,7 +442,6 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, ret = get_state_private(failure_tree, start, &private); if (ret) { - size_t pg_offset = start - page_offset(page); failrec = kmalloc(sizeof(*failrec), GFP_NOFS); if (!failrec) return -ENOMEM; @@ -442,8 +449,13 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, failrec->len = end - start + 1; failrec->last_mirror = 0; - em = btrfs_get_extent(inode, NULL, pg_offset, start, - failrec->len, 0); + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, start, failrec->len); + if (em->start > start || em->start + em->len < start) { + free_extent_map(em); + em = NULL; + } + spin_unlock(&em_tree->lock); if (!em || IS_ERR(em)) { kfree(failrec); @@ -559,6 +571,8 @@ zeroit: flush_dcache_page(page); kunmap_atomic(kaddr, KM_IRQ0); local_irq_restore(flags); + if (private == 0) + return 0; return -EIO; } @@ -908,8 +922,9 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int pending_del_nr = 0; int pending_del_slot = 0; int extent_type = -1; + u64 mask = root->sectorsize - 1; - btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1); + btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1); path = btrfs_alloc_path(); path->reada = -1; BUG_ON(!path); @@ -1212,7 +1227,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) hole_start, 0, 0, hole_size); btrfs_drop_extent_cache(inode, hole_start, - hole_size - 1); + (u64)-1); btrfs_check_file(root, inode); } btrfs_end_transaction(trans, root); @@ -2083,6 +2098,68 @@ out_unlock: return err; } +static int merge_extent_mapping(struct extent_map_tree *em_tree, + struct extent_map *existing, + struct extent_map *em) +{ + u64 start_diff; + u64 new_end; + int ret = 0; + int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE; + + if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE) + goto invalid; + + if (!real_blocks && em->block_start != existing->block_start) + goto invalid; + + new_end = max(existing->start + existing->len, em->start + em->len); + + if (existing->start >= em->start) { + if (em->start + em->len < existing->start) + goto invalid; + + start_diff = existing->start - em->start; + if (real_blocks && em->block_start + start_diff != + existing->block_start) + goto invalid; + + em->len = new_end - em->start; + + remove_extent_mapping(em_tree, existing); + /* free for the tree */ + free_extent_map(existing); + ret = add_extent_mapping(em_tree, em); + + } else if (em->start > existing->start) { + + if (existing->start + existing->len < em->start) + goto invalid; + + start_diff = em->start - existing->start; + if (real_blocks && existing->block_start + start_diff != + em->block_start) + goto invalid; + + remove_extent_mapping(em_tree, existing); + em->block_start = existing->block_start; + em->start = existing->start; + em->len = new_end - existing->start; + free_extent_map(existing); + + ret = add_extent_mapping(em_tree, em); + } else { + goto invalid; + } + return ret; + +invalid: + printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n", + existing->start, existing->len, existing->block_start, + em->start, em->len, em->block_start); + return -EIO; +} + struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create) @@ -2267,12 +2344,35 @@ insert: err = 0; spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); + + /* it is possible that someone inserted the extent into the tree + * while we had the lock dropped. It is also possible that + * an overlapping map exists in the tree + */ if (ret == -EEXIST) { - free_extent_map(em); - em = lookup_extent_mapping(em_tree, start, len); - if (!em) { - err = -EIO; - printk("failing to insert %Lu %Lu\n", start, len); + struct extent_map *existing; + existing = lookup_extent_mapping(em_tree, start, len); + if (!existing) { + existing = lookup_extent_mapping(em_tree, em->start, + em->len); + if (existing) { + err = merge_extent_mapping(em_tree, existing, + em); + free_extent_map(existing); + if (err) { + free_extent_map(em); + em = NULL; + } + } else { + err = -EIO; + printk("failing to insert %Lu %Lu\n", + start, len); + free_extent_map(em); + em = NULL; + } + } else { + free_extent_map(em); + em = existing; } } spin_unlock(&em_tree->lock); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e6417a573d44..0e658c1d8211 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -883,6 +883,9 @@ again: spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, logical, *length); spin_unlock(&em_tree->lock); + if (!em) { + printk("unable to find logical %Lu\n", logical); + } BUG_ON(!em); BUG_ON(em->start > logical || em->start + em->len < logical); From 7bf3b490dfa57bdef5892dffa5021469a0cd2b7f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Apr 2008 11:58:30 -0400 Subject: [PATCH 0490/2985] Btrfs: Avoid 64 bit div for RAID10 Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0e658c1d8211..c11b6fd408a6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -724,7 +724,7 @@ again: if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) *num_bytes = calc_size; else if (type & BTRFS_BLOCK_GROUP_RAID10) - *num_bytes = calc_size * num_stripes / sub_stripes; + *num_bytes = calc_size * (num_stripes / sub_stripes); else *num_bytes = calc_size * num_stripes; From 41471e8341a6b476bcd8ecc765a8b297c22a74f9 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Apr 2008 14:08:30 -0400 Subject: [PATCH 0491/2985] Btrfs: Remove bogus max_sector warnings from the extent_io code It was testing the bio before doing logical->physical mapping, so the test was always wrong. Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 21597bea21fd..a984a7bc1197 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1702,7 +1702,6 @@ extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, static int submit_one_bio(int rw, struct bio *bio, int mirror_num) { - u64 maxsector; int ret = 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct page *page = bvec->bv_page; @@ -1730,12 +1729,6 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num) bio_get(bio); - maxsector = bio->bi_bdev->bd_inode->i_size >> 9; - if (maxsector < bio->bi_sector) { - printk("sector too large max %Lu got %llu\n", maxsector, - (unsigned long long)bio->bi_sector); - WARN_ON(1); - } if (tree->ops && tree->ops->submit_bio_hook) tree->ops->submit_bio_hook(page->mapping->host, rw, bio, mirror_num); From a443755f1ca3e190e12e3a845ddecb3ee1782512 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Apr 2008 10:29:38 -0400 Subject: [PATCH 0492/2985] Btrfs: Check device uuids along with devids Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 5 +++++ fs/btrfs/volumes.c | 30 +++++++++++++++++++++++------- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b56ae1950658..d119d95d139a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -800,6 +800,11 @@ static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c, return (struct btrfs_stripe *)offset; } +static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr)); +} + static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb, struct btrfs_chunk *c, int nr) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c11b6fd408a6..cdf0019cca2e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -69,15 +69,18 @@ int btrfs_cleanup_fs_uuids(void) return 0; } -static struct btrfs_device *__find_device(struct list_head *head, u64 devid) +static struct btrfs_device *__find_device(struct list_head *head, u64 devid, + u8 *uuid) { struct btrfs_device *dev; struct list_head *cur; list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); - if (dev->devid == devid) + if (dev->devid == devid && + !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE)) { return dev; + } } return NULL; } @@ -117,7 +120,8 @@ static int device_list_add(const char *path, fs_devices->num_devices = 0; device = NULL; } else { - device = __find_device(&fs_devices->devices, devid); + device = __find_device(&fs_devices->devices, devid, + disk_super->dev_item.uuid); } if (!device) { device = kzalloc(sizeof(*device), GFP_NOFS); @@ -126,6 +130,8 @@ static int device_list_add(const char *path, return -ENOMEM; } device->devid = devid; + memcpy(device->uuid, disk_super->dev_item.uuid, + BTRFS_UUID_SIZE); device->barriers = 1; spin_lock_init(&device->io_lock); device->name = kstrdup(path, GFP_NOFS); @@ -1098,11 +1104,12 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, return 0; } -struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid) +struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, + u8 *uuid) { struct list_head *head = &root->fs_info->fs_devices->devices; - return __find_device(head, devid); + return __find_device(head, devid, uuid); } static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, @@ -1115,6 +1122,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, u64 logical; u64 length; u64 devid; + u8 uuid[BTRFS_UUID_SIZE]; int num_stripes; int ret; int i; @@ -1163,7 +1171,10 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, map->stripes[i].physical = btrfs_stripe_offset_nr(leaf, chunk, i); devid = btrfs_stripe_devid_nr(leaf, chunk, i); - map->stripes[i].dev = btrfs_find_device(root, devid); + read_extent_buffer(leaf, uuid, (unsigned long) + btrfs_stripe_dev_uuid_nr(chunk, i), + BTRFS_UUID_SIZE); + map->stripes[i].dev = btrfs_find_device(root, devid, uuid); if (!map->stripes[i].dev) { kfree(map); free_extent_map(em); @@ -1207,8 +1218,13 @@ static int read_one_dev(struct btrfs_root *root, struct btrfs_device *device; u64 devid; int ret; + u8 dev_uuid[BTRFS_UUID_SIZE]; + devid = btrfs_device_id(leaf, dev_item); - device = btrfs_find_device(root, devid); + read_extent_buffer(leaf, dev_uuid, + (unsigned long)btrfs_device_uuid(dev_item), + BTRFS_UUID_SIZE); + device = btrfs_find_device(root, devid, dev_uuid); if (!device) { printk("warning devid %Lu not found already\n", devid); device = kzalloc(sizeof(*device), GFP_NOFS); From 7ae9c09d8f001eb19ee2ba219dc5c3d4f6d60614 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Apr 2008 10:29:49 -0400 Subject: [PATCH 0493/2985] Btrfs: Add support for labels in the super block Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/volumes.c | 17 +++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d119d95d139a..f00c4be59ad6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -247,6 +247,7 @@ struct btrfs_header { * room to translate 14 chunks with 3 stripes each. */ #define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048 +#define BTRFS_LABEL_SIZE 256 /* * the super block basically lists the main trees of the FS @@ -276,6 +277,7 @@ struct btrfs_super_block { u8 root_level; u8 chunk_root_level; struct btrfs_dev_item dev_item; + char label[BTRFS_LABEL_SIZE]; u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; } __attribute__ ((__packed__)); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cdf0019cca2e..93aa36e2436e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -57,9 +57,7 @@ int btrfs_cleanup_fs_uuids(void) devices_cur = fs_devices->devices.next; dev = list_entry(devices_cur, struct btrfs_device, dev_list); - printk("uuid cleanup finds %s\n", dev->name); if (dev->bdev) { - printk("closing\n"); close_bdev_excl(dev->bdev); } list_del(&dev->dev_list); @@ -149,7 +147,6 @@ static int device_list_add(const char *path, } if (fs_devices->lowest_devid > devid) { fs_devices->lowest_devid = devid; - printk("lowest devid now %Lu\n", devid); } *fs_devices_ret = fs_devices; return 0; @@ -166,7 +163,6 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) device = list_entry(cur, struct btrfs_device, dev_list); if (device->bdev) { close_bdev_excl(device->bdev); - printk("close devices closes %s\n", device->name); } device->bdev = NULL; } @@ -220,11 +216,9 @@ int btrfs_scan_one_device(const char *path, int flags, void *holder, mutex_lock(&uuid_mutex); - printk("scan one opens %s\n", path); bdev = open_bdev_excl(path, flags, holder); if (IS_ERR(bdev)) { - printk("open failed\n"); ret = PTR_ERR(bdev); goto error; } @@ -240,13 +234,20 @@ int btrfs_scan_one_device(const char *path, int flags, void *holder, disk_super = (struct btrfs_super_block *)bh->b_data; if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { - printk("no btrfs found on %s\n", path); ret = -EINVAL; goto error_brelse; } devid = le64_to_cpu(disk_super->dev_item.devid); transid = btrfs_super_generation(disk_super); - printk("found device %Lu transid %Lu on %s\n", devid, transid, path); + if (disk_super->label[0]) + printk("device label %s ", disk_super->label); + else { + /* FIXME, make a readl uuid parser */ + printk("device fsid %llx-%llx ", + *(unsigned long long *)disk_super->fsid, + *(unsigned long long *)(disk_super->fsid + 8)); + } + printk("devid %Lu transid %Lu %s\n", devid, transid, path); ret = device_list_add(path, disk_super, devid, fs_devices_ret); error_brelse: From 7b13b7b119c932a5eca486db4113f4c1fe3b97a8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Apr 2008 10:29:50 -0400 Subject: [PATCH 0494/2985] Btrfs: Don't drop extent_map cache during releasepage on the btree inode The btree inode should only have a single extent_map in the cache, it doesn't make sense to ever drop it. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 23 ++++++++++++++--------- fs/btrfs/extent_io.c | 38 +++++++++++++++++++++++++++----------- fs/btrfs/extent_io.h | 3 +++ 3 files changed, 44 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 76ee7a4ae00c..2de2b00afebc 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -76,13 +76,12 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, struct extent_map *em; int ret; -again: spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); spin_unlock(&em_tree->lock); - if (em) { + if (em) goto out; - } + em = alloc_extent_map(GFP_NOFS); if (!em) { em = ERR_PTR(-ENOMEM); @@ -95,15 +94,21 @@ again: spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); - if (ret == -EEXIST) { free_extent_map(em); - em = NULL; - goto again; + em = lookup_extent_mapping(em_tree, start, len); + if (em) + ret = 0; + else + ret = -EIO; } else if (ret) { - em = ERR_PTR(ret); + free_extent_map(em); + em = NULL; } + spin_unlock(&em_tree->lock); + + if (ret) + em = ERR_PTR(ret); out: return em; } @@ -496,7 +501,7 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) } tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; - ret = try_release_extent_mapping(map, tree, page, gfp_flags); + ret = try_release_extent_state(map, tree, page, gfp_flags); if (ret == 1) { invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); ClearPagePrivate(page); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a984a7bc1197..81f8b4fd069b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2463,6 +2463,31 @@ err: } EXPORT_SYMBOL(extent_prepare_write); +/* + * a helper for releasepage, this tests for areas of the page that + * are locked or under IO and drops the related state bits if it is safe + * to drop the page. + */ +int try_release_extent_state(struct extent_map_tree *map, + struct extent_io_tree *tree, struct page *page, + gfp_t mask) +{ + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + int ret = 1; + + if (test_range_bit(tree, start, end, EXTENT_IOBITS, 0)) + ret = 0; + else { + if ((mask & GFP_NOFS) == GFP_NOFS) + mask = GFP_NOFS; + clear_extent_bit(tree, start, end, EXTENT_UPTODATE, + 1, 1, mask); + } + return ret; +} +EXPORT_SYMBOL(try_release_extent_state); + /* * a helper for releasepage. As long as there are no locked extents * in the range corresponding to the page, both state records and extent @@ -2475,8 +2500,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, struct extent_map *em; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; - u64 orig_start = start; - int ret = 1; + if ((mask & __GFP_WAIT) && page->mapping->host->i_size > 16 * 1024 * 1024) { u64 len; @@ -2507,15 +2531,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, free_extent_map(em); } } - if (test_range_bit(tree, orig_start, end, EXTENT_IOBITS, 0)) - ret = 0; - else { - if ((mask & GFP_NOFS) == GFP_NOFS) - mask = GFP_NOFS; - clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, - 1, 1, mask); - } - return ret; + return try_release_extent_state(map, tree, page, mask); } EXPORT_SYMBOL(try_release_extent_mapping); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 232300d13be2..e48346147514 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -110,6 +110,9 @@ void extent_io_tree_empty_lru(struct extent_io_tree *tree); int try_release_extent_mapping(struct extent_map_tree *map, struct extent_io_tree *tree, struct page *page, gfp_t mask); +int try_release_extent_state(struct extent_map_tree *map, + struct extent_io_tree *tree, struct page *page, + gfp_t mask); int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int extent_read_full_page(struct extent_io_tree *tree, struct page *page, From 9b3f68b90674419add8be1c0aa740dcdf04f44cc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Apr 2008 10:29:51 -0400 Subject: [PATCH 0495/2985] Btrfs: Calculate appropriate chunk sizes for both small and large filesystems Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 68 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 93aa36e2436e..e3ddd7fb8edd 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -627,6 +627,27 @@ int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, return 0; } +static u64 div_factor(u64 num, int factor) +{ + if (factor == 10) + return num; + num *= factor; + do_div(num, 10); + return num; +} + +static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes, + int sub_stripes) +{ + if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) + return calc_size; + else if (type & BTRFS_BLOCK_GROUP_RAID10) + return calc_size * (num_stripes / sub_stripes); + else + return calc_size * num_stripes; +} + + int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 *start, u64 *num_bytes, u64 type) @@ -643,11 +664,14 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct extent_map_tree *em_tree; struct map_lookup *map; struct extent_map *em; + int min_chunk_size = 8 * 1024 * 1024; u64 physical; u64 calc_size = 1024 * 1024 * 1024; - u64 min_free = calc_size; + u64 max_chunk_size = calc_size; + u64 min_free; u64 avail; u64 max_avail = 0; + u64 percent_max; int num_stripes = 1; int sub_stripes = 0; int looped = 0; @@ -666,6 +690,8 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (type & (BTRFS_BLOCK_GROUP_RAID1)) { num_stripes = min_t(u64, 2, btrfs_super_num_devices(&info->super_copy)); + if (num_stripes < 2) + return -ENOSPC; } if (type & (BTRFS_BLOCK_GROUP_RAID10)) { num_stripes = btrfs_super_num_devices(&info->super_copy); @@ -674,13 +700,45 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, num_stripes &= ~(u32)1; sub_stripes = 2; } + + if (type & BTRFS_BLOCK_GROUP_DATA) { + max_chunk_size = 10 * calc_size; + min_chunk_size = 256 * 1024 * 1024; + } else if (type & BTRFS_BLOCK_GROUP_METADATA) { + max_chunk_size = 4 * calc_size; + min_chunk_size = 64 * 1024 * 1024; + } else { + min_chunk_size = 32 * 1024 * 1024; + } + + /* we don't want a chunk larger than 10% of the FS */ + percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1); + max_chunk_size = min(percent_max, max_chunk_size); + + if (calc_size * num_stripes > max_chunk_size) { + calc_size = max_chunk_size; + do_div(calc_size, num_stripes); + do_div(calc_size, stripe_len); + calc_size *= stripe_len; + } + /* we don't want tiny stripes */ + *num_bytes = chunk_bytes_by_type(type, calc_size, + num_stripes, sub_stripes); + calc_size = max_t(u64, chunk_bytes_by_type(type, min_chunk_size, + num_stripes, sub_stripes), calc_size); + again: + do_div(calc_size, stripe_len); + calc_size *= stripe_len; + INIT_LIST_HEAD(&private_devs); cur = dev_list->next; index = 0; if (type & BTRFS_BLOCK_GROUP_DUP) min_free = calc_size * 2; + else + min_free = calc_size; /* build a private list of devices we will allocate from */ while(index < num_stripes) { @@ -727,13 +785,9 @@ again: } stripes = &chunk->stripe; + *num_bytes = chunk_bytes_by_type(type, calc_size, + num_stripes, sub_stripes); - if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) - *num_bytes = calc_size; - else if (type & BTRFS_BLOCK_GROUP_RAID10) - *num_bytes = calc_size * (num_stripes / sub_stripes); - else - *num_bytes = calc_size * num_stripes; index = 0; printk("new chunk type %Lu start %Lu size %Lu\n", type, key.offset, *num_bytes); From a40a90a0420abd5ff86a0917facd3293ebb6a9b6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Apr 2008 11:55:51 -0400 Subject: [PATCH 0496/2985] Btrfs: Fix chunk allocation when some devices don't have enough room for stripes Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e3ddd7fb8edd..fe5b00986d22 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -664,7 +664,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct extent_map_tree *em_tree; struct map_lookup *map; struct extent_map *em; - int min_chunk_size = 8 * 1024 * 1024; + int min_stripe_size = 1 * 1024 * 1024; u64 physical; u64 calc_size = 1024 * 1024 * 1024; u64 max_chunk_size = calc_size; @@ -673,6 +673,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 max_avail = 0; u64 percent_max; int num_stripes = 1; + int min_stripes = 1; int sub_stripes = 0; int looped = 0; int ret; @@ -683,15 +684,20 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (list_empty(dev_list)) return -ENOSPC; - if (type & (BTRFS_BLOCK_GROUP_RAID0)) + if (type & (BTRFS_BLOCK_GROUP_RAID0)) { num_stripes = btrfs_super_num_devices(&info->super_copy); - if (type & (BTRFS_BLOCK_GROUP_DUP)) + min_stripes = 2; + } + if (type & (BTRFS_BLOCK_GROUP_DUP)) { num_stripes = 2; + min_stripes = 2; + } if (type & (BTRFS_BLOCK_GROUP_RAID1)) { num_stripes = min_t(u64, 2, btrfs_super_num_devices(&info->super_copy)); if (num_stripes < 2) return -ENOSPC; + min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID10)) { num_stripes = btrfs_super_num_devices(&info->super_copy); @@ -699,22 +705,26 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, return -ENOSPC; num_stripes &= ~(u32)1; sub_stripes = 2; + min_stripes = 4; } if (type & BTRFS_BLOCK_GROUP_DATA) { max_chunk_size = 10 * calc_size; - min_chunk_size = 256 * 1024 * 1024; + min_stripe_size = 64 * 1024 * 1024; } else if (type & BTRFS_BLOCK_GROUP_METADATA) { max_chunk_size = 4 * calc_size; - min_chunk_size = 64 * 1024 * 1024; - } else { - min_chunk_size = 32 * 1024 * 1024; + min_stripe_size = 32 * 1024 * 1024; + } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { + calc_size = 8 * 1024 * 1024; + max_chunk_size = calc_size * 2; + min_stripe_size = 1 * 1024 * 1024; } /* we don't want a chunk larger than 10% of the FS */ percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1); max_chunk_size = min(percent_max, max_chunk_size); +again: if (calc_size * num_stripes > max_chunk_size) { calc_size = max_chunk_size; do_div(calc_size, num_stripes); @@ -722,12 +732,8 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, calc_size *= stripe_len; } /* we don't want tiny stripes */ - *num_bytes = chunk_bytes_by_type(type, calc_size, - num_stripes, sub_stripes); - calc_size = max_t(u64, chunk_bytes_by_type(type, min_chunk_size, - num_stripes, sub_stripes), calc_size); + calc_size = max_t(u64, min_stripe_size, calc_size); -again: do_div(calc_size, stripe_len); calc_size *= stripe_len; @@ -746,19 +752,27 @@ again: avail = device->total_bytes - device->bytes_used; cur = cur->next; - if (avail > max_avail) - max_avail = avail; if (avail >= min_free) { list_move_tail(&device->dev_list, &private_devs); index++; if (type & BTRFS_BLOCK_GROUP_DUP) index++; - } + } else if (avail > max_avail) + max_avail = avail; if (cur == dev_list) break; } if (index < num_stripes) { list_splice(&private_devs, dev_list); + if (index >= min_stripes) { + num_stripes = index; + if (type & (BTRFS_BLOCK_GROUP_RAID10)) { + num_stripes /= sub_stripes; + num_stripes *= sub_stripes; + } + looped = 1; + goto again; + } if (!looped && max_avail > 0) { looped = 1; calc_size = max_avail; @@ -766,7 +780,6 @@ again: } return -ENOSPC; } - key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; key.type = BTRFS_CHUNK_ITEM_KEY; ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, From 0afbaf8c8216a0de64b55c2d455de2b6c9dde5eb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Apr 2008 14:17:20 -0400 Subject: [PATCH 0497/2985] Btrfs: Set the btree inode i_size to OFFSET_MAX Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2de2b00afebc..9d648f2f41a5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -88,19 +88,35 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, goto out; } em->start = 0; - em->len = i_size_read(inode); + em->len = (u64)-1; em->block_start = 0; em->bdev = inode->i_sb->s_bdev; spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); if (ret == -EEXIST) { + u64 failed_start = em->start; + u64 failed_len = em->len; + + printk("failed to insert %Lu %Lu -> %Lu into tree\n", + em->start, em->len, em->block_start); free_extent_map(em); em = lookup_extent_mapping(em_tree, start, len); - if (em) + if (em) { + printk("after failing, found %Lu %Lu %Lu\n", + em->start, em->len, em->block_start); ret = 0; - else + } else { + em = lookup_extent_mapping(em_tree, failed_start, + failed_len); + if (em) { + printk("double failure lookup gives us " + "%Lu %Lu -> %Lu\n", em->start, + em->len, em->block_start); + free_extent_map(em); + } ret = -EIO; + } } else if (ret) { free_extent_map(em); em = NULL; @@ -1108,7 +1124,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; - fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; + + /* + * we set the i_size on the btree inode to the max possible int. + * the real end of the address space is determined by all of + * the devices in the system + */ + fs_info->btree_inode->i_size = OFFSET_MAX; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; @@ -1196,9 +1218,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, tree_root->stripesize = stripesize; sb_set_blocksize(sb, sectorsize); - i_size_write(fs_info->btree_inode, - btrfs_super_total_bytes(disk_super)); - if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { printk("btrfs: valid FS not found on %s\n", sb->s_id); From 9ad6b7bc2e00ba02f915cffd5b6bcd6564bb2c75 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Apr 2008 16:11:30 -0400 Subject: [PATCH 0498/2985] Force page->private removal in btrfs_invalidatepage btrfs_invalidatepage is not allowed to leave pages around on the lru. Any such pages will trigger an oops later on because the VM will see page->private and assume it is a buffer head. This also forces extra flushes of the async work queues before dropping all the pages on the btree inode during unmount. Left over items on the work queues are one possible cause of busy state ranges during truncate_inode_pages. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 12 ++++++++++++ fs/btrfs/inode.c | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9d648f2f41a5..b479cdfe3ee8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -533,6 +533,12 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) tree = &BTRFS_I(page->mapping->host)->io_tree; extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); + if (PagePrivate(page)) { + printk("2invalidate page cleaning up after releasepage\n"); + ClearPagePrivate(page); + set_page_private(page, 0); + page_cache_release(page); + } } #if 0 @@ -1484,6 +1490,8 @@ int close_ctree(struct btrfs_root *root) write_ctree_super(NULL, root); mutex_unlock(&fs_info->fs_mutex); + btrfs_transaction_flush_work(root); + if (fs_info->delalloc_bytes) { printk("btrfs: at unmount delalloc count %Lu\n", fs_info->delalloc_bytes); @@ -1514,7 +1522,11 @@ int close_ctree(struct btrfs_root *root) extent_io_tree_empty_lru(&fs_info->extent_ins); extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); + flush_workqueue(end_io_workqueue); + flush_workqueue(async_submit_workqueue); + truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + flush_workqueue(end_io_workqueue); destroy_workqueue(end_io_workqueue); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 40f8da884090..347cd85db128 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2524,6 +2524,12 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) tree = &BTRFS_I(page->mapping->host)->io_tree; extent_invalidatepage(tree, page, offset); btrfs_releasepage(page, GFP_NOFS); + if (PagePrivate(page)) { + printk("invalidate page cleaning up after releasepage\n"); + ClearPagePrivate(page); + set_page_private(page, 0); + page_cache_release(page); + } } /* From 4575c9cceeca2f51c50536850e15e1dc5187f3d9 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Apr 2008 16:13:31 -0400 Subject: [PATCH 0499/2985] Btrfs: Scale the bdi ra_pages by the number of devices in the FS Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b479cdfe3ee8..46188ee16623 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -932,7 +932,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) bdi_init(bdi); #endif - bdi->ra_pages = default_backing_dev_info.ra_pages * 4; + bdi->ra_pages = default_backing_dev_info.ra_pages; bdi->state = 0; bdi->capabilities = default_backing_dev_info.capabilities; bdi->unplug_io_fn = btrfs_unplug_io_fn; @@ -1214,6 +1214,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, (unsigned long long)fs_devices->num_devices); goto fail_sb_buffer; } + fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); + nodesize = btrfs_super_nodesize(disk_super); leafsize = btrfs_super_leafsize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); From ad5bd91ecee2b687071da00094564fd075f934ee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 21 Apr 2008 08:28:10 -0400 Subject: [PATCH 0500/2985] Btrfs: Add 1MB to the min_free in alloc_chunk This properly reflects the first 1MB we skip at the start of the device Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index fe5b00986d22..4fc92cf813d8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -746,6 +746,9 @@ again: else min_free = calc_size; + /* we add 1MB because we never use the first 1MB of the device */ + min_free += 1024 * 1024; + /* build a private list of devices we will allocate from */ while(index < num_stripes) { device = list_entry(cur, struct btrfs_device, dev_list); From 4ef64eae288a3644d1f8b748eb075426701e96d1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 21 Apr 2008 08:52:50 -0400 Subject: [PATCH 0501/2985] Btrfs: Remove debugging statements from the invalidatepage calls Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/inode.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 46188ee16623..c829612c797e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -534,7 +534,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); if (PagePrivate(page)) { - printk("2invalidate page cleaning up after releasepage\n"); + invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 347cd85db128..5b1514e58de4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2510,6 +2510,7 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) map = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_mapping(map, tree, page, gfp_flags); if (ret == 1) { + invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); @@ -2525,7 +2526,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) extent_invalidatepage(tree, page, offset); btrfs_releasepage(page, GFP_NOFS); if (PagePrivate(page)) { - printk("invalidate page cleaning up after releasepage\n"); + invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); From f2d8d74d7874f8f81222363cd6459a365796e35a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 21 Apr 2008 10:03:05 -0400 Subject: [PATCH 0502/2985] Btrfs: Make an unplug function that doesn't unplug every spindle Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 26 ++++++++------- fs/btrfs/inode.c | 6 +--- fs/btrfs/volumes.c | 79 +++++++++++++++++++++++++++++++++------------- fs/btrfs/volumes.h | 2 ++ 4 files changed, 75 insertions(+), 38 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c829612c797e..7f5aca35494d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -913,18 +913,22 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { - struct list_head *cur; - struct btrfs_device *device; - struct btrfs_fs_info *info; + struct inode *inode = page->mapping->host; + struct extent_map_tree *em_tree; + struct extent_map *em; + u64 offset = page_offset(page); - info = (struct btrfs_fs_info *)bdi->unplug_io_data; - list_for_each(cur, &info->fs_devices->devices) { - device = list_entry(cur, struct btrfs_device, dev_list); - bdi = blk_get_backing_dev_info(device->bdev); - if (bdi->unplug_io_fn) { - bdi->unplug_io_fn(bdi, page); - } - } + em_tree = &BTRFS_I(inode)->extent_tree; + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); + spin_unlock(&em_tree->lock); + if (!em) + return; + + offset = offset - em->start; + btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree, + em->block_start + offset, page); + free_extent_map(em); } static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5b1514e58de4..e875c7c8a647 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -313,13 +313,9 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, u64 logical = bio->bi_sector << 9; u64 length = 0; u64 map_length; - struct bio_vec *bvec; - int i; int ret; - bio_for_each_segment(bvec, bio, i) { - length += bvec->bv_len; - } + length = bio->bi_size; map_tree = &root->fs_info->mapping_tree; map_length = length; ret = btrfs_map_block(map_tree, READ, logical, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4fc92cf813d8..460240706505 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "ctree.h" #include "extent_map.h" @@ -930,9 +931,10 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) return ret; } -int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, - u64 logical, u64 *length, - struct btrfs_multi_bio **multi_ret, int mirror_num) +static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, + u64 logical, u64 *length, + struct btrfs_multi_bio **multi_ret, + int mirror_num, struct page *unplug_page) { struct extent_map *em; struct map_lookup *map; @@ -944,6 +946,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int stripes_required = 1; int stripe_index; int i; + int num_stripes; struct btrfs_multi_bio *multi = NULL; if (multi_ret && !(rw & (1 << BIO_RW))) { @@ -960,10 +963,14 @@ again: spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, logical, *length); spin_unlock(&em_tree->lock); + + if (!em && unplug_page) + return 0; + if (!em) { printk("unable to find logical %Lu\n", logical); + BUG(); } - BUG_ON(!em); BUG_ON(em->start > logical || em->start + em->len < logical); map = (struct map_lookup *)em->bdev; @@ -1010,14 +1017,15 @@ again: } else { *length = em->len - offset; } - if (!multi_ret) + + if (!multi_ret && !unplug_page) goto out; - multi->num_stripes = 1; + num_stripes = 1; stripe_index = 0; if (map->type & BTRFS_BLOCK_GROUP_RAID1) { - if (rw & (1 << BIO_RW)) - multi->num_stripes = map->num_stripes; + if (unplug_page || (rw & (1 << BIO_RW))) + num_stripes = map->num_stripes; else if (mirror_num) { stripe_index = mirror_num - 1; } else { @@ -1037,7 +1045,7 @@ again: } } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { if (rw & (1 << BIO_RW)) - multi->num_stripes = map->num_stripes; + num_stripes = map->num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { @@ -1047,8 +1055,8 @@ again: stripe_index = do_div(stripe_nr, factor); stripe_index *= map->sub_stripes; - if (rw & (1 << BIO_RW)) - multi->num_stripes = map->sub_stripes; + if (unplug_page || (rw & (1 << BIO_RW))) + num_stripes = map->sub_stripes; else if (mirror_num) stripe_index += mirror_num - 1; else @@ -1063,19 +1071,50 @@ again: } BUG_ON(stripe_index >= map->num_stripes); - for (i = 0; i < multi->num_stripes; i++) { - multi->stripes[i].physical = - map->stripes[stripe_index].physical + stripe_offset + - stripe_nr * map->stripe_len; - multi->stripes[i].dev = map->stripes[stripe_index].dev; + for (i = 0; i < num_stripes; i++) { + if (unplug_page) { + struct btrfs_device *device; + struct backing_dev_info *bdi; + + device = map->stripes[stripe_index].dev; + bdi = blk_get_backing_dev_info(device->bdev); + if (bdi->unplug_io_fn) { + bdi->unplug_io_fn(bdi, unplug_page); + } + } else { + multi->stripes[i].physical = + map->stripes[stripe_index].physical + + stripe_offset + stripe_nr * map->stripe_len; + multi->stripes[i].dev = map->stripes[stripe_index].dev; + } stripe_index++; } - *multi_ret = multi; + if (multi_ret) { + *multi_ret = multi; + multi->num_stripes = num_stripes; + } out: free_extent_map(em); return 0; } +int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, + u64 logical, u64 *length, + struct btrfs_multi_bio **multi_ret, int mirror_num) +{ + return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, + mirror_num, NULL); +} + +int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, + u64 logical, struct page *page) +{ + u64 length = PAGE_CACHE_SIZE; + return __btrfs_map_block(map_tree, READ, logical, &length, + NULL, 0, page); +} + + #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) static void end_bio_multi_stripe(struct bio *bio, int err) #else @@ -1122,16 +1161,12 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, u64 logical = bio->bi_sector << 9; u64 length = 0; u64 map_length; - struct bio_vec *bvec; struct btrfs_multi_bio *multi = NULL; - int i; int ret; int dev_nr = 0; int total_devs = 1; - bio_for_each_segment(bvec, bio, i) { - length += bvec->bv_len; - } + length = bio->bi_size; map_tree = &root->fs_info->mapping_tree; map_length = length; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index f9cae3072171..dfe7e8a9507c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -119,4 +119,6 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, struct btrfs_device *device); int btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); +int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, + u64 logical, struct page *page); #endif From 3c12ac7205d4bd679fefa722aa9b61385e4b4749 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 21 Apr 2008 12:01:38 -0400 Subject: [PATCH 0503/2985] Btrfs: Simplify device selection for mirrored reads Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 8 ++++---- fs/btrfs/volumes.c | 23 +++++++---------------- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 593011e5d455..cc0d7f30c36b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -640,7 +640,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - path->reada = 0; + path->reada = 1; key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_bytes; @@ -660,7 +660,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_release_path(root->fs_info->extent_root, path); - path->reada = 0; + path->reada = 1; ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root, path, bytenr, root_objectid, ref_generation, owner, owner_offset); @@ -692,7 +692,7 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, WARN_ON(num_bytes < root->sectorsize); path = btrfs_alloc_path(); - path->reada = 0; + path->reada = 1; key.objectid = bytenr; key.offset = num_bytes; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -1328,7 +1328,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; - path->reada = 0; + path->reada = 1; ret = lookup_extent_backref(trans, extent_root, path, bytenr, root_objectid, ref_generation, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 460240706505..107fc74c3ab8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1029,19 +1029,8 @@ again: else if (mirror_num) { stripe_index = mirror_num - 1; } else { - int i; - u64 least = (u64)-1; - struct btrfs_device *cur; - - for (i = 0; i < map->num_stripes; i++) { - cur = map->stripes[i].dev; - spin_lock(&cur->io_lock); - if (cur->total_ios < least) { - least = cur->total_ios; - stripe_index = i; - } - spin_unlock(&cur->io_lock); - } + u64 orig_stripe_nr = stripe_nr; + stripe_index = do_div(orig_stripe_nr, num_stripes); } } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { if (rw & (1 << BIO_RW)) @@ -1050,7 +1039,6 @@ again: stripe_index = mirror_num - 1; } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { int factor = map->num_stripes / map->sub_stripes; - int orig_stripe_nr = stripe_nr; stripe_index = do_div(stripe_nr, factor); stripe_index *= map->sub_stripes; @@ -1059,8 +1047,11 @@ again: num_stripes = map->sub_stripes; else if (mirror_num) stripe_index += mirror_num - 1; - else - stripe_index += orig_stripe_nr % map->sub_stripes; + else { + u64 orig_stripe_nr = stripe_nr; + stripe_index += do_div(orig_stripe_nr, + map->sub_stripes); + } } else { /* * after this do_div call, stripe_nr is the number of stripes From b30757178dad19a0388d958ff9eea66e674d39ed Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Apr 2008 09:22:07 -0400 Subject: [PATCH 0504/2985] Btrfs: Add a special device list for chunk allocations This allows other code that needs to walk every device in the FS to do so without locking against allocations. Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 15 ++++++++++----- fs/btrfs/volumes.h | 7 +++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 107fc74c3ab8..5619e50583e3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -111,6 +111,7 @@ static int device_list_add(const char *path, if (!fs_devices) return -ENOMEM; INIT_LIST_HEAD(&fs_devices->devices); + INIT_LIST_HEAD(&fs_devices->alloc_list); list_add(&fs_devices->list, &fs_uuids); memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); fs_devices->latest_devid = devid; @@ -139,6 +140,7 @@ static int device_list_add(const char *path, return -ENOMEM; } list_add(&device->dev_list, &fs_devices->devices); + list_add(&device->dev_alloc_list, &fs_devices->alloc_list); fs_devices->num_devices++; } @@ -660,7 +662,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_device *device = NULL; struct btrfs_chunk *chunk; struct list_head private_devs; - struct list_head *dev_list = &extent_root->fs_info->fs_devices->devices; + struct list_head *dev_list; struct list_head *cur; struct extent_map_tree *em_tree; struct map_lookup *map; @@ -682,6 +684,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, int stripe_len = 64 * 1024; struct btrfs_key key; + dev_list = &extent_root->fs_info->fs_devices->alloc_list; if (list_empty(dev_list)) return -ENOSPC; @@ -752,12 +755,12 @@ again: /* build a private list of devices we will allocate from */ while(index < num_stripes) { - device = list_entry(cur, struct btrfs_device, dev_list); + device = list_entry(cur, struct btrfs_device, dev_alloc_list); avail = device->total_bytes - device->bytes_used; cur = cur->next; if (avail >= min_free) { - list_move_tail(&device->dev_list, &private_devs); + list_move_tail(&device->dev_alloc_list, &private_devs); index++; if (type & BTRFS_BLOCK_GROUP_DUP) index++; @@ -812,12 +815,12 @@ printk("new chunk type %Lu start %Lu size %Lu\n", type, key.offset, *num_bytes); struct btrfs_stripe *stripe; BUG_ON(list_empty(&private_devs)); cur = private_devs.next; - device = list_entry(cur, struct btrfs_device, dev_list); + device = list_entry(cur, struct btrfs_device, dev_alloc_list); /* loop over this device again if we're doing a dup group */ if (!(type & BTRFS_BLOCK_GROUP_DUP) || (index == num_stripes - 1)) - list_move_tail(&device->dev_list, dev_list); + list_move_tail(&device->dev_alloc_list, dev_list); ret = btrfs_alloc_dev_extent(trans, device, info->chunk_root->root_key.objectid, @@ -1329,6 +1332,8 @@ static int read_one_dev(struct btrfs_root *root, return -ENOMEM; list_add(&device->dev_list, &root->fs_info->fs_devices->devices); + list_add(&device->dev_alloc_list, + &root->fs_info->fs_devices->alloc_list); device->barriers = 1; spin_lock_init(&device->io_lock); } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index dfe7e8a9507c..a2660d2b6b34 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -24,9 +24,12 @@ struct buffer_head; struct btrfs_device { struct list_head dev_list; + struct list_head dev_alloc_list; struct btrfs_root *dev_root; struct buffer_head *pending_io; + int barriers; + spinlock_t io_lock; struct block_device *bdev; @@ -70,7 +73,11 @@ struct btrfs_fs_devices { u64 num_devices; struct block_device *latest_bdev; struct block_device *lowest_bdev; + /* all of the devices in the FS */ struct list_head devices; + + /* devices not currently being allocated */ + struct list_head alloc_list; struct list_head list; }; From 38b669880df44cba24ab1b01715b87318d469217 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Apr 2008 09:22:11 -0400 Subject: [PATCH 0505/2985] Deal with page == NULL in the btrfs_unplug_io_fn Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7f5aca35494d..19c258d2c7e2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -911,12 +911,40 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) return ret; } +/* + * this unplugs every device on the box, and it is only used when page + * is null + */ +static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) +{ + struct list_head *cur; + struct btrfs_device *device; + struct btrfs_fs_info *info; + + info = (struct btrfs_fs_info *)bdi->unplug_io_data; + list_for_each(cur, &info->fs_devices->devices) { + device = list_entry(cur, struct btrfs_device, dev_list); + bdi = blk_get_backing_dev_info(device->bdev); + if (bdi->unplug_io_fn) { + bdi->unplug_io_fn(bdi, page); + } + } +} + void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode; struct extent_map_tree *em_tree; struct extent_map *em; - u64 offset = page_offset(page); + u64 offset; + + if (!page) { + __unplug_io_fn(bdi, page); + return; + } + + inode = page->mapping->host; + offset = page_offset(page); em_tree = &BTRFS_I(inode)->extent_tree; spin_lock(&em_tree->lock); From 409c6118d39cb2d8666bee3d61a1a9ae5bbd4f5d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Apr 2008 09:24:20 -0400 Subject: [PATCH 0506/2985] Btrfs: Set nodatasum on the inode when written by a nodatasum mount Signed-off-by: Chris Mason --- fs/btrfs/file.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3f5525f0834c..5d537f26dc83 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -863,6 +863,14 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, first_index = pos >> PAGE_CACHE_SHIFT; last_index = (pos + count) >> PAGE_CACHE_SHIFT; + /* + * if this is a nodatasum mount, force summing off for the inode + * all the time. That way a later mount with summing on won't + * get confused + */ + if (btrfs_test_opt(root, NODATASUM)) + btrfs_set_flag(inode, NODATASUM); + /* * there are lots of better ways to do this, but this code * makes sure the first and last page in the file range are From e1c4b7451e22f5b0a9fbccfa560ee7b80c35b8cd Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Apr 2008 13:26:46 -0400 Subject: [PATCH 0507/2985] Fix btrfs_get_extent and get_block corner cases, and disable O_DIRECT reads The generic O_DIRECT code assumes all the bios have the same bdev, which isn't true for multi-device btrfs. Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 1 + fs/btrfs/inode.c | 36 +++++++++++++++++++++++------------- fs/btrfs/volumes.c | 2 +- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 81f8b4fd069b..6e4bf029c6d4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1694,6 +1694,7 @@ extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, } if (bio) { + bio->bi_size = 0; bio->bi_bdev = bdev; bio->bi_sector = first_sector; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e875c7c8a647..abfe86df02d2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -494,6 +494,7 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, bio->bi_end_io = failed_bio->bi_end_io; bio->bi_sector = failrec->logical >> 9; bio->bi_bdev = failed_bio->bi_bdev; + bio->bi_size = 0; bio_add_page(bio, page, failrec->len, start - page_offset(page)); btrfs_submit_bio_hook(inode, READ, bio, failrec->last_mirror); return 0; @@ -2187,12 +2188,9 @@ again: spin_unlock(&em_tree->lock); if (em) { - if (em->start > start) { - printk("get_extent lookup [%Lu %Lu] em [%Lu %Lu]\n", - start, len, em->start, em->len); - WARN_ON(1); - } - if (em->block_start == EXTENT_MAP_INLINE && page) + if (em->start > start || em->start + em->len <= start) + free_extent_map(em); + else if (em->block_start == EXTENT_MAP_INLINE && page) free_extent_map(em); else goto out; @@ -2340,7 +2338,6 @@ insert: err = 0; spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); - /* it is possible that someone inserted the extent into the tree * while we had the lock dropped. It is also possible that * an overlapping map exists in the tree @@ -2348,6 +2345,11 @@ insert: if (ret == -EEXIST) { struct extent_map *existing; existing = lookup_extent_mapping(em_tree, start, len); + if (existing && (existing->start > start || + existing->start + existing->len <= start)) { + free_extent_map(existing); + existing = NULL; + } if (!existing) { existing = lookup_extent_mapping(em_tree, em->start, em->len); @@ -2388,6 +2390,7 @@ out: return em; } +#if 0 /* waiting for O_DIRECT reads */ static int btrfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { @@ -2405,22 +2408,24 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, if (!em || IS_ERR(em)) goto out; - if (em->start > start || em->start + em->len <= start) + if (em->start > start || em->start + em->len <= start) { goto out; + } if (em->block_start == EXTENT_MAP_INLINE) { ret = -EINVAL; goto out; } - if (em->block_start == EXTENT_MAP_HOLE || - em->block_start == EXTENT_MAP_DELALLOC) { - goto out; - } - len = em->start + em->len - start; len = min_t(u64, len, INT_LIMIT(typeof(bh_result->b_size))); + if (em->block_start == EXTENT_MAP_HOLE || + em->block_start == EXTENT_MAP_DELALLOC) { + bh_result->b_size = len; + goto out; + } + logical = start - em->start; logical = em->block_start + logical; @@ -2430,6 +2435,7 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, BUG_ON(ret); bh_result->b_blocknr = multi->stripes[0].physical >> inode->i_blkbits; bh_result->b_size = min(map_length, len); + bh_result->b_bdev = multi->stripes[0].dev->bdev; set_buffer_mapped(bh_result); kfree(multi); @@ -2437,11 +2443,14 @@ out: free_extent_map(em); return ret; } +#endif static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { + return -EINVAL; +#if 0 struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; @@ -2450,6 +2459,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, btrfs_get_block, NULL); +#endif } static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5619e50583e3..bccb5566fd84 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1161,7 +1161,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, int total_devs = 1; length = bio->bi_size; - map_tree = &root->fs_info->mapping_tree; map_length = length; @@ -1192,6 +1191,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, } bio->bi_sector = multi->stripes[dev_nr].physical >> 9; dev = multi->stripes[dev_nr].dev; + bio->bi_bdev = dev->bdev; spin_lock(&dev->io_lock); dev->total_ios++; From bcbfce8abd5f8d3f84eab60a9df1ec147f81c34f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Apr 2008 13:26:47 -0400 Subject: [PATCH 0508/2985] Btrfs: Fix the unplug_io_fn to grab a consistent copy of page->mapping Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 13 ++++++++++++- fs/btrfs/inode.c | 16 ++++++++++------ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 19c258d2c7e2..1281c393c7e6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -936,14 +936,25 @@ void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) struct inode *inode; struct extent_map_tree *em_tree; struct extent_map *em; + struct address_space *mapping; u64 offset; + /* the generic O_DIRECT read code does this */ if (!page) { __unplug_io_fn(bdi, page); return; } - inode = page->mapping->host; + /* + * page->mapping may change at any time. Get a consistent copy + * and use that for everything below + */ + smp_mb(); + mapping = page->mapping; + if (!mapping) + return; + + inode = mapping->host; offset = page_offset(page); em_tree = &BTRFS_I(inode)->extent_tree; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index abfe86df02d2..d58b4d5a1fef 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -80,6 +80,7 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy); u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy); u64 thresh; + unsigned long flags; int ret = 0; if (for_del) @@ -89,10 +90,10 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, do_div(thresh, 100); - spin_lock(&root->fs_info->delalloc_lock); + spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); if (used + root->fs_info->delalloc_bytes + num_required > thresh) ret = -ENOSPC; - spin_unlock(&root->fs_info->delalloc_lock); + spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); return ret; } @@ -275,12 +276,13 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits) { + unsigned long flags; if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; - spin_lock(&root->fs_info->delalloc_lock); + spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); BTRFS_I(inode)->delalloc_bytes += end - start + 1; root->fs_info->delalloc_bytes += end - start + 1; - spin_unlock(&root->fs_info->delalloc_lock); + spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); } return 0; } @@ -290,7 +292,9 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, { if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; - spin_lock(&root->fs_info->delalloc_lock); + unsigned long flags; + + spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); if (end - start + 1 > root->fs_info->delalloc_bytes) { printk("warning: delalloc account %Lu %Lu\n", end - start + 1, root->fs_info->delalloc_bytes); @@ -300,7 +304,7 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, root->fs_info->delalloc_bytes -= end - start + 1; BTRFS_I(inode)->delalloc_bytes -= end - start + 1; } - spin_unlock(&root->fs_info->delalloc_lock); + spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); } return 0; } From 1514794e4253cf4a32a2acc6de52f2527ca1bdce Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Apr 2008 09:22:51 -0400 Subject: [PATCH 0509/2985] Btrfs: Make sure nodes have enough room for a double split Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1c3d9d6fbdad..8ad8b5cc295a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1146,7 +1146,7 @@ again: slot -= 1; p->slots[level] = slot; if (ins_len > 0 && btrfs_header_nritems(b) >= - BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { + BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { int sret = split_node(trans, root, p, level); BUG_ON(sret > 0); if (sret) From c448acf0a057debc34507277add0fc09b1b37312 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Apr 2008 09:34:34 -0400 Subject: [PATCH 0510/2985] Btrfs: Fix split_node to require more empty slots in the node as well Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 8ad8b5cc295a..ac999f0060b6 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1485,7 +1485,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root ret = push_nodes_for_insert(trans, root, path, level); c = path->nodes[level]; if (!ret && btrfs_header_nritems(c) < - BTRFS_NODEPTRS_PER_BLOCK(root) - 1) + BTRFS_NODEPTRS_PER_BLOCK(root) - 3) return 0; if (ret < 0) return ret; From 971a1f6648bfafd4239234f1ffb9c26e907c2744 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Apr 2008 10:54:32 -0400 Subject: [PATCH 0511/2985] Btrfs: Don't empty the middle buffer in push_nodes_for_insert Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ac999f0060b6..319f9f9bf8d4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -29,7 +29,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_path *path, int data_size, int extend); static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *dst, - struct extent_buffer *src); + struct extent_buffer *src, int empty); static int balance_node_right(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *dst_buf, @@ -789,7 +789,7 @@ static int balance_level(struct btrfs_trans_handle *trans, /* first, try to make some room in the middle buffer */ if (left) { orig_slot += btrfs_header_nritems(left); - wret = push_node_left(trans, root, left, mid); + wret = push_node_left(trans, root, left, mid, 0); if (wret < 0) ret = wret; if (btrfs_header_nritems(mid) < 2) @@ -800,7 +800,7 @@ static int balance_level(struct btrfs_trans_handle *trans, * then try to empty the right most buffer into the middle */ if (right) { - wret = push_node_left(trans, root, mid, right); + wret = push_node_left(trans, root, mid, right, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; if (btrfs_header_nritems(right) == 0) { @@ -941,7 +941,7 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, wret = 1; else { wret = push_node_left(trans, root, - left, mid); + left, mid, 0); } } if (wret < 0) @@ -1239,7 +1239,7 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, */ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *dst, - struct extent_buffer *src) + struct extent_buffer *src, int empty) { int push_items = 0; int src_nritems; @@ -1252,12 +1252,17 @@ static int push_node_left(struct btrfs_trans_handle *trans, WARN_ON(btrfs_header_generation(src) != trans->transid); WARN_ON(btrfs_header_generation(dst) != trans->transid); + if (!empty && src_nritems <= 2) + return 1; + if (push_items <= 0) { return 1; } - if (src_nritems < push_items) - push_items = src_nritems; + if (empty) + push_items = min(src_nritems, push_items); + else + push_items = min(src_nritems - 2, push_items); copy_extent_buffer(dst, src, btrfs_node_key_ptr_offset(dst_nritems), From bce4eae9864e6b6ebde5d7f05576ff0a6c3724d0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Apr 2008 14:42:46 -0400 Subject: [PATCH 0512/2985] Btrfs: Fix balance_level to free the middle block if there is room in the left one balance level starts by trying to empty the middle block, and then pushes from the right to the middle. This might empty the right block and leave a small number of pointers in the middle. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 35 ++++++++++++++++++++++++++++------- fs/btrfs/extent-tree.c | 4 ++-- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 319f9f9bf8d4..29e5674b043d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -789,7 +789,7 @@ static int balance_level(struct btrfs_trans_handle *trans, /* first, try to make some room in the middle buffer */ if (left) { orig_slot += btrfs_header_nritems(left); - wret = push_node_left(trans, root, left, mid, 0); + wret = push_node_left(trans, root, left, mid, 1); if (wret < 0) ret = wret; if (btrfs_header_nritems(mid) < 2) @@ -844,6 +844,11 @@ static int balance_level(struct btrfs_trans_handle *trans, ret = wret; goto enospc; } + if (wret == 1) { + wret = push_node_left(trans, root, left, mid, 1); + if (wret < 0) + ret = wret; + } BUG_ON(wret == 1); } if (btrfs_header_nritems(mid) == 0) { @@ -1252,17 +1257,27 @@ static int push_node_left(struct btrfs_trans_handle *trans, WARN_ON(btrfs_header_generation(src) != trans->transid); WARN_ON(btrfs_header_generation(dst) != trans->transid); - if (!empty && src_nritems <= 2) + if (!empty && src_nritems <= 8) return 1; if (push_items <= 0) { return 1; } - if (empty) + if (empty) { push_items = min(src_nritems, push_items); - else - push_items = min(src_nritems - 2, push_items); + if (push_items < src_nritems) { + /* leave at least 8 pointers in the node if + * we aren't going to empty it + */ + if (src_nritems - push_items < 8) { + if (push_items <= 8) + return 1; + push_items -= 8; + } + } + } else + push_items = min(src_nritems - 8, push_items); copy_extent_buffer(dst, src, btrfs_node_key_ptr_offset(dst_nritems), @@ -1308,13 +1323,19 @@ static int balance_node_right(struct btrfs_trans_handle *trans, src_nritems = btrfs_header_nritems(src); dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; - if (push_items <= 0) + if (push_items <= 0) { return 1; + } + + if (src_nritems < 4) { + return 1; + } max_push = src_nritems / 2 + 1; /* don't try to empty the node */ - if (max_push >= src_nritems) + if (max_push >= src_nritems) { return 1; + } if (max_push < push_items) push_items = max_push; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cc0d7f30c36b..c49592c5127a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -306,13 +306,13 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int bit; int ret; int full_search = 0; - int factor = 8; + int factor = 10; block_group_cache = &info->block_group_cache; total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); if (!owner) - factor = 8; + factor = 10; bit = block_group_state_bits(data); From 81d7ed29ff6bdec903c36c26b386e16c014993b2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 25 Apr 2008 08:51:48 -0400 Subject: [PATCH 0513/2985] Btrfs: Throttle file_write when data=ordered is flushing the inode Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/file.c | 1 + fs/btrfs/inode.c | 4 ++++ fs/btrfs/ordered-data.c | 13 +++++++++++++ fs/btrfs/ordered-data.h | 1 + fs/btrfs/transaction.c | 10 ++++++++-- 6 files changed, 28 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index fe6ef8e34166..5ba83894c8b7 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -31,6 +31,7 @@ struct btrfs_inode { struct extent_io_tree io_tree; struct extent_io_tree io_failure_tree; struct inode vfs_inode; + atomic_t ordered_writeback; u64 ordered_trans; /* diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5d537f26dc83..8effdf4f5d6f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -977,6 +977,7 @@ out_nolock: (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); } current->backing_dev_info = NULL; + btrfs_ordered_throttle(root, inode); return num_written ? num_written : err; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d58b4d5a1fef..b31f52d4f2ca 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1419,6 +1419,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) inode->i_mapping, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); + atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); return 0; } @@ -1728,6 +1729,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mapping, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); + atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->root = root; @@ -1956,6 +1958,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); BTRFS_I(inode)->delalloc_bytes = 0; + atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; @@ -3292,6 +3295,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); BTRFS_I(inode)->delalloc_bytes = 0; + atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 3ee51e10c187..b474902c90e2 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -269,3 +269,16 @@ int btrfs_del_ordered_inode(struct inode *inode) return ret; } +int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode) +{ + struct btrfs_transaction *cur = root->fs_info->running_transaction; + while(cur == root->fs_info->running_transaction && + atomic_read(&BTRFS_I(inode)->ordered_writeback)) { +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) + congestion_wait(WRITE, HZ/20); +#else + blk_congestion_wait(WRITE, HZ/20); +#endif + } + return 0; +} diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index f25c6771ec64..29047e0abaab 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -39,4 +39,5 @@ int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, u64 *root_objectid, u64 *objectid, struct inode **inode); int btrfs_del_ordered_inode(struct inode *inode); +int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1ed179c020af..c85cb48d95ee 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -512,8 +512,11 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->fs_mutex); - if (S_ISREG(inode->i_mode)) + if (S_ISREG(inode->i_mode)) { + atomic_inc(&BTRFS_I(inode)->ordered_writeback); filemap_fdatawrite(inode->i_mapping); + atomic_dec(&BTRFS_I(inode)->ordered_writeback); + } iput(inode); mutex_lock(&root->fs_info->fs_mutex); @@ -530,8 +533,11 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->fs_mutex); - if (S_ISREG(inode->i_mode)) + if (S_ISREG(inode->i_mode)) { + atomic_inc(&BTRFS_I(inode)->ordered_writeback); filemap_write_and_wait(inode->i_mapping); + atomic_dec(&BTRFS_I(inode)->ordered_writeback); + } atomic_dec(&inode->i_count); iput(inode); From 004fb5750615bb3cf53e2aa50f6ef1ea57d97df9 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 25 Apr 2008 09:00:55 -0400 Subject: [PATCH 0514/2985] Btrfs: write_extent_pages came in 2.6.23 Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 6e4bf029c6d4..c1c980d1fbbf 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2107,7 +2107,7 @@ done: return 0; } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,20) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) /* Taken directly from 2.6.23 for 2.6.18 back port */ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, void *data); From 84eed90fac1b927a2657ff3bb7a0f18b9cb688f7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 25 Apr 2008 09:04:37 -0400 Subject: [PATCH 0515/2985] Btrfs: Add failure handling for read_sys_array Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 11 +++++++++-- fs/btrfs/volumes.c | 16 +++++++++------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1281c393c7e6..71838264ca6b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1278,7 +1278,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_lock(&fs_info->fs_mutex); ret = btrfs_read_sys_array(tree_root); - BUG_ON(ret); + if (ret) { + printk("btrfs: failed to read the system array on %s\n", + sb->s_id); + goto fail_sys_array; + } blocksize = btrfs_level_size(tree_root, btrfs_super_chunk_root_level(disk_super)); @@ -1335,8 +1339,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, fail_extent_root: free_extent_buffer(extent_root->node); fail_tree_root: - mutex_unlock(&fs_info->fs_mutex); free_extent_buffer(tree_root->node); +fail_sys_array: + mutex_unlock(&fs_info->fs_mutex); fail_sb_buffer: free_extent_buffer(fs_info->sb_buffer); extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); @@ -1344,6 +1349,8 @@ fail_iput: iput(fs_info->btree_inode); fail: close_all_devices(fs_info); + btrfs_mapping_tree_free(&fs_info->mapping_tree); + kfree(extent_root); kfree(tree_root); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bccb5566fd84..c63a982e31d0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1365,14 +1365,14 @@ int btrfs_read_sys_array(struct btrfs_root *root) struct extent_buffer *sb = root->fs_info->sb_buffer; struct btrfs_disk_key *disk_key; struct btrfs_chunk *chunk; - struct btrfs_key key; + u8 *ptr; + unsigned long sb_ptr; + int ret = 0; u32 num_stripes; u32 array_size; u32 len = 0; - u8 *ptr; - unsigned long sb_ptr; u32 cur; - int ret; + struct btrfs_key key; array_size = btrfs_super_sys_array_size(super_copy); @@ -1397,17 +1397,19 @@ int btrfs_read_sys_array(struct btrfs_root *root) if (key.type == BTRFS_CHUNK_ITEM_KEY) { chunk = (struct btrfs_chunk *)sb_ptr; ret = read_one_chunk(root, &key, sb, chunk); - BUG_ON(ret); + if (ret) + break; num_stripes = btrfs_chunk_num_stripes(sb, chunk); len = btrfs_chunk_item_size(num_stripes); } else { - BUG(); + ret = -EIO; + break; } ptr += len; sb_ptr += len; cur += len; } - return 0; + return ret; } int btrfs_read_chunk_tree(struct btrfs_root *root) From 5e478dc9828ad33d7b08dcdf277e13f14a7c1be7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 25 Apr 2008 09:10:45 -0400 Subject: [PATCH 0516/2985] Btrfs: write_cache_pages came in 2.6.22 Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c1c980d1fbbf..2368536629e7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2107,7 +2107,7 @@ done: return 0; } -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) /* Taken directly from 2.6.23 for 2.6.18 back port */ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, void *data); From 8f18cf13396caae5a3d7ae91201cfb15181a9642 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 25 Apr 2008 16:53:30 -0400 Subject: [PATCH 0517/2985] Btrfs: Make the resizer work based on shrinking and growing devices Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 4 + fs/btrfs/extent-tree.c | 145 ++++++++---------- fs/btrfs/inode.c | 36 ++++- fs/btrfs/volumes.c | 324 +++++++++++++++++++++++++++++++++++++++-- fs/btrfs/volumes.h | 5 + 6 files changed, 407 insertions(+), 108 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f00c4be59ad6..ac7106ec5357 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -474,6 +474,7 @@ struct btrfs_block_group_cache { u64 pinned; u64 flags; int cached; + int ro; }; struct btrfs_device; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 71838264ca6b..a9ce491d279b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -818,6 +818,10 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, return fs_info->tree_root; if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID) return fs_info->extent_root; + if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID) + return fs_info->chunk_root; + if (location->objectid == BTRFS_DEV_TREE_OBJECTID) + return fs_info->dev_root; root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)location->objectid); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c49592c5127a..6540095544e8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -187,6 +187,7 @@ static int noinline find_search_start(struct btrfs_root *root, if (!cache) goto out; + total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); free_space_cache = &root->fs_info->free_space_cache; @@ -196,7 +197,7 @@ again: goto out; last = max(search_start, cache->key.objectid); - if (!block_group_bits(cache, data)) { + if (!block_group_bits(cache, data) || cache->ro) { goto new_group; } @@ -221,6 +222,8 @@ again: continue; } spin_unlock_irq(&free_space_cache->lock); + if (cache->ro) + goto new_group; if (start + num > cache->key.objectid + cache->key.offset) goto new_group; if (start + num > total_fs_bytes) @@ -319,7 +322,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (search_start && search_start < total_fs_bytes) { struct btrfs_block_group_cache *shint; shint = btrfs_lookup_block_group(info, search_start); - if (shint && block_group_bits(shint, data)) { + if (shint && block_group_bits(shint, data) && !shint->ro) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < div_factor(shint->key.offset, factor)) { @@ -327,7 +330,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, } } } - if (hint && block_group_bits(hint, data) && + if (hint && !hint->ro && block_group_bits(hint, data) && hint->key.objectid < total_fs_bytes) { used = btrfs_block_group_used(&hint->item); if (used + hint->pinned < @@ -364,7 +367,7 @@ again: if (cache->key.objectid > total_fs_bytes) break; - if (block_group_bits(cache, data)) { + if (!cache->ro && block_group_bits(cache, data)) { if (full_search) free_check = cache->key.offset; else @@ -1020,6 +1023,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, if (found) { found->total_bytes += total_bytes; found->bytes_used += bytes_used; + found->full = 0; WARN_ON(found->total_bytes < found->bytes_used); *space_info = found; return 0; @@ -1700,7 +1704,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 super_used; u64 root_used; u64 search_start = 0; - u64 new_hint; u64 alloc_profile; u32 sizes[2]; struct btrfs_fs_info *info = root->fs_info; @@ -1724,7 +1727,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } again: - if (root->ref_cows) { + if (root != root->fs_info->extent_root) { if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2 * 1024 * 1024, @@ -1738,10 +1741,6 @@ again: BUG_ON(ret); } - new_hint = max(hint_byte, root->fs_info->alloc_start); - if (new_hint < btrfs_super_total_bytes(&info->super_copy)) - hint_byte = new_hint; - WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, search_start, search_end, hint_byte, ins, @@ -2473,15 +2472,16 @@ out: return ret; } -int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) +int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) { struct btrfs_trans_handle *trans; struct btrfs_root *tree_root = root->fs_info->tree_root; struct btrfs_path *path; u64 cur_byte; u64 total_found; + u64 shrink_last_byte; + struct btrfs_block_group_cache *shrink_block_group; struct btrfs_fs_info *info = root->fs_info; - struct extent_io_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; @@ -2489,17 +2489,29 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) int ret; int progress = 0; - btrfs_set_super_total_bytes(&info->super_copy, new_size); - clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, - GFP_NOFS); - block_group_cache = &info->block_group_cache; + shrink_block_group = btrfs_lookup_block_group(root->fs_info, + shrink_start); + BUG_ON(!shrink_block_group); + + shrink_last_byte = shrink_start + shrink_block_group->key.offset; + + shrink_block_group->space_info->total_bytes -= + shrink_block_group->key.offset; +printk("shrink_extent_tree %Lu -> %Lu type %Lu\n", shrink_start, shrink_last_byte, shrink_block_group->flags); path = btrfs_alloc_path(); root = root->fs_info->extent_root; path->reada = 2; again: + trans = btrfs_start_transaction(root, 1); + do_chunk_alloc(trans, root->fs_info->extent_root, + btrfs_block_group_used(&shrink_block_group->item) + + 2 * 1024 * 1024, shrink_block_group->flags); + btrfs_end_transaction(trans, root); + shrink_block_group->ro = 1; + total_found = 0; - key.objectid = new_size; + key.objectid = shrink_start; key.offset = 0; key.type = 0; cur_byte = key.objectid; @@ -2511,10 +2523,12 @@ again: ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); if (ret < 0) goto out; + if (ret == 0) { leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid + found_key.offset > new_size) { + if (found_key.objectid + found_key.offset > shrink_start && + found_key.objectid < shrink_last_byte) { cur_byte = found_key.objectid; key.objectid = cur_byte; } @@ -2543,6 +2557,9 @@ next: btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid >= shrink_last_byte) + break; + if (progress && need_resched()) { memcpy(&key, &found_key, sizeof(key)); mutex_unlock(&root->fs_info->fs_mutex); @@ -2583,68 +2600,31 @@ next: goto again; } + /* + * we've freed all the extents, now remove the block + * group item from the tree + */ trans = btrfs_start_transaction(root, 1); - key.objectid = new_size; - key.offset = 0; - key.type = 0; - while(1) { - u64 ptr; + memcpy(&key, &shrink_block_group->key, sizeof(key)); - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) - goto out; + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) + ret = -EIO; + if (ret < 0) + goto out; - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); -bg_next: - if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) - break; - if (ret == 1) { - ret = 0; - break; - } - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + kfree(shrink_block_group); - /* - * btrfs_next_leaf doesn't cow buffers, we have to - * do the search again - */ - memcpy(&key, &found_key, sizeof(key)); - btrfs_release_path(root, path); - goto resched_check; - } + clear_extent_bits(&info->block_group_cache, found_key.objectid, + found_key.objectid + found_key.offset - 1, + (unsigned int)-1, GFP_NOFS); - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) { - printk("shrinker found key %Lu %u %Lu\n", - found_key.objectid, found_key.type, - found_key.offset); - path->slots[0]++; - goto bg_next; - } - ret = get_state_private(&info->block_group_cache, - found_key.objectid, &ptr); - if (!ret) - kfree((void *)(unsigned long)ptr); - - clear_extent_bits(&info->block_group_cache, found_key.objectid, - found_key.objectid + found_key.offset - 1, - (unsigned int)-1, GFP_NOFS); - - key.objectid = found_key.objectid + 1; - btrfs_del_item(trans, root, path); - btrfs_release_path(root, path); -resched_check: - if (need_resched()) { - mutex_unlock(&root->fs_info->fs_mutex); - cond_resched(); - mutex_lock(&root->fs_info->fs_mutex); - } - } - clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, + btrfs_del_item(trans, root, path); + clear_extent_dirty(&info->free_space_cache, + shrink_start, shrink_last_byte - 1, GFP_NOFS); btrfs_commit_transaction(trans, root); out: @@ -2652,13 +2632,6 @@ out: return ret; } -int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 new_size) -{ - btrfs_set_super_total_bytes(&root->fs_info->super_copy, new_size); - return 0; -} - int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key) { @@ -2726,7 +2699,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - cache = kmalloc(sizeof(*cache), GFP_NOFS); + cache = kzalloc(sizeof(*cache), GFP_NOFS); if (!cache) { ret = -ENOMEM; break; @@ -2736,8 +2709,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(cache->item)); memcpy(&cache->key, &found_key, sizeof(found_key)); - cache->cached = 0; - cache->pinned = 0; key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); @@ -2789,12 +2760,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, extent_root = root->fs_info->extent_root; block_group_cache = &root->fs_info->block_group_cache; - cache = kmalloc(sizeof(*cache), GFP_NOFS); + cache = kzalloc(sizeof(*cache), GFP_NOFS); BUG_ON(!cache); cache->key.objectid = chunk_offset; cache->key.offset = size; - cache->cached = 0; - cache->pinned = 0; btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); memset(&cache->item, 0, sizeof(cache->item)); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b31f52d4f2ca..4d12aa532c5b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include #include #include #include @@ -2887,9 +2888,12 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) { u64 new_size; u64 old_size; + u64 devid = 1; struct btrfs_ioctl_vol_args *vol_args; struct btrfs_trans_handle *trans; + struct btrfs_device *device = NULL; char *sizestr; + char *devstr = NULL; int ret = 0; int namelen; int mod = 0; @@ -2909,9 +2913,25 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) goto out; } + mutex_lock(&root->fs_info->fs_mutex); sizestr = vol_args->name; + devstr = strchr(sizestr, ':'); + if (devstr) { + char *end; + sizestr = devstr + 1; + *devstr = '\0'; + devstr = vol_args->name; + devid = simple_strtoull(devstr, &end, 10); +printk("resizing devid %Lu\n", devid); + } + device = btrfs_find_device(root, devid, NULL); + if (!device) { + printk("resizer unable to find device %Lu\n", devid); + ret = -EINVAL; + goto out_unlock; + } if (!strcmp(sizestr, "max")) - new_size = root->fs_info->sb->s_bdev->bd_inode->i_size; + new_size = device->bdev->bd_inode->i_size; else { if (sizestr[0] == '-') { mod = -1; @@ -2923,12 +2943,11 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) new_size = btrfs_parse_size(sizestr); if (new_size == 0) { ret = -EINVAL; - goto out; + goto out_unlock; } } - mutex_lock(&root->fs_info->fs_mutex); - old_size = btrfs_super_total_bytes(&root->fs_info->super_copy); + old_size = device->total_bytes; if (mod < 0) { if (new_size > old_size) { @@ -2944,7 +2963,7 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) ret = -EINVAL; goto out_unlock; } - if (new_size > root->fs_info->sb->s_bdev->bd_inode->i_size) { + if (new_size > device->bdev->bd_inode->i_size) { ret = -EFBIG; goto out_unlock; } @@ -2952,13 +2971,14 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) do_div(new_size, root->sectorsize); new_size *= root->sectorsize; -printk("new size is %Lu\n", new_size); +printk("new size for %s is %llu\n", device->name, (unsigned long long)new_size); + if (new_size > old_size) { trans = btrfs_start_transaction(root, 1); - ret = btrfs_grow_extent_tree(trans, root, new_size); + ret = btrfs_grow_device(trans, device, new_size); btrfs_commit_transaction(trans, root); } else { - ret = btrfs_shrink_extent_tree(root, new_size); + ret = btrfs_shrink_device(device, new_size); } out_unlock: diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c63a982e31d0..a2c56de1548a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -77,7 +77,7 @@ static struct btrfs_device *__find_device(struct list_head *head, u64 devid, list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); if (dev->devid == devid && - !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE)) { + (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) { return dev; } } @@ -293,6 +293,10 @@ static int find_free_dev_extent(struct btrfs_trans_handle *trans, * so we make sure to start at an offset of at least 1MB */ search_start = max((u64)1024 * 1024, search_start); + + if (root->fs_info->alloc_start + num_bytes <= device->total_bytes) + search_start = max(root->fs_info->alloc_start, search_start); + key.objectid = device->devid; key.offset = search_start; key.type = BTRFS_DEV_EXTENT_KEY; @@ -380,6 +384,33 @@ error: return ret; } +int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, + u64 start) +{ + int ret; + struct btrfs_path *path; + struct btrfs_root *root = device->dev_root; + struct btrfs_key key; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = device->devid; + key.offset = start; + key.type = BTRFS_DEV_EXTENT_KEY; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + BUG_ON(ret); + + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + + btrfs_free_path(path); + return ret; +} + int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 chunk_tree, u64 chunk_objectid, @@ -560,6 +591,7 @@ out: btrfs_free_path(path); return ret; } + int btrfs_update_device(struct btrfs_trans_handle *trans, struct btrfs_device *device) { @@ -606,6 +638,254 @@ out: return ret; } +int btrfs_grow_device(struct btrfs_trans_handle *trans, + struct btrfs_device *device, u64 new_size) +{ + struct btrfs_super_block *super_copy = + &device->dev_root->fs_info->super_copy; + u64 old_total = btrfs_super_total_bytes(super_copy); + u64 diff = new_size - device->total_bytes; + + btrfs_set_super_total_bytes(super_copy, old_total + diff); + return btrfs_update_device(trans, device); +} + +static int btrfs_free_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 chunk_tree, u64 chunk_objectid, + u64 chunk_offset) +{ + int ret; + struct btrfs_path *path; + struct btrfs_key key; + + root = root->fs_info->chunk_root; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = chunk_objectid; + key.offset = chunk_offset; + key.type = BTRFS_CHUNK_ITEM_KEY; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + BUG_ON(ret); + + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + + btrfs_free_path(path); + return 0; +} + +int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 + chunk_offset) +{ + struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct btrfs_disk_key *disk_key; + struct btrfs_chunk *chunk; + u8 *ptr; + int ret = 0; + u32 num_stripes; + u32 array_size; + u32 len = 0; + u32 cur; + struct btrfs_key key; + + array_size = btrfs_super_sys_array_size(super_copy); + + ptr = super_copy->sys_chunk_array; + cur = 0; + + while (cur < array_size) { + disk_key = (struct btrfs_disk_key *)ptr; + btrfs_disk_key_to_cpu(&key, disk_key); + + len = sizeof(*disk_key); + + if (key.type == BTRFS_CHUNK_ITEM_KEY) { + chunk = (struct btrfs_chunk *)(ptr + len); + num_stripes = btrfs_stack_chunk_num_stripes(chunk); + len += btrfs_chunk_item_size(num_stripes); + } else { + ret = -EIO; + break; + } + if (key.objectid == chunk_objectid && + key.offset == chunk_offset) { + memmove(ptr, ptr + len, array_size - (cur + len)); + array_size -= len; + btrfs_set_super_sys_array_size(super_copy, array_size); + } else { + ptr += len; + cur += len; + } + } + return ret; +} + + +int btrfs_relocate_chunk(struct btrfs_root *root, + u64 chunk_tree, u64 chunk_objectid, + u64 chunk_offset) +{ + struct extent_map_tree *em_tree; + struct btrfs_root *extent_root; + struct btrfs_trans_handle *trans; + struct extent_map *em; + struct map_lookup *map; + int ret; + int i; + + root = root->fs_info->chunk_root; + extent_root = root->fs_info->extent_root; + em_tree = &root->fs_info->mapping_tree.map_tree; + + /* step one, relocate all the extents inside this chunk */ + ret = btrfs_shrink_extent_tree(extent_root, chunk_offset); + BUG_ON(ret); + + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + /* + * step two, delete the device extents and the + * chunk tree entries + */ + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, chunk_offset, 1); + spin_unlock(&em_tree->lock); + + BUG_ON(em->start > chunk_offset || em->start + em->len < chunk_offset); + map = (struct map_lookup *)em->bdev; + + for (i = 0; i < map->num_stripes; i++) { + ret = btrfs_free_dev_extent(trans, map->stripes[i].dev, + map->stripes[i].physical); + BUG_ON(ret); + } + ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, + chunk_offset); + + BUG_ON(ret); + + if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { + ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); + BUG_ON(ret); + goto out; + } + + + + spin_lock(&em_tree->lock); + remove_extent_mapping(em_tree, em); + kfree(map); + em->bdev = NULL; + + /* once for the tree */ + free_extent_map(em); + spin_unlock(&em_tree->lock); + +out: + /* once for us */ + free_extent_map(em); + + btrfs_end_transaction(trans, root); + return 0; +} + +/* + * shrinking a device means finding all of the device extents past + * the new size, and then following the back refs to the chunks. + * The chunk relocation code actually frees the device extent + */ +int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = device->dev_root; + struct btrfs_dev_extent *dev_extent = NULL; + struct btrfs_path *path; + u64 length; + u64 chunk_tree; + u64 chunk_objectid; + u64 chunk_offset; + int ret; + int slot; + struct extent_buffer *l; + struct btrfs_key key; + struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + u64 old_total = btrfs_super_total_bytes(super_copy); + u64 diff = device->total_bytes - new_size; + + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + trans = btrfs_start_transaction(root, 1); + if (!trans) { + ret = -ENOMEM; + goto done; + } + + path->reada = 2; + + device->total_bytes = new_size; + ret = btrfs_update_device(trans, device); + if (ret) { + btrfs_end_transaction(trans, root); + goto done; + } + WARN_ON(diff > old_total); + btrfs_set_super_total_bytes(super_copy, old_total - diff); + btrfs_end_transaction(trans, root); + + key.objectid = device->devid; + key.offset = (u64)-1; + key.type = BTRFS_DEV_EXTENT_KEY; + + while (1) { + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto done; + + ret = btrfs_previous_item(root, path, 0, key.type); + if (ret < 0) + goto done; + if (ret) { + ret = 0; + goto done; + } + + l = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(l, &key, path->slots[0]); + + if (key.objectid != device->devid) + goto done; + + dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); + length = btrfs_dev_extent_length(l, dev_extent); + + if (key.offset + length <= new_size) + goto done; + + chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); + chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); + chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); + btrfs_release_path(root, path); + + ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, + chunk_offset); + if (ret) + goto done; + } + +done: + btrfs_free_path(path); + return ret; +} + int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, @@ -658,6 +938,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 dev_offset; struct btrfs_fs_info *info = extent_root->fs_info; struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; + struct btrfs_path *path; struct btrfs_stripe *stripes; struct btrfs_device *device = NULL; struct btrfs_chunk *chunk; @@ -724,6 +1005,10 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, min_stripe_size = 1 * 1024 * 1024; } + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + /* we don't want a chunk larger than 10% of the FS */ percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1); max_chunk_size = min(percent_max, max_chunk_size); @@ -759,11 +1044,19 @@ again: avail = device->total_bytes - device->bytes_used; cur = cur->next; + if (avail >= min_free) { - list_move_tail(&device->dev_alloc_list, &private_devs); - index++; - if (type & BTRFS_BLOCK_GROUP_DUP) + u64 ignored_start = 0; + ret = find_free_dev_extent(trans, device, path, + min_free, + &ignored_start); + if (ret == 0) { + list_move_tail(&device->dev_alloc_list, + &private_devs); index++; + if (type & BTRFS_BLOCK_GROUP_DUP) + index++; + } } else if (avail > max_avail) max_avail = avail; if (cur == dev_list) @@ -785,30 +1078,37 @@ again: calc_size = max_avail; goto again; } + btrfs_free_path(path); return -ENOSPC; } key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; key.type = BTRFS_CHUNK_ITEM_KEY; ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, &key.offset); - if (ret) + if (ret) { + btrfs_free_path(path); return ret; + } chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS); - if (!chunk) + if (!chunk) { + btrfs_free_path(path); return -ENOMEM; + } map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); if (!map) { kfree(chunk); + btrfs_free_path(path); return -ENOMEM; } + btrfs_free_path(path); + path = NULL; stripes = &chunk->stripe; *num_bytes = chunk_bytes_by_type(type, calc_size, num_stripes, sub_stripes); - index = 0; printk("new chunk type %Lu start %Lu size %Lu\n", type, key.offset, *num_bytes); while(index < num_stripes) { @@ -874,6 +1174,11 @@ printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.offset, cal em->len = *num_bytes; em->block_start = 0; + if (type & BTRFS_BLOCK_GROUP_SYSTEM) { + ret = btrfs_add_system_chunk(trans, chunk_root, &key, + chunk, btrfs_chunk_item_size(num_stripes)); + BUG_ON(ret); + } kfree(chunk); em_tree = &extent_root->fs_info->mapping_tree.map_tree; @@ -1376,11 +1681,6 @@ int btrfs_read_sys_array(struct btrfs_root *root) array_size = btrfs_super_sys_array_size(super_copy); - /* - * we do this loop twice, once for the device items and - * once for all of the chunks. This way there are device - * structs filled in for every chunk - */ ptr = super_copy->sys_chunk_array; sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array); cur = 0; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index a2660d2b6b34..6fe8440b37a7 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -128,4 +128,9 @@ int btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, u64 logical, struct page *page); +int btrfs_grow_device(struct btrfs_trans_handle *trans, + struct btrfs_device *device, u64 new_size); +struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, + u8 *uuid); +int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); #endif From 3bf3d9e9c256e1a249a47bb8ceff682e6430aeff Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 26 Apr 2008 11:03:32 -0400 Subject: [PATCH 0518/2985] Btrfs: Avoid recursive chunk allocations Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6540095544e8..a589912fdd51 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1727,7 +1727,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } again: - if (root != root->fs_info->extent_root) { + if (root->ref_cows) { if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2 * 1024 * 1024, From 8e7bf94fd5f44fa585e29fbe6a1bfabc04aea7cf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 28 Apr 2008 09:02:36 -0400 Subject: [PATCH 0519/2985] Btrfs: Do more optimal file RA during shrinking and defrag Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 16 +++++++++++++++- fs/btrfs/inode.c | 15 +++++++-------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a589912fdd51..fe4fe709c312 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2263,6 +2263,12 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } +static unsigned long calc_ra(unsigned long start, unsigned long last, + unsigned long nr) +{ + return min(last, start + nr - 1); +} + static int noinline relocate_inode_pages(struct inode *inode, u64 start, u64 len) { @@ -2275,6 +2281,8 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, struct page *page; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct file_ra_state *ra; + unsigned long total_read = 0; + unsigned long ra_pages; ra = kzalloc(sizeof(*ra), GFP_NOFS); @@ -2282,11 +2290,17 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, i = start >> PAGE_CACHE_SHIFT; last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; + ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages; + file_ra_state_init(ra, inode->i_mapping); - btrfs_force_ra(inode->i_mapping, ra, NULL, i, last_index); kfree(ra); for (; i <= last_index; i++) { + if (total_read % ra_pages == 0) { + btrfs_force_ra(inode->i_mapping, ra, NULL, i, + calc_ra(i, last_index, ra_pages)); + } + total_read++; page = grab_cache_page(inode->i_mapping, i); if (!page) goto out_unlock; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4d12aa532c5b..ab707c0930df 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2814,14 +2814,12 @@ unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index) { - pgoff_t req_size; + pgoff_t req_size = last_index - offset + 1; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) - req_size = last_index - offset + 1; offset = page_cache_readahead(mapping, ra, file, offset, req_size); return offset; #else - req_size = min(last_index - offset + 1, (pgoff_t)128); page_cache_sync_readahead(mapping, ra, file, offset, req_size); return offset + req_size; #endif @@ -2833,7 +2831,8 @@ int btrfs_defrag_file(struct file *file) { struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct page *page; unsigned long last_index; - unsigned long ra_index = 0; + unsigned long ra_pages = root->fs_info->bdi.ra_pages; + unsigned long total_read = 0; u64 page_start; u64 page_end; unsigned long i; @@ -2848,11 +2847,11 @@ int btrfs_defrag_file(struct file *file) { mutex_lock(&inode->i_mutex); last_index = inode->i_size >> PAGE_CACHE_SHIFT; for (i = 0; i <= last_index; i++) { - if (i == ra_index) { - ra_index = btrfs_force_ra(inode->i_mapping, - &file->f_ra, - file, ra_index, last_index); + if (total_read % ra_pages == 0) { + btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i, + min(last_index, i + ra_pages - 1)); } + total_read++; page = grab_cache_page(inode->i_mapping, i); if (!page) goto out_unlock; From 788f20eb5affef584e75ea84bb80a4c3352a2c0e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 28 Apr 2008 15:29:42 -0400 Subject: [PATCH 0520/2985] Btrfs: Add new ioctl to add devices Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/inode.c | 23 ++++++++++++++ fs/btrfs/ioctl.h | 6 ++++ fs/btrfs/super.c | 1 + fs/btrfs/volumes.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.h | 1 + 6 files changed, 108 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ac7106ec5357..b9257b37bb96 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -551,6 +551,8 @@ struct btrfs_fs_info { u64 data_alloc_profile; u64 metadata_alloc_profile; u64 system_alloc_profile; + + void *bdev_holder; }; /* diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ab707c0930df..1a74b5018699 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3070,6 +3070,27 @@ static int btrfs_ioctl_defrag(struct file *file) return 0; } +long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_ioctl_vol_args *vol_args; + int ret; + + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); + + if (!vol_args) + return -ENOMEM; + + if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { + ret = -EFAULT; + goto out; + } + ret = btrfs_init_new_device(root, vol_args->name); + +out: + kfree(vol_args); + return ret; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -3082,6 +3103,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_defrag(file); case BTRFS_IOC_RESIZE: return btrfs_ioctl_resize(root, (void __user *)arg); + case BTRFS_IOC_ADD_DEV: + return btrfs_ioctl_add_dev(root, (void __user *)arg); } return -ENOTTY; diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 4551e82013c8..8ad35fc4ba56 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -36,4 +36,10 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ + struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 892864906880..7153dfaa3404 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -388,6 +388,7 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, goto error; } + btrfs_sb(s)->fs_info->bdev_holder = fs_type; s->s_flags |= MS_ACTIVE; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a2c56de1548a..b93c15aa17db 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "ctree.h" #include "extent_map.h" @@ -592,6 +593,80 @@ out: return ret; } +int btrfs_init_new_device(struct btrfs_root *root, char *device_path) +{ + struct btrfs_trans_handle *trans; + struct btrfs_device *device; + struct block_device *bdev; + struct list_head *cur; + struct list_head *devices; + u64 total_bytes; + int ret = 0; + + + bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder); + if (!bdev) { + return -EIO; + } + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + devices = &root->fs_info->fs_devices->devices; + list_for_each(cur, devices) { + device = list_entry(cur, struct btrfs_device, dev_list); + if (device->bdev == bdev) { + ret = -EEXIST; + goto out; + } + } + + device = kzalloc(sizeof(*device), GFP_NOFS); + if (!device) { + /* we can safely leave the fs_devices entry around */ + ret = -ENOMEM; + goto out_close_bdev; + } + + device->barriers = 1; + generate_random_uuid(device->uuid); + spin_lock_init(&device->io_lock); + device->name = kstrdup(device_path, GFP_NOFS); + if (!device->name) { + kfree(device); + goto out_close_bdev; + } + device->io_width = root->sectorsize; + device->io_align = root->sectorsize; + device->sector_size = root->sectorsize; + device->total_bytes = i_size_read(bdev->bd_inode); + device->dev_root = root->fs_info->dev_root; + device->bdev = bdev; + + ret = btrfs_add_device(trans, root, device); + if (ret) + goto out_close_bdev; + + total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); + btrfs_set_super_total_bytes(&root->fs_info->super_copy, + total_bytes + device->total_bytes); + + total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); + btrfs_set_super_num_devices(&root->fs_info->super_copy, + total_bytes + 1); + + list_add(&device->dev_list, &root->fs_info->fs_devices->devices); + list_add(&device->dev_alloc_list, + &root->fs_info->fs_devices->alloc_list); + root->fs_info->fs_devices->num_devices++; +out: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; + +out_close_bdev: + close_bdev_excl(bdev); + goto out; +} + int btrfs_update_device(struct btrfs_trans_handle *trans, struct btrfs_device *device) { diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 6fe8440b37a7..6f173450378b 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -133,4 +133,5 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, u8 *uuid); int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); +int btrfs_init_new_device(struct btrfs_root *root, char *path); #endif From ec44a35cbeb26ab2da84cb280d778260f2312feb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 28 Apr 2008 15:29:52 -0400 Subject: [PATCH 0521/2985] Btrfs: Add balance ioctl to restripe the chunks Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 106 +++++++++++++++++++++++++++++++------ fs/btrfs/inode.c | 11 ++++ fs/btrfs/volumes.c | 115 +++++++++++++++++++++++++++++++++++++---- fs/btrfs/volumes.h | 1 + 5 files changed, 208 insertions(+), 27 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b9257b37bb96..73b92dd150ff 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1364,7 +1364,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, u64 empty_size, u64 hint_byte, - u64 search_end, struct btrfs_key *ins, int data); + u64 search_end, struct btrfs_key *ins, u64 data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fe4fe709c312..95aee5a29375 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -17,6 +17,7 @@ */ #include #include +#include #include "hash.h" #include "crc32c.h" #include "ctree.h" @@ -1058,6 +1059,26 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) } } +static u64 reduce_alloc_profile(u64 flags) +{ + if ((flags & BTRFS_BLOCK_GROUP_DUP) && + (flags & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10))) + flags &= ~BTRFS_BLOCK_GROUP_DUP; + + if ((flags & BTRFS_BLOCK_GROUP_RAID1) && + (flags & BTRFS_BLOCK_GROUP_RAID10)) + flags &= ~BTRFS_BLOCK_GROUP_RAID1; + + if ((flags & BTRFS_BLOCK_GROUP_RAID0) && + ((flags & BTRFS_BLOCK_GROUP_RAID1) | + (flags & BTRFS_BLOCK_GROUP_RAID10) | + (flags & BTRFS_BLOCK_GROUP_DUP))) + flags &= ~BTRFS_BLOCK_GROUP_RAID0; + return flags; +} + + static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags) @@ -1068,6 +1089,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 num_bytes; int ret; + flags = reduce_alloc_profile(flags); + space_info = __find_space_info(extent_root->fs_info, flags); if (!space_info) { ret = update_space_info(extent_root->fs_info, flags, @@ -1684,6 +1707,7 @@ enospc: error: return ret; } + /* * finds a free extent and does all the dirty work required for allocation * returns the key for the extent through ins, and a tree buffer for @@ -1697,7 +1721,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, u64 empty_size, u64 hint_byte, - u64 search_end, struct btrfs_key *ins, int data) + u64 search_end, struct btrfs_key *ins, u64 data) { int ret; int pending_ret; @@ -1727,6 +1751,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } again: + data = reduce_alloc_profile(data); if (root->ref_cows) { if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, @@ -1752,6 +1777,9 @@ again: num_bytes = max(num_bytes, min_alloc_size); goto again; } + if (ret) { + printk("allocation failed flags %Lu\n", data); + } BUG_ON(ret); if (ret) return ret; @@ -2274,8 +2302,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, { u64 page_start; u64 page_end; - u64 delalloc_start; - u64 existing_delalloc; unsigned long last_index; unsigned long i; struct page *page; @@ -2293,7 +2319,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages; file_ra_state_init(ra, inode->i_mapping); - kfree(ra); for (; i <= last_index; i++) { if (total_read % ra_pages == 0) { @@ -2313,26 +2338,30 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, goto out_unlock; } } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + ClearPageDirty(page); +#else + cancel_dirty_page(page, PAGE_CACHE_SIZE); +#endif + wait_on_page_writeback(page); + set_page_extent_mapped(page); page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; lock_extent(io_tree, page_start, page_end, GFP_NOFS); - delalloc_start = page_start; - existing_delalloc = count_range_bits(io_tree, - &delalloc_start, page_end, - PAGE_CACHE_SIZE, EXTENT_DELALLOC); - + set_page_dirty(page); set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); - set_page_dirty(page); unlock_page(page); page_cache_release(page); + balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); } out_unlock: + kfree(ra); mutex_unlock(&inode->i_mutex); return 0; } @@ -2397,8 +2426,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, goto out; } relocate_inode_pages(inode, ref_offset, extent_key->offset); - /* FIXME, data=ordered will help get rid of this */ - filemap_fdatawrite(inode->i_mapping); iput(inode); mutex_lock(&extent_root->fs_info->fs_mutex); } else { @@ -2486,6 +2513,47 @@ out: return ret; } +static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) +{ + u64 num_devices; + u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; + + num_devices = btrfs_super_num_devices(&root->fs_info->super_copy); + if (num_devices == 1) { + stripped |= BTRFS_BLOCK_GROUP_DUP; + stripped = flags & ~stripped; + + /* turn raid0 into single device chunks */ + if (flags & BTRFS_BLOCK_GROUP_RAID0) + return stripped; + + /* turn mirroring into duplication */ + if (flags & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10)) + return stripped | BTRFS_BLOCK_GROUP_DUP; + return flags; + } else { + /* they already had raid on here, just return */ + if ((flags & BTRFS_BLOCK_GROUP_DUP) && + (flags & BTRFS_BLOCK_GROUP_RAID1)) { + } + if (flags & stripped) + return flags; + + stripped |= BTRFS_BLOCK_GROUP_DUP; + stripped = flags & ~stripped; + + /* switch duplicated blocks with raid1 */ + if (flags & BTRFS_BLOCK_GROUP_DUP) + return stripped | BTRFS_BLOCK_GROUP_RAID1; + + /* turn single device chunks into raid0 */ + return stripped | BTRFS_BLOCK_GROUP_RAID0; + } + return flags; +} + int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) { struct btrfs_trans_handle *trans; @@ -2494,6 +2562,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) u64 cur_byte; u64 total_found; u64 shrink_last_byte; + u64 new_alloc_flags; struct btrfs_block_group_cache *shrink_block_group; struct btrfs_fs_info *info = root->fs_info; struct btrfs_key key; @@ -2511,17 +2580,20 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) shrink_block_group->space_info->total_bytes -= shrink_block_group->key.offset; -printk("shrink_extent_tree %Lu -> %Lu type %Lu\n", shrink_start, shrink_last_byte, shrink_block_group->flags); path = btrfs_alloc_path(); root = root->fs_info->extent_root; path->reada = 2; again: - trans = btrfs_start_transaction(root, 1); - do_chunk_alloc(trans, root->fs_info->extent_root, + if (btrfs_block_group_used(&shrink_block_group->item) > 0) { + trans = btrfs_start_transaction(root, 1); + new_alloc_flags = update_block_group_flags(root, + shrink_block_group->flags); + do_chunk_alloc(trans, root->fs_info->extent_root, btrfs_block_group_used(&shrink_block_group->item) + - 2 * 1024 * 1024, shrink_block_group->flags); - btrfs_end_transaction(trans, root); + 2 * 1024 * 1024, new_alloc_flags); + btrfs_end_transaction(trans, root); + } shrink_block_group->ro = 1; total_found = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1a74b5018699..994834474590 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2864,6 +2864,15 @@ int btrfs_defrag_file(struct file *file) { goto out_unlock; } } + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + ClearPageDirty(page); +#else + cancel_dirty_page(page, PAGE_CACHE_SIZE); +#endif + wait_on_page_writeback(page); + set_page_extent_mapped(page); + page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; @@ -3105,6 +3114,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_resize(root, (void __user *)arg); case BTRFS_IOC_ADD_DEV: return btrfs_ioctl_add_dev(root, (void __user *)arg); + case BTRFS_IOC_BALANCE: + return btrfs_balance(root->fs_info->dev_root); } return -ENOTTY; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b93c15aa17db..6476ecbf132e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -869,6 +869,107 @@ out: return 0; } +static u64 div_factor(u64 num, int factor) +{ + if (factor == 10) + return num; + num *= factor; + do_div(num, 10); + return num; +} + + +int btrfs_balance(struct btrfs_root *dev_root) +{ + int ret; + struct list_head *cur; + struct list_head *devices = &dev_root->fs_info->fs_devices->devices; + struct btrfs_device *device; + u64 old_size; + u64 size_to_free; + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_chunk *chunk; + struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root; + struct btrfs_trans_handle *trans; + struct btrfs_key found_key; + + + dev_root = dev_root->fs_info->dev_root; + + mutex_lock(&dev_root->fs_info->fs_mutex); + /* step one make some room on all the devices */ + list_for_each(cur, devices) { + device = list_entry(cur, struct btrfs_device, dev_list); + old_size = device->total_bytes; + size_to_free = div_factor(old_size, 1); + size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); + if (device->total_bytes - device->bytes_used > size_to_free) + continue; + + ret = btrfs_shrink_device(device, old_size - size_to_free); + BUG_ON(ret); + + trans = btrfs_start_transaction(dev_root, 1); + BUG_ON(!trans); + + ret = btrfs_grow_device(trans, device, old_size); + BUG_ON(ret); + + btrfs_end_transaction(trans, dev_root); + } + + /* step two, relocate all the chunks */ + path = btrfs_alloc_path(); + BUG_ON(!path); + + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.offset = (u64)-1; + key.type = BTRFS_CHUNK_ITEM_KEY; + + while(1) { + ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); + if (ret < 0) + goto error; + + /* + * this shouldn't happen, it means the last relocate + * failed + */ + if (ret == 0) + break; + + ret = btrfs_previous_item(chunk_root, path, 0, + BTRFS_CHUNK_ITEM_KEY); + if (ret) { + break; + } + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + if (found_key.objectid != key.objectid) + break; + chunk = btrfs_item_ptr(path->nodes[0], + path->slots[0], + struct btrfs_chunk); + key.offset = found_key.offset; + /* chunk zero is special */ + if (key.offset == 0) + break; + + ret = btrfs_relocate_chunk(chunk_root, + chunk_root->root_key.objectid, + found_key.objectid, + found_key.offset); + BUG_ON(ret); + btrfs_release_path(chunk_root, path); + } + ret = 0; +error: + btrfs_free_path(path); + mutex_unlock(&dev_root->fs_info->fs_mutex); + return ret; +} + /* * shrinking a device means finding all of the device extents past * the new size, and then following the back refs to the chunks. @@ -985,15 +1086,6 @@ int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, return 0; } -static u64 div_factor(u64 num, int factor) -{ - if (factor == 10) - return num; - num *= factor; - do_div(num, 10); - return num; -} - static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes, int sub_stripes) { @@ -1040,6 +1132,11 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, int stripe_len = 64 * 1024; struct btrfs_key key; + if ((type & BTRFS_BLOCK_GROUP_RAID1) && + (type & BTRFS_BLOCK_GROUP_DUP)) { + WARN_ON(1); + type &= ~BTRFS_BLOCK_GROUP_DUP; + } dev_list = &extent_root->fs_info->fs_devices->alloc_list; if (list_empty(dev_list)) return -ENOSPC; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 6f173450378b..3f9a17f2e41b 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -134,4 +134,5 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, u8 *uuid); int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_root *root, char *path); +int btrfs_balance(struct btrfs_root *dev_root); #endif From 4235298e4fc3c1a09f659cfe2fd285024eeb2241 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 28 Apr 2008 16:40:52 -0400 Subject: [PATCH 0522/2985] Btrfs: Drop some verbose printks Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 18 +++++------------- fs/btrfs/extent_io.c | 13 +++++-------- fs/btrfs/volumes.c | 2 -- 3 files changed, 10 insertions(+), 23 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a9ce491d279b..1c7d84aff864 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -214,25 +214,18 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, while (1) { ret = read_extent_buffer_pages(io_tree, eb, start, 1, btree_get_extent, mirror_num); - if (!ret) { - if (mirror_num) -printk("good read %Lu mirror %d total %d\n", eb->start, mirror_num, num_copies); + if (!ret) return ret; - } + num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, eb->start, eb->len); -printk("failed to read %Lu mirror %d total %d\n", eb->start, mirror_num, num_copies); - if (num_copies == 1) { -printk("reading %Lu failed only one copy\n", eb->start); + if (num_copies == 1) return ret; - } + mirror_num++; - if (mirror_num > num_copies) { -printk("bailing at mirror %d of %d\n", mirror_num, num_copies); + if (mirror_num > num_copies) return ret; - } } -printk("read extent buffer page last\n"); return -EIO; } @@ -322,7 +315,6 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { -printk("bad start on %Lu found %Lu\n", eb->start, found_start); ret = -EIO; goto err; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2368536629e7..dd403b426ff5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3003,17 +3003,18 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb) { int ret = 0; - int ret2; unsigned long num_pages; unsigned long i; struct page *page; int pg_uptodate = 1; if (eb->flags & EXTENT_UPTODATE) - ret = 1; + return 1; - ret2 = test_range_bit(tree, eb->start, eb->start + eb->len - 1, + ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, EXTENT_UPTODATE, 1); + if (ret) + return ret; num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { @@ -3023,11 +3024,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, break; } } - if ((ret || ret2) && !pg_uptodate) { -printk("uptodate error2 eb %Lu ret %d ret2 %d pg_uptodate %d\n", eb->start, ret, ret2, pg_uptodate); - WARN_ON(1); - } - return (ret || ret2); + return pg_uptodate; } EXPORT_SYMBOL(extent_buffer_uptodate); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6476ecbf132e..9a7241134560 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1282,7 +1282,6 @@ again: num_stripes, sub_stripes); index = 0; -printk("new chunk type %Lu start %Lu size %Lu\n", type, key.offset, *num_bytes); while(index < num_stripes) { struct btrfs_stripe *stripe; BUG_ON(list_empty(&private_devs)); @@ -1299,7 +1298,6 @@ printk("new chunk type %Lu start %Lu size %Lu\n", type, key.offset, *num_bytes); BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset, calc_size, &dev_offset); BUG_ON(ret); -printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.offset, calc_size, device->devid, type); device->bytes_used += calc_size; ret = btrfs_update_device(trans, device); BUG_ON(ret); From a236aed14ccb0661611d4416f6b573d892bdc60a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Apr 2008 09:38:00 -0400 Subject: [PATCH 0523/2985] Btrfs: Deal with failed writes in mirrored configurations Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 17 +++++++++++++++-- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/volumes.c | 17 ++++++++++++++--- fs/btrfs/volumes.h | 3 ++- 4 files changed, 33 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1c7d84aff864..e35e70165b53 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1385,7 +1385,10 @@ int write_all_supers(struct btrfs_root *root) struct buffer_head *bh; int ret; int do_barriers; + int max_errors; + int total_errors = 0; + max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; do_barriers = !btrfs_test_opt(root, NOBARRIER); sb = root->fs_info->sb_buffer; @@ -1433,8 +1436,14 @@ int write_all_supers(struct btrfs_root *root) } else { ret = submit_bh(WRITE, bh); } - BUG_ON(ret); + if (ret) + total_errors++; } + if (total_errors > max_errors) { + printk("btrfs: %d errors while writing supers\n", total_errors); + BUG(); + } + total_errors = 0; list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); @@ -1454,13 +1463,17 @@ int write_all_supers(struct btrfs_root *root) wait_on_buffer(bh); BUG_ON(!buffer_uptodate(bh)); } else { - BUG(); + total_errors++; } } dev->pending_io = NULL; brelse(bh); } + if (total_errors > max_errors) { + printk("btrfs: %d errors while writing supers\n", total_errors); + BUG(); + } return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 95aee5a29375..f94794a99329 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -315,8 +315,8 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, block_group_cache = &info->block_group_cache; total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); - if (!owner) - factor = 10; + if (data & BTRFS_BLOCK_GROUP_METADATA) + factor = 9; bit = block_group_state_bits(data); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9a7241134560..57ab755aca76 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1425,6 +1425,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int stripe_index; int i; int num_stripes; + int max_errors = 0; struct btrfs_multi_bio *multi = NULL; if (multi_ret && !(rw & (1 << BIO_RW))) { @@ -1436,6 +1437,8 @@ again: GFP_NOFS); if (!multi) return -ENOMEM; + + atomic_set(&multi->error, 0); } spin_lock(&em_tree->lock); @@ -1462,8 +1465,10 @@ again: if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) { stripes_required = map->num_stripes; + max_errors = 1; } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { stripes_required = map->sub_stripes; + max_errors = 1; } } if (multi_ret && rw == WRITE && @@ -1561,6 +1566,7 @@ again: if (multi_ret) { *multi_ret = multi; multi->num_stripes = num_stripes; + multi->max_errors = max_errors; } out: free_extent_map(em); @@ -1598,14 +1604,19 @@ static int end_bio_multi_stripe(struct bio *bio, return 1; #endif if (err) - multi->error = err; + atomic_inc(&multi->error); if (atomic_dec_and_test(&multi->stripes_pending)) { bio->bi_private = multi->private; bio->bi_end_io = multi->end_io; - if (!err && multi->error) - err = multi->error; + /* only send an error to the higher layers if it is + * beyond the tolerance of the multi-bio + */ + if (atomic_read(&multi->error) > multi->max_errors) + err = -EIO; + else + err = 0; kfree(multi); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 3f9a17f2e41b..a9663e92bb14 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -90,7 +90,8 @@ struct btrfs_multi_bio { atomic_t stripes_pending; bio_end_io_t *end_io; void *private; - int error; + atomic_t error; + int max_errors; int num_stripes; struct btrfs_bio_stripe stripes[]; }; From 2fff734fafa742236aecbdcdc8b1ff4d221cbaca Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Apr 2008 14:12:09 -0400 Subject: [PATCH 0524/2985] Btrfs: Tune stripe selection for raid1 and raid10 Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 57ab755aca76..6d8dd4438c76 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1509,17 +1509,17 @@ again: if (map->type & BTRFS_BLOCK_GROUP_RAID1) { if (unplug_page || (rw & (1 << BIO_RW))) num_stripes = map->num_stripes; - else if (mirror_num) { + else if (mirror_num) stripe_index = mirror_num - 1; - } else { - u64 orig_stripe_nr = stripe_nr; - stripe_index = do_div(orig_stripe_nr, num_stripes); - } + else + stripe_index = current->pid % map->num_stripes; + } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { if (rw & (1 << BIO_RW)) num_stripes = map->num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; + } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { int factor = map->num_stripes / map->sub_stripes; @@ -1530,11 +1530,8 @@ again: num_stripes = map->sub_stripes; else if (mirror_num) stripe_index += mirror_num - 1; - else { - u64 orig_stripe_nr = stripe_nr; - stripe_index += do_div(orig_stripe_nr, - map->sub_stripes); - } + else + stripe_index += current->pid % map->sub_stripes; } else { /* * after this do_div call, stripe_nr is the number of stripes From d6bfde8765668c8687de72f7a40f52acdf4f2f19 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 30 Apr 2008 13:59:35 -0400 Subject: [PATCH 0525/2985] Btrfs: Fixes for 2.6.18 enterprise kernels 2.6.18 seems to get caught in an infinite loop when cancel_rearming_delayed_workqueue is called more than once, so this switches to cancel_delayed_work, which is arguably more correct. Also, balance_dirty_pages can run into problems with 2.6.18 based kernels because it doesn't have the per-bdi dirty limits. This avoids calling balance_dirty_pages on the btree inode unless there is actually something to balance, which is a good optimization in general. Finally there's a compile fix for ordered-data.h Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 24 +++++++++++++++++++----- fs/btrfs/ordered-data.c | 1 + fs/btrfs/transaction.c | 8 ++++++-- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e35e70165b53..fabc31b334b6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1548,6 +1548,7 @@ int close_ctree(struct btrfs_root *root) btrfs_commit_transaction(trans, root); ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); + write_ctree_super(NULL, root); mutex_unlock(&fs_info->fs_mutex); @@ -1583,17 +1584,17 @@ int close_ctree(struct btrfs_root *root) extent_io_tree_empty_lru(&fs_info->extent_ins); extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); - flush_workqueue(end_io_workqueue); flush_workqueue(async_submit_workqueue); + flush_workqueue(end_io_workqueue); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); - flush_workqueue(end_io_workqueue); - destroy_workqueue(end_io_workqueue); - flush_workqueue(async_submit_workqueue); destroy_workqueue(async_submit_workqueue); + flush_workqueue(end_io_workqueue); + destroy_workqueue(end_io_workqueue); + iput(fs_info->btree_inode); #if 0 while(!list_empty(&fs_info->hashers)) { @@ -1663,8 +1664,21 @@ void btrfs_throttle(struct btrfs_root *root) void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) { - balance_dirty_pages_ratelimited_nr( + struct extent_io_tree *tree; + u64 num_dirty; + u64 start = 0; + unsigned long thresh = 16 * 1024 * 1024; + tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; + + if (current_is_pdflush()) + return; + + num_dirty = count_range_bits(tree, &start, (u64)-1, + thresh, EXTENT_DIRTY); + if (num_dirty > thresh) { + balance_dirty_pages_ratelimited_nr( root->fs_info->btree_inode->i_mapping, 1); + } } void btrfs_set_buffer_defrag(struct extent_buffer *buf) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index b474902c90e2..3814cb0246b4 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -18,6 +18,7 @@ #include #include +#include #include "ctree.h" #include "transaction.h" #include "btrfs_inode.h" diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c85cb48d95ee..9826942fa18a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -814,6 +814,9 @@ void btrfs_transaction_cleaner(struct work_struct *work) int ret; mutex_lock(&root->fs_info->fs_mutex); + if (root->fs_info->closing) + goto out; + mutex_lock(&root->fs_info->trans_mutex); cur = root->fs_info->running_transaction; if (!cur) { @@ -838,12 +841,13 @@ out: void btrfs_transaction_queue_work(struct btrfs_root *root, int delay) { - queue_delayed_work(trans_wq, &root->fs_info->trans_work, delay); + if (!root->fs_info->closing) + queue_delayed_work(trans_wq, &root->fs_info->trans_work, delay); } void btrfs_transaction_flush_work(struct btrfs_root *root) { - cancel_rearming_delayed_workqueue(trans_wq, &root->fs_info->trans_work); + cancel_delayed_work(&root->fs_info->trans_work); flush_workqueue(trans_wq); } From f2eb0a241f0e5c135d93243b0236cb1f14c305e0 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 2 May 2008 14:43:14 -0400 Subject: [PATCH 0526/2985] Btrfs: Clone file data ioctl Add a new ioctl to clone file data Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 +- fs/btrfs/file-item.c | 12 +-- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 171 ++++++++++++++++++++++++++++++++++++++++++- fs/btrfs/ioctl.h | 1 + 5 files changed, 179 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 73b92dd150ff..3b6f8524a4ad 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1516,9 +1516,9 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root /* file-item.c */ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 objectid, u64 pos, u64 offset, + u64 objectid, u64 pos, u64 disk_offset, u64 disk_num_bytes, - u64 num_bytes); + u64 num_bytes, u64 offset); int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 9259aece6ed1..f537eb43c2c6 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -28,10 +28,10 @@ sizeof(struct btrfs_item) * 2) / \ BTRFS_CRC32_SIZE) - 1)) int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 objectid, u64 pos, - u64 offset, u64 disk_num_bytes, - u64 num_bytes) + struct btrfs_root *root, + u64 objectid, u64 pos, + u64 disk_offset, u64 disk_num_bytes, + u64 num_bytes, u64 offset) { int ret = 0; struct btrfs_file_extent_item *item; @@ -53,9 +53,9 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_bytenr(leaf, item, offset); + btrfs_set_file_extent_disk_bytenr(leaf, item, disk_offset); btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes); - btrfs_set_file_extent_offset(leaf, item, 0); + btrfs_set_file_extent_offset(leaf, item, offset); btrfs_set_file_extent_num_bytes(leaf, item, num_bytes); btrfs_set_file_extent_generation(leaf, item, trans->transid); btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8effdf4f5d6f..a50507f3056d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -285,7 +285,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, err = btrfs_insert_file_extent(trans, root, inode->i_ino, last_pos_in_file, - 0, 0, hole_size); + 0, 0, hole_size, 0); btrfs_drop_extent_cache(inode, last_pos_in_file, last_pos_in_file + hole_size -1); btrfs_check_file(root, inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 994834474590..c6fae29c0b9e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -141,7 +142,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) cur_alloc_size = ins.offset; ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start, ins.objectid, ins.offset, - ins.offset); + ins.offset, 0); inode->i_blocks += ins.offset >> 9; btrfs_check_file(root, inode); if (num_bytes < cur_alloc_size) { @@ -1227,7 +1228,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) err = btrfs_insert_file_extent(trans, root, inode->i_ino, hole_start, 0, 0, - hole_size); + hole_size, 0); btrfs_drop_extent_cache(inode, hole_start, (u64)-1); btrfs_check_file(root, inode); @@ -3100,6 +3101,170 @@ out: return ret; } +void dup_item_to_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *leaf, + int slot, + struct btrfs_key *key, + u64 destino) +{ + struct btrfs_path *cpath = btrfs_alloc_path(); + int len = btrfs_item_size_nr(leaf, slot); + int dstoff; + struct btrfs_key ckey = *key; + int ret; + + ckey.objectid = destino; + ret = btrfs_insert_empty_item(trans, root, cpath, &ckey, len); + dstoff = btrfs_item_ptr_offset(cpath->nodes[0], cpath->slots[0]); + copy_extent_buffer(cpath->nodes[0], leaf, dstoff, + btrfs_item_ptr_offset(leaf, slot), + len); + btrfs_release_path(root, cpath); +} + +long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct file *src_file; + struct inode *src; + struct btrfs_trans_handle *trans; + int ret; + u64 pos; + struct btrfs_path *path; + struct btrfs_key key; + struct extent_buffer *leaf; + u32 nritems; + int nextret; + int slot; + + src_file = fget(src_fd); + if (!src_file) + return -EBADF; + src = src_file->f_dentry->d_inode; + + ret = -EXDEV; + if (src->i_sb != inode->i_sb) + goto out_fput; + + if (inode < src) { + mutex_lock(&inode->i_mutex); + mutex_lock(&src->i_mutex); + } else { + mutex_lock(&src->i_mutex); + mutex_lock(&inode->i_mutex); + } + + ret = -ENOTEMPTY; + if (inode->i_size) + goto out_unlock; + + /* do any pending delalloc/csum calc on src, one way or + another, and lock file content */ + while (1) { + filemap_write_and_wait(src->i_mapping); + lock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); + if (BTRFS_I(src)->delalloc_bytes == 0) + break; + unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); + } + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 0); + path = btrfs_alloc_path(); + pos = 0; + while (1) { + ret = btrfs_lookup_file_extent(trans, root, path, src->i_ino, + pos, 0); + if (ret < 0) + goto out; + if (ret > 0) { + if (path->slots[0] == 0) { + ret = 0; + goto out; + } + path->slots[0]--; + } +next_slot: + leaf = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + nritems = btrfs_header_nritems(leaf); + + if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY || + key.objectid != src->i_ino) + goto out; + if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { + struct btrfs_file_extent_item *extent; + int found_type; + pos = key.offset; + extent = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + found_type = btrfs_file_extent_type(leaf, extent); + if (found_type == BTRFS_FILE_EXTENT_REG) { + u64 len = btrfs_file_extent_num_bytes(leaf, + extent); + u64 ds = btrfs_file_extent_disk_bytenr(leaf, + extent); + u64 dl = btrfs_file_extent_disk_num_bytes(leaf, + extent); + u64 off = btrfs_file_extent_offset(leaf, + extent); + btrfs_insert_file_extent(trans, root, + inode->i_ino, pos, + ds, dl, len, off); + /* ds == 0 means there's a hole */ + if (ds != 0) { + btrfs_inc_extent_ref(trans, root, + ds, dl, + root->root_key.objectid, + trans->transid, + inode->i_ino, pos); + } + pos = key.offset + len; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + dup_item_to_inode(trans, root, path, leaf, slot, + &key, inode->i_ino); + pos = key.offset + btrfs_item_size_nr(leaf, + slot); + } + } else if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) + dup_item_to_inode(trans, root, path, leaf, slot, &key, + inode->i_ino); + + if (slot >= nritems - 1) { + nextret = btrfs_next_leaf(root, path); + if (nextret) + goto out; + } else { + path->slots[0]++; + } + goto next_slot; + } + +out: + btrfs_free_path(path); + ret = 0; + + inode->i_blocks = src->i_blocks; + i_size_write(inode, src->i_size); + btrfs_update_inode(trans, root, inode); + + unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); + + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + +out_unlock: + mutex_unlock(&src->i_mutex); + mutex_unlock(&inode->i_mutex); +out_fput: + fput(src_file); + return ret; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -3116,6 +3281,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_add_dev(root, (void __user *)arg); case BTRFS_IOC_BALANCE: return btrfs_balance(root->fs_info->dev_root); + case BTRFS_IOC_CLONE: + return btrfs_ioctl_clone(file, arg); } return -ENOTTY; diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 8ad35fc4ba56..b0e73f51d636 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -36,6 +36,7 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) #define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ struct btrfs_ioctl_vol_args) #define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ From 006a58a25fe8aaa7559be80dcff94d98aee84309 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 May 2008 14:43:15 -0400 Subject: [PATCH 0527/2985] Btrfs: Compile warning fixup in volume.c Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6d8dd4438c76..b38187573108 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -547,7 +547,7 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_key key; unsigned long ptr; - u64 free_devid; + u64 free_devid = 0; root = root->fs_info->chunk_root; From bb8885cc0aa6b1606143d92c70762b16026016a5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 May 2008 14:49:33 -0400 Subject: [PATCH 0528/2985] Btrfs: Fix do_sync_file_range ifdefs (2.6.22) Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a50507f3056d..c89c3ac4b23b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -961,7 +961,7 @@ out_nolock: if (err < 0) num_written = err; } else if (num_written > 0 && (file->f_flags & O_DIRECT)) { -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) do_sync_file_range(file, start_pos, start_pos + num_written - 1, SYNC_FILE_RANGE_WRITE | From 12fa8ec64f445aa932ba154053fe95432f30f2c6 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Fri, 2 May 2008 15:03:58 -0400 Subject: [PATCH 0529/2985] Btrfs: Add workaround for AppArmor changing remove_suid() In openSUSE 10.3, AppArmor modifies remove_suid to take a struct path rather than just a dentry. This patch tests that the kernel is openSUSE 10.3 or newer and adjusts the call accordingly. Debian/Ubuntu with AppArmor applied will also need a similar patch. Maintainers of btrfs under those distributions should build on this patch or, alternatively, alter their package descriptions to add -DREMOVE_SUID_PATH to the compiler command line. Signed-off-by: Jeff Mahoney - --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ b/compat.h 2008-02-06 16:46:13.000000000 -0500 @@ -0,0 +1,15 @@ +#ifndef _COMPAT_H_ +#define _COMPAT_H_ + + +/* + * Even if AppArmor isn't enabled, it still has different prototypes. + * Add more distro/version pairs here to declare which has AppArmor applied. + */ +#if defined(CONFIG_SUSE_KERNEL) +# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) +# define REMOVE_SUID_PATH 1 +# endif +#endif + +#endif /* _COMPAT_H_ */ - --- a/file.c 2008-02-06 11:37:39.000000000 -0500 +++ b/file.c 2008-02-06 16:46:23.000000000 -0500 @@ -37,6 +37,7 @@ #include "ordered-data.h" #include "ioctl.h" #include "print-tree.h" +#include "compat.h" static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, @@ -790,7 +791,11 @@ static ssize_t btrfs_file_write(struct f goto out_nolock; if (count == 0) goto out_nolock; +#ifdef REMOVE_SUID_PATH + err = remove_suid(&file->f_path); +#else err = remove_suid(fdentry(file)); +#endif if (err) goto out_nolock; file_update_time(file); Signed-off-by: Chris Mason --- fs/btrfs/compat.h | 25 +++++++++++++++++++++++++ fs/btrfs/file.c | 5 +++++ 2 files changed, 30 insertions(+) create mode 100644 fs/btrfs/compat.h diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h new file mode 100644 index 000000000000..d39a768e65ff --- /dev/null +++ b/fs/btrfs/compat.h @@ -0,0 +1,25 @@ +#ifndef _COMPAT_H_ +#define _COMPAT_H_ + + +/* + * Even if AppArmor isn't enabled, it still has different prototypes. + * Add more distro/version pairs here to declare which has AppArmor applied. + */ +#if defined(CONFIG_SUSE_KERNEL) +# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) +# define REMOVE_SUID_PATH 1 +# endif +#endif + +/* + * catch any other distros that have patched in apparmor. This isn't + * 100% reliable because it won't catch people that hand compile their + * own distro kernels without apparmor compiled in. But, it is better + * than nothing. + */ +#ifdef CONFIG_SECURITY_APPARMOR +# define REMOVE_SUID_PATH 1 +#endif + +#endif /* _COMPAT_H_ */ diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c89c3ac4b23b..df97d470812a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -37,6 +37,7 @@ #include "ordered-data.h" #include "ioctl.h" #include "print-tree.h" +#include "compat.h" static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, @@ -852,7 +853,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, goto out_nolock; if (count == 0) goto out_nolock; +#ifdef REMOVE_SUID_PATH + err = remove_suid(&file->f_path); +#else err = remove_suid(fdentry(file)); +#endif if (err) goto out_nolock; file_update_time(file); From b9d86667c94e5fe4bf9f6aa500e7ff1138e717ff Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 May 2008 16:13:49 -0400 Subject: [PATCH 0530/2985] Btrfs: Silence bogus inode.c compiler warnings Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c6fae29c0b9e..5204599e4309 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2035,12 +2035,12 @@ fail: static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { - struct inode *inode; + struct inode *inode = NULL; struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; int err = 0; int drop_on_err = 0; - u64 objectid; + u64 objectid = 0; unsigned long nr = 1; mutex_lock(&root->fs_info->fs_mutex); From 5d9cd9ecbf40b8bd5045a3c2f1feb35db6a12266 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 5 May 2008 06:26:21 -0400 Subject: [PATCH 0531/2985] Btrfs: Fix clone ioctl to not hold the path over inserts Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 79 ++++++++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 32 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5204599e4309..f7beb9b0d37a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3101,7 +3101,7 @@ out: return ret; } -void dup_item_to_inode(struct btrfs_trans_handle *trans, +int dup_item_to_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct extent_buffer *leaf, @@ -3109,19 +3109,22 @@ void dup_item_to_inode(struct btrfs_trans_handle *trans, struct btrfs_key *key, u64 destino) { - struct btrfs_path *cpath = btrfs_alloc_path(); + char *dup; int len = btrfs_item_size_nr(leaf, slot); - int dstoff; struct btrfs_key ckey = *key; - int ret; + int ret = 0; + + dup = kmalloc(len, GFP_NOFS); + if (!dup) + return -ENOMEM; + + read_extent_buffer(leaf, dup, btrfs_item_ptr_offset(leaf, slot), len); + btrfs_release_path(root, path); ckey.objectid = destino; - ret = btrfs_insert_empty_item(trans, root, cpath, &ckey, len); - dstoff = btrfs_item_ptr_offset(cpath->nodes[0], cpath->slots[0]); - copy_extent_buffer(cpath->nodes[0], leaf, dstoff, - btrfs_item_ptr_offset(leaf, slot), - len); - btrfs_release_path(root, cpath); + ret = btrfs_insert_item(trans, root, &ckey, dup, len); + kfree(dup); + return ret; } long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) @@ -3137,7 +3140,6 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) struct btrfs_key key; struct extent_buffer *leaf; u32 nritems; - int nextret; int slot; src_file = fget(src_fd); @@ -3174,20 +3176,32 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 0); path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + key.offset = 0; + key.type = BTRFS_EXTENT_DATA_KEY; + key.objectid = src->i_ino; pos = 0; + path->reada = 2; + while (1) { - ret = btrfs_lookup_file_extent(trans, root, path, src->i_ino, - pos, 0); + /* + * note the key will change type as we walk through the + * tree. + */ + ret = btrfs_search_slot(trans, root, &key, path, 0, 0); if (ret < 0) goto out; - if (ret > 0) { - if (path->slots[0] == 0) { - ret = 0; + + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) goto out; - } - path->slots[0]--; + if (ret > 0) + break; } -next_slot: leaf = path->nodes[0]; slot = path->slots[0]; btrfs_item_key_to_cpu(leaf, &key, slot); @@ -3195,7 +3209,8 @@ next_slot: if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY || key.objectid != src->i_ino) - goto out; + break; + if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { struct btrfs_file_extent_item *extent; int found_type; @@ -3225,28 +3240,28 @@ next_slot: } pos = key.offset + len; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - dup_item_to_inode(trans, root, path, leaf, slot, - &key, inode->i_ino); + ret = dup_item_to_inode(trans, root, path, + leaf, slot, &key, + inode->i_ino); + if (ret) + goto out; pos = key.offset + btrfs_item_size_nr(leaf, slot); } - } else if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) - dup_item_to_inode(trans, root, path, leaf, slot, &key, - inode->i_ino); + } else if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) { + ret = dup_item_to_inode(trans, root, path, leaf, + slot, &key, inode->i_ino); - if (slot >= nritems - 1) { - nextret = btrfs_next_leaf(root, path); - if (nextret) + if (ret) goto out; - } else { - path->slots[0]++; } - goto next_slot; + key.offset++; + btrfs_release_path(root, path); } + ret = 0; out: btrfs_free_path(path); - ret = 0; inode->i_blocks = src->i_blocks; i_size_write(inode, src->i_size); From a061fc8da7b990faa41ca503e66faef3ecdeead0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 7 May 2008 11:43:44 -0400 Subject: [PATCH 0532/2985] Btrfs: Add support for online device removal This required a few structural changes to the code that manages bdev pointers: The VFS super block now gets an anon-bdev instead of a pointer to the lowest bdev. This allows us to avoid swapping the super block bdev pointer around at run time. The code to read in the super block no longer goes through the extent buffer interface. Things got ugly keeping the mapping constant. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 +- fs/btrfs/disk-io.c | 97 ++++++++---------- fs/btrfs/extent-tree.c | 139 +++++++++++++++++++++----- fs/btrfs/inode.c | 27 ++++- fs/btrfs/super.c | 35 ++----- fs/btrfs/transaction.c | 5 +- fs/btrfs/volumes.c | 221 +++++++++++++++++++++++++++++++++++++++-- fs/btrfs/volumes.h | 3 + 8 files changed, 410 insertions(+), 120 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3b6f8524a4ad..33ab165591c5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -505,7 +505,7 @@ struct btrfs_fs_info { u64 alloc_start; struct btrfs_transaction *running_transaction; struct btrfs_super_block super_copy; - struct extent_buffer *sb_buffer; + struct btrfs_super_block super_for_commit; struct block_device *__bdev; struct super_block *sb; struct inode *btree_inode; @@ -1208,6 +1208,7 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); /* struct btrfs_super_block */ BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64); BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, generation, 64); BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fabc31b334b6..9d5424ad01a3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -78,9 +78,13 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); - spin_unlock(&em_tree->lock); - if (em) + if (em) { + em->bdev = + BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; + spin_unlock(&em_tree->lock); goto out; + } + spin_unlock(&em_tree->lock); em = alloc_extent_map(GFP_NOFS); if (!em) { @@ -90,7 +94,7 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, em->start = 0; em->len = (u64)-1; em->block_start = 0; - em->bdev = inode->i_sb->s_bdev; + em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); @@ -435,11 +439,6 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1); BUG_ON(ret); - if (offset == BTRFS_SUPER_INFO_OFFSET) { - bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; - submit_bio(rw, bio); - return 0; - } return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); } @@ -587,8 +586,7 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) list = &fs_info->fs_devices->devices; list_for_each(next, list) { device = list_entry(next, struct btrfs_device, dev_list); - if (device->bdev && device->bdev != fs_info->sb->s_bdev) - close_bdev_excl(device->bdev); + close_bdev_excl(device->bdev); device->bdev = NULL; } return 0; @@ -1118,6 +1116,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, u32 leafsize; u32 blocksize; u32 stripesize; + struct buffer_head *bh; struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), @@ -1153,7 +1152,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->new_trans_lock); init_completion(&fs_info->kobj_unregister); - sb_set_blocksize(sb, BTRFS_SUPER_INFO_SIZE); fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->chunk_root = chunk_root; @@ -1170,6 +1168,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; + sb->s_blocksize = 4096; + sb->s_blocksize_bits = blksize_bits(4096); + /* * we set the i_size on the btree inode to the max possible int. * the real end of the address space is determined by all of @@ -1229,19 +1230,16 @@ struct btrfs_root *open_ctree(struct super_block *sb, __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); - fs_info->sb_buffer = read_tree_block(tree_root, - BTRFS_SUPER_INFO_OFFSET, - 4096); - if (!fs_info->sb_buffer) + bh = __bread(fs_devices->latest_bdev, + BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + if (!bh) goto fail_iput; - read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0, - sizeof(fs_info->super_copy)); + memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); + brelse(bh); - read_extent_buffer(fs_info->sb_buffer, fs_info->fsid, - (unsigned long)btrfs_super_fsid(fs_info->sb_buffer), - BTRFS_FSID_SIZE); + memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE); disk_super = &fs_info->super_copy; if (!btrfs_super_root(disk_super)) @@ -1263,7 +1261,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, tree_root->leafsize = leafsize; tree_root->sectorsize = sectorsize; tree_root->stripesize = stripesize; - sb_set_blocksize(sb, sectorsize); + + sb->s_blocksize = sectorsize; + sb->s_blocksize_bits = blksize_bits(sectorsize); if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { @@ -1339,7 +1339,6 @@ fail_tree_root: fail_sys_array: mutex_unlock(&fs_info->fs_mutex); fail_sb_buffer: - free_extent_buffer(fs_info->sb_buffer); extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); fail_iput: iput(fs_info->btree_inode); @@ -1380,41 +1379,44 @@ int write_all_supers(struct btrfs_root *root) struct list_head *cur; struct list_head *head = &root->fs_info->fs_devices->devices; struct btrfs_device *dev; - struct extent_buffer *sb; + struct btrfs_super_block *sb; struct btrfs_dev_item *dev_item; struct buffer_head *bh; int ret; int do_barriers; int max_errors; int total_errors = 0; + u32 crc; + u64 flags; max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; do_barriers = !btrfs_test_opt(root, NOBARRIER); - sb = root->fs_info->sb_buffer; - dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block, - dev_item); + sb = &root->fs_info->super_for_commit; + dev_item = &sb->dev_item; list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); - btrfs_set_device_type(sb, dev_item, dev->type); - btrfs_set_device_id(sb, dev_item, dev->devid); - btrfs_set_device_total_bytes(sb, dev_item, dev->total_bytes); - btrfs_set_device_bytes_used(sb, dev_item, dev->bytes_used); - btrfs_set_device_io_align(sb, dev_item, dev->io_align); - btrfs_set_device_io_width(sb, dev_item, dev->io_width); - btrfs_set_device_sector_size(sb, dev_item, dev->sector_size); - write_extent_buffer(sb, dev->uuid, - (unsigned long)btrfs_device_uuid(dev_item), - BTRFS_UUID_SIZE); + btrfs_set_stack_device_type(dev_item, dev->type); + btrfs_set_stack_device_id(dev_item, dev->devid); + btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); + btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); + btrfs_set_stack_device_io_align(dev_item, dev->io_align); + btrfs_set_stack_device_io_width(dev_item, dev->io_width); + btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); + memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); + flags = btrfs_super_flags(sb); + btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); - btrfs_set_header_flag(sb, BTRFS_HEADER_FLAG_WRITTEN); - csum_tree_block(root, sb, 0); - bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / - root->fs_info->sb->s_blocksize, + crc = ~(u32)0; + crc = btrfs_csum_data(root, (char *)sb + BTRFS_CSUM_SIZE, crc, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, sb->csum); + + bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / 4096, BTRFS_SUPER_INFO_SIZE); - read_extent_buffer(sb, bh->b_data, 0, BTRFS_SUPER_INFO_SIZE); + memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); dev->pending_io = bh; get_bh(bh); @@ -1483,15 +1485,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root int ret; ret = write_all_supers(root); -#if 0 - if (!btrfs_test_opt(root, NOBARRIER)) - blkdev_issue_flush(sb->s_bdev, NULL); - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super); - ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, - super->start, super->len); - if (!btrfs_test_opt(root, NOBARRIER)) - blkdev_issue_flush(sb->s_bdev, NULL); -#endif return ret; } @@ -1570,8 +1563,6 @@ int close_ctree(struct btrfs_root *root) if (root->fs_info->dev_root->node); free_extent_buffer(root->fs_info->dev_root->node); - free_extent_buffer(fs_info->sb_buffer); - btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); @@ -1652,7 +1643,7 @@ void btrfs_throttle(struct btrfs_root *root) { struct backing_dev_info *bdi; - bdi = root->fs_info->sb->s_bdev->bd_inode->i_mapping->backing_dev_info; + bdi = &root->fs_info->bdi; if (root->fs_info->throttles && bdi_write_congested(bdi)) { #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) congestion_wait(WRITE, HZ/20); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f94794a99329..c0e67bde8428 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -147,6 +147,8 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct u64 end; int ret; + bytenr = max_t(u64, bytenr, + BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); block_group_cache = &info->block_group_cache; ret = find_first_extent_bit(block_group_cache, bytenr, &start, &end, @@ -1059,16 +1061,25 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) } } -static u64 reduce_alloc_profile(u64 flags) +static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags) { + u64 num_devices = root->fs_info->fs_devices->num_devices; + + if (num_devices == 1) + flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); + if (num_devices < 4) + flags &= ~BTRFS_BLOCK_GROUP_RAID10; + if ((flags & BTRFS_BLOCK_GROUP_DUP) && (flags & (BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID10))) + BTRFS_BLOCK_GROUP_RAID10))) { flags &= ~BTRFS_BLOCK_GROUP_DUP; + } if ((flags & BTRFS_BLOCK_GROUP_RAID1) && - (flags & BTRFS_BLOCK_GROUP_RAID10)) + (flags & BTRFS_BLOCK_GROUP_RAID10)) { flags &= ~BTRFS_BLOCK_GROUP_RAID1; + } if ((flags & BTRFS_BLOCK_GROUP_RAID0) && ((flags & BTRFS_BLOCK_GROUP_RAID1) | @@ -1078,7 +1089,6 @@ static u64 reduce_alloc_profile(u64 flags) return flags; } - static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags) @@ -1089,7 +1099,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 num_bytes; int ret; - flags = reduce_alloc_profile(flags); + flags = reduce_alloc_profile(extent_root, flags); space_info = __find_space_info(extent_root->fs_info, flags); if (!space_info) { @@ -1169,6 +1179,21 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } +static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) +{ + u64 start; + u64 end; + int ret; + ret = find_first_extent_bit(&root->fs_info->block_group_cache, + search_start, &start, &end, + BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | + BLOCK_GROUP_SYSTEM); + if (ret) + return 0; + return start; +} + + static int update_pinned_extents(struct btrfs_root *root, u64 bytenr, u64 num, int pin) { @@ -1185,16 +1210,25 @@ static int update_pinned_extents(struct btrfs_root *root, } while (num > 0) { cache = btrfs_lookup_block_group(fs_info, bytenr); - WARN_ON(!cache); - len = min(num, cache->key.offset - - (bytenr - cache->key.objectid)); + if (!cache) { + u64 first = first_logical_byte(root, bytenr); + WARN_ON(first < bytenr); + len = min(first - bytenr, num); + } else { + len = min(num, cache->key.offset - + (bytenr - cache->key.objectid)); + } if (pin) { - cache->pinned += len; - cache->space_info->bytes_pinned += len; + if (cache) { + cache->pinned += len; + cache->space_info->bytes_pinned += len; + } fs_info->total_pinned += len; } else { - cache->pinned -= len; - cache->space_info->bytes_pinned -= len; + if (cache) { + cache->pinned -= len; + cache->space_info->bytes_pinned -= len; + } fs_info->total_pinned -= len; } bytenr += len; @@ -1547,7 +1581,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, int data) { int ret; - u64 orig_search_start = search_start; + u64 orig_search_start; struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; u64 total_needed = num_bytes; @@ -1577,6 +1611,9 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, } } + search_start = max(search_start, first_logical_byte(root, 0)); + orig_search_start = search_start; + if (search_end == (u64)-1) search_end = btrfs_super_total_bytes(&info->super_copy); @@ -1751,7 +1788,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } again: - data = reduce_alloc_profile(data); + data = reduce_alloc_profile(root, data); if (root->ref_cows) { if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, @@ -2309,6 +2346,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, struct file_ra_state *ra; unsigned long total_read = 0; unsigned long ra_pages; + struct btrfs_trans_handle *trans; ra = kzalloc(sizeof(*ra), GFP_NOFS); @@ -2326,9 +2364,13 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, calc_ra(i, last_index, ra_pages)); } total_read++; + if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size) + goto truncate_racing; + page = grab_cache_page(inode->i_mapping, i); - if (!page) + if (!page) { goto out_unlock; + } if (!PageUptodate(page)) { btrfs_readpage(NULL, page); lock_page(page); @@ -2350,20 +2392,33 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, lock_extent(io_tree, page_start, page_end, GFP_NOFS); - set_page_dirty(page); set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); + set_page_dirty(page); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_page(page); page_cache_release(page); - balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); } + balance_dirty_pages_ratelimited_nr(inode->i_mapping, + total_read); out_unlock: kfree(ra); + trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); + if (trans) { + btrfs_add_ordered_inode(inode); + btrfs_end_transaction(trans, BTRFS_I(inode)->root); + mark_inode_dirty(inode); + } mutex_unlock(&inode->i_mutex); return 0; + +truncate_racing: + vmtruncate(inode, inode->i_size); + balance_dirty_pages_ratelimited_nr(inode->i_mapping, + total_read); + goto out_unlock; } /* @@ -2466,6 +2521,27 @@ out: return 0; } +static int noinline del_extent_zero(struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key) +{ + int ret; + struct btrfs_trans_handle *trans; + + trans = btrfs_start_transaction(extent_root, 1); + ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); + if (ret > 0) { + ret = -EIO; + goto out; + } + if (ret < 0) + goto out; + ret = btrfs_del_item(trans, extent_root, path); +out: + btrfs_end_transaction(trans, extent_root); + return ret; +} + static int noinline relocate_one_extent(struct btrfs_root *extent_root, struct btrfs_path *path, struct btrfs_key *extent_key) @@ -2477,6 +2553,10 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, u32 item_size; int ret = 0; + if (extent_key->objectid == 0) { + ret = del_extent_zero(extent_root, path, extent_key); + goto out; + } key.objectid = extent_key->objectid; key.type = BTRFS_EXTENT_REF_KEY; key.offset = 0; @@ -2490,15 +2570,24 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, ret = 0; leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); - if (path->slots[0] == nritems) - goto out; + if (path->slots[0] == nritems) { + ret = btrfs_next_leaf(extent_root, path); + if (ret > 0) { + ret = 0; + goto out; + } + if (ret < 0) + goto out; + } btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid != extent_key->objectid) + if (found_key.objectid != extent_key->objectid) { break; + } - if (found_key.type != BTRFS_EXTENT_REF_KEY) + if (found_key.type != BTRFS_EXTENT_REF_KEY) { break; + } key.offset = found_key.offset + 1; item_size = btrfs_item_size_nr(leaf, path->slots[0]); @@ -2519,7 +2608,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; - num_devices = btrfs_super_num_devices(&root->fs_info->super_copy); + num_devices = root->fs_info->fs_devices->num_devices; if (num_devices == 1) { stripped |= BTRFS_BLOCK_GROUP_DUP; stripped = flags & ~stripped; @@ -2535,9 +2624,6 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) return flags; } else { /* they already had raid on here, just return */ - if ((flags & BTRFS_BLOCK_GROUP_DUP) && - (flags & BTRFS_BLOCK_GROUP_RAID1)) { - } if (flags & stripped) return flags; @@ -2570,7 +2656,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) struct extent_buffer *leaf; u32 nritems; int ret; - int progress = 0; + int progress; shrink_block_group = btrfs_lookup_block_group(root->fs_info, shrink_start); @@ -2597,6 +2683,7 @@ again: shrink_block_group->ro = 1; total_found = 0; + progress = 0; key.objectid = shrink_start; key.offset = 0; key.type = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f7beb9b0d37a..b437d3bdf95e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2194,6 +2194,8 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, again: spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); + if (em) + em->bdev = root->fs_info->fs_devices->latest_bdev; spin_unlock(&em_tree->lock); if (em) { @@ -2212,7 +2214,7 @@ again: em->start = EXTENT_MAP_HOLE; em->len = (u64)-1; - em->bdev = inode->i_sb->s_bdev; + em->bdev = root->fs_info->fs_devices->latest_bdev; ret = btrfs_lookup_file_extent(trans, root, path, objectid, start, trans != NULL); if (ret < 0) { @@ -3101,6 +3103,27 @@ out: return ret; } +long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_ioctl_vol_args *vol_args; + int ret; + + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); + + if (!vol_args) + return -ENOMEM; + + if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { + ret = -EFAULT; + goto out; + } + ret = btrfs_rm_device(root, vol_args->name); + +out: + kfree(vol_args); + return ret; +} + int dup_item_to_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -3294,6 +3317,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_resize(root, (void __user *)arg); case BTRFS_IOC_ADD_DEV: return btrfs_ioctl_add_dev(root, (void __user *)arg); + case BTRFS_IOC_RM_DEV: + return btrfs_ioctl_rm_dev(root, (void __user *)arg); case BTRFS_IOC_BALANCE: return btrfs_balance(root->fs_info->dev_root); case BTRFS_IOC_CLONE: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7153dfaa3404..020e5a83e31f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -315,24 +315,12 @@ static void btrfs_write_super(struct super_block *sb) sb->s_dirt = 0; } -/* - * This is almost a copy of get_sb_bdev in fs/super.c. - * We need the local copy to allow direct mounting of - * subvolumes, but this could be easily integrated back - * into the generic version. --hch - */ - -/* start copy & paste */ -static int set_bdev_super(struct super_block *s, void *data) +static int btrfs_test_super(struct super_block *s, void *data) { - s->s_bdev = data; - s->s_dev = s->s_bdev->bd_dev; - return 0; -} + struct btrfs_fs_devices *test_fs_devices = data; + struct btrfs_root *root = btrfs_sb(s); -static int test_bdev_super(struct super_block *s, void *data) -{ - return (void *)s->s_bdev == data; + return root->fs_info->fs_devices == test_fs_devices; } int btrfs_get_sb_bdev(struct file_system_type *fs_type, @@ -354,14 +342,9 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, return error; bdev = fs_devices->lowest_bdev; - /* - * once the super is inserted into the list by sget, s_umount - * will protect the lockfs code from trying to start a snapshot - * while we are mounting - */ - down(&bdev->bd_mount_sem); - s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); - up(&bdev->bd_mount_sem); + btrfs_lock_volumes(); + s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); + btrfs_unlock_volumes(); if (IS_ERR(s)) goto error_s; @@ -373,13 +356,11 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, goto error_bdev; } - close_bdev_excl(bdev); } else { char b[BDEVNAME_SIZE]; s->s_flags = flags; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); - sb_set_blocksize(s, block_size(bdev)); error = btrfs_fill_super(s, fs_devices, data, flags & MS_SILENT ? 1 : 0); if (error) { @@ -458,7 +439,7 @@ static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", .get_sb = btrfs_get_sb, - .kill_sb = kill_block_super, + .kill_sb = kill_anon_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9826942fa18a..57746c11eae3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -738,9 +738,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, chunk_root->node->start); btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, btrfs_header_level(chunk_root->node)); - write_extent_buffer(root->fs_info->sb_buffer, - &root->fs_info->super_copy, 0, - sizeof(root->fs_info->super_copy)); + memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, + sizeof(root->fs_info->super_copy)); btrfs_copy_pinned(root, pinned_copy); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b38187573108..55da5f0c56e3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -45,6 +45,16 @@ struct map_lookup { static DEFINE_MUTEX(uuid_mutex); static LIST_HEAD(fs_uuids); +void btrfs_lock_volumes(void) +{ + mutex_lock(&uuid_mutex); +} + +void btrfs_unlock_volumes(void) +{ + mutex_unlock(&uuid_mutex); +} + int btrfs_cleanup_fs_uuids(void) { struct btrfs_fs_devices *fs_devices; @@ -193,12 +203,14 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, ret = PTR_ERR(bdev); goto fail; } + set_blocksize(bdev, 4096); if (device->devid == fs_devices->latest_devid) fs_devices->latest_bdev = bdev; if (device->devid == fs_devices->lowest_devid) { fs_devices->lowest_bdev = bdev; } device->bdev = bdev; + } mutex_unlock(&uuid_mutex); return 0; @@ -393,6 +405,9 @@ int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_root *root = device->dev_root; struct btrfs_key key; + struct btrfs_key found_key; + struct extent_buffer *leaf = NULL; + struct btrfs_dev_extent *extent = NULL; path = btrfs_alloc_path(); if (!path) @@ -403,8 +418,25 @@ int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, key.type = BTRFS_DEV_EXTENT_KEY; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + ret = btrfs_previous_item(root, path, key.objectid, + BTRFS_DEV_EXTENT_KEY); + BUG_ON(ret); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_dev_extent); + BUG_ON(found_key.offset > start || found_key.offset + + btrfs_dev_extent_length(leaf, extent) < start); + ret = 0; + } else if (ret == 0) { + leaf = path->nodes[0]; + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_dev_extent); + } BUG_ON(ret); + device->bytes_used -= btrfs_dev_extent_length(leaf, extent); ret = btrfs_del_item(trans, root, path); BUG_ON(ret); @@ -593,6 +625,170 @@ out: return ret; } +static int btrfs_rm_dev_item(struct btrfs_root *root, + struct btrfs_device *device) +{ + int ret; + struct btrfs_path *path; + struct block_device *bdev = device->bdev; + struct btrfs_device *next_dev; + struct btrfs_key key; + u64 total_bytes; + struct btrfs_fs_devices *fs_devices; + struct btrfs_trans_handle *trans; + + root = root->fs_info->chunk_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + trans = btrfs_start_transaction(root, 1); + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = device->devid; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + goto out; + + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + ret = btrfs_del_item(trans, root, path); + if (ret) + goto out; + + /* + * at this point, the device is zero sized. We want to + * remove it from the devices list and zero out the old super + */ + list_del_init(&device->dev_list); + list_del_init(&device->dev_alloc_list); + fs_devices = root->fs_info->fs_devices; + + next_dev = list_entry(fs_devices->devices.next, struct btrfs_device, + dev_list); + if (bdev == fs_devices->lowest_bdev) + fs_devices->lowest_bdev = next_dev->bdev; + if (bdev == root->fs_info->sb->s_bdev) + root->fs_info->sb->s_bdev = next_dev->bdev; + if (bdev == fs_devices->latest_bdev) + fs_devices->latest_bdev = next_dev->bdev; + + total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); + btrfs_set_super_total_bytes(&root->fs_info->super_copy, + total_bytes - device->total_bytes); + + total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); + btrfs_set_super_num_devices(&root->fs_info->super_copy, + total_bytes - 1); +out: + btrfs_free_path(path); + btrfs_commit_transaction(trans, root); + return ret; +} + +int btrfs_rm_device(struct btrfs_root *root, char *device_path) +{ + struct btrfs_device *device; + struct block_device *bdev; + struct buffer_head *bh; + struct btrfs_super_block *disk_super; + u64 all_avail; + u64 devid; + int ret = 0; + + mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&uuid_mutex); + + all_avail = root->fs_info->avail_data_alloc_bits | + root->fs_info->avail_system_alloc_bits | + root->fs_info->avail_metadata_alloc_bits; + + if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && + root->fs_info->fs_devices->num_devices <= 4) { + printk("btrfs: unable to go below four devices on raid10\n"); + ret = -EINVAL; + goto out; + } + + if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && + root->fs_info->fs_devices->num_devices <= 2) { + printk("btrfs: unable to go below two devices on raid1\n"); + ret = -EINVAL; + goto out; + } + + bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder); + if (IS_ERR(bdev)) { + ret = PTR_ERR(bdev); + goto out; + } + + bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + if (!bh) { + ret = -EIO; + goto error_close; + } + disk_super = (struct btrfs_super_block *)bh->b_data; + if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, + sizeof(disk_super->magic))) { + ret = -ENOENT; + goto error_brelse; + } + if (memcmp(disk_super->fsid, root->fs_info->fsid, BTRFS_FSID_SIZE)) { + ret = -ENOENT; + goto error_brelse; + } + devid = le64_to_cpu(disk_super->dev_item.devid); + device = btrfs_find_device(root, devid, NULL); + if (!device) { + ret = -ENOENT; + goto error_brelse; + } + + root->fs_info->fs_devices->num_devices--; + + ret = btrfs_shrink_device(device, 0); + if (ret) + goto error_brelse; + + + ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); + if (ret) + goto error_brelse; + + /* make sure this device isn't detected as part of the FS anymore */ + memset(&disk_super->magic, 0, sizeof(disk_super->magic)); + set_buffer_dirty(bh); + sync_dirty_buffer(bh); + + brelse(bh); + + /* one close for the device struct or super_block */ + close_bdev_excl(device->bdev); + + /* one close for us */ + close_bdev_excl(device->bdev); + + kfree(device->name); + kfree(device); + ret = 0; + goto out; + +error_brelse: + brelse(bh); +error_close: + close_bdev_excl(bdev); +out: + mutex_unlock(&uuid_mutex); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + int btrfs_init_new_device(struct btrfs_root *root, char *device_path) { struct btrfs_trans_handle *trans; @@ -831,13 +1027,17 @@ int btrfs_relocate_chunk(struct btrfs_root *root, em = lookup_extent_mapping(em_tree, chunk_offset, 1); spin_unlock(&em_tree->lock); - BUG_ON(em->start > chunk_offset || em->start + em->len < chunk_offset); + BUG_ON(em->start > chunk_offset || + em->start + em->len < chunk_offset); map = (struct map_lookup *)em->bdev; for (i = 0; i < map->num_stripes; i++) { ret = btrfs_free_dev_extent(trans, map->stripes[i].dev, map->stripes[i].physical); BUG_ON(ret); + + ret = btrfs_update_device(trans, map->stripes[i].dev); + BUG_ON(ret); } ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, chunk_offset); @@ -847,11 +1047,8 @@ int btrfs_relocate_chunk(struct btrfs_root *root, if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); BUG_ON(ret); - goto out; } - - spin_lock(&em_tree->lock); remove_extent_mapping(em_tree, em); kfree(map); @@ -861,7 +1058,6 @@ int btrfs_relocate_chunk(struct btrfs_root *root, free_extent_map(em); spin_unlock(&em_tree->lock); -out: /* once for us */ free_extent_map(em); @@ -1449,7 +1645,7 @@ again: return 0; if (!em) { - printk("unable to find logical %Lu\n", logical); + printk("unable to find logical %Lu len %Lu\n", logical, *length); BUG(); } @@ -1712,6 +1908,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, logical = key->offset; length = btrfs_chunk_length(leaf, chunk); + spin_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); spin_unlock(&map_tree->map_tree.lock); @@ -1845,7 +2042,7 @@ int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf) int btrfs_read_sys_array(struct btrfs_root *root) { struct btrfs_super_block *super_copy = &root->fs_info->super_copy; - struct extent_buffer *sb = root->fs_info->sb_buffer; + struct extent_buffer *sb; struct btrfs_disk_key *disk_key; struct btrfs_chunk *chunk; u8 *ptr; @@ -1857,6 +2054,12 @@ int btrfs_read_sys_array(struct btrfs_root *root) u32 cur; struct btrfs_key key; + sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET, + BTRFS_SUPER_INFO_SIZE); + if (!sb) + return -ENOMEM; + btrfs_set_buffer_uptodate(sb); + write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); array_size = btrfs_super_sys_array_size(super_copy); ptr = super_copy->sys_chunk_array; @@ -1867,8 +2070,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) disk_key = (struct btrfs_disk_key *)ptr; btrfs_disk_key_to_cpu(&key, disk_key); - len = sizeof(*disk_key); - ptr += len; + len = sizeof(*disk_key); ptr += len; sb_ptr += len; cur += len; @@ -1887,6 +2089,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) sb_ptr += len; cur += len; } + free_extent_buffer(sb); return ret; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index a9663e92bb14..0f94a69e6eb6 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -125,6 +125,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); int btrfs_add_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_device *device); +int btrfs_rm_device(struct btrfs_root *root, char *device_path); int btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, @@ -136,4 +137,6 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_root *root, char *path); int btrfs_balance(struct btrfs_root *dev_root); +void btrfs_unlock_volumes(void); +void btrfs_lock_volumes(void); #endif From bf4ef67924d87b0addb32f084e83a9283496350e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 8 May 2008 13:26:18 -0400 Subject: [PATCH 0533/2985] Btrfs: Properly find the root for snapshotted blocks during chunk relocation Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 + fs/btrfs/extent-tree.c | 120 +++++++++++++++++++++++++++++++++++++---- fs/btrfs/root-tree.c | 46 +++++++++++++++- 3 files changed, 156 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 33ab165591c5..f4b4677bec77 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1459,6 +1459,8 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *item); int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); +int btrfs_search_root(struct btrfs_root *root, u64 search_start, + u64 *found_objectid); int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, struct btrfs_root *latest_root); /* dir-item.c */ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c0e67bde8428..8e69b5acfeff 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2421,6 +2421,85 @@ truncate_racing: goto out_unlock; } +/* + * The back references tell us which tree holds a ref on a block, + * but it is possible for the tree root field in the reference to + * reflect the original root before a snapshot was made. In this + * case we should search through all the children of a given root + * to find potential holders of references on a block. + * + * Instead, we do something a little less fancy and just search + * all the roots for a given key/block combination. + */ +static int find_root_for_ref(struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *key0, + int level, + int file_key, + struct btrfs_root **found_root, + u64 bytenr) +{ + struct btrfs_key root_location; + struct btrfs_root *cur_root = *found_root; + struct btrfs_file_extent_item *file_extent; + u64 root_search_start = BTRFS_FS_TREE_OBJECTID; + u64 found_bytenr; + int ret; + int i; + + root_location.offset = (u64)-1; + root_location.type = BTRFS_ROOT_ITEM_KEY; + path->lowest_level = level; + path->reada = 0; + while(1) { + ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0); + found_bytenr = 0; + if (ret == 0 && file_key) { + struct extent_buffer *leaf = path->nodes[0]; + file_extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, file_extent) == + BTRFS_FILE_EXTENT_REG) { + found_bytenr = + btrfs_file_extent_disk_bytenr(leaf, + file_extent); + } + } else if (ret == 0) { + if (path->nodes[level]) + found_bytenr = path->nodes[level]->start; + } + + for (i = level; i < BTRFS_MAX_LEVEL; i++) { + if (!path->nodes[i]) + break; + free_extent_buffer(path->nodes[i]); + path->nodes[i] = NULL; + } + btrfs_release_path(cur_root, path); + + if (found_bytenr == bytenr) { + *found_root = cur_root; + ret = 0; + goto out; + } + ret = btrfs_search_root(root->fs_info->tree_root, + root_search_start, &root_search_start); + if (ret) + break; + + root_location.objectid = root_search_start; + cur_root = btrfs_read_fs_root_no_name(root->fs_info, + &root_location); + if (!cur_root) { + ret = 1; + break; + } + } +out: + path->lowest_level = 0; + return ret; +} + /* * note, this releases the path */ @@ -2430,13 +2509,15 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, { struct inode *inode; struct btrfs_root *found_root; - struct btrfs_key *root_location; + struct btrfs_key root_location; + struct btrfs_key found_key; struct btrfs_extent_ref *ref; u64 ref_root; u64 ref_gen; u64 ref_objectid; u64 ref_offset; int ret; + int level; ref = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_extent_ref); @@ -2446,20 +2527,30 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, ref_offset = btrfs_ref_offset(path->nodes[0], ref); btrfs_release_path(extent_root, path); - root_location = kmalloc(sizeof(*root_location), GFP_NOFS); - root_location->objectid = ref_root; + root_location.objectid = ref_root; if (ref_gen == 0) - root_location->offset = 0; + root_location.offset = 0; else - root_location->offset = (u64)-1; - root_location->type = BTRFS_ROOT_ITEM_KEY; + root_location.offset = (u64)-1; + root_location.type = BTRFS_ROOT_ITEM_KEY; found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, - root_location); + &root_location); BUG_ON(!found_root); - kfree(root_location); if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + found_key.objectid = ref_objectid; + found_key.type = BTRFS_EXTENT_DATA_KEY; + found_key.offset = ref_offset; + level = 0; + + ret = find_root_for_ref(extent_root, path, &found_key, + level, 1, &found_root, + extent_key->objectid); + + if (ret) + goto out; + mutex_unlock(&extent_root->fs_info->fs_mutex); inode = btrfs_iget_locked(extent_root->fs_info->sb, ref_objectid, found_root); @@ -2485,12 +2576,9 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, mutex_lock(&extent_root->fs_info->fs_mutex); } else { struct btrfs_trans_handle *trans; - struct btrfs_key found_key; struct extent_buffer *eb; - int level; int i; - trans = btrfs_start_transaction(found_root, 1); eb = read_tree_block(found_root, extent_key->objectid, extent_key->offset); level = btrfs_header_level(eb); @@ -2502,6 +2590,15 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, free_extent_buffer(eb); + ret = find_root_for_ref(extent_root, path, &found_key, + level, 0, &found_root, + extent_key->objectid); + + if (ret) + goto out; + + trans = btrfs_start_transaction(found_root, 1); + path->lowest_level = level; path->reada = 2; ret = btrfs_search_slot(trans, found_root, &found_key, path, @@ -2578,6 +2675,7 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, } if (ret < 0) goto out; + leaf = path->nodes[0]; } btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 08f19ec88092..8bf21ba0a43b 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -21,6 +21,51 @@ #include "disk-io.h" #include "print-tree.h" +/* + * returns 0 on finding something, 1 if no more roots are there + * and < 0 on error + */ +int btrfs_search_root(struct btrfs_root *root, u64 search_start, + u64 *found_objectid) +{ + struct btrfs_path *path; + struct btrfs_key search_key; + int ret; + + root = root->fs_info->tree_root; + search_key.objectid = search_start; + search_key.type = (u8)-1; + search_key.offset = (u64)-1; + + path = btrfs_alloc_path(); + BUG_ON(!path); +again: + ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); + if (ret < 0) + goto out; + if (ret == 0) { + ret = 1; + goto out; + } + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret) + goto out; + } + btrfs_item_key_to_cpu(path->nodes[0], &search_key, path->slots[0]); + if (search_key.type != BTRFS_ROOT_ITEM_KEY) { + search_key.offset++; + btrfs_release_path(root, path); + goto again; + } + ret = 0; + *found_objectid = search_key.objectid; + +out: + btrfs_free_path(path); + return ret; +} + int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key) { @@ -55,7 +100,6 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, memcpy(key, &found_key, sizeof(found_key)); ret = 0; out: - btrfs_release_path(root, path); btrfs_free_path(path); return ret; } From a68d5933a0e409592860229b35230c8e87155472 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 8 May 2008 14:11:56 -0400 Subject: [PATCH 0534/2985] Btrfs: Update nodatacow mode to support cloned single files and resizing Before, nodatacow only checked to make sure multiple roots didn't have references on a single extent. This check makes sure that multiple inodes don't have references. nodatacow needed an extra check to see if the block group was currently readonly. This way cows forced by the chunk relocation code are honored. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 9 +++++++++ fs/btrfs/inode.c | 13 +++++++++---- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f4b4677bec77..6c2c2c4e4d2a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1325,7 +1325,7 @@ static inline struct dentry *fdentry(struct file *file) { /* extent-tree.c */ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, struct btrfs_path *count_path, - u64 first_extent); + u64 expected_owner, u64 first_extent); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8e69b5acfeff..30a5094fffa7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -721,12 +721,14 @@ out: u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, struct btrfs_path *count_path, + u64 expected_owner, u64 first_extent) { struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_path *path; u64 bytenr; u64 found_objectid; + u64 found_owner; u64 root_objectid = root->root_key.objectid; u32 total_count = 0; u32 cur_count; @@ -792,6 +794,13 @@ again: total_count = 2; goto out; } + if (level == -1) { + found_owner = btrfs_ref_objectid(l, ref_item); + if (found_owner != expected_owner) { + total_count = 2; + goto out; + } + } total_count = 1; path->slots[0]++; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b437d3bdf95e..1bf37d15b174 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -172,6 +172,7 @@ static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) u64 loops = 0; u64 total_fs_bytes; struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_block_group_cache *block_group; struct extent_buffer *leaf; int found_type; struct btrfs_path *path; @@ -230,16 +231,20 @@ again: if (bytenr == 0) goto not_found; + if (btrfs_count_snapshots_in_path(root, path, inode->i_ino, + bytenr) != 1) { + goto not_found; + } + /* * we may be called by the resizer, make sure we're inside * the limits of the FS */ - if (bytenr + extent_num_bytes > total_fs_bytes) + block_group = btrfs_lookup_block_group(root->fs_info, + bytenr); + if (!block_group || block_group->ro) goto not_found; - if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) { - goto not_found; - } start = extent_end; } else { From c1c4d91c38e6e5b29543b02d4a4fb7f007e44f70 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 8 May 2008 15:05:58 -0400 Subject: [PATCH 0535/2985] Btrfs: Only open block devices once during mount -o subvol= btrfs_open_devices needed a check to see if the device was already open. Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 55da5f0c56e3..aa001b08ced0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -196,6 +196,9 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, mutex_lock(&uuid_mutex); list_for_each(cur, head) { device = list_entry(cur, struct btrfs_device, dev_list); + if (device->bdev) + continue; + bdev = open_bdev_excl(device->name, flags, holder); if (IS_ERR(bdev)) { From bbaf549e0c3d28399fc5abd68020d4025ae5c3a7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 8 May 2008 16:31:21 -0400 Subject: [PATCH 0536/2985] Btrfs: A number of nodatacow fixes Once part of a delalloc request fails the cow checks, just cow the entire range It is possible for the back references to all be from the same root, but still have snapshots against an extent. The checks are now more strict, forcing cow any time there are multiple refs against the data extent. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 20 ++++++++++++++++++++ fs/btrfs/inode.c | 8 +++----- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 30a5094fffa7..db996f0edf0b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -731,6 +731,7 @@ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, u64 found_owner; u64 root_objectid = root->root_key.objectid; u32 total_count = 0; + u32 extent_refs; u32 cur_count; u32 nritems; int ret; @@ -767,6 +768,7 @@ again: } item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); + extent_refs = btrfs_extent_refs(l, item); while (1) { l = path->nodes[0]; nritems = btrfs_header_nritems(l); @@ -800,10 +802,28 @@ again: total_count = 2; goto out; } + /* + * nasty. we don't count a reference held by + * the running transaction. This allows nodatacow + * to avoid cow most of the time + */ + if (found_owner >= BTRFS_FIRST_FREE_OBJECTID && + btrfs_ref_generation(l, ref_item) == + root->fs_info->generation) { + extent_refs--; + } } total_count = 1; path->slots[0]++; } + /* + * if there is more than one reference against a data extent, + * we have to assume the other ref is another snapshot + */ + if (level == -1 && extent_refs > 1) { + total_count = 2; + goto out; + } if (cur_count == 0) { total_count = 0; goto out; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1bf37d15b174..a492fd238c88 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -207,9 +207,8 @@ again: btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); found_type = btrfs_key_type(&found_key); if (found_key.objectid != inode->i_ino || - found_type != BTRFS_EXTENT_DATA_KEY) { + found_type != BTRFS_EXTENT_DATA_KEY) goto not_found; - } found_type = btrfs_file_extent_type(leaf, item); extent_start = found_key.offset; @@ -245,7 +244,6 @@ again: if (!block_group || block_group->ro) goto not_found; - start = extent_end; } else { goto not_found; @@ -260,8 +258,8 @@ loop: goto again; not_found: - cow_file_range(inode, start, cow_end); - start = cow_end + 1; + cow_file_range(inode, start, end); + start = end + 1; goto loop; } From 323da79c9f096ed4da04e5ea00f766f75b28aeaa Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 9 May 2008 11:46:48 -0400 Subject: [PATCH 0537/2985] Btrfs: Chunk relocation fine tuning, and add a few printks to show progress Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 20 +++++++++++++++++--- fs/btrfs/volumes.c | 2 ++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index db996f0edf0b..df95fb660d49 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2493,7 +2493,7 @@ static int find_root_for_ref(struct btrfs_root *root, btrfs_file_extent_disk_bytenr(leaf, file_extent); } - } else if (ret == 0) { + } else if (!file_key) { if (path->nodes[level]) found_bytenr = path->nodes[level]->start; } @@ -2797,14 +2797,25 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) root = root->fs_info->extent_root; path->reada = 2; + printk("btrfs relocating block group %llu flags %llu\n", + (unsigned long long)shrink_start, + (unsigned long long)shrink_block_group->flags); + again: if (btrfs_block_group_used(&shrink_block_group->item) > 0) { + u64 calc; + trans = btrfs_start_transaction(root, 1); new_alloc_flags = update_block_group_flags(root, shrink_block_group->flags); + if (new_alloc_flags != shrink_block_group->flags) { + calc = + btrfs_block_group_used(&shrink_block_group->item); + } else { + calc = shrink_block_group->key.offset; + } do_chunk_alloc(trans, root->fs_info->extent_root, - btrfs_block_group_used(&shrink_block_group->item) + - 2 * 1024 * 1024, new_alloc_flags); + calc + 2 * 1024 * 1024, new_alloc_flags); btrfs_end_transaction(trans, root); } shrink_block_group->ro = 1; @@ -2888,6 +2899,9 @@ next: btrfs_release_path(root, path); if (total_found > 0) { + printk("btrfs relocate found %llu last extent was %llu\n", + (unsigned long long)total_found, + (unsigned long long)found_key.objectid); trans = btrfs_start_transaction(tree_root, 1); btrfs_commit_transaction(trans, tree_root); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index aa001b08ced0..b5d7bd1915b4 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1011,6 +1011,8 @@ int btrfs_relocate_chunk(struct btrfs_root *root, int ret; int i; + printk("btrfs relocating chunk %llu\n", + (unsigned long long)chunk_offset); root = root->fs_info->chunk_root; extent_root = root->fs_info->extent_root; em_tree = &root->fs_info->mapping_tree.map_tree; From 188de649c51f03df3f8a601aea0110a3870815c2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 9 May 2008 11:52:25 -0400 Subject: [PATCH 0538/2985] Btrfs: Don't do btree balance_dirty_pages on old kernels, it stalls forever Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9d5424ad01a3..c3083165b172 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1655,6 +1655,11 @@ void btrfs_throttle(struct btrfs_root *root) void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) { + /* + * looks as though older kernels can get into trouble with + * this code, they end up stuck in balance_dirty_pages forever + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) struct extent_io_tree *tree; u64 num_dirty; u64 start = 0; @@ -1670,6 +1675,9 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) balance_dirty_pages_ratelimited_nr( root->fs_info->btree_inode->i_mapping, 1); } +#else + return; +#endif } void btrfs_set_buffer_defrag(struct extent_buffer *buf) From ca7a79ad8dbe24669a8ced01f9fc0126b872745d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 12 May 2008 12:59:19 -0400 Subject: [PATCH 0539/2985] Btrfs: Pass down the expected generation number when reading tree blocks Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 55 ++++++++++++++++-------------------------- fs/btrfs/disk-io.c | 30 ++++++++++------------- fs/btrfs/disk-io.h | 7 +++--- fs/btrfs/extent-tree.c | 15 +++++++----- fs/btrfs/print-tree.c | 3 ++- fs/btrfs/tree-defrag.c | 9 ++++--- 6 files changed, 55 insertions(+), 64 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 29e5674b043d..289d71d8653a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -302,6 +302,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct extent_buffer *cur; struct extent_buffer *tmp; u64 blocknr; + u64 gen; u64 search_start = *last_ret; u64 last_block = 0; u64 other; @@ -354,6 +355,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, progress_passed = 1; blocknr = btrfs_node_blockptr(parent, i); + gen = btrfs_node_ptr_generation(parent, i); if (last_block == 0) last_block = blocknr; @@ -387,15 +389,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, } if (!cur) { cur = read_tree_block(root, blocknr, - blocksize); + blocksize, gen); } else if (!uptodate) { - btrfs_read_buffer(cur); + btrfs_read_buffer(cur, gen); } } if (search_start == 0) search_start = last_block; - btrfs_verify_block_csum(root, cur); err = __btrfs_cow_block(trans, root, cur, parent, i, &tmp, search_start, min(16 * blocksize, @@ -696,12 +697,17 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, static struct extent_buffer *read_node_slot(struct btrfs_root *root, struct extent_buffer *parent, int slot) { + int level = btrfs_header_level(parent); if (slot < 0) return NULL; if (slot >= btrfs_header_nritems(parent)) return NULL; + + BUG_ON(level == 0); + return read_tree_block(root, btrfs_node_blockptr(parent, slot), - btrfs_level_size(root, btrfs_header_level(parent) - 1)); + btrfs_level_size(root, level - 1), + btrfs_node_ptr_generation(parent, slot)); } static int balance_level(struct btrfs_trans_handle *trans, @@ -1076,7 +1082,8 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, if ((search >= lowest_read && search <= highest_read) || (search < lowest_read && lowest_read - search <= 32768) || (search > highest_read && search - highest_read <= 32768)) { - readahead_tree_block(root, search, blocksize); + readahead_tree_block(root, search, blocksize, + btrfs_node_ptr_generation(node, nr)); nread += blocksize; } nscan++; @@ -1109,8 +1116,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow) { struct extent_buffer *b; - u64 bytenr; - u64 ptr_gen; int slot; int ret; int level; @@ -1174,20 +1179,12 @@ again: /* this is only true while dropping a snapshot */ if (level == lowest_level) break; - bytenr = btrfs_node_blockptr(b, slot); - ptr_gen = btrfs_node_ptr_generation(b, slot); + if (should_reada) reada_for_search(root, p, level, slot, key->objectid); - b = read_tree_block(root, bytenr, - btrfs_level_size(root, level - 1)); - if (ptr_gen != btrfs_header_generation(b)) { - printk("block %llu bad gen wanted %llu " - "found %llu\n", - (unsigned long long)b->start, - (unsigned long long)ptr_gen, - (unsigned long long)btrfs_header_generation(b)); - } + + b = read_node_slot(root, b, slot); } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < @@ -1650,8 +1647,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (slot >= btrfs_header_nritems(upper) - 1) return 1; - right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1), - root->leafsize); + right = read_node_slot(root, upper, slot + 1); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { free_extent_buffer(right); @@ -1826,8 +1822,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } - left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1], - slot - 1), root->leafsize); + left = read_node_slot(root, path->nodes[1], slot - 1); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { free_extent_buffer(left); @@ -2742,7 +2737,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, */ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { - u64 bytenr; int slot; int level = 1; struct extent_buffer *c; @@ -2762,12 +2756,10 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) } slot--; - bytenr = btrfs_node_blockptr(c, slot); if (next) free_extent_buffer(next); - next = read_tree_block(root, bytenr, - btrfs_level_size(root, level - 1)); + next = read_node_slot(root, c, slot); break; } path->slots[level] = slot; @@ -2782,8 +2774,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) path->slots[level] = slot; if (!level) break; - next = read_tree_block(root, btrfs_node_blockptr(next, slot), - btrfs_level_size(root, level - 1)); + next = read_node_slot(root, next, slot); } return 0; } @@ -2797,7 +2788,6 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) { int slot; int level = 1; - u64 bytenr; struct extent_buffer *c; struct extent_buffer *next = NULL; @@ -2814,15 +2804,13 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) continue; } - bytenr = btrfs_node_blockptr(c, slot); if (next) free_extent_buffer(next); if (path->reada) reada_for_search(root, path, level, slot, 0); - next = read_tree_block(root, bytenr, - btrfs_level_size(root, level -1)); + next = read_node_slot(root, c, slot); break; } path->slots[level] = slot; @@ -2836,8 +2824,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) break; if (path->reada) reada_for_search(root, path, level, 0, 0); - next = read_tree_block(root, btrfs_node_blockptr(next, 0), - btrfs_level_size(root, level - 1)); + next = read_node_slot(root, next, 0); } return 0; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c3083165b172..edee7a44f861 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -207,7 +207,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, static int btree_read_extent_buffer_pages(struct btrfs_root *root, struct extent_buffer *eb, - u64 start) + u64 start, u64 parent_transid) { struct extent_io_tree *io_tree; int ret; @@ -254,7 +254,8 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) WARN_ON(1); } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); - ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE); + ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, + btrfs_header_generation(eb)); BUG_ON(ret); btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); @@ -562,7 +563,8 @@ static struct address_space_operations btree_aops = { .sync_page = block_sync_page, }; -int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) +int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, + u64 parent_transid) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; @@ -592,12 +594,6 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) return 0; } -int btrfs_verify_block_csum(struct btrfs_root *root, - struct extent_buffer *buf) -{ - return btrfs_buffer_uptodate(buf); -} - struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { @@ -621,7 +617,7 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, - u32 blocksize) + u32 blocksize, u64 parent_transid) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; @@ -634,7 +630,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, if (!buf) return NULL; - ret = btree_read_extent_buffer_pages(root, buf, 0); + ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret == 0) { buf->flags |= EXTENT_UPTODATE; @@ -715,7 +711,7 @@ static int find_and_setup_root(struct btrfs_root *tree_root, blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), - blocksize); + blocksize, 0); BUG_ON(!root->node); return 0; } @@ -771,7 +767,7 @@ out: } blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), - blocksize); + blocksize, 0); BUG_ON(!root->node); insert: root->ref_cows = 1; @@ -1288,7 +1284,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, chunk_root->node = read_tree_block(chunk_root, btrfs_super_chunk_root(disk_super), - blocksize); + blocksize, 0); BUG_ON(!chunk_root->node); read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, @@ -1304,7 +1300,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), - blocksize); + blocksize, 0); if (!tree_root->node) goto fail_sb_buffer; @@ -1732,11 +1728,11 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) EXTENT_DEFRAG, GFP_NOFS); } -int btrfs_read_buffer(struct extent_buffer *buf) +int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; int ret; - ret = btree_read_extent_buffer_pages(root, buf, 0); + ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret == 0) { buf->flags |= EXTENT_UPTODATE; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 5d5f6979c41c..e29c895d5230 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -25,8 +25,9 @@ struct btrfs_device; struct btrfs_fs_devices; struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, - u32 blocksize); -int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); + u32 blocksize, u64 parent_transid); +int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, + u64 parent_transid); struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); int clean_tree_block(struct btrfs_trans_handle *trans, @@ -65,7 +66,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf); int btrfs_buffer_defrag_done(struct extent_buffer *buf); int btrfs_clear_buffer_defrag(struct extent_buffer *buf); int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf); -int btrfs_read_buffer(struct extent_buffer *buf); +int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, char *result); void btrfs_throttle(struct btrfs_root *root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index df95fb660d49..db07dde4a870 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1338,7 +1338,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, &extent_item, sizeof(extent_item)); clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, GFP_NOFS); - eb = read_tree_block(extent_root, ins.objectid, ins.offset); + eb = read_tree_block(extent_root, ins.objectid, ins.offset, + trans->transid); level = btrfs_header_level(eb); if (level == 0) { btrfs_item_key(eb, &first, 0); @@ -2076,7 +2077,8 @@ static void noinline reada_walk_down(struct btrfs_root *root, } } mutex_unlock(&root->fs_info->fs_mutex); - ret = readahead_tree_block(root, bytenr, blocksize); + ret = readahead_tree_block(root, bytenr, blocksize, + btrfs_node_ptr_generation(node, i)); last = bytenr + blocksize; cond_resched(); mutex_lock(&root->fs_info->fs_mutex); @@ -2096,6 +2098,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, u64 root_owner; u64 root_gen; u64 bytenr; + u64 ptr_gen; struct extent_buffer *next; struct extent_buffer *cur; struct extent_buffer *parent; @@ -2132,6 +2135,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, break; } bytenr = btrfs_node_blockptr(cur, path->slots[*level]); + ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); blocksize = btrfs_level_size(root, *level - 1); ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); BUG_ON(ret); @@ -2152,7 +2156,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, reada_walk_down(root, cur, path->slots[*level]); mutex_unlock(&root->fs_info->fs_mutex); - next = read_tree_block(root, bytenr, blocksize); + next = read_tree_block(root, bytenr, blocksize, + ptr_gen); mutex_lock(&root->fs_info->fs_mutex); /* we've dropped the lock, double check */ @@ -2173,8 +2178,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, BUG_ON(ret); continue; } - } else if (next) { - btrfs_verify_block_csum(root, next); } WARN_ON(*level <= 0); if (path->nodes[*level-1]) @@ -2609,7 +2612,7 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, int i; eb = read_tree_block(found_root, extent_key->objectid, - extent_key->offset); + extent_key->offset, 0); level = btrfs_header_level(eb); if (level == 0) diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index e99f3249d05a..14d863720302 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -186,7 +186,8 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) for (i = 0; i < nr; i++) { struct extent_buffer *next = read_tree_block(root, btrfs_node_blockptr(c, i), - btrfs_level_size(root, level - 1)); + btrfs_level_size(root, level - 1), + btrfs_node_ptr_generation(c, i)); if (btrfs_is_leaf(next) && btrfs_header_level(c) != 1) BUG(); diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 256af1870eef..5085e9e693b9 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -28,6 +28,7 @@ static void reada_defrag(struct btrfs_root *root, int i; u32 nritems; u64 bytenr; + u64 gen; u32 blocksize; int ret; @@ -35,7 +36,8 @@ static void reada_defrag(struct btrfs_root *root, nritems = btrfs_header_nritems(node); for (i = 0; i < nritems; i++) { bytenr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, bytenr, blocksize); + gen = btrfs_node_ptr_generation(node, i); + ret = readahead_tree_block(root, bytenr, blocksize, gen); if (ret) break; } @@ -101,10 +103,11 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, path->slots[*level]++; continue; } - btrfs_verify_block_csum(root, next); } else { next = read_tree_block(root, bytenr, - btrfs_level_size(root, *level - 1)); + btrfs_level_size(root, *level - 1), + btrfs_node_ptr_generation(cur, + path->slots[*level])); } ret = btrfs_cow_block(trans, root, next, path->nodes[*level], path->slots[*level], &next); From 1259ab75c62462b8ffad90067b5e1f6312786a18 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 12 May 2008 13:39:03 -0400 Subject: [PATCH 0540/2985] Btrfs: Handle write errors on raid1 and raid10 When duplicate copies exist, writes are allowed to fail to one of those copies. This changeset includes a few changes that allow the FS to continue even when some IOs fail. It also adds verification of the parent generation number for btree blocks. This generation is stored in the pointer to a block, and it ensures that missed writes to are detected. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 +- fs/btrfs/disk-io.c | 56 +++++++++++++++++++++++++++++++---- fs/btrfs/disk-io.h | 2 +- fs/btrfs/extent-tree.c | 4 +-- fs/btrfs/extent_io.c | 49 ++++++++++++++++++++++++++----- fs/btrfs/extent_io.h | 7 ++++- fs/btrfs/inode.c | 66 ++++++++++++++++++++++++++---------------- fs/btrfs/tree-defrag.c | 7 +++-- fs/btrfs/volumes.c | 11 +++++-- 9 files changed, 155 insertions(+), 49 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 289d71d8653a..02e571e6ee62 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -379,7 +379,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, cur = btrfs_find_tree_block(root, blocknr, blocksize); if (cur) - uptodate = btrfs_buffer_uptodate(cur); + uptodate = btrfs_buffer_uptodate(cur, gen); else uptodate = 0; if (!cur || !uptodate) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index edee7a44f861..574b1245964e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -205,6 +205,33 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, return 0; } +static int verify_parent_transid(struct extent_io_tree *io_tree, + struct extent_buffer *eb, u64 parent_transid) +{ + int ret; + + if (!parent_transid || btrfs_header_generation(eb) == parent_transid) + return 0; + + lock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); + if (extent_buffer_uptodate(io_tree, eb) && + btrfs_header_generation(eb) == parent_transid) { + ret = 0; + goto out; + } + printk("parent transid verify failed on %llu wanted %llu found %llu\n", + (unsigned long long)eb->start, + (unsigned long long)parent_transid, + (unsigned long long)btrfs_header_generation(eb)); + ret = 1; +out: + clear_extent_buffer_uptodate(io_tree, eb); + unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, + GFP_NOFS); + return ret; + +} + static int btree_read_extent_buffer_pages(struct btrfs_root *root, struct extent_buffer *eb, u64 start, u64 parent_transid) @@ -218,7 +245,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, while (1) { ret = read_extent_buffer_pages(io_tree, eb, start, 1, btree_get_extent, mirror_num); - if (!ret) + if (!ret && + !verify_parent_transid(io_tree, eb, parent_transid)) return ret; num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, @@ -330,6 +358,13 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, ret = -EIO; goto err; } + if (memcmp_extent_buffer(eb, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(eb), + BTRFS_FSID_SIZE)) { + printk("bad fsid on block %Lu\n", eb->start); + ret = -EIO; + goto err; + } found_level = btrfs_header_level(eb); ret = csum_tree_block(root, eb, 1); @@ -1363,7 +1398,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) "I/O error on %s\n", bdevname(bh->b_bdev, b)); } - set_buffer_write_io_error(bh); + /* note, we dont' set_buffer_write_io_error because we have + * our own ways of dealing with the IO errors + */ clear_buffer_uptodate(bh); } unlock_buffer(bh); @@ -1459,7 +1496,8 @@ int write_all_supers(struct btrfs_root *root) ret = submit_bh(WRITE, bh); BUG_ON(ret); wait_on_buffer(bh); - BUG_ON(!buffer_uptodate(bh)); + if (!buffer_uptodate(bh)) + total_errors++; } else { total_errors++; } @@ -1607,10 +1645,18 @@ int close_ctree(struct btrfs_root *root) return 0; } -int btrfs_buffer_uptodate(struct extent_buffer *buf) +int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) { + int ret; struct inode *btree_inode = buf->first_page->mapping->host; - return extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); + + ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); + if (!ret) + return ret; + + ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf, + parent_transid); + return !ret; } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index e29c895d5230..30d1ed293c25 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -56,7 +56,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_mark_buffer_dirty(struct extent_buffer *buf); -int btrfs_buffer_uptodate(struct extent_buffer *buf); +int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); int btrfs_set_buffer_uptodate(struct extent_buffer *buf); int wait_on_tree_block_writeback(struct btrfs_root *root, struct extent_buffer *buf); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index db07dde4a870..605018c6045c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1366,7 +1366,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, if (!pending) { buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { - if (btrfs_buffer_uptodate(buf)) { + if (btrfs_buffer_uptodate(buf, 0)) { u64 transid = root->fs_info->running_transaction->transid; u64 header_transid = @@ -2151,7 +2151,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, continue; } next = btrfs_find_tree_block(root, bytenr, blocksize); - if (!next || !btrfs_buffer_uptodate(next)) { + if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { free_extent_buffer(next); reada_walk_down(root, cur, path->slots[*level]); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index dd403b426ff5..2a3624adc0cf 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1366,7 +1366,7 @@ static int end_bio_extent_writepage(struct bio *bio, unsigned int bytes_done, int err) #endif { - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + int uptodate = err == 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct extent_state *state = bio->bi_private; struct extent_io_tree *tree = state->tree; @@ -1375,6 +1375,7 @@ static int end_bio_extent_writepage(struct bio *bio, u64 end; u64 cur; int whole_page; + int ret; unsigned long flags; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) @@ -1395,17 +1396,30 @@ static int end_bio_extent_writepage(struct bio *bio, if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); + if (tree->ops && tree->ops->writepage_end_io_hook) { + ret = tree->ops->writepage_end_io_hook(page, start, + end, state); + if (ret) + uptodate = 0; + } + + if (!uptodate && tree->ops && + tree->ops->writepage_io_failed_hook) { + ret = tree->ops->writepage_io_failed_hook(bio, page, + start, end, state); + if (ret == 0) { + state = NULL; + uptodate = (err == 0); + continue; + } + } + if (!uptodate) { clear_extent_uptodate(tree, start, end, GFP_ATOMIC); ClearPageUptodate(page); SetPageError(page); } - if (tree->ops && tree->ops->writepage_end_io_hook) { - tree->ops->writepage_end_io_hook(page, start, end, - state); - } - /* * bios can get merged in funny ways, and so we need to * be careful with the state variable. We know the @@ -2073,9 +2087,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, } else { ret = 0; } - if (ret) + if (ret) { SetPageError(page); - else { + } else { unsigned long max_nr = end_index + 1; set_range_writeback(tree, cur, cur + iosize - 1); if (!PageWriteback(page)) { @@ -2948,6 +2962,25 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, } EXPORT_SYMBOL(set_extent_buffer_dirty); +int clear_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + struct page *page; + unsigned long num_pages; + + num_pages = num_extent_pages(eb->start, eb->len); + eb->flags &= ~EXTENT_UPTODATE; + + clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + GFP_NOFS); + for (i = 0; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + ClearPageUptodate(page); + } + return 0; +} + int set_extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index e48346147514..f1960dafaa19 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -36,9 +36,12 @@ struct extent_io_ops { int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, u64 start, u64 end, struct extent_state *state); + int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, + u64 start, u64 end, + struct extent_state *state); int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); - void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, + int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits); @@ -212,6 +215,8 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, struct extent_buffer *eb); int set_extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb); +int clear_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb); int extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb); int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a492fd238c88..08760ff9bab7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -430,9 +430,9 @@ struct io_failure_record { int last_mirror; }; -int btrfs_readpage_io_failed_hook(struct bio *failed_bio, - struct page *page, u64 start, u64 end, - struct extent_state *state) +int btrfs_io_failed_hook(struct bio *failed_bio, + struct page *page, u64 start, u64 end, + struct extent_state *state) { struct io_failure_record *failrec = NULL; u64 private; @@ -443,6 +443,7 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, struct bio *bio; int num_copies; int ret; + int rw; u64 logical; ret = get_state_private(failure_tree, start, &private); @@ -505,7 +506,41 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, bio->bi_bdev = failed_bio->bi_bdev; bio->bi_size = 0; bio_add_page(bio, page, failrec->len, start - page_offset(page)); - btrfs_submit_bio_hook(inode, READ, bio, failrec->last_mirror); + if (failed_bio->bi_rw & (1 << BIO_RW)) + rw = WRITE; + else + rw = READ; + + BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, + failrec->last_mirror); + return 0; +} + +int btrfs_clean_io_failures(struct inode *inode, u64 start) +{ + u64 private; + u64 private_failure; + struct io_failure_record *failure; + int ret; + + private = 0; + if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, + (u64)-1, 1, EXTENT_DIRTY)) { + ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, + start, &private_failure); + if (ret == 0) { + failure = (struct io_failure_record *)(unsigned long) + private_failure; + set_state_private(&BTRFS_I(inode)->io_failure_tree, + failure->start, 0); + clear_extent_bits(&BTRFS_I(inode)->io_failure_tree, + failure->start, + failure->start + failure->len - 1, + EXTENT_DIRTY | EXTENT_LOCKED, + GFP_NOFS); + kfree(failure); + } + } return 0; } @@ -547,26 +582,7 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, /* if the io failure tree for this inode is non-empty, * check to see if we've recovered from a failed IO */ - private = 0; - if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, - (u64)-1, 1, EXTENT_DIRTY)) { - u64 private_failure; - struct io_failure_record *failure; - ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, - start, &private_failure); - if (ret == 0) { - failure = (struct io_failure_record *)(unsigned long) - private_failure; - set_state_private(&BTRFS_I(inode)->io_failure_tree, - failure->start, 0); - clear_extent_bits(&BTRFS_I(inode)->io_failure_tree, - failure->start, - failure->start + failure->len - 1, - EXTENT_DIRTY | EXTENT_LOCKED, - GFP_NOFS); - kfree(failure); - } - } + btrfs_clean_io_failures(inode, start); return 0; zeroit: @@ -3657,7 +3673,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { .merge_bio_hook = btrfs_merge_bio_hook, .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, - .readpage_io_failed_hook = btrfs_readpage_io_failed_hook, + .readpage_io_failed_hook = btrfs_io_failed_hook, .set_bit_hook = btrfs_set_bit_hook, .clear_bit_hook = btrfs_clear_bit_hook, }; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 5085e9e693b9..c02e2bf2f028 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -51,6 +51,7 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, struct extent_buffer *next; struct extent_buffer *cur; u64 bytenr; + u64 ptr_gen; int ret = 0; int is_extent = 0; @@ -93,11 +94,12 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, break; } bytenr = btrfs_node_blockptr(cur, path->slots[*level]); + ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); if (cache_only) { next = btrfs_find_tree_block(root, bytenr, btrfs_level_size(root, *level - 1)); - if (!next || !btrfs_buffer_uptodate(next) || + if (!next || !btrfs_buffer_uptodate(next, ptr_gen) || !btrfs_buffer_defrag(next)) { free_extent_buffer(next); path->slots[*level]++; @@ -106,8 +108,7 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, } else { next = read_tree_block(root, bytenr, btrfs_level_size(root, *level - 1), - btrfs_node_ptr_generation(cur, - path->slots[*level])); + ptr_gen); } ret = btrfs_cow_block(trans, root, next, path->nodes[*level], path->slots[*level], &next); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b5d7bd1915b4..5fc7fb481474 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1807,14 +1807,19 @@ static int end_bio_multi_stripe(struct bio *bio, if (atomic_dec_and_test(&multi->stripes_pending)) { bio->bi_private = multi->private; bio->bi_end_io = multi->end_io; - /* only send an error to the higher layers if it is * beyond the tolerance of the multi-bio */ - if (atomic_read(&multi->error) > multi->max_errors) + if (atomic_read(&multi->error) > multi->max_errors) { err = -EIO; - else + } else if (err) { + /* + * this bio is actually up to date, we didn't + * go over the max number of errors + */ + set_bit(BIO_UPTODATE, &bio->bi_flags); err = 0; + } kfree(multi); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) From dfe25020689bb2d318782d2c9c7141203583fc70 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 May 2008 13:46:40 -0400 Subject: [PATCH 0541/2985] Btrfs: Add mount -o degraded to allow mounts to continue with missing devices Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 + fs/btrfs/disk-io.c | 49 ++++---- fs/btrfs/disk-io.h | 3 +- fs/btrfs/super.c | 25 ++-- fs/btrfs/volumes.c | 279 ++++++++++++++++++++++++++++++++------------- fs/btrfs/volumes.h | 3 + 6 files changed, 253 insertions(+), 109 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6c2c2c4e4d2a..a9377fb16c37 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -657,6 +657,7 @@ struct btrfs_root { #define BTRFS_MOUNT_NODATACOW (1 << 1) #define BTRFS_MOUNT_NOBARRIER (1 << 2) #define BTRFS_MOUNT_SSD (1 << 3) +#define BTRFS_MOUNT_DEGRADED (1 << 4) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) @@ -1606,4 +1607,6 @@ int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); /* super.c */ u64 btrfs_parse_size(char *str); +int btrfs_parse_options(char *options, struct btrfs_root *root, + char **subvol_name); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 574b1245964e..38b0d9ecda6a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -614,21 +614,6 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, return ret; } -static int close_all_devices(struct btrfs_fs_info *fs_info) -{ - struct list_head *list; - struct list_head *next; - struct btrfs_device *device; - - list = &fs_info->fs_devices->devices; - list_for_each(next, list) { - device = list_entry(next, struct btrfs_device, dev_list); - close_bdev_excl(device->bdev); - device->bdev = NULL; - } - return 0; -} - struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { @@ -927,6 +912,8 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) list_for_each(cur, &info->fs_devices->devices) { device = list_entry(cur, struct btrfs_device, dev_list); + if (!device->bdev) + continue; bdi = blk_get_backing_dev_info(device->bdev); if (bdi && bdi_congested(bdi, bdi_bits)) { ret = 1; @@ -1140,7 +1127,8 @@ static void btrfs_async_submit_work(struct work_struct *work) } struct btrfs_root *open_ctree(struct super_block *sb, - struct btrfs_fs_devices *fs_devices) + struct btrfs_fs_devices *fs_devices, + char *options) { u32 sectorsize; u32 nodesize; @@ -1276,12 +1264,19 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; - if (btrfs_super_num_devices(disk_super) != fs_devices->num_devices) { + btrfs_parse_options(options, tree_root, NULL); + + if (btrfs_super_num_devices(disk_super) > fs_devices->num_devices) { printk("Btrfs: wanted %llu devices, but found %llu\n", (unsigned long long)btrfs_super_num_devices(disk_super), (unsigned long long)fs_devices->num_devices); - goto fail_sb_buffer; + if (btrfs_test_opt(tree_root, DEGRADED)) + printk("continuing in degraded mode\n"); + else { + goto fail_sb_buffer; + } } + fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); nodesize = btrfs_super_nodesize(disk_super); @@ -1329,6 +1324,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = btrfs_read_chunk_tree(chunk_root); BUG_ON(ret); + btrfs_close_extra_devices(fs_devices); + blocksize = btrfs_level_size(tree_root, btrfs_super_root_level(disk_super)); @@ -1374,7 +1371,7 @@ fail_sb_buffer: fail_iput: iput(fs_info->btree_inode); fail: - close_all_devices(fs_info); + btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); kfree(extent_root); @@ -1429,6 +1426,13 @@ int write_all_supers(struct btrfs_root *root) dev_item = &sb->dev_item; list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); + if (!dev->bdev) { + total_errors++; + continue; + } + if (!dev->in_fs_metadata) + continue; + btrfs_set_stack_device_type(dev_item, dev->type); btrfs_set_stack_device_id(dev_item, dev->devid); btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); @@ -1482,6 +1486,11 @@ int write_all_supers(struct btrfs_root *root) list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); + if (!dev->bdev) + continue; + if (!dev->in_fs_metadata) + continue; + BUG_ON(!dev->pending_io); bh = dev->pending_io; wait_on_buffer(bh); @@ -1631,7 +1640,7 @@ int close_ctree(struct btrfs_root *root) kfree(hasher); } #endif - close_all_devices(fs_info); + btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 30d1ed293c25..2bc64fefe6ea 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -33,7 +33,8 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); struct btrfs_root *open_ctree(struct super_block *sb, - struct btrfs_fs_devices *fs_devices); + struct btrfs_fs_devices *fs_devices, + char *options); int close_ctree(struct btrfs_root *root); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 020e5a83e31f..273a5b511f50 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -65,11 +65,13 @@ static void btrfs_put_super (struct super_block * sb) } enum { - Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, - Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_err, + Opt_degraded, Opt_subvol, Opt_nodatasum, Opt_nodatacow, + Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, + Opt_ssd, Opt_err, }; static match_table_t tokens = { + {Opt_degraded, "degraded"}, {Opt_subvol, "subvol=%s"}, {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, @@ -106,9 +108,8 @@ u64 btrfs_parse_size(char *str) return res; } -static int parse_options (char * options, - struct btrfs_root *root, - char **subvol_name) +int btrfs_parse_options(char *options, struct btrfs_root *root, + char **subvol_name) { char * p; struct btrfs_fs_info *info = NULL; @@ -135,6 +136,12 @@ static int parse_options (char * options, token = match_token(p, tokens, args); switch (token) { + case Opt_degraded: + if (info) { + printk("btrfs: allowing degraded mounts\n"); + btrfs_set_opt(info->mount_opt, DEGRADED); + } + break; case Opt_subvol: if (subvol_name) { *subvol_name = match_strdup(&args[0]); @@ -234,7 +241,7 @@ static int btrfs_fill_super(struct super_block * sb, sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; - tree_root = open_ctree(sb, fs_devices); + tree_root = open_ctree(sb, fs_devices, (char *)data); if (IS_ERR(tree_root)) { printk("btrfs: open_ctree failed\n"); @@ -267,8 +274,6 @@ static int btrfs_fill_super(struct super_block * sb, goto fail_close; } - parse_options((char *)data, tree_root, NULL); - /* this does the super kobj at the same time */ err = btrfs_sysfs_add_super(tree_root->fs_info); if (err) @@ -341,7 +346,7 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, if (error) return error; - bdev = fs_devices->lowest_bdev; + bdev = fs_devices->latest_bdev; btrfs_lock_volumes(); s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); btrfs_unlock_volumes(); @@ -411,7 +416,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int ret; char *subvol_name = NULL; - parse_options((char *)data, NULL, &subvol_name); + btrfs_parse_options((char *)data, NULL, &subvol_name); ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, mnt, subvol_name ? subvol_name : "default"); if (subvol_name) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5fc7fb481474..43f74d17bcea 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -73,6 +73,7 @@ int btrfs_cleanup_fs_uuids(void) close_bdev_excl(dev->bdev); } list_del(&dev->dev_list); + kfree(dev->name); kfree(dev); } } @@ -127,7 +128,6 @@ static int device_list_add(const char *path, memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); fs_devices->latest_devid = devid; fs_devices->latest_trans = found_transid; - fs_devices->lowest_devid = (u64)-1; fs_devices->num_devices = 0; device = NULL; } else { @@ -159,13 +159,35 @@ static int device_list_add(const char *path, fs_devices->latest_devid = devid; fs_devices->latest_trans = found_transid; } - if (fs_devices->lowest_devid > devid) { - fs_devices->lowest_devid = devid; - } *fs_devices_ret = fs_devices; return 0; } +int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) +{ + struct list_head *head = &fs_devices->devices; + struct list_head *cur; + struct btrfs_device *device; + + mutex_lock(&uuid_mutex); +again: + list_for_each(cur, head) { + device = list_entry(cur, struct btrfs_device, dev_list); + if (!device->in_fs_metadata) { +printk("getting rid of extra dev %s\n", device->name); + if (device->bdev) + close_bdev_excl(device->bdev); + list_del(&device->dev_list); + list_del(&device->dev_alloc_list); + fs_devices->num_devices--; + kfree(device->name); + kfree(device); + goto again; + } + } + mutex_unlock(&uuid_mutex); + return 0; +} int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) { struct list_head *head = &fs_devices->devices; @@ -179,6 +201,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) close_bdev_excl(device->bdev); } device->bdev = NULL; + device->in_fs_metadata = 0; } mutex_unlock(&uuid_mutex); return 0; @@ -199,6 +222,9 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, if (device->bdev) continue; + if (!device->name) + continue; + bdev = open_bdev_excl(device->name, flags, holder); if (IS_ERR(bdev)) { @@ -209,10 +235,8 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, set_blocksize(bdev, 4096); if (device->devid == fs_devices->latest_devid) fs_devices->latest_bdev = bdev; - if (device->devid == fs_devices->lowest_devid) { - fs_devices->lowest_bdev = bdev; - } device->bdev = bdev; + device->in_fs_metadata = 0; } mutex_unlock(&uuid_mutex); @@ -439,7 +463,8 @@ int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, } BUG_ON(ret); - device->bytes_used -= btrfs_dev_extent_length(leaf, extent); + if (device->bytes_used > 0) + device->bytes_used -= btrfs_dev_extent_length(leaf, extent); ret = btrfs_del_item(trans, root, path); BUG_ON(ret); @@ -460,6 +485,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_key key; + WARN_ON(!device->in_fs_metadata); path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -674,8 +700,6 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, next_dev = list_entry(fs_devices->devices.next, struct btrfs_device, dev_list); - if (bdev == fs_devices->lowest_bdev) - fs_devices->lowest_bdev = next_dev->bdev; if (bdev == root->fs_info->sb->s_bdev) root->fs_info->sb->s_bdev = next_dev->bdev; if (bdev == fs_devices->latest_bdev) @@ -698,7 +722,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) { struct btrfs_device *device; struct block_device *bdev; - struct buffer_head *bh; + struct buffer_head *bh = NULL; struct btrfs_super_block *disk_super; u64 all_avail; u64 devid; @@ -712,47 +736,73 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) root->fs_info->avail_metadata_alloc_bits; if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && - root->fs_info->fs_devices->num_devices <= 4) { + btrfs_super_num_devices(&root->fs_info->super_copy) <= 4) { printk("btrfs: unable to go below four devices on raid10\n"); ret = -EINVAL; goto out; } if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && - root->fs_info->fs_devices->num_devices <= 2) { + btrfs_super_num_devices(&root->fs_info->super_copy) <= 2) { printk("btrfs: unable to go below two devices on raid1\n"); ret = -EINVAL; goto out; } - bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder); - if (IS_ERR(bdev)) { - ret = PTR_ERR(bdev); - goto out; - } + if (strcmp(device_path, "missing") == 0) { + struct list_head *cur; + struct list_head *devices; + struct btrfs_device *tmp; - bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); - if (!bh) { - ret = -EIO; - goto error_close; - } - disk_super = (struct btrfs_super_block *)bh->b_data; - if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, - sizeof(disk_super->magic))) { - ret = -ENOENT; - goto error_brelse; - } - if (memcmp(disk_super->fsid, root->fs_info->fsid, BTRFS_FSID_SIZE)) { - ret = -ENOENT; - goto error_brelse; - } - devid = le64_to_cpu(disk_super->dev_item.devid); - device = btrfs_find_device(root, devid, NULL); - if (!device) { - ret = -ENOENT; - goto error_brelse; - } + device = NULL; + devices = &root->fs_info->fs_devices->devices; + list_for_each(cur, devices) { + tmp = list_entry(cur, struct btrfs_device, dev_list); + if (tmp->in_fs_metadata && !tmp->bdev) { + device = tmp; + break; + } + } + bdev = NULL; + bh = NULL; + disk_super = NULL; + if (!device) { + printk("btrfs: no missing devices found to remove\n"); + goto out; + } + } else { + bdev = open_bdev_excl(device_path, 0, + root->fs_info->bdev_holder); + if (IS_ERR(bdev)) { + ret = PTR_ERR(bdev); + goto out; + } + + bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + if (!bh) { + ret = -EIO; + goto error_close; + } + disk_super = (struct btrfs_super_block *)bh->b_data; + if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, + sizeof(disk_super->magic))) { + ret = -ENOENT; + goto error_brelse; + } + if (memcmp(disk_super->fsid, root->fs_info->fsid, + BTRFS_FSID_SIZE)) { + ret = -ENOENT; + goto error_brelse; + } + devid = le64_to_cpu(disk_super->dev_item.devid); + device = btrfs_find_device(root, devid, NULL); + if (!device) { + ret = -ENOENT; + goto error_brelse; + } + + } root->fs_info->fs_devices->num_devices--; ret = btrfs_shrink_device(device, 0); @@ -764,19 +814,25 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (ret) goto error_brelse; - /* make sure this device isn't detected as part of the FS anymore */ - memset(&disk_super->magic, 0, sizeof(disk_super->magic)); - set_buffer_dirty(bh); - sync_dirty_buffer(bh); + if (bh) { + /* make sure this device isn't detected as part of + * the FS anymore + */ + memset(&disk_super->magic, 0, sizeof(disk_super->magic)); + set_buffer_dirty(bh); + sync_dirty_buffer(bh); - brelse(bh); - - /* one close for the device struct or super_block */ - close_bdev_excl(device->bdev); - - /* one close for us */ - close_bdev_excl(device->bdev); + brelse(bh); + } + if (device->bdev) { + /* one close for the device struct or super_block */ + close_bdev_excl(device->bdev); + } + if (bdev) { + /* one close for us */ + close_bdev_excl(bdev); + } kfree(device->name); kfree(device); ret = 0; @@ -785,7 +841,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) error_brelse: brelse(bh); error_close: - close_bdev_excl(bdev); + if (bdev) + close_bdev_excl(bdev); out: mutex_unlock(&uuid_mutex); mutex_unlock(&root->fs_info->fs_mutex); @@ -839,6 +896,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->total_bytes = i_size_read(bdev->bd_inode); device->dev_root = root->fs_info->dev_root; device->bdev = bdev; + device->in_fs_metadata = 1; ret = btrfs_add_device(trans, root, device); if (ret) @@ -1041,8 +1099,10 @@ int btrfs_relocate_chunk(struct btrfs_root *root, map->stripes[i].physical); BUG_ON(ret); - ret = btrfs_update_device(trans, map->stripes[i].dev); - BUG_ON(ret); + if (map->stripes[i].dev) { + ret = btrfs_update_device(trans, map->stripes[i].dev); + BUG_ON(ret); + } } ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, chunk_offset); @@ -1415,10 +1475,13 @@ again: while(index < num_stripes) { device = list_entry(cur, struct btrfs_device, dev_alloc_list); - avail = device->total_bytes - device->bytes_used; + if (device->total_bytes > device->bytes_used) + avail = device->total_bytes - device->bytes_used; + else + avail = 0; cur = cur->next; - if (avail >= min_free) { + if (device->in_fs_metadata && avail >= min_free) { u64 ignored_start = 0; ret = find_free_dev_extent(trans, device, path, min_free, @@ -1430,7 +1493,7 @@ again: if (type & BTRFS_BLOCK_GROUP_DUP) index++; } - } else if (avail > max_avail) + } else if (device->in_fs_metadata && avail > max_avail) max_avail = avail; if (cur == dev_list) break; @@ -1610,6 +1673,22 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) return ret; } +static int find_live_mirror(struct map_lookup *map, int first, int num, + int optimal) +{ + int i; + if (map->stripes[optimal].dev->bdev) + return optimal; + for (i = first; i < first + num; i++) { + if (map->stripes[i].dev->bdev) + return i; + } + /* we couldn't find one that doesn't fail. Just return something + * and the io error handling code will clean up eventually + */ + return optimal; +} + static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, struct btrfs_multi_bio **multi_ret, @@ -1712,8 +1791,11 @@ again: num_stripes = map->num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; - else - stripe_index = current->pid % map->num_stripes; + else { + stripe_index = find_live_mirror(map, 0, + map->num_stripes, + current->pid % map->num_stripes); + } } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { if (rw & (1 << BIO_RW)) @@ -1731,8 +1813,11 @@ again: num_stripes = map->sub_stripes; else if (mirror_num) stripe_index += mirror_num - 1; - else - stripe_index += current->pid % map->sub_stripes; + else { + stripe_index = find_live_mirror(map, stripe_index, + map->sub_stripes, stripe_index + + current->pid % map->sub_stripes); + } } else { /* * after this do_div call, stripe_nr is the number of stripes @@ -1749,9 +1834,11 @@ again: struct backing_dev_info *bdi; device = map->stripes[stripe_index].dev; - bdi = blk_get_backing_dev_info(device->bdev); - if (bdi->unplug_io_fn) { - bdi->unplug_io_fn(bdi, unplug_page); + if (device->bdev) { + bdi = blk_get_backing_dev_info(device->bdev); + if (bdi->unplug_io_fn) { + bdi->unplug_io_fn(bdi, unplug_page); + } } } else { multi->stripes[i].physical = @@ -1880,12 +1967,21 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, } bio->bi_sector = multi->stripes[dev_nr].physical >> 9; dev = multi->stripes[dev_nr].dev; - - bio->bi_bdev = dev->bdev; - spin_lock(&dev->io_lock); - dev->total_ios++; - spin_unlock(&dev->io_lock); - submit_bio(rw, bio); + if (dev && dev->bdev) { + bio->bi_bdev = dev->bdev; + spin_lock(&dev->io_lock); + dev->total_ios++; + spin_unlock(&dev->io_lock); + submit_bio(rw, bio); + } else { + bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; + bio->bi_sector = logical >> 9; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + bio_endio(bio, bio->bi_size, -EIO); +#else + bio_endio(bio, -EIO); +#endif + } dev_nr++; } if (total_devs == 1) @@ -1901,6 +1997,27 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, return __find_device(head, devid, uuid); } +static struct btrfs_device *add_missing_dev(struct btrfs_root *root, + u64 devid, u8 *dev_uuid) +{ + struct btrfs_device *device; + struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; + + device = kzalloc(sizeof(*device), GFP_NOFS); + list_add(&device->dev_list, + &fs_devices->devices); + list_add(&device->dev_alloc_list, + &fs_devices->alloc_list); + device->barriers = 1; + device->dev_root = root->fs_info->dev_root; + device->devid = devid; + fs_devices->num_devices++; + spin_lock_init(&device->io_lock); + memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); + return device; +} + + static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) @@ -1965,11 +2082,22 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, btrfs_stripe_dev_uuid_nr(chunk, i), BTRFS_UUID_SIZE); map->stripes[i].dev = btrfs_find_device(root, devid, uuid); - if (!map->stripes[i].dev) { + + if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { kfree(map); free_extent_map(em); return -EIO; } + if (!map->stripes[i].dev) { + map->stripes[i].dev = + add_missing_dev(root, devid, uuid); + if (!map->stripes[i].dev) { + kfree(map); + free_extent_map(em); + return -EIO; + } + } + map->stripes[i].dev->in_fs_metadata = 1; } spin_lock(&map_tree->map_tree.lock); @@ -2016,20 +2144,15 @@ static int read_one_dev(struct btrfs_root *root, BTRFS_UUID_SIZE); device = btrfs_find_device(root, devid, dev_uuid); if (!device) { - printk("warning devid %Lu not found already\n", devid); - device = kzalloc(sizeof(*device), GFP_NOFS); + printk("warning devid %Lu missing\n", devid); + device = add_missing_dev(root, devid, dev_uuid); if (!device) return -ENOMEM; - list_add(&device->dev_list, - &root->fs_info->fs_devices->devices); - list_add(&device->dev_alloc_list, - &root->fs_info->fs_devices->alloc_list); - device->barriers = 1; - spin_lock_init(&device->io_lock); } fill_device_from_item(leaf, dev_item, device); device->dev_root = root->fs_info->dev_root; + device->in_fs_metadata = 1; ret = 0; #if 0 ret = btrfs_open_device(device); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 0f94a69e6eb6..454fe8103329 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -27,8 +27,10 @@ struct btrfs_device { struct list_head dev_alloc_list; struct btrfs_root *dev_root; struct buffer_head *pending_io; + u64 generation; int barriers; + int in_fs_metadata; spinlock_t io_lock; @@ -122,6 +124,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int btrfs_scan_one_device(const char *path, int flags, void *holder, struct btrfs_fs_devices **fs_devices_ret); int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); +int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices); int btrfs_add_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_device *device); From a0af469b58944f6e8c5c8ecbebb42997baf0cb9e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 May 2008 16:03:06 -0400 Subject: [PATCH 0542/2985] Fix btrfs_open_devices to deal with changes since the scan ioctls Devices can change after the scan ioctls are done, and btrfs_open_devices needs to be able to verify them as they are opened and used by the FS. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 +-- fs/btrfs/volumes.c | 70 ++++++++++++++++++++++++++++++++++++++-------- fs/btrfs/volumes.h | 4 +-- 3 files changed, 63 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 38b0d9ecda6a..264f297260f8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1266,10 +1266,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_parse_options(options, tree_root, NULL); - if (btrfs_super_num_devices(disk_super) > fs_devices->num_devices) { + if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { printk("Btrfs: wanted %llu devices, but found %llu\n", (unsigned long long)btrfs_super_num_devices(disk_super), - (unsigned long long)fs_devices->num_devices); + (unsigned long long)fs_devices->open_devices); if (btrfs_test_opt(tree_root, DEGRADED)) printk("continuing in degraded mode\n"); else { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 43f74d17bcea..501d23d3ebfd 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -71,6 +71,7 @@ int btrfs_cleanup_fs_uuids(void) dev_list); if (dev->bdev) { close_bdev_excl(dev->bdev); + fs_devices->open_devices--; } list_del(&dev->dev_list); kfree(dev->name); @@ -174,9 +175,10 @@ again: list_for_each(cur, head) { device = list_entry(cur, struct btrfs_device, dev_list); if (!device->in_fs_metadata) { -printk("getting rid of extra dev %s\n", device->name); - if (device->bdev) + if (device->bdev) { close_bdev_excl(device->bdev); + fs_devices->open_devices--; + } list_del(&device->dev_list); list_del(&device->dev_alloc_list); fs_devices->num_devices--; @@ -188,6 +190,7 @@ printk("getting rid of extra dev %s\n", device->name); mutex_unlock(&uuid_mutex); return 0; } + int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) { struct list_head *head = &fs_devices->devices; @@ -199,10 +202,12 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) device = list_entry(cur, struct btrfs_device, dev_list); if (device->bdev) { close_bdev_excl(device->bdev); + fs_devices->open_devices--; } device->bdev = NULL; device->in_fs_metadata = 0; } + fs_devices->mounted = 0; mutex_unlock(&uuid_mutex); return 0; } @@ -214,9 +219,19 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, struct list_head *head = &fs_devices->devices; struct list_head *cur; struct btrfs_device *device; - int ret; + struct block_device *latest_bdev = NULL; + struct buffer_head *bh; + struct btrfs_super_block *disk_super; + u64 latest_devid = 0; + u64 latest_transid = 0; + u64 transid; + u64 devid; + int ret = 0; mutex_lock(&uuid_mutex); + if (fs_devices->mounted) + goto out; + list_for_each(cur, head) { device = list_entry(cur, struct btrfs_device, dev_list); if (device->bdev) @@ -229,21 +244,52 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, if (IS_ERR(bdev)) { printk("open %s failed\n", device->name); - ret = PTR_ERR(bdev); - goto fail; + goto error; } set_blocksize(bdev, 4096); - if (device->devid == fs_devices->latest_devid) - fs_devices->latest_bdev = bdev; + + bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + if (!bh) + goto error_close; + + disk_super = (struct btrfs_super_block *)bh->b_data; + if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, + sizeof(disk_super->magic))) + goto error_brelse; + + devid = le64_to_cpu(disk_super->dev_item.devid); + if (devid != device->devid) + goto error_brelse; + + transid = btrfs_super_generation(disk_super); + if (transid > latest_transid) { + latest_devid = devid; + latest_transid = transid; + latest_bdev = bdev; + } + device->bdev = bdev; device->in_fs_metadata = 0; + fs_devices->open_devices++; + continue; +error_brelse: + brelse(bh); +error_close: + close_bdev_excl(bdev); +error: + continue; } + if (fs_devices->open_devices == 0) { + ret = -EIO; + goto out; + } + fs_devices->mounted = 1; + fs_devices->latest_bdev = latest_bdev; + fs_devices->latest_devid = latest_devid; + fs_devices->latest_trans = latest_transid; +out: mutex_unlock(&uuid_mutex); - return 0; -fail: - mutex_unlock(&uuid_mutex); - btrfs_close_devices(fs_devices); return ret; } @@ -828,6 +874,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (device->bdev) { /* one close for the device struct or super_block */ close_bdev_excl(device->bdev); + root->fs_info->fs_devices->open_devices--; } if (bdev) { /* one close for us */ @@ -914,6 +961,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) list_add(&device->dev_alloc_list, &root->fs_info->fs_devices->alloc_list); root->fs_info->fs_devices->num_devices++; + root->fs_info->fs_devices->open_devices++; out: btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 454fe8103329..4df6b1608f91 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -71,16 +71,16 @@ struct btrfs_fs_devices { /* the device with this id has the most recent coyp of the super */ u64 latest_devid; u64 latest_trans; - u64 lowest_devid; u64 num_devices; + u64 open_devices; struct block_device *latest_bdev; - struct block_device *lowest_bdev; /* all of the devices in the FS */ struct list_head devices; /* devices not currently being allocated */ struct list_head alloc_list; struct list_head list; + int mounted; }; struct btrfs_bio_stripe { From 211c17f51f46dc6c308c742098273dd46b5ca59c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 15 May 2008 09:13:45 -0400 Subject: [PATCH 0543/2985] Fix corners in writepage and btrfs_truncate_page The extent_io writepage calls needed an extra check for discarding pages that started on th last byte in the file. btrfs_truncate_page needed checks to make sure the page was still part of the file after reading it, and most importantly, needed to wait for all IO to the page to finish before freeing the corresponding extents on disk. Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 15 +++++++++------ fs/btrfs/file.c | 8 -------- fs/btrfs/inode.c | 9 ++++++++- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2a3624adc0cf..17c508a941eb 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1988,9 +1988,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, u64 nr_delalloc; u64 delalloc_end; + WARN_ON(!PageLocked(page)); - if (page->index > end_index) { - clear_extent_dirty(tree, start, page_end, GFP_NOFS); + page_offset = i_size & (PAGE_CACHE_SIZE - 1); + if (page->index > end_index || + (page->index == end_index && !page_offset)) { + page->mapping->a_ops->invalidatepage(page, 0); unlock_page(page); return 0; } @@ -1998,13 +2001,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (page->index == end_index) { char *userpage; - size_t offset = i_size & (PAGE_CACHE_SIZE - 1); - userpage = kmap_atomic(page, KM_USER0); - memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); + memset(userpage + page_offset, 0, + PAGE_CACHE_SIZE - page_offset); kunmap_atomic(userpage, KM_USER0); + flush_dcache_page(page); } + page_offset = 0; set_page_extent_mapped(page); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index df97d470812a..d968900c6973 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -305,7 +305,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, (inline_size & (root->sectorsize -1)) == 0 || inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { u64 last_end; - u64 existing_delalloc = 0; for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; @@ -315,13 +314,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, last_end = (u64)(pages[num_pages -1]->index) << PAGE_CACHE_SHIFT; last_end += PAGE_CACHE_SIZE - 1; - if (start_pos < isize) { - u64 delalloc_start = start_pos; - existing_delalloc = count_range_bits(io_tree, - &delalloc_start, - end_of_last_block, (u64)-1, - EXTENT_DELALLOC); - } set_extent_delalloc(io_tree, start_pos, end_of_last_block, GFP_NOFS); btrfs_add_ordered_inode(inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 08760ff9bab7..40b4a8ec17fe 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1180,19 +1180,26 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) goto out; ret = -ENOMEM; +again: page = grab_cache_page(mapping, index); if (!page) goto out; if (!PageUptodate(page)) { ret = btrfs_readpage(NULL, page); lock_page(page); + if (page->mapping != mapping) { + unlock_page(page); + page_cache_release(page); + goto again; + } if (!PageUptodate(page)) { ret = -EIO; goto out; } } - page_start = (u64)page->index << PAGE_CACHE_SHIFT; + page_start = (u64)page->index << PAGE_CACHE_SHIFT; + wait_on_page_writeback(page); ret = btrfs_cow_one_page(inode, page, offset); unlock_page(page); From cb03c743c6486e4efb8be6bdf7bf23177826d156 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 15 May 2008 16:15:45 -0400 Subject: [PATCH 0544/2985] Btrfs: Change the congestion functions to meter the number of async submits as well The async submit workqueue was absorbing too many requests, leading to long stalls where the async submitters were stalling. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a9377fb16c37..293464c0c6ba 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -522,6 +522,7 @@ struct btrfs_fs_info { struct work_struct async_submit_work; spinlock_t end_io_work_lock; spinlock_t async_submit_work_lock; + atomic_t nr_async_submits; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) struct work_struct trans_work; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 264f297260f8..373374340e9b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -453,6 +453,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, spin_lock(&fs_info->async_submit_work_lock); list_add_tail(&async->list, &fs_info->async_submit_work_list); + atomic_inc(&fs_info->nr_async_submits); spin_unlock(&fs_info->async_submit_work_lock); queue_work(async_submit_workqueue, &fs_info->async_submit_work); @@ -906,10 +907,16 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) { struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; int ret = 0; + int limit = 256 * info->fs_devices->open_devices; struct list_head *cur; struct btrfs_device *device; struct backing_dev_info *bdi; + if ((bdi_bits & (1 << BDI_write_congested)) && + atomic_read(&info->nr_async_submits) > limit) { + return 1; + } + list_for_each(cur, &info->fs_devices->devices) { device = list_entry(cur, struct btrfs_device, dev_list); if (!device->bdev) @@ -1117,6 +1124,7 @@ static void btrfs_async_submit_work(struct work_struct *work) } next = fs_info->async_submit_work_list.next; list_del(next); + atomic_dec(&fs_info->nr_async_submits); spin_unlock(&fs_info->async_submit_work_lock); async = list_entry(next, struct async_submit_bio, list); @@ -1179,6 +1187,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); + atomic_set(&fs_info->nr_async_submits, 0); fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; From 1c8cfcc1590d378c82ec75888f71e87fd26551e3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 16 May 2008 13:06:51 -0400 Subject: [PATCH 0545/2985] Btrfs: Enable btree balancing on old kernels again Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 373374340e9b..d8a256d3c83a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1719,7 +1719,6 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) * looks as though older kernels can get into trouble with * this code, they end up stuck in balance_dirty_pages forever */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) struct extent_io_tree *tree; u64 num_dirty; u64 start = 0; @@ -1735,9 +1734,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) balance_dirty_pages_ratelimited_nr( root->fs_info->btree_inode->i_mapping, 1); } -#else return; -#endif } void btrfs_set_buffer_defrag(struct extent_buffer *buf) From 6af5ac3c1aede574acde5fc477f1a1c876fb568e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 16 May 2008 13:14:57 -0400 Subject: [PATCH 0546/2985] Btrfs: Handle transid == 0 while opening devices Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 501d23d3ebfd..3fbf0834e884 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -262,7 +262,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, goto error_brelse; transid = btrfs_super_generation(disk_super); - if (transid > latest_transid) { + if (!latest_transid || transid > latest_transid) { latest_devid = devid; latest_transid = transid; latest_bdev = bdev; From 515dc3228c568f9df8b634f82b3471e20205ac16 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 16 May 2008 13:30:15 -0400 Subject: [PATCH 0547/2985] Btrfs: Use kzalloc on the fs_devices allocation Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3fbf0834e884..333e5269577a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -120,7 +120,7 @@ static int device_list_add(const char *path, fs_devices = find_fsid(disk_super->fsid); if (!fs_devices) { - fs_devices = kmalloc(sizeof(*fs_devices), GFP_NOFS); + fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); if (!fs_devices) return -ENOMEM; INIT_LIST_HEAD(&fs_devices->devices); @@ -129,7 +129,6 @@ static int device_list_add(const char *path, memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); fs_devices->latest_devid = devid; fs_devices->latest_trans = found_transid; - fs_devices->num_devices = 0; device = NULL; } else { device = __find_device(&fs_devices->devices, devid, From 0ef3e66b6700eb8f052daa8b89443ff872fbbdfc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 24 May 2008 14:04:53 -0400 Subject: [PATCH 0548/2985] Btrfs: Allocator fix variety pack * Force chunk allocation when find_free_extent has to do a full scan * Record the max key at the start of defrag so it doesn't run forever * Block groups might not be contiguous, make a forward search for the next block group in extent-tree.c * Get rid of extra checks for total fs size * Fix relocate_one_reference to avoid relocating the same file data block twice when referenced by an older transaction * Use the open device count when allocating chunks so that we don't try to allocate from devices that don't exist Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 +- fs/btrfs/ctree.h | 2 + fs/btrfs/extent-tree.c | 281 +++++++++++++++++++++++++++-------------- fs/btrfs/tree-defrag.c | 17 +++ fs/btrfs/volumes.c | 12 +- 5 files changed, 213 insertions(+), 101 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 02e571e6ee62..1d404bde3cfa 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -363,7 +363,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, other = btrfs_node_blockptr(parent, i - 1); close = close_blocks(blocknr, other, blocksize); } - if (close && i < end_slot - 2) { + if (!close && i < end_slot - 2) { other = btrfs_node_blockptr(parent, i + 1); close = close_blocks(blocknr, other, blocksize); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 293464c0c6ba..7b73a9c3d868 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -464,6 +464,7 @@ struct btrfs_space_info { u64 bytes_used; u64 bytes_pinned; int full; + int force_alloc; struct list_head list; }; @@ -589,6 +590,7 @@ struct btrfs_root { int ref_cows; int track_dirty; struct btrfs_key defrag_progress; + struct btrfs_key defrag_max; int defrag_running; int defrag_level; char *name; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 605018c6045c..41a63462d3eb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -136,6 +136,35 @@ err: return 0; } +struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct + btrfs_fs_info *info, + u64 bytenr) +{ + struct extent_io_tree *block_group_cache; + struct btrfs_block_group_cache *block_group = NULL; + u64 ptr; + u64 start; + u64 end; + int ret; + + bytenr = max_t(u64, bytenr, + BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); + block_group_cache = &info->block_group_cache; + ret = find_first_extent_bit(block_group_cache, + bytenr, &start, &end, + BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | + BLOCK_GROUP_SYSTEM); + if (ret) { + return NULL; + } + ret = get_state_private(block_group_cache, start, &ptr); + if (ret) + return NULL; + + block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; + return block_group; +} + struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr) @@ -175,7 +204,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) static int noinline find_search_start(struct btrfs_root *root, struct btrfs_block_group_cache **cache_ret, - u64 *start_ret, int num, int data) + u64 *start_ret, u64 num, int data) { int ret; struct btrfs_block_group_cache *cache = *cache_ret; @@ -188,22 +217,22 @@ static int noinline find_search_start(struct btrfs_root *root, u64 search_start = *start_ret; int wrapped = 0; - if (!cache) - goto out; - total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); free_space_cache = &root->fs_info->free_space_cache; -again: - ret = cache_block_group(root, cache); - if (ret) + if (!cache) goto out; - last = max(search_start, cache->key.objectid); - if (!block_group_bits(cache, data) || cache->ro) { - goto new_group; +again: + ret = cache_block_group(root, cache); + if (ret) { + goto out; } + last = max(search_start, cache->key.objectid); + if (!block_group_bits(cache, data) || cache->ro) + goto new_group; + spin_lock_irq(&free_space_cache->lock); state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY); while(1) { @@ -217,20 +246,17 @@ again: start = max(last, state->start); last = state->end + 1; if (last - start < num) { - if (last == cache->key.objectid + cache->key.offset) - cache_miss = start; do { state = extent_state_next(state); } while(state && !(state->state & EXTENT_DIRTY)); continue; } spin_unlock_irq(&free_space_cache->lock); - if (cache->ro) + if (cache->ro) { goto new_group; + } if (start + num > cache->key.objectid + cache->key.offset) goto new_group; - if (start + num > total_fs_bytes) - goto new_group; if (!block_group_bits(cache, data)) { printk("block group bits don't match %Lu %d\n", cache->flags, data); } @@ -248,7 +274,7 @@ out: new_group: last = cache->key.objectid + cache->key.offset; wrapped: - cache = btrfs_lookup_block_group(root->fs_info, last); + cache = btrfs_lookup_first_block_group(root->fs_info, last); if (!cache || cache->key.objectid >= total_fs_bytes) { no_cache: if (!wrapped) { @@ -261,13 +287,13 @@ no_cache: if (cache_miss && !cache->cached) { cache_block_group(root, cache); last = cache_miss; - cache = btrfs_lookup_block_group(root->fs_info, last); + cache = btrfs_lookup_first_block_group(root->fs_info, last); } + cache_miss = 0; cache = btrfs_find_block_group(root, cache, last, data, 0); if (!cache) goto no_cache; *cache_ret = cache; - cache_miss = 0; goto again; } @@ -303,28 +329,26 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_fs_info *info = root->fs_info; u64 used; u64 last = 0; - u64 hint_last; u64 start; u64 end; u64 free_check; u64 ptr; - u64 total_fs_bytes; int bit; int ret; int full_search = 0; int factor = 10; + int wrapped = 0; block_group_cache = &info->block_group_cache; - total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); if (data & BTRFS_BLOCK_GROUP_METADATA) factor = 9; bit = block_group_state_bits(data); - if (search_start && search_start < total_fs_bytes) { + if (search_start) { struct btrfs_block_group_cache *shint; - shint = btrfs_lookup_block_group(info, search_start); + shint = btrfs_lookup_first_block_group(info, search_start); if (shint && block_group_bits(shint, data) && !shint->ro) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < @@ -333,24 +357,18 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, } } } - if (hint && !hint->ro && block_group_bits(hint, data) && - hint->key.objectid < total_fs_bytes) { + if (hint && !hint->ro && block_group_bits(hint, data)) { used = btrfs_block_group_used(&hint->item); if (used + hint->pinned < div_factor(hint->key.offset, factor)) { return hint; } last = hint->key.objectid + hint->key.offset; - hint_last = last; } else { if (hint) - hint_last = max(hint->key.objectid, search_start); + last = max(hint->key.objectid, search_start); else - hint_last = search_start; - - if (hint_last >= total_fs_bytes) - hint_last = search_start; - last = hint_last; + last = search_start; } again: while(1) { @@ -360,23 +378,17 @@ again: break; ret = get_state_private(block_group_cache, start, &ptr); - if (ret) - break; + if (ret) { + last = end + 1; + continue; + } cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; last = cache->key.objectid + cache->key.offset; used = btrfs_block_group_used(&cache->item); - if (cache->key.objectid > total_fs_bytes) - break; - if (!cache->ro && block_group_bits(cache, data)) { - if (full_search) - free_check = cache->key.offset; - else - free_check = div_factor(cache->key.offset, - factor); - + free_check = div_factor(cache->key.offset, factor); if (used + cache->pinned < free_check) { found_group = cache; goto found; @@ -384,9 +396,15 @@ again: } cond_resched(); } - if (!full_search) { + if (!wrapped) { + last = search_start; + wrapped = 1; + goto again; + } + if (!full_search && factor < 10) { last = search_start; full_search = 1; + factor = 10; goto again; } found: @@ -1070,6 +1088,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_used = bytes_used; found->bytes_pinned = 0; found->full = 0; + found->force_alloc = 0; *space_info = found; return 0; } @@ -1120,7 +1139,7 @@ static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags) static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, - u64 flags) + u64 flags, int force) { struct btrfs_space_info *space_info; u64 thresh; @@ -1138,11 +1157,16 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, } BUG_ON(!space_info); + if (space_info->force_alloc) { + force = 1; + space_info->force_alloc = 0; + } if (space_info->full) return 0; thresh = div_factor(space_info->total_bytes, 6); - if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < + if (!force && + (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < thresh) return 0; @@ -1152,7 +1176,6 @@ printk("space info full %Lu\n", flags); space_info->full = 1; return 0; } - BUG_ON(ret); ret = btrfs_make_block_group(trans, extent_root, 0, flags, @@ -1619,11 +1642,16 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; + int chunk_alloc_done = 0; int empty_cluster = 2 * 1024 * 1024; + int allowed_chunk_alloc = 0; WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + if (orig_root->ref_cows || empty_size) + allowed_chunk_alloc = 1; + if (data & BTRFS_BLOCK_GROUP_METADATA) { last_ptr = &root->fs_info->last_alloc; empty_cluster = 256 * 1024; @@ -1648,7 +1676,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, search_end = btrfs_super_total_bytes(&info->super_copy); if (hint_byte) { - block_group = btrfs_lookup_block_group(info, hint_byte); + block_group = btrfs_lookup_first_block_group(info, hint_byte); if (!block_group) hint_byte = search_start; block_group = btrfs_find_block_group(root, block_group, @@ -1666,17 +1694,28 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, check_failed: if (!block_group) { - block_group = btrfs_lookup_block_group(info, search_start); + block_group = btrfs_lookup_first_block_group(info, + search_start); if (!block_group) - block_group = btrfs_lookup_block_group(info, + block_group = btrfs_lookup_first_block_group(info, orig_search_start); } + if (full_scan && !chunk_alloc_done) { + if (allowed_chunk_alloc) { + do_chunk_alloc(trans, root, + num_bytes + 2 * 1024 * 1024, data, 1); + allowed_chunk_alloc = 0; + } else if (block_group && block_group_bits(block_group, data)) { + block_group->space_info->force_alloc = 1; + } + chunk_alloc_done = 1; + } ret = find_search_start(root, &block_group, &search_start, total_needed, data); if (ret == -ENOSPC && last_ptr && *last_ptr) { *last_ptr = 0; - block_group = btrfs_lookup_block_group(info, - orig_search_start); + block_group = btrfs_lookup_first_block_group(info, + orig_search_start); search_start = orig_search_start; ret = find_search_start(root, &block_group, &search_start, total_needed, data); @@ -1692,7 +1731,7 @@ check_failed: empty_size += empty_cluster; total_needed += empty_size; } - block_group = btrfs_lookup_block_group(info, + block_group = btrfs_lookup_first_block_group(info, orig_search_start); search_start = orig_search_start; ret = find_search_start(root, &block_group, @@ -1765,7 +1804,7 @@ enospc: } else wrapped = 1; } - block_group = btrfs_lookup_block_group(info, search_start); + block_group = btrfs_lookup_first_block_group(info, search_start); cond_resched(); block_group = btrfs_find_block_group(root, block_group, search_start, data, 0); @@ -1819,17 +1858,21 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } again: data = reduce_alloc_profile(root, data); - if (root->ref_cows) { + /* + * the only place that sets empty_size is btrfs_realloc_node, which + * is not called recursively on allocations + */ + if (empty_size || root->ref_cows) { if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, - 2 * 1024 * 1024, - BTRFS_BLOCK_GROUP_METADATA | - (info->metadata_alloc_profile & - info->avail_metadata_alloc_bits)); + 2 * 1024 * 1024, + BTRFS_BLOCK_GROUP_METADATA | + (info->metadata_alloc_profile & + info->avail_metadata_alloc_bits), 0); BUG_ON(ret); } ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes + 2 * 1024 * 1024, data); + num_bytes + 2 * 1024 * 1024, data, 0); BUG_ON(ret); } @@ -1842,6 +1885,8 @@ again: if (ret == -ENOSPC && num_bytes > min_alloc_size) { num_bytes = num_bytes >> 1; num_bytes = max(num_bytes, min_alloc_size); + do_chunk_alloc(trans, root->fs_info->extent_root, + num_bytes, data, 1); goto again; } if (ret) { @@ -2537,7 +2582,11 @@ out: */ static int noinline relocate_one_reference(struct btrfs_root *extent_root, struct btrfs_path *path, - struct btrfs_key *extent_key) + struct btrfs_key *extent_key, + u64 *last_file_objectid, + u64 *last_file_offset, + u64 *last_file_root, + u64 last_extent) { struct inode *inode; struct btrfs_root *found_root; @@ -2576,6 +2625,12 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, found_key.offset = ref_offset; level = 0; + if (last_extent == extent_key->objectid && + *last_file_objectid == ref_objectid && + *last_file_offset == ref_offset && + *last_file_root == ref_root) + goto out; + ret = find_root_for_ref(extent_root, path, &found_key, level, 1, &found_root, extent_key->objectid); @@ -2583,6 +2638,12 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, if (ret) goto out; + if (last_extent == extent_key->objectid && + *last_file_objectid == ref_objectid && + *last_file_offset == ref_offset && + *last_file_root == ref_root) + goto out; + mutex_unlock(&extent_root->fs_info->fs_mutex); inode = btrfs_iget_locked(extent_root->fs_info->sb, ref_objectid, found_root); @@ -2603,6 +2664,10 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, mutex_lock(&extent_root->fs_info->fs_mutex); goto out; } + *last_file_objectid = inode->i_ino; + *last_file_root = found_root->root_key.objectid; + *last_file_offset = ref_offset; + relocate_inode_pages(inode, ref_offset, extent_key->offset); iput(inode); mutex_lock(&extent_root->fs_info->fs_mutex); @@ -2643,6 +2708,8 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, path->nodes[i] = NULL; } btrfs_release_path(found_root, path); + if (found_root == found_root->fs_info->extent_root) + btrfs_extent_post_op(trans, found_root); btrfs_end_transaction(trans, found_root); } @@ -2678,6 +2745,10 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; + u64 last_file_objectid = 0; + u64 last_file_root = 0; + u64 last_file_offset = (u64)-1; + u64 last_extent = 0; u32 nritems; u32 item_size; int ret = 0; @@ -2722,9 +2793,13 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, key.offset = found_key.offset + 1; item_size = btrfs_item_size_nr(leaf, path->slots[0]); - ret = relocate_one_reference(extent_root, path, extent_key); + ret = relocate_one_reference(extent_root, path, extent_key, + &last_file_objectid, + &last_file_offset, + &last_file_root, last_extent); if (ret) goto out; + last_extent = extent_key->objectid; } ret = 0; out: @@ -2770,6 +2845,32 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) return flags; } +int __alloc_chunk_for_shrink(struct btrfs_root *root, + struct btrfs_block_group_cache *shrink_block_group, + int force) +{ + struct btrfs_trans_handle *trans; + u64 new_alloc_flags; + u64 calc; + + if (btrfs_block_group_used(&shrink_block_group->item) > 0) { + + trans = btrfs_start_transaction(root, 1); + new_alloc_flags = update_block_group_flags(root, + shrink_block_group->flags); + if (new_alloc_flags != shrink_block_group->flags) { + calc = + btrfs_block_group_used(&shrink_block_group->item); + } else { + calc = shrink_block_group->key.offset; + } + do_chunk_alloc(trans, root->fs_info->extent_root, + calc + 2 * 1024 * 1024, new_alloc_flags, force); + btrfs_end_transaction(trans, root); + } + return 0; +} + int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) { struct btrfs_trans_handle *trans; @@ -2778,7 +2879,6 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) u64 cur_byte; u64 total_found; u64 shrink_last_byte; - u64 new_alloc_flags; struct btrfs_block_group_cache *shrink_block_group; struct btrfs_fs_info *info = root->fs_info; struct btrfs_key key; @@ -2792,7 +2892,8 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) shrink_start); BUG_ON(!shrink_block_group); - shrink_last_byte = shrink_start + shrink_block_group->key.offset; + shrink_last_byte = shrink_block_group->key.objectid + + shrink_block_group->key.offset; shrink_block_group->space_info->total_bytes -= shrink_block_group->key.offset; @@ -2804,23 +2905,10 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) (unsigned long long)shrink_start, (unsigned long long)shrink_block_group->flags); -again: - if (btrfs_block_group_used(&shrink_block_group->item) > 0) { - u64 calc; + __alloc_chunk_for_shrink(root, shrink_block_group, 1); + +again: - trans = btrfs_start_transaction(root, 1); - new_alloc_flags = update_block_group_flags(root, - shrink_block_group->flags); - if (new_alloc_flags != shrink_block_group->flags) { - calc = - btrfs_block_group_used(&shrink_block_group->item); - } else { - calc = shrink_block_group->key.offset; - } - do_chunk_alloc(trans, root->fs_info->extent_root, - calc + 2 * 1024 * 1024, new_alloc_flags); - btrfs_end_transaction(trans, root); - } shrink_block_group->ro = 1; total_found = 0; @@ -2888,6 +2976,8 @@ next: if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY || found_key.objectid + found_key.offset <= cur_byte) { + memcpy(&key, &found_key, sizeof(key)); + key.offset++; path->slots[0]++; goto next; } @@ -2897,6 +2987,7 @@ next: key.objectid = cur_byte; btrfs_release_path(root, path); ret = relocate_one_extent(root, path, &found_key); + __alloc_chunk_for_shrink(root, shrink_block_group, 0); } btrfs_release_path(root, path); @@ -2930,20 +3021,27 @@ next: if (ret < 0) goto out; - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - kfree(shrink_block_group); - - clear_extent_bits(&info->block_group_cache, found_key.objectid, - found_key.objectid + found_key.offset - 1, + clear_extent_bits(&info->block_group_cache, key.objectid, + key.objectid + key.offset - 1, (unsigned int)-1, GFP_NOFS); + + clear_extent_bits(&info->free_space_cache, + key.objectid, key.objectid + key.offset - 1, + (unsigned int)-1, GFP_NOFS); + + memset(shrink_block_group, 0, sizeof(*shrink_block_group)); + kfree(shrink_block_group); + btrfs_del_item(trans, root, path); - clear_extent_dirty(&info->free_space_cache, - shrink_start, shrink_last_byte - 1, - GFP_NOFS); btrfs_commit_transaction(trans, root); + + /* the code to unpin extents might set a few bits in the free + * space cache for this range again + */ + clear_extent_bits(&info->free_space_cache, + key.objectid, key.objectid + key.offset - 1, + (unsigned int)-1, GFP_NOFS); out: btrfs_free_path(path); return ret; @@ -3081,9 +3179,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, BUG_ON(!cache); cache->key.objectid = chunk_offset; cache->key.offset = size; - btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); - memset(&cache->item, 0, sizeof(cache->item)); + btrfs_set_block_group_used(&cache->item, bytes_used); btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); cache->flags = type; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index c02e2bf2f028..155961c7b4d5 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -198,6 +198,13 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, goto out; } if (root->defrag_progress.objectid == 0) { + u32 nritems; + + nritems = btrfs_header_nritems(root->node); + root->defrag_max.objectid = 0; + /* from above we know this is not a leaf */ + btrfs_node_key_to_cpu(root->node, &root->defrag_max, + nritems - 1); extent_buffer_get(root->node); ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); BUG_ON(ret); @@ -254,6 +261,16 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, out: if (path) btrfs_free_path(path); + if (ret == -EAGAIN) { + if (root->defrag_max.objectid > root->defrag_progress.objectid) + goto done; + if (root->defrag_max.type > root->defrag_progress.type) + goto done; + if (root->defrag_max.offset > root->defrag_progress.offset) + goto done; + ret = 0; + } +done: if (ret != -EAGAIN) { memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 333e5269577a..722eb4550154 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -750,10 +750,6 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, if (bdev == fs_devices->latest_bdev) fs_devices->latest_bdev = next_dev->bdev; - total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); - btrfs_set_super_total_bytes(&root->fs_info->super_copy, - total_bytes - device->total_bytes); - total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); btrfs_set_super_num_devices(&root->fs_info->super_copy, total_bytes - 1); @@ -849,6 +845,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } root->fs_info->fs_devices->num_devices--; + root->fs_info->fs_devices->open_devices--; ret = btrfs_shrink_device(device, 0); if (ret) @@ -873,7 +870,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (device->bdev) { /* one close for the device struct or super_block */ close_bdev_excl(device->bdev); - root->fs_info->fs_devices->open_devices--; } if (bdev) { /* one close for us */ @@ -1450,7 +1446,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, return -ENOSPC; if (type & (BTRFS_BLOCK_GROUP_RAID0)) { - num_stripes = btrfs_super_num_devices(&info->super_copy); + num_stripes = extent_root->fs_info->fs_devices->open_devices; min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_DUP)) { @@ -1459,13 +1455,13 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, } if (type & (BTRFS_BLOCK_GROUP_RAID1)) { num_stripes = min_t(u64, 2, - btrfs_super_num_devices(&info->super_copy)); + extent_root->fs_info->fs_devices->open_devices); if (num_stripes < 2) return -ENOSPC; min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID10)) { - num_stripes = btrfs_super_num_devices(&info->super_copy); + num_stripes = extent_root->fs_info->fs_devices->open_devices; if (num_stripes < 4) return -ENOSPC; num_stripes &= ~(u32)1; From da496f2acf61153e0d4d42ded6a53f3367939db0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 May 2008 10:52:17 -0400 Subject: [PATCH 0549/2985] Btrfs: Always use the async submission queue for checksummed writes This avoids IO stalls and poorly ordered IO from inline writers mixing in with the async submission queue Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d8a256d3c83a..016c0c0226ed 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -434,13 +434,6 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, { struct async_submit_bio *async; - /* - * inline writerback should stay inline, only hop to the async - * queue if we're pdflush - */ - if (!current_is_pdflush()) - return submit_bio_hook(inode, rw, bio, mirror_num); - async = kmalloc(sizeof(*async), GFP_NOFS); if (!async) return -ENOMEM; From e1b81e6761bd8419146d4bbe0aadd8b2d348a01c Mon Sep 17 00:00:00 2001 From: Mingming Date: Tue, 27 May 2008 10:55:43 -0400 Subject: [PATCH 0550/2985] btrfs delete ordered inode handling fix Use btrfs_release_file instead of a put_inode call Signed-off-by: Chris Mason --- fs/btrfs/file.c | 7 +++++++ fs/btrfs/inode.c | 24 +----------------------- fs/btrfs/ordered-data.c | 21 ++++++++++++++------- fs/btrfs/ordered-data.h | 2 +- fs/btrfs/super.c | 1 - 5 files changed, 23 insertions(+), 32 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d968900c6973..c4fa4664a456 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -978,6 +978,12 @@ out_nolock: return num_written ? num_written : err; } +static int btrfs_release_file (struct inode * inode, struct file * filp) +{ + btrfs_del_ordered_inode(inode); + return 0; +} + static int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) { @@ -1044,6 +1050,7 @@ struct file_operations btrfs_file_operations = { .write = btrfs_file_write, .mmap = btrfs_file_mmap, .open = generic_file_open, + .release = btrfs_release_file, .fsync = btrfs_sync_file, .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 40b4a8ec17fe..1569fb864515 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -857,15 +857,11 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) nr = trans->blocks_used; if (inode->i_nlink == 0) { - int found; /* if the inode isn't linked anywhere, * we don't need to worry about * data=ordered */ - found = btrfs_del_ordered_inode(inode); - if (found == 1) { - atomic_dec(&inode->i_count); - } + btrfs_del_ordered_inode(inode); } btrfs_end_transaction(trans, root); @@ -1271,24 +1267,6 @@ fail: return err; } -void btrfs_put_inode(struct inode *inode) -{ - int ret; - - if (!BTRFS_I(inode)->ordered_trans) { - return; - } - - if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) || - mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)) - return; - - ret = btrfs_del_ordered_inode(inode); - if (ret == 1) { - atomic_dec(&inode->i_count); - } -} - void btrfs_delete_inode(struct inode *inode) { struct btrfs_trans_handle *trans; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 3814cb0246b4..8dd8180183ff 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -231,7 +231,7 @@ int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, return 1; } -static int __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, +static void __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, struct inode *inode, u64 root_objectid, u64 objectid) { @@ -243,31 +243,38 @@ static int __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, node = __tree_search(&tree->tree, root_objectid, objectid, &prev); if (!node) { write_unlock(&tree->lock); - return 0; + return; } rb_erase(node, &tree->tree); BTRFS_I(inode)->ordered_trans = 0; write_unlock(&tree->lock); + atomic_dec(&inode->i_count); entry = rb_entry(node, struct tree_entry, rb_node); kfree(entry); - return 1; + return; } -int btrfs_del_ordered_inode(struct inode *inode) +void btrfs_del_ordered_inode(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 root_objectid = root->root_key.objectid; - int ret = 0; + + if (!BTRFS_I(inode)->ordered_trans) { + return; + } + + if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) || + mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)) + return; spin_lock(&root->fs_info->new_trans_lock); if (root->fs_info->running_transaction) { struct btrfs_ordered_inode_tree *tree; tree = &root->fs_info->running_transaction->ordered_inode_tree; - ret = __btrfs_del_ordered_inode(tree, inode, root_objectid, + __btrfs_del_ordered_inode(tree, inode, root_objectid, inode->i_ino); } spin_unlock(&root->fs_info->new_trans_lock); - return ret; } int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode) diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 29047e0abaab..c515c4b39996 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -38,6 +38,6 @@ int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, u64 *root_objectid, u64 *objectid, struct inode **inode); -int btrfs_del_ordered_inode(struct inode *inode); +void btrfs_del_ordered_inode(struct inode *inode); int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode); #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 273a5b511f50..05029656e42e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -487,7 +487,6 @@ static void btrfs_unlockfs(struct super_block *sb) static struct super_operations btrfs_super_ops = { .delete_inode = btrfs_delete_inode, - .put_inode = btrfs_put_inode, .put_super = btrfs_put_super, .write_super = btrfs_write_super, .sync_fs = btrfs_sync_fs, From 48ec2cf8738b0db53dc8361026cb7a328f7ad386 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 9 Jun 2008 09:35:50 -0400 Subject: [PATCH 0551/2985] Btrfs: Fix race in running_transaction checks When a new transaction was started, the code would incorrectly set the pointer in fs_info before all the data structures were setup. fsync heavy workloads hit races on the setup of the ordered inode spinlock Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 57746c11eae3..1cb084efd6ed 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -56,7 +56,6 @@ static noinline int join_transaction(struct btrfs_root *root) total_trans++; BUG_ON(!cur_trans); root->fs_info->generation++; - root->fs_info->running_transaction = cur_trans; root->fs_info->last_alloc = 0; root->fs_info->last_data_alloc = 0; cur_trans->num_writers = 1; @@ -74,6 +73,9 @@ static noinline int join_transaction(struct btrfs_root *root) extent_io_tree_init(&cur_trans->dirty_pages, root->fs_info->btree_inode->i_mapping, GFP_NOFS); + spin_lock(&root->fs_info->new_trans_lock); + root->fs_info->running_transaction = cur_trans; + spin_unlock(&root->fs_info->new_trans_lock); } else { cur_trans->num_writers++; cur_trans->num_joined++; From 3b96362cc8d314c935c335d5c3c42eb93c23166b Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Mon, 9 Jun 2008 21:57:42 -0400 Subject: [PATCH 0552/2985] Btrfs: Invalidate dcache entry after creating snapshot and We need to invalidate an existing dcache entry after creating a new snapshot or subvolume, because a negative dache entry will stop us from accessing the new snapshot or subvolume. --- ctree.h | 23 +++++++++++++++++++++++ inode.c | 4 ++++ transaction.c | 4 ++++ 3 files changed, 31 insertions(+) Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 +++ fs/btrfs/inode.c | 29 +++++++++++++++++++++++++++++ fs/btrfs/transaction.c | 8 +++++++- 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7b73a9c3d868..ad4eacca7f59 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1544,6 +1544,9 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); /* inode.c */ +void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, + int namelen); + int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1569fb864515..31aa4ba06fce 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2642,6 +2642,31 @@ static void btrfs_truncate(struct inode *inode) btrfs_throttle(root); } +/* + * Invalidate a single dcache entry at the root of the filesystem. + * Needed after creation of snapshot or subvolume. + */ +void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, + int namelen) +{ + struct dentry *alias, *entry; + struct qstr qstr; + + alias = d_find_alias(root->fs_info->sb->s_root->d_inode); + if (alias) { + qstr.name = name; + qstr.len = namelen; + /* change me if btrfs ever gets a d_hash operation */ + qstr.hash = full_name_hash(qstr.name, qstr.len); + entry = d_lookup(alias, &qstr); + dput(alias); + if (entry) { + d_invalidate(entry); + dput(entry); + } + } +} + static int noinline create_subvol(struct btrfs_root *root, char *name, int namelen) { @@ -2761,6 +2786,10 @@ static int noinline create_subvol(struct btrfs_root *root, char *name, ret = btrfs_update_inode(trans, new_root, inode); if (ret) goto fail; + + /* Invalidate existing dcache entry for new subvolume. */ + btrfs_invalidate_dcache_root(root, name, namelen); + fail: nr = trans->blocks_used; err = btrfs_commit_transaction(trans, new_root); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1cb084efd6ed..f04684f7fea3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -560,6 +560,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root = pending->root; struct extent_buffer *tmp; int ret; + int namelen; u64 objectid; new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); @@ -595,8 +596,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, * insert the directory item */ key.offset = (u64)-1; + namelen = strlen(pending->name); ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, - pending->name, strlen(pending->name), + pending->name, namelen, root->fs_info->sb->s_root->d_inode->i_ino, &key, BTRFS_FT_DIR); @@ -606,6 +608,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, pending->name, strlen(pending->name), objectid, root->fs_info->sb->s_root->d_inode->i_ino); + + /* Invalidate existing dcache entry for new snapshot. */ + btrfs_invalidate_dcache_root(root, pending->name, namelen); + fail: kfree(new_root_item); return ret; From f819d837eea9829ed6c356fb88cdd0a170cbd947 Mon Sep 17 00:00:00 2001 From: Linda Knippers Date: Mon, 9 Jun 2008 22:17:11 -0400 Subject: [PATCH 0553/2985] btrfsctl -A error code fixup Send the error back to userland if the ioctl fails Signed-off-by: Chris Mason --- fs/btrfs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 05029656e42e..77f44494e229 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -453,7 +453,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, { struct btrfs_ioctl_vol_args *vol; struct btrfs_fs_devices *fs_devices; - int ret; + int ret = 0; int len; vol = kmalloc(sizeof(*vol), GFP_KERNEL); @@ -470,7 +470,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, } out: kfree(vol); - return 0; + return ret; } static void btrfs_write_super_lockfs(struct super_block *sb) From 51ebc0d3d5cd1a2728068499a8fc1bdb0a581ab5 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 9 Jun 2008 22:19:40 -0400 Subject: [PATCH 0554/2985] Btrfs: bdi_init and bdi_destroy come with 2.6.23 Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 016c0c0226ed..3805e7eab82d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -984,7 +984,7 @@ void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) bdi_init(bdi); #endif bdi->ra_pages = default_backing_dev_info.ra_pages; @@ -1378,7 +1378,7 @@ fail: kfree(extent_root); kfree(tree_root); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) bdi_destroy(&fs_info->bdi); #endif kfree(fs_info); @@ -1645,7 +1645,7 @@ int close_ctree(struct btrfs_root *root) btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) bdi_destroy(&fs_info->bdi); #endif From eba12c7bfcb4855fc757357e5e5b0b9a474499ba Mon Sep 17 00:00:00 2001 From: Yan Date: Mon, 9 Jun 2008 22:21:46 -0400 Subject: [PATCH 0555/2985] Btrfs: Dislable acl xattr handlers The acl code is not yet complete, and the xattr handlers are causing problems for cp -p on some distros. Signed-off-by: Chris Mason --- fs/btrfs/xattr.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 0a4950400f85..f63488dc2f16 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -29,22 +29,22 @@ static struct xattr_handler *btrfs_xattr_handler_map[] = { [BTRFS_XATTR_INDEX_USER] = &btrfs_xattr_user_handler, #ifdef CONFIG_FS_POSIX_ACL - [BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &btrfs_xattr_acl_access_handler, - [BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &btrfs_xattr_acl_default_handler, +// [BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &btrfs_xattr_acl_access_handler, +// [BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &btrfs_xattr_acl_default_handler, #endif [BTRFS_XATTR_INDEX_TRUSTED] = &btrfs_xattr_trusted_handler, [BTRFS_XATTR_INDEX_SECURITY] = &btrfs_xattr_security_handler, - [BTRFS_XATTR_INDEX_SYSTEM] = &btrfs_xattr_system_handler, +// [BTRFS_XATTR_INDEX_SYSTEM] = &btrfs_xattr_system_handler, }; struct xattr_handler *btrfs_xattr_handlers[] = { &btrfs_xattr_user_handler, #ifdef CONFIG_FS_POSIX_ACL - &btrfs_xattr_acl_access_handler, - &btrfs_xattr_acl_default_handler, +// &btrfs_xattr_acl_access_handler, +// &btrfs_xattr_acl_default_handler, #endif &btrfs_xattr_trusted_handler, &btrfs_xattr_security_handler, - &btrfs_xattr_system_handler, +// &btrfs_xattr_system_handler, NULL, }; From 6bf13c0cc833bf5ba013d6aa60379484bf48c4e6 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 10 Jun 2008 10:07:39 -0400 Subject: [PATCH 0556/2985] Btrfs: transaction ioctls These ioctls let a user application hold a transaction open while it performs a series of operations. A final ioctl does a sync on the fs (closing the current transaction). This is the main requirement for Ceph's OSD to be able to keep the data it's storing in a btrfs volume consistent, and AFAICS it works just fine. The application would do something like fd = ::open("some/file", O_RDONLY); ::ioctl(fd, BTRFS_IOC_TRANS_START); /* do a bunch of stuff */ ::ioctl(fd, BTRFS_IOC_TRANS_END); or just ::close(fd); And to ensure it commits to disk, ::ioctl(fd, BTRFS_IOC_SYNC); When a transaction is held open, the trans_handle is attached to the struct file (via private_data) so that it will get cleaned up if the process dies unexpectedly. A held transaction is also ended on fsync() to avoid a deadlock. A misbehaving application could also deliberately hold a transaction open, effectively locking up the FS, so it may make sense to restrict something like this to root or something. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 +++ fs/btrfs/file.c | 7 +++++- fs/btrfs/inode.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ioctl.h | 9 +++++++ fs/btrfs/super.c | 2 +- 5 files changed, 83 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ad4eacca7f59..1dcf4fb5b688 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1575,6 +1575,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); int btrfs_init_cachep(void); void btrfs_destroy_cachep(void); +long btrfs_ioctl_trans_end(struct file *file); long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root); @@ -1595,6 +1596,8 @@ extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, u64 inline_limit, u64 *hint_block); +int btrfs_release_file(struct inode *inode, struct file *file); + /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only); @@ -1615,4 +1618,5 @@ int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, u64 btrfs_parse_size(char *str); int btrfs_parse_options(char *options, struct btrfs_root *root, char **subvol_name); +int btrfs_sync_fs(struct super_block *sb, int wait); #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c4fa4664a456..73c6d085bd90 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -978,9 +978,11 @@ out_nolock: return num_written ? num_written : err; } -static int btrfs_release_file (struct inode * inode, struct file * filp) +int btrfs_release_file(struct inode * inode, struct file * filp) { btrfs_del_ordered_inode(inode); + if (filp->private_data) + btrfs_ioctl_trans_end(filp); return 0; } @@ -1011,6 +1013,9 @@ static int btrfs_sync_file(struct file *file, /* * ok we haven't committed the transaction yet, lets do a commit */ + if (file->private_data) + btrfs_ioctl_trans_end(file); + trans = btrfs_start_transaction(root, 1); if (!trans) { ret = -ENOMEM; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 31aa4ba06fce..0f14697becef 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3336,6 +3336,61 @@ out_fput: return ret; } +/* + * there are many ways the trans_start and trans_end ioctls can lead + * to deadlocks. They should only be used by applications that + * basically own the machine, and have a very in depth understanding + * of all the possible deadlocks and enospc problems. + */ +long btrfs_ioctl_trans_start(struct file *file) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + int ret = 0; + + mutex_lock(&root->fs_info->fs_mutex); + if (file->private_data) { + ret = -EINPROGRESS; + goto out; + } + trans = btrfs_start_transaction(root, 0); + if (trans) + file->private_data = trans; + else + ret = -ENOMEM; + /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/ +out: + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + +/* + * there are many ways the trans_start and trans_end ioctls can lead + * to deadlocks. They should only be used by applications that + * basically own the machine, and have a very in depth understanding + * of all the possible deadlocks and enospc problems. + */ +long btrfs_ioctl_trans_end(struct file *file) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + int ret = 0; + + mutex_lock(&root->fs_info->fs_mutex); + trans = file->private_data; + if (!trans) { + ret = -EINVAL; + goto out; + } + btrfs_end_transaction(trans, root); + file->private_data = 0; +out: + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -3356,6 +3411,13 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_balance(root->fs_info->dev_root); case BTRFS_IOC_CLONE: return btrfs_ioctl_clone(file, arg); + case BTRFS_IOC_TRANS_START: + return btrfs_ioctl_trans_start(file); + case BTRFS_IOC_TRANS_END: + return btrfs_ioctl_trans_end(file); + case BTRFS_IOC_SYNC: + btrfs_sync_fs(file->f_dentry->d_sb, 1); + return 0; } return -ENOTTY; @@ -3679,6 +3741,7 @@ static struct file_operations btrfs_dir_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_ioctl, #endif + .release = btrfs_release_file, }; static struct extent_io_ops btrfs_extent_io_ops = { diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index b0e73f51d636..85ed35a775b1 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -36,6 +36,14 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ struct btrfs_ioctl_vol_args) +/* trans start and trans end are dangerous, and only for + * use by applications that know how to avoid the + * resulting deadlocks + */ +#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) +#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) +#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) + #define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) #define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ struct btrfs_ioctl_vol_args) @@ -43,4 +51,5 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ struct btrfs_ioctl_vol_args) + #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 77f44494e229..39bb86945ed0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -293,7 +293,7 @@ fail_close: return err; } -static int btrfs_sync_fs(struct super_block *sb, int wait) +int btrfs_sync_fs(struct super_block *sb, int wait) { struct btrfs_trans_handle *trans; struct btrfs_root *root; From 95c9eb178b96474e9d896f20f982818c5f2ceaa0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Jun 2008 10:20:57 -0400 Subject: [PATCH 0557/2985] btrfs: tiny makefile cleanup use normal kbuild syntax to build acl.o conditinally and remove comment out lines. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 5ac6f2946c21..e5fc3cfea0ab 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -8,13 +8,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o -ifeq ($(CONFIG_FS_POSIX_ACL),y) -btrfs-y += acl.o -endif -#btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ -# root-tree.o dir-item.o hash.o file-item.o inode-item.o \ -# inode-map.o \ - +btrfs-$(CONFIG_FS_POSIX_ACL) += acl.o else # Normal Makefile From 306929f364b993581c91596230807fa1c022268a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Jun 2008 10:21:04 -0400 Subject: [PATCH 0558/2985] btrfs: fix strange indentation in lookup_extent_mapping Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 268ad8facf6e..ba46f7911d99 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -231,7 +231,13 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, { struct extent_map *em; struct rb_node *rb_node; - struct rb_node *prev = NULL; struct rb_node *next = NULL; u64 end = range_end(start, len); em = tree->last; if (em && end > em->start && start < extent_map_end(em)) goto found; + struct rb_node *prev = NULL; + struct rb_node *next = NULL; + u64 end = range_end(start, len); + + em = tree->last; + if (em && end > em->start && start < extent_map_end(em)) + goto found; rb_node = __tree_search(&tree->map, start, &prev, &next); if (!rb_node && prev) { From edf24abe51493ccda384644d487fe2f796ac21c8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Jun 2008 10:40:29 -0400 Subject: [PATCH 0559/2985] btrfs: sanity mount option parsing and early mount code Also adds lots of comments to describe what's going on here. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 +- fs/btrfs/disk-io.c | 5 +- fs/btrfs/super.c | 237 +++++++++++++++++++++++++-------------------- 3 files changed, 139 insertions(+), 106 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1dcf4fb5b688..49cbc62b42f9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1616,7 +1616,6 @@ int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); /* super.c */ u64 btrfs_parse_size(char *str); -int btrfs_parse_options(char *options, struct btrfs_root *root, - char **subvol_name); +int btrfs_parse_options(struct btrfs_root *root, char *options); int btrfs_sync_fs(struct super_block *sb, int wait); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3805e7eab82d..b9a53646ceb2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1266,8 +1266,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; - btrfs_parse_options(options, tree_root, NULL); + err = btrfs_parse_options(tree_root, options); + if (err) + goto fail_sb_buffer; + err = -EINVAL; if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { printk("Btrfs: wanted %llu devices, but found %llu\n", (unsigned long long)btrfs_super_num_devices(disk_super), diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 39bb86945ed0..288300fa5848 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -108,15 +108,18 @@ u64 btrfs_parse_size(char *str) return res; } -int btrfs_parse_options(char *options, struct btrfs_root *root, - char **subvol_name) +/* + * Regular mount options parser. Everything that is needed only when + * reading in a new superblock is parsed here. + */ +int btrfs_parse_options(struct btrfs_root *root, char *options) { - char * p; - struct btrfs_fs_info *info = NULL; + struct btrfs_fs_info *info = root->fs_info; substring_t args[MAX_OPT_ARGS]; + char *p, *num; if (!options) - return 1; + return 0; /* * strsep changes the string, duplicate it because parse_options @@ -126,10 +129,8 @@ int btrfs_parse_options(char *options, struct btrfs_root *root, if (!options) return -ENOMEM; - if (root) - info = root->fs_info; - while ((p = strsep (&options, ",")) != NULL) { + while ((p = strsep(&options, ",")) != NULL) { int token; if (!*p) continue; @@ -137,83 +138,64 @@ int btrfs_parse_options(char *options, struct btrfs_root *root, token = match_token(p, tokens, args); switch (token) { case Opt_degraded: - if (info) { - printk("btrfs: allowing degraded mounts\n"); - btrfs_set_opt(info->mount_opt, DEGRADED); - } + printk(KERN_INFO "btrfs: allowing degraded mounts\n"); + btrfs_set_opt(info->mount_opt, DEGRADED); break; case Opt_subvol: - if (subvol_name) { - *subvol_name = match_strdup(&args[0]); - } + /* + * This one is parsed by btrfs_parse_early_options + * and can be happily ignored here. + */ break; case Opt_nodatasum: - if (info) { - printk("btrfs: setting nodatacsum\n"); - btrfs_set_opt(info->mount_opt, NODATASUM); - } + printk(KERN_INFO "btrfs: setting nodatacsum\n"); + btrfs_set_opt(info->mount_opt, NODATASUM); break; case Opt_nodatacow: - if (info) { - printk("btrfs: setting nodatacow\n"); - btrfs_set_opt(info->mount_opt, NODATACOW); - btrfs_set_opt(info->mount_opt, NODATASUM); - } + printk(KERN_INFO "btrfs: setting nodatacow\n"); + btrfs_set_opt(info->mount_opt, NODATACOW); + btrfs_set_opt(info->mount_opt, NODATASUM); break; case Opt_ssd: - if (info) { - printk("btrfs: use ssd allocation scheme\n"); - btrfs_set_opt(info->mount_opt, SSD); - } + printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); + btrfs_set_opt(info->mount_opt, SSD); break; case Opt_nobarrier: - if (info) { - printk("btrfs: turning off barriers\n"); - btrfs_set_opt(info->mount_opt, NOBARRIER); - } + printk(KERN_INFO "btrfs: turning off barriers\n"); + btrfs_set_opt(info->mount_opt, NOBARRIER); break; case Opt_max_extent: - if (info) { - char *num = match_strdup(&args[0]); - if (num) { - info->max_extent = - btrfs_parse_size(num); - kfree(num); + num = match_strdup(&args[0]); + if (num) { + info->max_extent = btrfs_parse_size(num); + kfree(num); - info->max_extent = max_t(u64, - info->max_extent, - root->sectorsize); - printk("btrfs: max_extent at %Lu\n", - info->max_extent); - } + info->max_extent = max_t(u64, + info->max_extent, root->sectorsize); + printk(KERN_INFO "btrfs: max_extent at %llu\n", + info->max_extent); } break; case Opt_max_inline: - if (info) { - char *num = match_strdup(&args[0]); - if (num) { - info->max_inline = - btrfs_parse_size(num); - kfree(num); + num = match_strdup(&args[0]); + if (num) { + info->max_inline = btrfs_parse_size(num); + kfree(num); - info->max_inline = max_t(u64, - info->max_inline, - root->sectorsize); - printk("btrfs: max_inline at %Lu\n", - info->max_inline); - } + info->max_inline = max_t(u64, + info->max_inline, root->sectorsize); + printk(KERN_INFO "btrfs: max_inline at %llu\n", + info->max_inline); } break; case Opt_alloc_start: - if (info) { - char *num = match_strdup(&args[0]); - if (num) { - info->alloc_start = - btrfs_parse_size(num); - kfree(num); - printk("btrfs: allocations start at " - "%Lu\n", info->alloc_start); - } + num = match_strdup(&args[0]); + if (num) { + info->alloc_start = btrfs_parse_size(num); + kfree(num); + printk(KERN_INFO + "btrfs: allocations start at %llu\n", + info->alloc_start); } break; default: @@ -221,7 +203,61 @@ int btrfs_parse_options(char *options, struct btrfs_root *root, } } kfree(options); - return 1; + return 0; +} + +/* + * Parse mount options that are required early in the mount process. + * + * All other options will be parsed on much later in the mount process and + * only when we need to allocate a new super block. + */ +static int btrfs_parse_early_options(const char *options, + char **subvol_name) +{ + substring_t args[MAX_OPT_ARGS]; + char *opts, *p; + int error = 0; + + if (!options) + goto out; + + /* + * strsep changes the string, duplicate it because parse_options + * gets called twice + */ + opts = kstrdup(options, GFP_KERNEL); + if (!opts) + return -ENOMEM; + + while ((p = strsep(&opts, ",")) != NULL) { + int token; + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_subvol: + *subvol_name = match_strdup(&args[0]); + break; + default: + break; + } + } + + kfree(opts); + out: + /* + * If no subvolume name is specified we use the default one. Allocate + * a copy of the string "default" here so that code later in the + * mount path doesn't care if it's the default volume or another one. + */ + if (!*subvol_name) { + *subvol_name = kstrdup("default", GFP_KERNEL); + if (!*subvol_name) + return -ENOMEM; + } + return error; } static int btrfs_fill_super(struct super_block * sb, @@ -328,23 +364,33 @@ static int btrfs_test_super(struct super_block *s, void *data) return root->fs_info->fs_devices == test_fs_devices; } -int btrfs_get_sb_bdev(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - struct vfsmount *mnt, const char *subvol) +/* + * Find a superblock for the given device / mount point. + * + * Note: This is based on get_sb_bdev from fs/super.c with a few additions + * for multiple device setup. Make sure to keep it in sync. + */ +static int btrfs_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt) { + char *subvol_name = NULL; struct block_device *bdev = NULL; struct super_block *s; struct dentry *root; struct btrfs_fs_devices *fs_devices = NULL; int error = 0; + error = btrfs_parse_early_options(data, &subvol_name); + if (error) + goto error; + error = btrfs_scan_one_device(dev_name, flags, fs_type, &fs_devices); if (error) - return error; + goto error_free_subvol_name; error = btrfs_open_devices(fs_devices, flags, fs_type); if (error) - return error; + goto error_free_subvol_name; bdev = fs_devices->latest_bdev; btrfs_lock_volumes(); @@ -378,51 +424,36 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, s->s_flags |= MS_ACTIVE; } - if (subvol) { - root = lookup_one_len(subvol, s->s_root, strlen(subvol)); - if (IS_ERR(root)) { - up_write(&s->s_umount); - deactivate_super(s); - error = PTR_ERR(root); - goto error; - } - if (!root->d_inode) { - dput(root); - up_write(&s->s_umount); - deactivate_super(s); - error = -ENXIO; - goto error; - } - } else { - root = dget(s->s_root); + root = lookup_one_len(subvol_name, s->s_root, strlen(subvol_name)); + if (IS_ERR(root)) { + up_write(&s->s_umount); + deactivate_super(s); + error = PTR_ERR(root); + goto error; + } + if (!root->d_inode) { + dput(root); + up_write(&s->s_umount); + deactivate_super(s); + error = -ENXIO; + goto error; } mnt->mnt_sb = s; mnt->mnt_root = root; + + kfree(subvol_name); return 0; error_s: error = PTR_ERR(s); error_bdev: btrfs_close_devices(fs_devices); +error_free_subvol_name: + kfree(subvol_name); error: return error; } -/* end copy & paste */ - -static int btrfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) -{ - int ret; - char *subvol_name = NULL; - - btrfs_parse_options((char *)data, NULL, &subvol_name); - ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, mnt, - subvol_name ? subvol_name : "default"); - if (subvol_name) - kfree(subvol_name); - return ret; -} static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { From 43e570b08a6c6b1d75f218566a6240542a386fd9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Jun 2008 10:40:46 -0400 Subject: [PATCH 0560/2985] btrfs: allow scanning multiple devices during mount Allows to specify one or multiple device=/dev/foo options during mount so that ioctls on the control device can be avoided. Especially useful when trying to mount a multi-device setup as root. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/super.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 288300fa5848..346932e546ba 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -65,7 +65,7 @@ static void btrfs_put_super (struct super_block * sb) } enum { - Opt_degraded, Opt_subvol, Opt_nodatasum, Opt_nodatacow, + Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_err, }; @@ -73,6 +73,7 @@ enum { static match_table_t tokens = { {Opt_degraded, "degraded"}, {Opt_subvol, "subvol=%s"}, + {Opt_device, "device=%s"}, {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, {Opt_nobarrier, "nobarrier"}, @@ -142,8 +143,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_set_opt(info->mount_opt, DEGRADED); break; case Opt_subvol: + case Opt_device: /* - * This one is parsed by btrfs_parse_early_options + * These are parsed by btrfs_parse_early_options * and can be happily ignored here. */ break; @@ -212,8 +214,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) * All other options will be parsed on much later in the mount process and * only when we need to allocate a new super block. */ -static int btrfs_parse_early_options(const char *options, - char **subvol_name) +static int btrfs_parse_early_options(const char *options, int flags, + void *holder, char **subvol_name, + struct btrfs_fs_devices **fs_devices) { substring_t args[MAX_OPT_ARGS]; char *opts, *p; @@ -240,11 +243,18 @@ static int btrfs_parse_early_options(const char *options, case Opt_subvol: *subvol_name = match_strdup(&args[0]); break; + case Opt_device: + error = btrfs_scan_one_device(match_strdup(&args[0]), + flags, holder, fs_devices); + if (error) + goto out_free_opts; + break; default: break; } } + out_free_opts: kfree(opts); out: /* @@ -380,7 +390,8 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, struct btrfs_fs_devices *fs_devices = NULL; int error = 0; - error = btrfs_parse_early_options(data, &subvol_name); + error = btrfs_parse_early_options(data, flags, fs_type, + &subvol_name, &fs_devices); if (error) goto error; From 8b7128429235d9bd72cfd5ed20c77c4f3118f744 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Jun 2008 16:50:36 -0400 Subject: [PATCH 0561/2985] Btrfs: Add async worker threads for pre and post IO checksumming Btrfs has been using workqueues to spread the checksumming load across other CPUs in the system. But, workqueues only schedule work on the same CPU that queued the work, giving them a limited benefit for systems with higher CPU counts. This code adds a generic facility to schedule work with pools of kthreads, and changes the bio submission code to queue bios up. The queueing is important to make sure large numbers of procs on the system don't turn streaming workloads into random workloads by sending IO down concurrently. The end result of all of this is much higher performance (and CPU usage) when doing checksumming on large machines. Two worker pools are created, one for writes and one for endio processing. The two could deadlock if we tried to service both from a single pool. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/async-thread.c | 288 ++++++++++++++++++++++++++++++++++++++++ fs/btrfs/async-thread.h | 78 +++++++++++ fs/btrfs/ctree.h | 14 +- fs/btrfs/disk-io.c | 198 +++++++++++---------------- fs/btrfs/inode.c | 4 +- fs/btrfs/volumes.c | 162 +++++++++++++++++++++- fs/btrfs/volumes.h | 10 +- 8 files changed, 625 insertions(+), 131 deletions(-) create mode 100644 fs/btrfs/async-thread.c create mode 100644 fs/btrfs/async-thread.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index e5fc3cfea0ab..9dcfc2fe3332 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -6,7 +6,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ - extent_io.o volumes.o + extent_io.o volumes.o async-thread.o btrfs-$(CONFIG_FS_POSIX_ACL) += acl.o else diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c new file mode 100644 index 000000000000..2911b67bd6f7 --- /dev/null +++ b/fs/btrfs/async-thread.c @@ -0,0 +1,288 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include +#include +#include +#include "async-thread.h" + +/* + * container for the kthread task pointer and the list of pending work + * One of these is allocated per thread. + */ +struct btrfs_worker_thread { + /* list of struct btrfs_work that are waiting for service */ + struct list_head pending; + + /* list of worker threads from struct btrfs_workers */ + struct list_head worker_list; + + /* kthread */ + struct task_struct *task; + + /* number of things on the pending list */ + atomic_t num_pending; + + /* protects the pending list. */ + spinlock_t lock; + + /* set to non-zero when this thread is already awake and kicking */ + int working; +}; + +/* + * main loop for servicing work items + */ +static int worker_loop(void *arg) +{ + struct btrfs_worker_thread *worker = arg; + struct list_head *cur; + struct btrfs_work *work; + do { + spin_lock_irq(&worker->lock); + while(!list_empty(&worker->pending)) { + cur = worker->pending.next; + work = list_entry(cur, struct btrfs_work, list); + list_del(&work->list); + clear_bit(0, &work->flags); + + work->worker = worker; + spin_unlock_irq(&worker->lock); + + work->func(work); + + atomic_dec(&worker->num_pending); + spin_lock_irq(&worker->lock); + } + worker->working = 0; + if (freezing(current)) { + refrigerator(); + } else { + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_irq(&worker->lock); + schedule(); + __set_current_state(TASK_RUNNING); + } + } while (!kthread_should_stop()); + return 0; +} + +/* + * this will wait for all the worker threads to shutdown + */ +int btrfs_stop_workers(struct btrfs_workers *workers) +{ + struct list_head *cur; + struct btrfs_worker_thread *worker; + + while(!list_empty(&workers->worker_list)) { + cur = workers->worker_list.next; + worker = list_entry(cur, struct btrfs_worker_thread, + worker_list); + kthread_stop(worker->task); + list_del(&worker->worker_list); + kfree(worker); + } + return 0; +} + +/* + * simple init on struct btrfs_workers + */ +void btrfs_init_workers(struct btrfs_workers *workers, int max) +{ + workers->num_workers = 0; + INIT_LIST_HEAD(&workers->worker_list); + workers->last = NULL; + spin_lock_init(&workers->lock); + workers->max_workers = max; +} + +/* + * starts new worker threads. This does not enforce the max worker + * count in case you need to temporarily go past it. + */ +int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) +{ + struct btrfs_worker_thread *worker; + int ret = 0; + int i; + + for (i = 0; i < num_workers; i++) { + worker = kzalloc(sizeof(*worker), GFP_NOFS); + if (!worker) { + ret = -ENOMEM; + goto fail; + } + + INIT_LIST_HEAD(&worker->pending); + INIT_LIST_HEAD(&worker->worker_list); + spin_lock_init(&worker->lock); + atomic_set(&worker->num_pending, 0); + worker->task = kthread_run(worker_loop, worker, "btrfs"); + if (IS_ERR(worker->task)) { + ret = PTR_ERR(worker->task); + goto fail; + } + + spin_lock_irq(&workers->lock); + list_add_tail(&worker->worker_list, &workers->worker_list); + workers->last = worker; + workers->num_workers++; + spin_unlock_irq(&workers->lock); + } + return 0; +fail: + btrfs_stop_workers(workers); + return ret; +} + +/* + * run through the list and find a worker thread that doesn't have a lot + * to do right now. This can return null if we aren't yet at the thread + * count limit and all of the threads are busy. + */ +static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) +{ + struct btrfs_worker_thread *worker; + struct list_head *next; + struct list_head *start; + int enforce_min = workers->num_workers < workers->max_workers; + + /* start with the last thread if it isn't busy */ + worker = workers->last; + if (atomic_read(&worker->num_pending) < 64) + goto done; + + next = worker->worker_list.next; + start = &worker->worker_list; + + /* + * check all the workers for someone that is bored. FIXME, do + * something smart here + */ + while(next != start) { + if (next == &workers->worker_list) { + next = workers->worker_list.next; + continue; + } + worker = list_entry(next, struct btrfs_worker_thread, + worker_list); + if (atomic_read(&worker->num_pending) < 64 || !enforce_min) + goto done; + next = next->next; + } + /* + * nobody was bored, if we're already at the max thread count, + * use the last thread + */ + if (!enforce_min || atomic_read(&workers->last->num_pending) < 64) { + return workers->last; + } + return NULL; +done: + workers->last = worker; + return worker; +} + +static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) +{ + struct btrfs_worker_thread *worker; + unsigned long flags; + +again: + spin_lock_irqsave(&workers->lock, flags); + worker = next_worker(workers); + spin_unlock_irqrestore(&workers->lock, flags); + + if (!worker) { + spin_lock_irqsave(&workers->lock, flags); + if (workers->num_workers >= workers->max_workers) { + /* + * we have failed to find any workers, just + * return the force one + */ + worker = list_entry(workers->worker_list.next, + struct btrfs_worker_thread, worker_list); + spin_unlock_irqrestore(&workers->lock, flags); + } else { + spin_unlock_irqrestore(&workers->lock, flags); + /* we're below the limit, start another worker */ + btrfs_start_workers(workers, 1); + goto again; + } + } + return worker; +} + +/* + * btrfs_requeue_work just puts the work item back on the tail of the list + * it was taken from. It is intended for use with long running work functions + * that make some progress and want to give the cpu up for others. + */ +int btrfs_requeue_work(struct btrfs_work *work) +{ + struct btrfs_worker_thread *worker = work->worker; + unsigned long flags; + + if (test_and_set_bit(0, &work->flags)) + goto out; + + spin_lock_irqsave(&worker->lock, flags); + atomic_inc(&worker->num_pending); + list_add_tail(&work->list, &worker->pending); + spin_unlock_irqrestore(&worker->lock, flags); +out: + return 0; +} + +/* + * places a struct btrfs_work into the pending queue of one of the kthreads + */ +int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) +{ + struct btrfs_worker_thread *worker; + unsigned long flags; + int wake = 0; + + /* don't requeue something already on a list */ + if (test_and_set_bit(0, &work->flags)) + goto out; + + worker = find_worker(workers); + + spin_lock_irqsave(&worker->lock, flags); + atomic_inc(&worker->num_pending); + list_add_tail(&work->list, &worker->pending); + + /* + * avoid calling into wake_up_process if this thread has already + * been kicked + */ + if (!worker->working) + wake = 1; + worker->working = 1; + + spin_unlock_irqrestore(&worker->lock, flags); + + if (wake) + wake_up_process(worker->task); +out: + return 0; +} diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h new file mode 100644 index 000000000000..52fc9da0f9e7 --- /dev/null +++ b/fs/btrfs/async-thread.h @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_ASYNC_THREAD_ +#define __BTRFS_ASYNC_THREAD_ + +struct btrfs_worker_thread; + +/* + * This is similar to a workqueue, but it is meant to spread the operations + * across all available cpus instead of just the CPU that was used to + * queue the work. There is also some batching introduced to try and + * cut down on context switches. + * + * By default threads are added on demand up to 2 * the number of cpus. + * Changing struct btrfs_workers->max_workers is one way to prevent + * demand creation of kthreads. + * + * the basic model of these worker threads is to embed a btrfs_work + * structure in your own data struct, and use container_of in a + * work function to get back to your data struct. + */ +struct btrfs_work { + /* + * only func should be set to the function you want called + * your work struct is passed as the only arg + */ + void (*func)(struct btrfs_work *work); + + /* + * flags should be set to zero. It is used to make sure the + * struct is only inserted once into the list. + */ + unsigned long flags; + + /* don't touch these */ + struct btrfs_worker_thread *worker; + struct list_head list; +}; + +struct btrfs_workers { + /* current number of running workers */ + int num_workers; + + /* max number of workers allowed. changed by btrfs_start_workers */ + int max_workers; + + /* list with all the work threads */ + struct list_head worker_list; + + /* the last worker thread to have something queued */ + struct btrfs_worker_thread *last; + + /* lock for finding the next worker thread to queue on */ + spinlock_t lock; +}; + +int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); +int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); +int btrfs_stop_workers(struct btrfs_workers *workers); +void btrfs_init_workers(struct btrfs_workers *workers, int max); +int btrfs_requeue_work(struct btrfs_work *work); +#endif diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 49cbc62b42f9..6c91a510c965 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -30,6 +30,7 @@ #include "bit-radix.h" #include "extent_io.h" #include "extent_map.h" +#include "async-thread.h" struct btrfs_trans_handle; struct btrfs_transaction; @@ -518,13 +519,20 @@ struct btrfs_fs_info { struct list_head hashers; struct list_head dead_roots; struct list_head end_io_work_list; - struct list_head async_submit_work_list; struct work_struct end_io_work; - struct work_struct async_submit_work; spinlock_t end_io_work_lock; - spinlock_t async_submit_work_lock; atomic_t nr_async_submits; + /* + * there is a pool of worker threads for checksumming during writes + * and a pool for checksumming after reads. This is because readers + * can run with FS locks held, and the writers may be waiting for + * those locks. We don't want ordering in the pending list to cause + * deadlocks, and so the two are serviced separately. + */ + struct btrfs_workers workers; + struct btrfs_workers endio_workers; + #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) struct work_struct trans_work; #else diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b9a53646ceb2..98ff4fbcb386 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -31,6 +31,7 @@ #include "btrfs_inode.h" #include "volumes.h" #include "print-tree.h" +#include "async-thread.h" #if 0 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) @@ -46,8 +47,7 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) #endif static struct extent_io_ops btree_extent_io_ops; -static struct workqueue_struct *end_io_workqueue; -static struct workqueue_struct *async_submit_workqueue; +static void end_workqueue_fn(struct btrfs_work *work); struct end_io_wq { struct bio *bio; @@ -57,6 +57,7 @@ struct end_io_wq { int error; int metadata; struct list_head list; + struct btrfs_work work; }; struct async_submit_bio { @@ -66,6 +67,7 @@ struct async_submit_bio { extent_submit_bio_hook_t *submit_bio_hook; int rw; int mirror_num; + struct btrfs_work work; }; struct extent_map *btree_get_extent(struct inode *inode, struct page *page, @@ -389,7 +391,6 @@ static int end_workqueue_bio(struct bio *bio, { struct end_io_wq *end_io_wq = bio->bi_private; struct btrfs_fs_info *fs_info; - unsigned long flags; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) if (bio->bi_size) @@ -397,11 +398,10 @@ static int end_workqueue_bio(struct bio *bio, #endif fs_info = end_io_wq->info; - spin_lock_irqsave(&fs_info->end_io_work_lock, flags); end_io_wq->error = err; - list_add_tail(&end_io_wq->list, &fs_info->end_io_work_list); - spin_unlock_irqrestore(&fs_info->end_io_work_lock, flags); - queue_work(end_io_workqueue, &fs_info->end_io_work); + end_io_wq->work.func = end_workqueue_fn; + end_io_wq->work.flags = 0; + btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) return 0; @@ -428,6 +428,19 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, return 0; } +static void run_one_async_submit(struct btrfs_work *work) +{ + struct btrfs_fs_info *fs_info; + struct async_submit_bio *async; + + async = container_of(work, struct async_submit_bio, work); + fs_info = BTRFS_I(async->inode)->root->fs_info; + atomic_dec(&fs_info->nr_async_submits); + async->submit_bio_hook(async->inode, async->rw, async->bio, + async->mirror_num); + kfree(async); +} + int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, int rw, struct bio *bio, int mirror_num, extent_submit_bio_hook_t *submit_bio_hook) @@ -443,13 +456,10 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->bio = bio; async->mirror_num = mirror_num; async->submit_bio_hook = submit_bio_hook; - - spin_lock(&fs_info->async_submit_work_lock); - list_add_tail(&async->list, &fs_info->async_submit_work_list); + async->work.func = run_one_async_submit; + async->work.flags = 0; atomic_inc(&fs_info->nr_async_submits); - spin_unlock(&fs_info->async_submit_work_lock); - - queue_work(async_submit_workqueue, &fs_info->async_submit_work); + btrfs_queue_worker(&fs_info->workers, &async->work); return 0; } @@ -462,19 +472,32 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, offset = bio->bi_sector << 9; + /* + * when we're called for a write, we're already in the async + * submission context. Just jump ingo btrfs_map_bio + */ if (rw & (1 << BIO_RW)) { - return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, + mirror_num, 0); } + /* + * called for a read, do the setup so that checksum validation + * can happen in the async kernel threads + */ ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1); BUG_ON(ret); - return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); } static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { + /* + * kthread helpers are used to submit writes so that checksumming + * can happen in parallel across all CPUs + */ if (!(rw & (1 << BIO_RW))) { return __btree_submit_bio_hook(inode, rw, bio, mirror_num); } @@ -1036,95 +1059,40 @@ static int bio_ready_for_csum(struct bio *bio) return ret; } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) -static void btrfs_end_io_csum(void *p) -#else -static void btrfs_end_io_csum(struct work_struct *work) -#endif +/* + * called by the kthread helper functions to finally call the bio end_io + * functions. This is where read checksum verification actually happens + */ +static void end_workqueue_fn(struct btrfs_work *work) { -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - struct btrfs_fs_info *fs_info = p; -#else - struct btrfs_fs_info *fs_info = container_of(work, - struct btrfs_fs_info, - end_io_work); -#endif - unsigned long flags; - struct end_io_wq *end_io_wq; struct bio *bio; - struct list_head *next; + struct end_io_wq *end_io_wq; + struct btrfs_fs_info *fs_info; int error; - int was_empty; - while(1) { - spin_lock_irqsave(&fs_info->end_io_work_lock, flags); - if (list_empty(&fs_info->end_io_work_list)) { - spin_unlock_irqrestore(&fs_info->end_io_work_lock, - flags); - return; - } - next = fs_info->end_io_work_list.next; - list_del(next); - spin_unlock_irqrestore(&fs_info->end_io_work_lock, flags); + end_io_wq = container_of(work, struct end_io_wq, work); + bio = end_io_wq->bio; + fs_info = end_io_wq->info; - end_io_wq = list_entry(next, struct end_io_wq, list); - - bio = end_io_wq->bio; - if (end_io_wq->metadata && !bio_ready_for_csum(bio)) { - spin_lock_irqsave(&fs_info->end_io_work_lock, flags); - was_empty = list_empty(&fs_info->end_io_work_list); - list_add_tail(&end_io_wq->list, - &fs_info->end_io_work_list); - spin_unlock_irqrestore(&fs_info->end_io_work_lock, - flags); - if (was_empty) - return; - continue; - } - error = end_io_wq->error; - bio->bi_private = end_io_wq->private; - bio->bi_end_io = end_io_wq->end_io; - kfree(end_io_wq); + /* metadata bios are special because the whole tree block must + * be checksummed at once. This makes sure the entire block is in + * ram and up to date before trying to verify things. For + * blocksize <= pagesize, it is basically a noop + */ + if (end_io_wq->metadata && !bio_ready_for_csum(bio)) { + btrfs_queue_worker(&fs_info->endio_workers, + &end_io_wq->work); + return; + } + error = end_io_wq->error; + bio->bi_private = end_io_wq->private; + bio->bi_end_io = end_io_wq->end_io; + kfree(end_io_wq); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - bio_endio(bio, bio->bi_size, error); + bio_endio(bio, bio->bi_size, error); #else - bio_endio(bio, error); + bio_endio(bio, error); #endif - } -} - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) -static void btrfs_async_submit_work(void *p) -#else -static void btrfs_async_submit_work(struct work_struct *work) -#endif -{ -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - struct btrfs_fs_info *fs_info = p; -#else - struct btrfs_fs_info *fs_info = container_of(work, - struct btrfs_fs_info, - async_submit_work); -#endif - struct async_submit_bio *async; - struct list_head *next; - - while(1) { - spin_lock(&fs_info->async_submit_work_lock); - if (list_empty(&fs_info->async_submit_work_list)) { - spin_unlock(&fs_info->async_submit_work_lock); - return; - } - next = fs_info->async_submit_work_list.next; - list_del(next); - atomic_dec(&fs_info->nr_async_submits); - spin_unlock(&fs_info->async_submit_work_lock); - - async = list_entry(next, struct async_submit_bio, list); - async->submit_bio_hook(async->inode, async->rw, async->bio, - async->mirror_num); - kfree(async); - } } struct btrfs_root *open_ctree(struct super_block *sb, @@ -1155,19 +1123,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, err = -ENOMEM; goto fail; } - end_io_workqueue = create_workqueue("btrfs-end-io"); - BUG_ON(!end_io_workqueue); - async_submit_workqueue = create_workqueue("btrfs-async-submit"); - INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); - INIT_LIST_HEAD(&fs_info->end_io_work_list); - INIT_LIST_HEAD(&fs_info->async_submit_work_list); spin_lock_init(&fs_info->hash_lock); - spin_lock_init(&fs_info->end_io_work_lock); - spin_lock_init(&fs_info->async_submit_work_lock); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); @@ -1222,13 +1182,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->do_barriers = 1; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum, fs_info); - INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work, - fs_info); INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); #else - INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum); - INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work); INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); #endif BTRFS_I(fs_info->btree_inode)->root = tree_root; @@ -1240,6 +1195,19 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); + /* we need to start all the end_io workers up front because the + * queue work function gets called at interrupt time. The endio + * workers don't normally start IO, so some number of them <= the + * number of cpus is fine. They handle checksumming after a read. + * + * The other worker threads do start IO, so the max is larger than + * the number of CPUs. FIXME, tune this for huge machines + */ + btrfs_init_workers(&fs_info->workers, num_online_cpus() * 2); + btrfs_init_workers(&fs_info->endio_workers, num_online_cpus()); + btrfs_start_workers(&fs_info->workers, 1); + btrfs_start_workers(&fs_info->endio_workers, num_online_cpus()); + #if 0 ret = add_hasher(fs_info, "crc32c"); if (ret) { @@ -1375,6 +1343,8 @@ fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); fail_iput: iput(fs_info->btree_inode); + btrfs_stop_workers(&fs_info->workers); + btrfs_stop_workers(&fs_info->endio_workers); fail: btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); @@ -1623,16 +1593,10 @@ int close_ctree(struct btrfs_root *root) extent_io_tree_empty_lru(&fs_info->extent_ins); extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); - flush_workqueue(async_submit_workqueue); - flush_workqueue(end_io_workqueue); - truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); - flush_workqueue(async_submit_workqueue); - destroy_workqueue(async_submit_workqueue); - - flush_workqueue(end_io_workqueue); - destroy_workqueue(end_io_workqueue); + btrfs_stop_workers(&fs_info->workers); + btrfs_stop_workers(&fs_info->endio_workers); iput(fs_info->btree_inode); #if 0 diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0f14697becef..7daef8d37006 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -359,7 +359,7 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, kfree(sums); - return btrfs_map_bio(root, rw, bio, mirror_num); + return btrfs_map_bio(root, rw, bio, mirror_num, 1); } int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, @@ -383,7 +383,7 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, inode, rw, bio, mirror_num, __btrfs_submit_bio_hook); mapit: - return btrfs_map_bio(root, rw, bio, mirror_num); + return btrfs_map_bio(root, rw, bio, mirror_num, 0); } int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 722eb4550154..c57458ce6339 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -27,6 +27,7 @@ #include "transaction.h" #include "print-tree.h" #include "volumes.h" +#include "async-thread.h" struct map_lookup { u64 type; @@ -110,6 +111,101 @@ static struct btrfs_fs_devices *find_fsid(u8 *fsid) return NULL; } +/* + * we try to collect pending bios for a device so we don't get a large + * number of procs sending bios down to the same device. This greatly + * improves the schedulers ability to collect and merge the bios. + * + * But, it also turns into a long list of bios to process and that is sure + * to eventually make the worker thread block. The solution here is to + * make some progress and then put this work struct back at the end of + * the list if the block device is congested. This way, multiple devices + * can make progress from a single worker thread. + */ +int run_scheduled_bios(struct btrfs_device *device) +{ + struct bio *pending; + struct backing_dev_info *bdi; + struct bio *tail; + struct bio *cur; + int again = 0; + unsigned long num_run = 0; + + bdi = device->bdev->bd_inode->i_mapping->backing_dev_info; +loop: + spin_lock(&device->io_lock); + + /* take all the bios off the list at once and process them + * later on (without the lock held). But, remember the + * tail and other pointers so the bios can be properly reinserted + * into the list if we hit congestion + */ + pending = device->pending_bios; + tail = device->pending_bio_tail; + WARN_ON(pending && !tail); + device->pending_bios = NULL; + device->pending_bio_tail = NULL; + + /* + * if pending was null this time around, no bios need processing + * at all and we can stop. Otherwise it'll loop back up again + * and do an additional check so no bios are missed. + * + * device->running_pending is used to synchronize with the + * schedule_bio code. + */ + if (pending) { + again = 1; + device->running_pending = 1; + } else { + again = 0; + device->running_pending = 0; + } + spin_unlock(&device->io_lock); + + while(pending) { + cur = pending; + pending = pending->bi_next; + cur->bi_next = NULL; + atomic_dec(&device->dev_root->fs_info->nr_async_submits); + submit_bio(cur->bi_rw, cur); + num_run++; + + /* + * we made progress, there is more work to do and the bdi + * is now congested. Back off and let other work structs + * run instead + */ + if (pending && num_run && bdi_write_congested(bdi)) { + struct bio *old_head; + + spin_lock(&device->io_lock); + old_head = device->pending_bios; + device->pending_bios = pending; + if (device->pending_bio_tail) + tail->bi_next = old_head; + else + device->pending_bio_tail = tail; + + spin_unlock(&device->io_lock); + btrfs_requeue_work(&device->work); + goto done; + } + } + if (again) + goto loop; +done: + return 0; +} + +void pending_bios_fn(struct btrfs_work *work) +{ + struct btrfs_device *device; + + device = container_of(work, struct btrfs_device, work); + run_scheduled_bios(device); +} + static int device_list_add(const char *path, struct btrfs_super_block *disk_super, u64 devid, struct btrfs_fs_devices **fs_devices_ret) @@ -141,6 +237,7 @@ static int device_list_add(const char *path, return -ENOMEM; } device->devid = devid; + device->work.func = pending_bios_fn; memcpy(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE); device->barriers = 1; @@ -925,6 +1022,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } device->barriers = 1; + device->work.func = pending_bios_fn; generate_random_uuid(device->uuid); spin_lock_init(&device->io_lock); device->name = kstrdup(device_path, GFP_NOFS); @@ -1965,8 +2063,61 @@ static int end_bio_multi_stripe(struct bio *bio, #endif } +struct async_sched { + struct bio *bio; + int rw; + struct btrfs_fs_info *info; + struct btrfs_work work; +}; + +/* + * see run_scheduled_bios for a description of why bios are collected for + * async submit. + * + * This will add one bio to the pending list for a device and make sure + * the work struct is scheduled. + */ +int schedule_bio(struct btrfs_root *root, struct btrfs_device *device, + int rw, struct bio *bio) +{ + int should_queue = 1; + + /* don't bother with additional async steps for reads, right now */ + if (!(rw & (1 << BIO_RW))) { + submit_bio(rw, bio); + return 0; + } + + /* + * nr_async_sumbits allows us to reliably return congestion to the + * higher layers. Otherwise, the async bio makes it appear we have + * made progress against dirty pages when we've really just put it + * on a queue for later + */ + atomic_inc(&root->fs_info->nr_async_submits); + bio->bi_next = NULL; + bio->bi_rw |= rw; + + spin_lock(&device->io_lock); + + if (device->pending_bio_tail) + device->pending_bio_tail->bi_next = bio; + + device->pending_bio_tail = bio; + if (!device->pending_bios) + device->pending_bios = bio; + if (device->running_pending) + should_queue = 0; + + spin_unlock(&device->io_lock); + + if (should_queue) + btrfs_queue_worker(&root->fs_info->workers, &device->work); + return 0; +} + int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, - int mirror_num) + int mirror_num, int async_submit) { struct btrfs_mapping_tree *map_tree; struct btrfs_device *dev; @@ -2012,10 +2163,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, dev = multi->stripes[dev_nr].dev; if (dev && dev->bdev) { bio->bi_bdev = dev->bdev; - spin_lock(&dev->io_lock); - dev->total_ios++; - spin_unlock(&dev->io_lock); - submit_bio(rw, bio); + if (async_submit) + schedule_bio(root, dev, rw, bio); + else + submit_bio(rw, bio); } else { bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; bio->bi_sector = logical >> 9; @@ -2054,6 +2205,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, device->barriers = 1; device->dev_root = root->fs_info->dev_root; device->devid = devid; + device->work.func = pending_bios_fn; fs_devices->num_devices++; spin_lock_init(&device->io_lock); memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 4df6b1608f91..48a44f7a9385 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -20,6 +20,7 @@ #define __BTRFS_VOLUMES_ #include +#include "async-thread.h" struct buffer_head; struct btrfs_device { @@ -27,6 +28,9 @@ struct btrfs_device { struct list_head dev_alloc_list; struct btrfs_root *dev_root; struct buffer_head *pending_io; + struct bio *pending_bios; + struct bio *pending_bio_tail; + int running_pending; u64 generation; int barriers; @@ -36,8 +40,6 @@ struct btrfs_device { struct block_device *bdev; - u64 total_ios; - char *name; /* the internal btrfs device id */ @@ -63,6 +65,8 @@ struct btrfs_device { /* physical drive uuid (or lvm uuid) */ u8 uuid[BTRFS_UUID_SIZE]; + + struct btrfs_work work; }; struct btrfs_fs_devices { @@ -117,7 +121,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, void btrfs_mapping_init(struct btrfs_mapping_tree *tree); void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, - int mirror_num); + int mirror_num, int async_submit); int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf); int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int flags, void *holder); From 15ada040d7cd68d7853938a92b116292cc16a2f3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Jun 2008 16:51:38 -0400 Subject: [PATCH 0562/2985] Btrfs: Fix mount -o max_inline=0 max_inline=0 used to force the max_inline size to one sector instead. Now it properly disables inline data items, while still being able to read any that happen to exist on disk. Signed-off-by: Chris Mason --- fs/btrfs/super.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 346932e546ba..f3274befd46a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -184,8 +184,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) info->max_inline = btrfs_parse_size(num); kfree(num); - info->max_inline = max_t(u64, - info->max_inline, root->sectorsize); + if (info->max_inline) { + info->max_inline = max_t(u64, + info->max_inline, + root->sectorsize); + } printk(KERN_INFO "btrfs: max_inline at %llu\n", info->max_inline); } From d05e5a4dad985acc938e62d9752c813647dedd9d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Jun 2008 17:09:48 -0400 Subject: [PATCH 0563/2985] Btrfs: Add backport for the kthread work on kernels older than 2.6.20 Signed-off-by: Chris Mason --- fs/btrfs/async-thread.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 2911b67bd6f7..8b9e2cf50606 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -16,10 +16,17 @@ * Boston, MA 021110-1307, USA. */ +#include #include #include #include -#include + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) +# include +#else +# include +#endif + #include "async-thread.h" /* From 35d8ba66294ff2a53c17337a1aa1ff6739492f41 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Jun 2008 20:21:24 -0400 Subject: [PATCH 0564/2985] Btrfs: Worker thread optimizations This changes the worker thread pool to maintain a list of idle threads, avoiding a complex search for a good thread to wake up. Threads have two states: idle - we try to reuse the last thread used in hopes of improving the batching ratios busy - each time a new work item is added to a busy task, the task is rotated to the end of the line. Signed-off-by: Chris Mason --- fs/btrfs/async-thread.c | 100 +++++++++++++++++++++++++++------------- fs/btrfs/async-thread.h | 7 +-- 2 files changed, 73 insertions(+), 34 deletions(-) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 8b9e2cf50606..8d4cc4679d51 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -34,6 +34,9 @@ * One of these is allocated per thread. */ struct btrfs_worker_thread { + /* pool we belong to */ + struct btrfs_workers *workers; + /* list of struct btrfs_work that are waiting for service */ struct list_head pending; @@ -51,8 +54,44 @@ struct btrfs_worker_thread { /* set to non-zero when this thread is already awake and kicking */ int working; + + /* are we currently idle */ + int idle; }; +/* + * helper function to move a thread onto the idle list after it + * has finished some requests. + */ +static void check_idle_worker(struct btrfs_worker_thread *worker) +{ + if (!worker->idle && atomic_read(&worker->num_pending) < + worker->workers->idle_thresh / 2) { + unsigned long flags; + spin_lock_irqsave(&worker->workers->lock, flags); + worker->idle = 1; + list_move(&worker->worker_list, &worker->workers->idle_list); + spin_unlock_irqrestore(&worker->workers->lock, flags); + } +} + +/* + * helper function to move a thread off the idle list after new + * pending work is added. + */ +static void check_busy_worker(struct btrfs_worker_thread *worker) +{ + if (worker->idle && atomic_read(&worker->num_pending) >= + worker->workers->idle_thresh) { + unsigned long flags; + spin_lock_irqsave(&worker->workers->lock, flags); + worker->idle = 0; + list_move_tail(&worker->worker_list, + &worker->workers->worker_list); + spin_unlock_irqrestore(&worker->workers->lock, flags); + } +} + /* * main loop for servicing work items */ @@ -76,6 +115,7 @@ static int worker_loop(void *arg) atomic_dec(&worker->num_pending); spin_lock_irq(&worker->lock); + check_idle_worker(worker); } worker->working = 0; if (freezing(current)) { @@ -98,6 +138,7 @@ int btrfs_stop_workers(struct btrfs_workers *workers) struct list_head *cur; struct btrfs_worker_thread *worker; + list_splice_init(&workers->idle_list, &workers->worker_list); while(!list_empty(&workers->worker_list)) { cur = workers->worker_list.next; worker = list_entry(cur, struct btrfs_worker_thread, @@ -116,9 +157,10 @@ void btrfs_init_workers(struct btrfs_workers *workers, int max) { workers->num_workers = 0; INIT_LIST_HEAD(&workers->worker_list); - workers->last = NULL; + INIT_LIST_HEAD(&workers->idle_list); spin_lock_init(&workers->lock); workers->max_workers = max; + workers->idle_thresh = 64; } /* @@ -143,14 +185,14 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) spin_lock_init(&worker->lock); atomic_set(&worker->num_pending, 0); worker->task = kthread_run(worker_loop, worker, "btrfs"); + worker->workers = workers; if (IS_ERR(worker->task)) { ret = PTR_ERR(worker->task); goto fail; } spin_lock_irq(&workers->lock); - list_add_tail(&worker->worker_list, &workers->worker_list); - workers->last = worker; + list_add_tail(&worker->worker_list, &workers->idle_list); workers->num_workers++; spin_unlock_irq(&workers->lock); } @@ -169,42 +211,30 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) { struct btrfs_worker_thread *worker; struct list_head *next; - struct list_head *start; int enforce_min = workers->num_workers < workers->max_workers; - /* start with the last thread if it isn't busy */ - worker = workers->last; - if (atomic_read(&worker->num_pending) < 64) - goto done; - - next = worker->worker_list.next; - start = &worker->worker_list; - /* - * check all the workers for someone that is bored. FIXME, do - * something smart here + * if we find an idle thread, don't move it to the end of the + * idle list. This improves the chance that the next submission + * will reuse the same thread, and maybe catch it while it is still + * working */ - while(next != start) { - if (next == &workers->worker_list) { - next = workers->worker_list.next; - continue; - } + if (!list_empty(&workers->idle_list)) { + next = workers->idle_list.next; worker = list_entry(next, struct btrfs_worker_thread, worker_list); - if (atomic_read(&worker->num_pending) < 64 || !enforce_min) - goto done; - next = next->next; + return worker; } + if (enforce_min || list_empty(&workers->worker_list)) + return NULL; + /* - * nobody was bored, if we're already at the max thread count, - * use the last thread + * if we pick a busy task, move the task to the end of the list. + * hopefully this will keep things somewhat evenly balanced */ - if (!enforce_min || atomic_read(&workers->last->num_pending) < 64) { - return workers->last; - } - return NULL; -done: - workers->last = worker; + next = workers->worker_list.next; + worker = list_entry(next, struct btrfs_worker_thread, worker_list); + list_move_tail(next, &workers->worker_list); return worker; } @@ -221,11 +251,17 @@ again: if (!worker) { spin_lock_irqsave(&workers->lock, flags); if (workers->num_workers >= workers->max_workers) { + struct list_head *fallback = NULL; /* * we have failed to find any workers, just * return the force one */ - worker = list_entry(workers->worker_list.next, + if (!list_empty(&workers->worker_list)) + fallback = workers->worker_list.next; + if (!list_empty(&workers->idle_list)) + fallback = workers->idle_list.next; + BUG_ON(!fallback); + worker = list_entry(fallback, struct btrfs_worker_thread, worker_list); spin_unlock_irqrestore(&workers->lock, flags); } else { @@ -254,6 +290,7 @@ int btrfs_requeue_work(struct btrfs_work *work) spin_lock_irqsave(&worker->lock, flags); atomic_inc(&worker->num_pending); list_add_tail(&work->list, &worker->pending); + check_busy_worker(worker); spin_unlock_irqrestore(&worker->lock, flags); out: return 0; @@ -276,6 +313,7 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) spin_lock_irqsave(&worker->lock, flags); atomic_inc(&worker->num_pending); + check_busy_worker(worker); list_add_tail(&work->list, &worker->pending); /* diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 52fc9da0f9e7..3436ff897597 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -60,11 +60,12 @@ struct btrfs_workers { /* max number of workers allowed. changed by btrfs_start_workers */ int max_workers; + /* once a worker has this many requests or fewer, it is idle */ + int idle_thresh; + /* list with all the work threads */ struct list_head worker_list; - - /* the last worker thread to have something queued */ - struct btrfs_worker_thread *last; + struct list_head idle_list; /* lock for finding the next worker thread to queue on */ spinlock_t lock; From 4543df7ecc8ae4928c1e51d6e7dc188d650abee4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Jun 2008 21:47:56 -0400 Subject: [PATCH 0565/2985] Btrfs: Add a mount option to control worker thread pool size mount -o thread_pool_size changes the default, which is min(num_cpus + 2, 8). Larger thread pools would make more sense on very large disk arrays. This mount option controls the max size of each thread pool. There are multiple thread pools, so the total worker count will be larger than the mount option. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 30 +++++++++++++++--------------- fs/btrfs/super.c | 13 ++++++++++++- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6c91a510c965..7ae4666103c0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -532,6 +532,7 @@ struct btrfs_fs_info { */ struct btrfs_workers workers; struct btrfs_workers endio_workers; + int thread_pool_size; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) struct work_struct trans_work; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 98ff4fbcb386..c6a710a668cb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1117,6 +1117,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, GFP_NOFS); int ret; int err = -EINVAL; + struct btrfs_super_block *disk_super; if (!extent_root || !tree_root || !fs_info) { @@ -1148,6 +1149,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; + fs_info->thread_pool_size = min(num_online_cpus() + 2, 8); sb->s_blocksize = 4096; sb->s_blocksize_bits = blksize_bits(4096); @@ -1195,19 +1197,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - /* we need to start all the end_io workers up front because the - * queue work function gets called at interrupt time. The endio - * workers don't normally start IO, so some number of them <= the - * number of cpus is fine. They handle checksumming after a read. - * - * The other worker threads do start IO, so the max is larger than - * the number of CPUs. FIXME, tune this for huge machines - */ - btrfs_init_workers(&fs_info->workers, num_online_cpus() * 2); - btrfs_init_workers(&fs_info->endio_workers, num_online_cpus()); - btrfs_start_workers(&fs_info->workers, 1); - btrfs_start_workers(&fs_info->endio_workers, num_online_cpus()); - #if 0 ret = add_hasher(fs_info, "crc32c"); if (ret) { @@ -1238,6 +1227,17 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (err) goto fail_sb_buffer; + /* + * we need to start all the end_io workers up front because the + * queue work function gets called at interrupt time, and so it + * cannot dynamically grow. + */ + btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); + btrfs_start_workers(&fs_info->workers, 1); + btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); + + err = -EINVAL; if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { printk("Btrfs: wanted %llu devices, but found %llu\n", @@ -1341,10 +1341,10 @@ fail_sys_array: mutex_unlock(&fs_info->fs_mutex); fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); -fail_iput: - iput(fs_info->btree_inode); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); +fail_iput: + iput(fs_info->btree_inode); fail: btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f3274befd46a..196d0e280b19 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -67,7 +67,7 @@ static void btrfs_put_super (struct super_block * sb) enum { Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, - Opt_ssd, Opt_err, + Opt_ssd, Opt_thread_pool, Opt_err, }; static match_table_t tokens = { @@ -80,6 +80,7 @@ static match_table_t tokens = { {Opt_max_extent, "max_extent=%s"}, {Opt_max_inline, "max_inline=%s"}, {Opt_alloc_start, "alloc_start=%s"}, + {Opt_thread_pool, "thread_pool=%d"}, {Opt_ssd, "ssd"}, {Opt_err, NULL} }; @@ -118,6 +119,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) struct btrfs_fs_info *info = root->fs_info; substring_t args[MAX_OPT_ARGS]; char *p, *num; + int intarg; if (!options) return 0; @@ -166,6 +168,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) printk(KERN_INFO "btrfs: turning off barriers\n"); btrfs_set_opt(info->mount_opt, NOBARRIER); break; + case Opt_thread_pool: + intarg = 0; + match_int(&args[0], &intarg); + if (intarg) { + info->thread_pool_size = intarg; + printk(KERN_INFO "btrfs: thread pool %d\n", + info->thread_pool_size); + } + break; case Opt_max_extent: num = match_strdup(&args[0]); if (num) { From 9d2423c5c3fbb0f110ac0b6cdc5a8e4d64729483 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Jun 2008 21:52:17 -0400 Subject: [PATCH 0566/2985] Btrfs: kerneldoc comments for extent_map.c Add kerneldoc comments for all exported functions. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 61 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index ba46f7911d99..f5a04eb9a2ac 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -31,6 +31,14 @@ void extent_map_exit(void) kmem_cache_destroy(extent_map_cache); } +/** + * extent_map_tree_init - initialize extent map tree + * @tree: tree to initialize + * @mask: flags for memory allocations during tree operations + * + * Initialize the extent tree @tree. Should be called for each new inode + * or other user of the extent_map interface. + */ void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) { tree->map.rb_node = NULL; @@ -39,6 +47,14 @@ void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) } EXPORT_SYMBOL(extent_map_tree_init); +/** + * alloc_extent_map - allocate new extent map structure + * @mask: memory allocation flags + * + * Allocate a new extent_map structure. The new structure is + * returned with a reference count of one and needs to be + * freed using free_extent_map() + */ struct extent_map *alloc_extent_map(gfp_t mask) { struct extent_map *em; @@ -52,6 +68,13 @@ struct extent_map *alloc_extent_map(gfp_t mask) } EXPORT_SYMBOL(alloc_extent_map); +/** + * free_extent_map - drop reference count of an extent_map + * @em: extent map beeing releasead + * + * Drops the reference out on @em by one and free the structure + * if the reference count hits zero. + */ void free_extent_map(struct extent_map *em) { if (!em) @@ -166,10 +189,15 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) return 0; } -/* - * add_extent_mapping tries a simple forward/backward merge with existing - * mappings. The extent_map struct passed in will be inserted into - * the tree directly (no copies made, just a reference taken). +/** + * add_extent_mapping - add new extent map to the extent tree + * @tree: tree to insert new map in + * @em: map to insert + * + * Insert @em into @tree or perform a simple forward/backward merge with + * existing mappings. The extent_map struct passed in will be inserted + * into the tree directly, with an additional reference taken, or a + * reference dropped if the merge attempt was sucessfull. */ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) @@ -220,11 +248,16 @@ static u64 range_end(u64 start, u64 len) return start + len; } -/* - * lookup_extent_mapping returns the first extent_map struct in the - * tree that intersects the [start, len] range. There may - * be additional objects in the tree that intersect, so check the object - * returned carefully to make sure you don't need additional lookups. +/** + * lookup_extent_mapping - lookup extent_map + * @tree: tree to lookup in + * @start: byte offset to start the search + * @len: length of the lookup range + * + * Find and return the first extent_map struct in @tree that intersects the + * [start, len] range. There may be additional objects in the tree that + * intersect, so check the object returned carefully to make sure that no + * additional lookups are needed. */ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, u64 start, u64 len) @@ -273,9 +306,13 @@ out: } EXPORT_SYMBOL(lookup_extent_mapping); -/* - * removes an extent_map struct from the tree. No reference counts are - * dropped, and no checks are done to see if the range is in use +/** + * remove_extent_mapping - removes an extent_map from the extent tree + * @tree: extent tree to remove from + * @em: extent map beeing removed + * + * Removes @em from @tree. No reference counts are dropped, and no checks + * are done to see if the range is in use */ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { From f46b5a66b3316ef2f8febfe4c56e2d555e2c3979 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Jun 2008 21:53:53 -0400 Subject: [PATCH 0567/2985] Btrfs: split out ioctl.c Split the ioctl handling out of inode.c into a file of it's own. Also fix up checkpatch.pl warnings for the moved code. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.h | 9 +- fs/btrfs/inode.c | 733 +------------------------------------------ fs/btrfs/ioctl.c | 781 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 796 insertions(+), 729 deletions(-) create mode 100644 fs/btrfs/ioctl.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9dcfc2fe3332..7ed6b39e42d2 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -6,7 +6,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ - extent_io.o volumes.o async-thread.o + extent_io.o volumes.o async-thread.o ioctl.o btrfs-$(CONFIG_FS_POSIX_ACL) += acl.o else diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7ae4666103c0..181c81d29897 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1553,6 +1553,10 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); /* inode.c */ +int btrfs_create_subvol_root(struct btrfs_root *new_root, + struct btrfs_trans_handle *trans, u64 new_dirid, + struct btrfs_block_group_cache *block_group); + void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, int namelen); @@ -1585,7 +1589,6 @@ void btrfs_destroy_inode(struct inode *inode); int btrfs_init_cachep(void); void btrfs_destroy_cachep(void); long btrfs_ioctl_trans_end(struct file *file); -long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root); struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, @@ -1598,6 +1601,10 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, int btrfs_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); + +/* ioctl.c */ +long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); + /* file.c */ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end); int btrfs_check_file(struct btrfs_root *root, struct inode *inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7daef8d37006..0c79346fd2c9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2667,114 +2667,17 @@ void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, } } -static int noinline create_subvol(struct btrfs_root *root, char *name, - int namelen) +int btrfs_create_subvol_root(struct btrfs_root *new_root, + struct btrfs_trans_handle *trans, u64 new_dirid, + struct btrfs_block_group_cache *block_group) { - struct btrfs_trans_handle *trans; - struct btrfs_key key; - struct btrfs_root_item root_item; - struct btrfs_inode_item *inode_item; - struct extent_buffer *leaf; - struct btrfs_root *new_root = root; struct inode *inode; - struct inode *dir; int ret; - int err; - u64 objectid; - u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; - unsigned long nr = 1; - - mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_check_free_space(root, 1, 0); - if (ret) - goto fail_commit; - - trans = btrfs_start_transaction(root, 1); - BUG_ON(!trans); - - ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, - 0, &objectid); - if (ret) - goto fail; - - leaf = __btrfs_alloc_free_block(trans, root, root->leafsize, - objectid, trans->transid, 0, 0, - 0, 0); - if (IS_ERR(leaf)) - return PTR_ERR(leaf); - - btrfs_set_header_nritems(leaf, 0); - btrfs_set_header_level(leaf, 0); - btrfs_set_header_bytenr(leaf, leaf->start); - btrfs_set_header_generation(leaf, trans->transid); - btrfs_set_header_owner(leaf, objectid); - - write_extent_buffer(leaf, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(leaf), - BTRFS_FSID_SIZE); - btrfs_mark_buffer_dirty(leaf); - - inode_item = &root_item.inode; - memset(inode_item, 0, sizeof(*inode_item)); - inode_item->generation = cpu_to_le64(1); - inode_item->size = cpu_to_le64(3); - inode_item->nlink = cpu_to_le32(1); - inode_item->nblocks = cpu_to_le64(1); - inode_item->mode = cpu_to_le32(S_IFDIR | 0755); - - btrfs_set_root_bytenr(&root_item, leaf->start); - btrfs_set_root_level(&root_item, 0); - btrfs_set_root_refs(&root_item, 1); - btrfs_set_root_used(&root_item, 0); - - memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); - root_item.drop_level = 0; - - free_extent_buffer(leaf); - leaf = NULL; - - btrfs_set_root_dirid(&root_item, new_dirid); - - key.objectid = objectid; - key.offset = 1; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, - &root_item); - if (ret) - goto fail; - - /* - * insert the directory item - */ - key.offset = (u64)-1; - dir = root->fs_info->sb->s_root->d_inode; - ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, - name, namelen, dir->i_ino, &key, - BTRFS_FT_DIR); - if (ret) - goto fail; - - ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, - name, namelen, objectid, - root->fs_info->sb->s_root->d_inode->i_ino); - if (ret) - goto fail; - - ret = btrfs_commit_transaction(trans, root); - if (ret) - goto fail_commit; - - new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen); - BUG_ON(!new_root); - - trans = btrfs_start_transaction(new_root, 1); - BUG_ON(!trans); inode = btrfs_new_inode(trans, new_root, "..", 2, new_dirid, - new_dirid, - BTRFS_I(dir)->block_group, S_IFDIR | 0700); + new_dirid, block_group, S_IFDIR | 0700); if (IS_ERR(inode)) - goto fail; + return PTR_ERR(inode); inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; new_root->inode = inode; @@ -2783,67 +2686,8 @@ static int noinline create_subvol(struct btrfs_root *root, char *name, new_dirid); inode->i_nlink = 1; inode->i_size = 0; - ret = btrfs_update_inode(trans, new_root, inode); - if (ret) - goto fail; - /* Invalidate existing dcache entry for new subvolume. */ - btrfs_invalidate_dcache_root(root, name, namelen); - -fail: - nr = trans->blocks_used; - err = btrfs_commit_transaction(trans, new_root); - if (err && !ret) - ret = err; -fail_commit: - mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); - return ret; -} - -static int create_snapshot(struct btrfs_root *root, char *name, int namelen) -{ - struct btrfs_pending_snapshot *pending_snapshot; - struct btrfs_trans_handle *trans; - int ret; - int err; - unsigned long nr = 0; - - if (!root->ref_cows) - return -EINVAL; - - mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_check_free_space(root, 1, 0); - if (ret) - goto fail_unlock; - - pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS); - if (!pending_snapshot) { - ret = -ENOMEM; - goto fail_unlock; - } - pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); - if (!pending_snapshot->name) { - ret = -ENOMEM; - kfree(pending_snapshot); - goto fail_unlock; - } - memcpy(pending_snapshot->name, name, namelen); - pending_snapshot->name[namelen] = '\0'; - trans = btrfs_start_transaction(root, 1); - BUG_ON(!trans); - pending_snapshot->root = root; - list_add(&pending_snapshot->list, - &trans->transaction->pending_snapshots); - ret = btrfs_update_inode(trans, root, root->inode); - err = btrfs_commit_transaction(trans, root); - -fail_unlock: - mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); - return ret; + return btrfs_update_inode(trans, new_root, inode); } unsigned long btrfs_force_ra(struct address_space *mapping, @@ -2861,571 +2705,6 @@ unsigned long btrfs_force_ra(struct address_space *mapping, #endif } -int btrfs_defrag_file(struct file *file) { - struct inode *inode = fdentry(file)->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct page *page; - unsigned long last_index; - unsigned long ra_pages = root->fs_info->bdi.ra_pages; - unsigned long total_read = 0; - u64 page_start; - u64 page_end; - unsigned long i; - int ret; - - mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_check_free_space(root, inode->i_size, 0); - mutex_unlock(&root->fs_info->fs_mutex); - if (ret) - return -ENOSPC; - - mutex_lock(&inode->i_mutex); - last_index = inode->i_size >> PAGE_CACHE_SHIFT; - for (i = 0; i <= last_index; i++) { - if (total_read % ra_pages == 0) { - btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i, - min(last_index, i + ra_pages - 1)); - } - total_read++; - page = grab_cache_page(inode->i_mapping, i); - if (!page) - goto out_unlock; - if (!PageUptodate(page)) { - btrfs_readpage(NULL, page); - lock_page(page); - if (!PageUptodate(page)) { - unlock_page(page); - page_cache_release(page); - goto out_unlock; - } - } - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - ClearPageDirty(page); -#else - cancel_dirty_page(page, PAGE_CACHE_SIZE); -#endif - wait_on_page_writeback(page); - set_page_extent_mapped(page); - - page_start = (u64)page->index << PAGE_CACHE_SHIFT; - page_end = page_start + PAGE_CACHE_SIZE - 1; - - lock_extent(io_tree, page_start, page_end, GFP_NOFS); - set_extent_delalloc(io_tree, page_start, - page_end, GFP_NOFS); - - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); - set_page_dirty(page); - unlock_page(page); - page_cache_release(page); - balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); - } - -out_unlock: - mutex_unlock(&inode->i_mutex); - return 0; -} - -static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) -{ - u64 new_size; - u64 old_size; - u64 devid = 1; - struct btrfs_ioctl_vol_args *vol_args; - struct btrfs_trans_handle *trans; - struct btrfs_device *device = NULL; - char *sizestr; - char *devstr = NULL; - int ret = 0; - int namelen; - int mod = 0; - - vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); - - if (!vol_args) - return -ENOMEM; - - if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { - ret = -EFAULT; - goto out; - } - namelen = strlen(vol_args->name); - if (namelen > BTRFS_VOL_NAME_MAX) { - ret = -EINVAL; - goto out; - } - - mutex_lock(&root->fs_info->fs_mutex); - sizestr = vol_args->name; - devstr = strchr(sizestr, ':'); - if (devstr) { - char *end; - sizestr = devstr + 1; - *devstr = '\0'; - devstr = vol_args->name; - devid = simple_strtoull(devstr, &end, 10); -printk("resizing devid %Lu\n", devid); - } - device = btrfs_find_device(root, devid, NULL); - if (!device) { - printk("resizer unable to find device %Lu\n", devid); - ret = -EINVAL; - goto out_unlock; - } - if (!strcmp(sizestr, "max")) - new_size = device->bdev->bd_inode->i_size; - else { - if (sizestr[0] == '-') { - mod = -1; - sizestr++; - } else if (sizestr[0] == '+') { - mod = 1; - sizestr++; - } - new_size = btrfs_parse_size(sizestr); - if (new_size == 0) { - ret = -EINVAL; - goto out_unlock; - } - } - - old_size = device->total_bytes; - - if (mod < 0) { - if (new_size > old_size) { - ret = -EINVAL; - goto out_unlock; - } - new_size = old_size - new_size; - } else if (mod > 0) { - new_size = old_size + new_size; - } - - if (new_size < 256 * 1024 * 1024) { - ret = -EINVAL; - goto out_unlock; - } - if (new_size > device->bdev->bd_inode->i_size) { - ret = -EFBIG; - goto out_unlock; - } - - do_div(new_size, root->sectorsize); - new_size *= root->sectorsize; - -printk("new size for %s is %llu\n", device->name, (unsigned long long)new_size); - - if (new_size > old_size) { - trans = btrfs_start_transaction(root, 1); - ret = btrfs_grow_device(trans, device, new_size); - btrfs_commit_transaction(trans, root); - } else { - ret = btrfs_shrink_device(device, new_size); - } - -out_unlock: - mutex_unlock(&root->fs_info->fs_mutex); -out: - kfree(vol_args); - return ret; -} - -static int noinline btrfs_ioctl_snap_create(struct btrfs_root *root, - void __user *arg) -{ - struct btrfs_ioctl_vol_args *vol_args; - struct btrfs_dir_item *di; - struct btrfs_path *path; - u64 root_dirid; - int namelen; - int ret; - - vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); - - if (!vol_args) - return -ENOMEM; - - if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { - ret = -EFAULT; - goto out; - } - - namelen = strlen(vol_args->name); - if (namelen > BTRFS_VOL_NAME_MAX) { - ret = -EINVAL; - goto out; - } - if (strchr(vol_args->name, '/')) { - ret = -EINVAL; - goto out; - } - - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - - root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, - mutex_lock(&root->fs_info->fs_mutex); - di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, - path, root_dirid, - vol_args->name, namelen, 0); - mutex_unlock(&root->fs_info->fs_mutex); - btrfs_free_path(path); - - if (di && !IS_ERR(di)) { - ret = -EEXIST; - goto out; - } - - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto out; - } - - if (root == root->fs_info->tree_root) - ret = create_subvol(root, vol_args->name, namelen); - else - ret = create_snapshot(root, vol_args->name, namelen); -out: - kfree(vol_args); - return ret; -} - -static int btrfs_ioctl_defrag(struct file *file) -{ - struct inode *inode = fdentry(file)->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - - switch (inode->i_mode & S_IFMT) { - case S_IFDIR: - mutex_lock(&root->fs_info->fs_mutex); - btrfs_defrag_root(root, 0); - btrfs_defrag_root(root->fs_info->extent_root, 0); - mutex_unlock(&root->fs_info->fs_mutex); - break; - case S_IFREG: - btrfs_defrag_file(file); - break; - } - - return 0; -} - -long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) -{ - struct btrfs_ioctl_vol_args *vol_args; - int ret; - - vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); - - if (!vol_args) - return -ENOMEM; - - if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { - ret = -EFAULT; - goto out; - } - ret = btrfs_init_new_device(root, vol_args->name); - -out: - kfree(vol_args); - return ret; -} - -long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) -{ - struct btrfs_ioctl_vol_args *vol_args; - int ret; - - vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); - - if (!vol_args) - return -ENOMEM; - - if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { - ret = -EFAULT; - goto out; - } - ret = btrfs_rm_device(root, vol_args->name); - -out: - kfree(vol_args); - return ret; -} - -int dup_item_to_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct extent_buffer *leaf, - int slot, - struct btrfs_key *key, - u64 destino) -{ - char *dup; - int len = btrfs_item_size_nr(leaf, slot); - struct btrfs_key ckey = *key; - int ret = 0; - - dup = kmalloc(len, GFP_NOFS); - if (!dup) - return -ENOMEM; - - read_extent_buffer(leaf, dup, btrfs_item_ptr_offset(leaf, slot), len); - btrfs_release_path(root, path); - - ckey.objectid = destino; - ret = btrfs_insert_item(trans, root, &ckey, dup, len); - kfree(dup); - return ret; -} - -long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) -{ - struct inode *inode = fdentry(file)->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct file *src_file; - struct inode *src; - struct btrfs_trans_handle *trans; - int ret; - u64 pos; - struct btrfs_path *path; - struct btrfs_key key; - struct extent_buffer *leaf; - u32 nritems; - int slot; - - src_file = fget(src_fd); - if (!src_file) - return -EBADF; - src = src_file->f_dentry->d_inode; - - ret = -EXDEV; - if (src->i_sb != inode->i_sb) - goto out_fput; - - if (inode < src) { - mutex_lock(&inode->i_mutex); - mutex_lock(&src->i_mutex); - } else { - mutex_lock(&src->i_mutex); - mutex_lock(&inode->i_mutex); - } - - ret = -ENOTEMPTY; - if (inode->i_size) - goto out_unlock; - - /* do any pending delalloc/csum calc on src, one way or - another, and lock file content */ - while (1) { - filemap_write_and_wait(src->i_mapping); - lock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); - if (BTRFS_I(src)->delalloc_bytes == 0) - break; - unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); - } - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 0); - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - key.offset = 0; - key.type = BTRFS_EXTENT_DATA_KEY; - key.objectid = src->i_ino; - pos = 0; - path->reada = 2; - - while (1) { - /* - * note the key will change type as we walk through the - * tree. - */ - ret = btrfs_search_slot(trans, root, &key, path, 0, 0); - if (ret < 0) - goto out; - - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) - goto out; - if (ret > 0) - break; - } - leaf = path->nodes[0]; - slot = path->slots[0]; - btrfs_item_key_to_cpu(leaf, &key, slot); - nritems = btrfs_header_nritems(leaf); - - if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY || - key.objectid != src->i_ino) - break; - - if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { - struct btrfs_file_extent_item *extent; - int found_type; - pos = key.offset; - extent = btrfs_item_ptr(leaf, slot, - struct btrfs_file_extent_item); - found_type = btrfs_file_extent_type(leaf, extent); - if (found_type == BTRFS_FILE_EXTENT_REG) { - u64 len = btrfs_file_extent_num_bytes(leaf, - extent); - u64 ds = btrfs_file_extent_disk_bytenr(leaf, - extent); - u64 dl = btrfs_file_extent_disk_num_bytes(leaf, - extent); - u64 off = btrfs_file_extent_offset(leaf, - extent); - btrfs_insert_file_extent(trans, root, - inode->i_ino, pos, - ds, dl, len, off); - /* ds == 0 means there's a hole */ - if (ds != 0) { - btrfs_inc_extent_ref(trans, root, - ds, dl, - root->root_key.objectid, - trans->transid, - inode->i_ino, pos); - } - pos = key.offset + len; - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - ret = dup_item_to_inode(trans, root, path, - leaf, slot, &key, - inode->i_ino); - if (ret) - goto out; - pos = key.offset + btrfs_item_size_nr(leaf, - slot); - } - } else if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) { - ret = dup_item_to_inode(trans, root, path, leaf, - slot, &key, inode->i_ino); - - if (ret) - goto out; - } - key.offset++; - btrfs_release_path(root, path); - } - - ret = 0; -out: - btrfs_free_path(path); - - inode->i_blocks = src->i_blocks; - i_size_write(inode, src->i_size); - btrfs_update_inode(trans, root, inode); - - unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); - - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - -out_unlock: - mutex_unlock(&src->i_mutex); - mutex_unlock(&inode->i_mutex); -out_fput: - fput(src_file); - return ret; -} - -/* - * there are many ways the trans_start and trans_end ioctls can lead - * to deadlocks. They should only be used by applications that - * basically own the machine, and have a very in depth understanding - * of all the possible deadlocks and enospc problems. - */ -long btrfs_ioctl_trans_start(struct file *file) -{ - struct inode *inode = fdentry(file)->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; - int ret = 0; - - mutex_lock(&root->fs_info->fs_mutex); - if (file->private_data) { - ret = -EINPROGRESS; - goto out; - } - trans = btrfs_start_transaction(root, 0); - if (trans) - file->private_data = trans; - else - ret = -ENOMEM; - /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/ -out: - mutex_unlock(&root->fs_info->fs_mutex); - return ret; -} - -/* - * there are many ways the trans_start and trans_end ioctls can lead - * to deadlocks. They should only be used by applications that - * basically own the machine, and have a very in depth understanding - * of all the possible deadlocks and enospc problems. - */ -long btrfs_ioctl_trans_end(struct file *file) -{ - struct inode *inode = fdentry(file)->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; - int ret = 0; - - mutex_lock(&root->fs_info->fs_mutex); - trans = file->private_data; - if (!trans) { - ret = -EINVAL; - goto out; - } - btrfs_end_transaction(trans, root); - file->private_data = 0; -out: - mutex_unlock(&root->fs_info->fs_mutex); - return ret; -} - -long btrfs_ioctl(struct file *file, unsigned int - cmd, unsigned long arg) -{ - struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; - - switch (cmd) { - case BTRFS_IOC_SNAP_CREATE: - return btrfs_ioctl_snap_create(root, (void __user *)arg); - case BTRFS_IOC_DEFRAG: - return btrfs_ioctl_defrag(file); - case BTRFS_IOC_RESIZE: - return btrfs_ioctl_resize(root, (void __user *)arg); - case BTRFS_IOC_ADD_DEV: - return btrfs_ioctl_add_dev(root, (void __user *)arg); - case BTRFS_IOC_RM_DEV: - return btrfs_ioctl_rm_dev(root, (void __user *)arg); - case BTRFS_IOC_BALANCE: - return btrfs_balance(root->fs_info->dev_root); - case BTRFS_IOC_CLONE: - return btrfs_ioctl_clone(file, arg); - case BTRFS_IOC_TRANS_START: - return btrfs_ioctl_trans_start(file); - case BTRFS_IOC_TRANS_END: - return btrfs_ioctl_trans_end(file); - case BTRFS_IOC_SYNC: - btrfs_sync_fs(file->f_dentry->d_sb, 1); - return 0; - } - - return -ENOTTY; -} - -/* - * Called inside transaction, so use GFP_NOFS - */ struct inode *btrfs_alloc_inode(struct super_block *sb) { struct btrfs_inode *ei; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c new file mode 100644 index 000000000000..da8de6cfdb5a --- /dev/null +++ b/fs/btrfs/ioctl.c @@ -0,0 +1,781 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" +#include "btrfs_inode.h" +#include "ioctl.h" +#include "print-tree.h" +#include "volumes.h" + + + +static noinline int create_subvol(struct btrfs_root *root, char *name, + int namelen) +{ + struct btrfs_trans_handle *trans; + struct btrfs_key key; + struct btrfs_root_item root_item; + struct btrfs_inode_item *inode_item; + struct extent_buffer *leaf; + struct btrfs_root *new_root = root; + struct inode *dir; + int ret; + int err; + u64 objectid; + u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; + unsigned long nr = 1; + + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_check_free_space(root, 1, 0); + if (ret) + goto fail_commit; + + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, + 0, &objectid); + if (ret) + goto fail; + + leaf = __btrfs_alloc_free_block(trans, root, root->leafsize, + objectid, trans->transid, 0, 0, + 0, 0); + if (IS_ERR(leaf)) + return PTR_ERR(leaf); + + btrfs_set_header_nritems(leaf, 0); + btrfs_set_header_level(leaf, 0); + btrfs_set_header_bytenr(leaf, leaf->start); + btrfs_set_header_generation(leaf, trans->transid); + btrfs_set_header_owner(leaf, objectid); + + write_extent_buffer(leaf, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(leaf), + BTRFS_FSID_SIZE); + btrfs_mark_buffer_dirty(leaf); + + inode_item = &root_item.inode; + memset(inode_item, 0, sizeof(*inode_item)); + inode_item->generation = cpu_to_le64(1); + inode_item->size = cpu_to_le64(3); + inode_item->nlink = cpu_to_le32(1); + inode_item->nblocks = cpu_to_le64(1); + inode_item->mode = cpu_to_le32(S_IFDIR | 0755); + + btrfs_set_root_bytenr(&root_item, leaf->start); + btrfs_set_root_level(&root_item, 0); + btrfs_set_root_refs(&root_item, 1); + btrfs_set_root_used(&root_item, 0); + + memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); + root_item.drop_level = 0; + + free_extent_buffer(leaf); + leaf = NULL; + + btrfs_set_root_dirid(&root_item, new_dirid); + + key.objectid = objectid; + key.offset = 1; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, + &root_item); + if (ret) + goto fail; + + /* + * insert the directory item + */ + key.offset = (u64)-1; + dir = root->fs_info->sb->s_root->d_inode; + ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, + name, namelen, dir->i_ino, &key, + BTRFS_FT_DIR); + if (ret) + goto fail; + + ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, + name, namelen, objectid, + root->fs_info->sb->s_root->d_inode->i_ino); + if (ret) + goto fail; + + ret = btrfs_commit_transaction(trans, root); + if (ret) + goto fail_commit; + + new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen); + BUG_ON(!new_root); + + trans = btrfs_start_transaction(new_root, 1); + BUG_ON(!trans); + + ret = btrfs_create_subvol_root(new_root, trans, new_dirid, + BTRFS_I(dir)->block_group); + if (ret) + goto fail; + + /* Invalidate existing dcache entry for new subvolume. */ + btrfs_invalidate_dcache_root(root, name, namelen); + +fail: + nr = trans->blocks_used; + err = btrfs_commit_transaction(trans, new_root); + if (err && !ret) + ret = err; +fail_commit: + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root, nr); + btrfs_throttle(root); + return ret; +} + +static int create_snapshot(struct btrfs_root *root, char *name, int namelen) +{ + struct btrfs_pending_snapshot *pending_snapshot; + struct btrfs_trans_handle *trans; + int ret; + int err; + unsigned long nr = 0; + + if (!root->ref_cows) + return -EINVAL; + + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_check_free_space(root, 1, 0); + if (ret) + goto fail_unlock; + + pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS); + if (!pending_snapshot) { + ret = -ENOMEM; + goto fail_unlock; + } + pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); + if (!pending_snapshot->name) { + ret = -ENOMEM; + kfree(pending_snapshot); + goto fail_unlock; + } + memcpy(pending_snapshot->name, name, namelen); + pending_snapshot->name[namelen] = '\0'; + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + pending_snapshot->root = root; + list_add(&pending_snapshot->list, + &trans->transaction->pending_snapshots); + ret = btrfs_update_inode(trans, root, root->inode); + err = btrfs_commit_transaction(trans, root); + +fail_unlock: + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root, nr); + btrfs_throttle(root); + return ret; +} + +int btrfs_defrag_file(struct file *file) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct page *page; + unsigned long last_index; + unsigned long ra_pages = root->fs_info->bdi.ra_pages; + unsigned long total_read = 0; + u64 page_start; + u64 page_end; + unsigned long i; + int ret; + + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_check_free_space(root, inode->i_size, 0); + mutex_unlock(&root->fs_info->fs_mutex); + if (ret) + return -ENOSPC; + + mutex_lock(&inode->i_mutex); + last_index = inode->i_size >> PAGE_CACHE_SHIFT; + for (i = 0; i <= last_index; i++) { + if (total_read % ra_pages == 0) { + btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i, + min(last_index, i + ra_pages - 1)); + } + total_read++; + page = grab_cache_page(inode->i_mapping, i); + if (!page) + goto out_unlock; + if (!PageUptodate(page)) { + btrfs_readpage(NULL, page); + lock_page(page); + if (!PageUptodate(page)) { + unlock_page(page); + page_cache_release(page); + goto out_unlock; + } + } + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + ClearPageDirty(page); +#else + cancel_dirty_page(page, PAGE_CACHE_SIZE); +#endif + wait_on_page_writeback(page); + set_page_extent_mapped(page); + + page_start = (u64)page->index << PAGE_CACHE_SHIFT; + page_end = page_start + PAGE_CACHE_SIZE - 1; + + lock_extent(io_tree, page_start, page_end, GFP_NOFS); + set_extent_delalloc(io_tree, page_start, + page_end, GFP_NOFS); + + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); + balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); + } + +out_unlock: + mutex_unlock(&inode->i_mutex); + return 0; +} + +/* + * Called inside transaction, so use GFP_NOFS + */ + +static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) +{ + u64 new_size; + u64 old_size; + u64 devid = 1; + struct btrfs_ioctl_vol_args *vol_args; + struct btrfs_trans_handle *trans; + struct btrfs_device *device = NULL; + char *sizestr; + char *devstr = NULL; + int ret = 0; + int namelen; + int mod = 0; + + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); + + if (!vol_args) + return -ENOMEM; + + if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { + ret = -EFAULT; + goto out; + } + namelen = strlen(vol_args->name); + if (namelen > BTRFS_VOL_NAME_MAX) { + ret = -EINVAL; + goto out; + } + + mutex_lock(&root->fs_info->fs_mutex); + sizestr = vol_args->name; + devstr = strchr(sizestr, ':'); + if (devstr) { + char *end; + sizestr = devstr + 1; + *devstr = '\0'; + devstr = vol_args->name; + devid = simple_strtoull(devstr, &end, 10); + printk(KERN_INFO "resizing devid %llu\n", devid); + } + device = btrfs_find_device(root, devid, NULL); + if (!device) { + printk(KERN_INFO "resizer unable to find device %llu\n", devid); + ret = -EINVAL; + goto out_unlock; + } + if (!strcmp(sizestr, "max")) + new_size = device->bdev->bd_inode->i_size; + else { + if (sizestr[0] == '-') { + mod = -1; + sizestr++; + } else if (sizestr[0] == '+') { + mod = 1; + sizestr++; + } + new_size = btrfs_parse_size(sizestr); + if (new_size == 0) { + ret = -EINVAL; + goto out_unlock; + } + } + + old_size = device->total_bytes; + + if (mod < 0) { + if (new_size > old_size) { + ret = -EINVAL; + goto out_unlock; + } + new_size = old_size - new_size; + } else if (mod > 0) { + new_size = old_size + new_size; + } + + if (new_size < 256 * 1024 * 1024) { + ret = -EINVAL; + goto out_unlock; + } + if (new_size > device->bdev->bd_inode->i_size) { + ret = -EFBIG; + goto out_unlock; + } + + do_div(new_size, root->sectorsize); + new_size *= root->sectorsize; + + printk(KERN_INFO "new size for %s is %llu\n", + device->name, (unsigned long long)new_size); + + if (new_size > old_size) { + trans = btrfs_start_transaction(root, 1); + ret = btrfs_grow_device(trans, device, new_size); + btrfs_commit_transaction(trans, root); + } else { + ret = btrfs_shrink_device(device, new_size); + } + +out_unlock: + mutex_unlock(&root->fs_info->fs_mutex); +out: + kfree(vol_args); + return ret; +} + +static noinline int btrfs_ioctl_snap_create(struct btrfs_root *root, + void __user *arg) +{ + struct btrfs_ioctl_vol_args *vol_args; + struct btrfs_dir_item *di; + struct btrfs_path *path; + u64 root_dirid; + int namelen; + int ret; + + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); + + if (!vol_args) + return -ENOMEM; + + if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { + ret = -EFAULT; + goto out; + } + + namelen = strlen(vol_args->name); + if (namelen > BTRFS_VOL_NAME_MAX) { + ret = -EINVAL; + goto out; + } + if (strchr(vol_args->name, '/')) { + ret = -EINVAL; + goto out; + } + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, + mutex_lock(&root->fs_info->fs_mutex); + di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, + path, root_dirid, + vol_args->name, namelen, 0); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_free_path(path); + + if (di && !IS_ERR(di)) { + ret = -EEXIST; + goto out; + } + + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out; + } + + if (root == root->fs_info->tree_root) + ret = create_subvol(root, vol_args->name, namelen); + else + ret = create_snapshot(root, vol_args->name, namelen); +out: + kfree(vol_args); + return ret; +} + +static int btrfs_ioctl_defrag(struct file *file) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + + switch (inode->i_mode & S_IFMT) { + case S_IFDIR: + mutex_lock(&root->fs_info->fs_mutex); + btrfs_defrag_root(root, 0); + btrfs_defrag_root(root->fs_info->extent_root, 0); + mutex_unlock(&root->fs_info->fs_mutex); + break; + case S_IFREG: + btrfs_defrag_file(file); + break; + } + + return 0; +} + +long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_ioctl_vol_args *vol_args; + int ret; + + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); + + if (!vol_args) + return -ENOMEM; + + if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { + ret = -EFAULT; + goto out; + } + ret = btrfs_init_new_device(root, vol_args->name); + +out: + kfree(vol_args); + return ret; +} + +long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_ioctl_vol_args *vol_args; + int ret; + + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); + + if (!vol_args) + return -ENOMEM; + + if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { + ret = -EFAULT; + goto out; + } + ret = btrfs_rm_device(root, vol_args->name); + +out: + kfree(vol_args); + return ret; +} + +int dup_item_to_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *leaf, + int slot, + struct btrfs_key *key, + u64 destino) +{ + char *dup; + int len = btrfs_item_size_nr(leaf, slot); + struct btrfs_key ckey = *key; + int ret = 0; + + dup = kmalloc(len, GFP_NOFS); + if (!dup) + return -ENOMEM; + + read_extent_buffer(leaf, dup, btrfs_item_ptr_offset(leaf, slot), len); + btrfs_release_path(root, path); + + ckey.objectid = destino; + ret = btrfs_insert_item(trans, root, &ckey, dup, len); + kfree(dup); + return ret; +} + +long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct file *src_file; + struct inode *src; + struct btrfs_trans_handle *trans; + int ret; + u64 pos; + struct btrfs_path *path; + struct btrfs_key key; + struct extent_buffer *leaf; + u32 nritems; + int slot; + + src_file = fget(src_fd); + if (!src_file) + return -EBADF; + src = src_file->f_dentry->d_inode; + + ret = -EXDEV; + if (src->i_sb != inode->i_sb) + goto out_fput; + + if (inode < src) { + mutex_lock(&inode->i_mutex); + mutex_lock(&src->i_mutex); + } else { + mutex_lock(&src->i_mutex); + mutex_lock(&inode->i_mutex); + } + + ret = -ENOTEMPTY; + if (inode->i_size) + goto out_unlock; + + /* do any pending delalloc/csum calc on src, one way or + another, and lock file content */ + while (1) { + filemap_write_and_wait(src->i_mapping); + lock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); + if (BTRFS_I(src)->delalloc_bytes == 0) + break; + unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); + } + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 0); + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + key.offset = 0; + key.type = BTRFS_EXTENT_DATA_KEY; + key.objectid = src->i_ino; + pos = 0; + path->reada = 2; + + while (1) { + /* + * note the key will change type as we walk through the + * tree. + */ + ret = btrfs_search_slot(trans, root, &key, path, 0, 0); + if (ret < 0) + goto out; + + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret > 0) + break; + } + leaf = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + nritems = btrfs_header_nritems(leaf); + + if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY || + key.objectid != src->i_ino) + break; + + if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { + struct btrfs_file_extent_item *extent; + int found_type; + pos = key.offset; + extent = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + found_type = btrfs_file_extent_type(leaf, extent); + if (found_type == BTRFS_FILE_EXTENT_REG) { + u64 len = btrfs_file_extent_num_bytes(leaf, + extent); + u64 ds = btrfs_file_extent_disk_bytenr(leaf, + extent); + u64 dl = btrfs_file_extent_disk_num_bytes(leaf, + extent); + u64 off = btrfs_file_extent_offset(leaf, + extent); + btrfs_insert_file_extent(trans, root, + inode->i_ino, pos, + ds, dl, len, off); + /* ds == 0 means there's a hole */ + if (ds != 0) { + btrfs_inc_extent_ref(trans, root, + ds, dl, + root->root_key.objectid, + trans->transid, + inode->i_ino, pos); + } + pos = key.offset + len; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + ret = dup_item_to_inode(trans, root, path, + leaf, slot, &key, + inode->i_ino); + if (ret) + goto out; + pos = key.offset + btrfs_item_size_nr(leaf, + slot); + } + } else if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) { + ret = dup_item_to_inode(trans, root, path, leaf, + slot, &key, inode->i_ino); + + if (ret) + goto out; + } + key.offset++; + btrfs_release_path(root, path); + } + + ret = 0; +out: + btrfs_free_path(path); + + inode->i_blocks = src->i_blocks; + i_size_write(inode, src->i_size); + btrfs_update_inode(trans, root, inode); + + unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); + + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + +out_unlock: + mutex_unlock(&src->i_mutex); + mutex_unlock(&inode->i_mutex); +out_fput: + fput(src_file); + return ret; +} + +/* + * there are many ways the trans_start and trans_end ioctls can lead + * to deadlocks. They should only be used by applications that + * basically own the machine, and have a very in depth understanding + * of all the possible deadlocks and enospc problems. + */ +long btrfs_ioctl_trans_start(struct file *file) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + int ret = 0; + + mutex_lock(&root->fs_info->fs_mutex); + if (file->private_data) { + ret = -EINPROGRESS; + goto out; + } + trans = btrfs_start_transaction(root, 0); + if (trans) + file->private_data = trans; + else + ret = -ENOMEM; + /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/ +out: + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + +/* + * there are many ways the trans_start and trans_end ioctls can lead + * to deadlocks. They should only be used by applications that + * basically own the machine, and have a very in depth understanding + * of all the possible deadlocks and enospc problems. + */ +long btrfs_ioctl_trans_end(struct file *file) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + int ret = 0; + + mutex_lock(&root->fs_info->fs_mutex); + trans = file->private_data; + if (!trans) { + ret = -EINVAL; + goto out; + } + btrfs_end_transaction(trans, root); + file->private_data = 0; +out: + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + +long btrfs_ioctl(struct file *file, unsigned int + cmd, unsigned long arg) +{ + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + + switch (cmd) { + case BTRFS_IOC_SNAP_CREATE: + return btrfs_ioctl_snap_create(root, (void __user *)arg); + case BTRFS_IOC_DEFRAG: + return btrfs_ioctl_defrag(file); + case BTRFS_IOC_RESIZE: + return btrfs_ioctl_resize(root, (void __user *)arg); + case BTRFS_IOC_ADD_DEV: + return btrfs_ioctl_add_dev(root, (void __user *)arg); + case BTRFS_IOC_RM_DEV: + return btrfs_ioctl_rm_dev(root, (void __user *)arg); + case BTRFS_IOC_BALANCE: + return btrfs_balance(root->fs_info->dev_root); + case BTRFS_IOC_CLONE: + return btrfs_ioctl_clone(file, arg); + case BTRFS_IOC_TRANS_START: + return btrfs_ioctl_trans_start(file); + case BTRFS_IOC_TRANS_END: + return btrfs_ioctl_trans_end(file); + case BTRFS_IOC_SYNC: + btrfs_sync_fs(file->f_dentry->d_sb, 1); + return 0; + } + + return -ENOTTY; +} From df5b5520b21d8a2554ede65c09b288833c77144d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Jun 2008 21:53:58 -0400 Subject: [PATCH 0568/2985] BTRFS_IOC_TRANS_START should be privilegued As mentioned in the comment next to it btrfs_ioctl_trans_start can do bad damage to filesystems and thus should be limited to privilegued users. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index da8de6cfdb5a..6fb455802759 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -706,6 +706,9 @@ long btrfs_ioctl_trans_start(struct file *file) struct btrfs_trans_handle *trans; int ret = 0; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + mutex_lock(&root->fs_info->fs_mutex); if (file->private_data) { ret = -EINPROGRESS; From 1cc127b5d1b71453091859301de4a7dd6ee96fa8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 12 Jun 2008 14:46:17 -0400 Subject: [PATCH 0569/2985] Btrfs: Add a thread pool just for submit_bio If a bio submission is after a lock holder waiting for the bio on the work queue, it is possible to deadlock. Move the bios into their own pool. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++++ fs/btrfs/disk-io.c | 4 ++++ fs/btrfs/volumes.c | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 181c81d29897..dcea9d706d9b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -529,9 +529,13 @@ struct btrfs_fs_info { * can run with FS locks held, and the writers may be waiting for * those locks. We don't want ordering in the pending list to cause * deadlocks, and so the two are serviced separately. + * + * A third pool does submit_bio to avoid deadlocking with the other + * two */ struct btrfs_workers workers; struct btrfs_workers endio_workers; + struct btrfs_workers submit_workers; int thread_pool_size; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c6a710a668cb..e5c758e306d5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1233,8 +1233,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, * cannot dynamically grow. */ btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); btrfs_start_workers(&fs_info->workers, 1); + btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); @@ -1343,6 +1345,7 @@ fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); + btrfs_stop_workers(&fs_info->submit_workers); fail_iput: iput(fs_info->btree_inode); fail: @@ -1597,6 +1600,7 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); + btrfs_stop_workers(&fs_info->submit_workers); iput(fs_info->btree_inode); #if 0 diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c57458ce6339..ba3968571024 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2112,7 +2112,8 @@ int schedule_bio(struct btrfs_root *root, struct btrfs_device *device, spin_unlock(&device->io_lock); if (should_queue) - btrfs_queue_worker(&root->fs_info->workers, &device->work); + btrfs_queue_worker(&root->fs_info->submit_workers, + &device->work); return 0; } From 925baeddc5b0764a53f2214a1253251bab0e0324 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: [PATCH 0570/2985] Btrfs: Start btree concurrency work. The allocation trees and the chunk trees are serialized via their own dedicated mutexes. This means allocation location is still not very fine grained. The main FS btree is protected by locks on each block in the btree. Locks are taken top / down, and as processing finishes on a given level of the tree, the lock is released after locking the lower level. The end result of a search is now a path where only the lowest level is locked. Releasing or freeing the path drops any locks held. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.c | 386 ++++++++++++++++++++++++++++++----------- fs/btrfs/ctree.h | 23 ++- fs/btrfs/disk-io.c | 13 +- fs/btrfs/extent-tree.c | 252 +++++++++++++++++---------- fs/btrfs/extent_io.c | 8 - fs/btrfs/inode.c | 8 +- fs/btrfs/ioctl.c | 8 +- fs/btrfs/locking.c | 50 ++++++ fs/btrfs/locking.h | 26 +++ fs/btrfs/transaction.c | 15 +- fs/btrfs/tree-defrag.c | 2 + 12 files changed, 579 insertions(+), 214 deletions(-) create mode 100644 fs/btrfs/locking.c create mode 100644 fs/btrfs/locking.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 7ed6b39e42d2..0e2dcc718de2 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -6,7 +6,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ - extent_io.o volumes.o async-thread.o ioctl.o + extent_io.o volumes.o async-thread.o ioctl.o locking.o btrfs-$(CONFIG_FS_POSIX_ACL) += acl.o else diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1d404bde3cfa..75625c68fdd8 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -21,6 +21,7 @@ #include "disk-io.h" #include "transaction.h" #include "print-tree.h" +#include "locking.h" static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); @@ -64,12 +65,47 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) int i; for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (!p->nodes[i]) - break; + continue; + if (p->locks[i]) { + btrfs_tree_unlock(p->nodes[i]); + p->locks[i] = 0; + } free_extent_buffer(p->nodes[i]); } memset(p, 0, sizeof(*p)); } +struct extent_buffer *btrfs_root_node(struct btrfs_root *root) +{ + struct extent_buffer *eb; + spin_lock(&root->node_lock); + eb = root->node; + extent_buffer_get(eb); + spin_unlock(&root->node_lock); + return eb; +} + +struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) +{ + struct extent_buffer *eb; + + while(1) { + eb = btrfs_root_node(root); + btrfs_tree_lock(eb); + + spin_lock(&root->node_lock); + if (eb == root->node) { + spin_unlock(&root->node_lock); + break; + } + spin_unlock(&root->node_lock); + + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + } + return eb; +} + static void add_root_to_dirty_list(struct btrfs_root *root) { if (root->track_dirty && list_empty(&root->dirty_list)) { @@ -111,7 +147,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, } else { first_key.objectid = 0; } - cow = __btrfs_alloc_free_block(trans, new_root, buf->len, + cow = btrfs_alloc_free_block(trans, new_root, buf->len, new_root_objectid, trans->transid, first_key.objectid, level, buf->start, 0); @@ -151,8 +187,14 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, int ret = 0; int different_trans = 0; int level; + int unlock_orig = 0; struct btrfs_key first_key; + if (*cow_ret == buf) + unlock_orig = 1; + + WARN_ON(!btrfs_tree_locked(buf)); + if (root->ref_cows) { root_gen = trans->transid; } else { @@ -172,7 +214,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, } else { first_key.objectid = 0; } - cow = __btrfs_alloc_free_block(trans, root, buf->len, + cow = btrfs_alloc_free_block(trans, root, buf->len, root->root_key.objectid, root_gen, first_key.objectid, level, search_start, empty_size); @@ -196,9 +238,14 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, } if (buf == root->node) { + WARN_ON(parent && parent != buf); root_gen = btrfs_header_generation(buf); + + spin_lock(&root->node_lock); root->node = cow; extent_buffer_get(cow); + spin_unlock(&root->node_lock); + if (buf != root->commit_root) { btrfs_free_extent(trans, root, buf->start, buf->len, root->root_key.objectid, @@ -219,6 +266,8 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, btrfs_header_owner(parent), root_gen, 0, 0, 1); } + if (unlock_orig) + btrfs_tree_unlock(buf); free_extent_buffer(buf); btrfs_mark_buffer_dirty(cow); *cow_ret = cow; @@ -316,6 +365,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, int progress_passed = 0; struct btrfs_disk_key disk_key; + /* FIXME this code needs locking */ + return 0; + parent_level = btrfs_header_level(parent); if (cache_only && parent_level != 1) return 0; @@ -729,6 +781,7 @@ static int balance_level(struct btrfs_trans_handle *trans, return 0; mid = path->nodes[level]; + WARN_ON(!path->locks[level]); WARN_ON(btrfs_header_generation(mid) != trans->transid); orig_ptr = btrfs_node_blockptr(mid, orig_slot); @@ -749,14 +802,21 @@ static int balance_level(struct btrfs_trans_handle *trans, /* promote the child to a root */ child = read_node_slot(root, mid, 0); + btrfs_tree_lock(child); BUG_ON(!child); ret = btrfs_cow_block(trans, root, child, mid, 0, &child); BUG_ON(ret); + spin_lock(&root->node_lock); root->node = child; + spin_unlock(&root->node_lock); + add_root_to_dirty_list(root); + btrfs_tree_unlock(child); + path->locks[level] = 0; path->nodes[level] = NULL; clean_tree_block(trans, root, mid); + btrfs_tree_unlock(mid); /* once for the path */ free_extent_buffer(mid); ret = btrfs_free_extent(trans, root, mid->start, mid->len, @@ -775,6 +835,7 @@ static int balance_level(struct btrfs_trans_handle *trans, left = read_node_slot(root, parent, pslot - 1); if (left) { + btrfs_tree_lock(left); wret = btrfs_cow_block(trans, root, left, parent, pslot - 1, &left); if (wret) { @@ -784,6 +845,7 @@ static int balance_level(struct btrfs_trans_handle *trans, } right = read_node_slot(root, parent, pslot + 1); if (right) { + btrfs_tree_lock(right); wret = btrfs_cow_block(trans, root, right, parent, pslot + 1, &right); if (wret) { @@ -815,6 +877,7 @@ static int balance_level(struct btrfs_trans_handle *trans, u32 blocksize = right->len; clean_tree_block(trans, root, right); + btrfs_tree_unlock(right); free_extent_buffer(right); right = NULL; wret = del_ptr(trans, root, path, level + 1, pslot + @@ -862,7 +925,9 @@ static int balance_level(struct btrfs_trans_handle *trans, u64 root_gen = btrfs_header_generation(parent); u64 bytenr = mid->start; u32 blocksize = mid->len; + clean_tree_block(trans, root, mid); + btrfs_tree_unlock(mid); free_extent_buffer(mid); mid = NULL; wret = del_ptr(trans, root, path, level + 1, pslot); @@ -885,11 +950,14 @@ static int balance_level(struct btrfs_trans_handle *trans, if (left) { if (btrfs_header_nritems(left) > orig_slot) { extent_buffer_get(left); + /* left was locked after cow */ path->nodes[level] = left; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; - if (mid) + if (mid) { + btrfs_tree_unlock(mid); free_extent_buffer(mid); + } } else { orig_slot -= btrfs_header_nritems(left); path->slots[level] = orig_slot; @@ -901,10 +969,15 @@ static int balance_level(struct btrfs_trans_handle *trans, btrfs_node_blockptr(path->nodes[level], path->slots[level])) BUG(); enospc: - if (right) + if (right) { + btrfs_tree_unlock(right); free_extent_buffer(right); - if (left) + } + if (left) { + if (path->nodes[level] != left) + btrfs_tree_unlock(left); free_extent_buffer(left); + } return ret; } @@ -942,6 +1015,8 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, /* first, try to make some room in the middle buffer */ if (left) { u32 left_nr; + + btrfs_tree_lock(left); left_nr = btrfs_header_nritems(left); if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; @@ -967,24 +1042,28 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, path->nodes[level] = left; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; + btrfs_tree_unlock(mid); free_extent_buffer(mid); } else { orig_slot -= btrfs_header_nritems(left); path->slots[level] = orig_slot; + btrfs_tree_unlock(left); free_extent_buffer(left); } return 0; } + btrfs_tree_unlock(left); free_extent_buffer(left); } - right= read_node_slot(root, parent, pslot + 1); + right = read_node_slot(root, parent, pslot + 1); /* * then try to empty the right most buffer into the middle */ if (right) { u32 right_nr; + btrfs_tree_lock(right); right_nr = btrfs_header_nritems(right); if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; @@ -1013,12 +1092,15 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, path->slots[level + 1] += 1; path->slots[level] = orig_slot - btrfs_header_nritems(mid); + btrfs_tree_unlock(mid); free_extent_buffer(mid); } else { + btrfs_tree_unlock(right); free_extent_buffer(right); } return 0; } + btrfs_tree_unlock(right); free_extent_buffer(right); } return 1; @@ -1050,6 +1132,8 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, return; node = path->nodes[level]; + WARN_ON(!path->skip_locking && !btrfs_tree_locked(node)); + search = btrfs_node_blockptr(node, slot); blocksize = btrfs_level_size(root, level - 1); eb = btrfs_find_tree_block(root, search, blocksize); @@ -1098,6 +1182,39 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, highest_read = search; } } + +static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) +{ + int i; + int skip_level = level; + struct extent_buffer *t; + + for (i = level; i < BTRFS_MAX_LEVEL; i++) { + if (!path->nodes[i]) + break; + if (!path->locks[i]) + break; + if (path->slots[i] == 0) { + skip_level = i + 1; + continue; + } + if (path->keep_locks) { + u32 nritems; + t = path->nodes[i]; + nritems = btrfs_header_nritems(t); + if (path->slots[i] >= nritems - 1) { + skip_level = i + 1; + continue; + } + } + t = path->nodes[i]; + if (i >= lowest_unlock && i > skip_level && path->locks[i]) { + btrfs_tree_unlock(t); + path->locks[i] = 0; + } + } +} + /* * look for key in the tree. path is filled in with nodes along the way * if key is found, we return zero and you can find the item in the leaf @@ -1120,15 +1237,27 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int ret; int level; int should_reada = p->reada; + int lowest_unlock = 1; u8 lowest_level = 0; lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len); WARN_ON(p->nodes[0] != NULL); - WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); + // WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); + WARN_ON(root == root->fs_info->extent_root && + !mutex_is_locked(&root->fs_info->alloc_mutex)); + WARN_ON(root == root->fs_info->chunk_root && + !mutex_is_locked(&root->fs_info->chunk_mutex)); + WARN_ON(root == root->fs_info->dev_root && + !mutex_is_locked(&root->fs_info->chunk_mutex)); + if (ins_len < 0) + lowest_unlock = 2; again: - b = root->node; - extent_buffer_get(b); + if (!p->skip_locking) + b = btrfs_lock_root_node(root); + else + b = btrfs_root_node(root); + while (b) { level = btrfs_header_level(b); if (cow) { @@ -1147,9 +1276,12 @@ again: WARN_ON(1); level = btrfs_header_level(b); p->nodes[level] = b; + if (!p->skip_locking) + p->locks[level] = 1; ret = check_block(root, p, level); if (ret) return -1; + ret = bin_search(b, key, level, &slot); if (level != 0) { if (ret && slot > 0) @@ -1177,14 +1309,19 @@ again: BUG_ON(btrfs_header_nritems(b) == 1); } /* this is only true while dropping a snapshot */ - if (level == lowest_level) + if (level == lowest_level) { + unlock_up(p, level, lowest_unlock); break; + } if (should_reada) reada_for_search(root, p, level, slot, key->objectid); b = read_node_slot(root, b, slot); + if (!p->skip_locking) + btrfs_tree_lock(b); + unlock_up(p, level, lowest_unlock); } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < @@ -1195,6 +1332,7 @@ again: if (sret) return sret; } + unlock_up(p, level, lowest_unlock); return ret; } } @@ -1225,6 +1363,13 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, break; t = path->nodes[i]; btrfs_set_node_key(t, key, tslot); + if (!btrfs_tree_locked(path->nodes[i])) { + int ii; +printk("fixup without lock on level %d\n", btrfs_header_level(path->nodes[i])); + for (ii = 0; ii < BTRFS_MAX_LEVEL; ii++) { +printk("level %d slot %d\n", ii, path->slots[ii]); + } + } btrfs_mark_buffer_dirty(path->nodes[i]); if (tslot != 0) break; @@ -1370,6 +1515,7 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, u64 lower_gen; struct extent_buffer *lower; struct extent_buffer *c; + struct extent_buffer *old; struct btrfs_disk_key lower_key; BUG_ON(path->nodes[level]); @@ -1386,12 +1532,13 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, else btrfs_node_key(lower, &lower_key, 0); - c = __btrfs_alloc_free_block(trans, root, root->nodesize, + c = btrfs_alloc_free_block(trans, root, root->nodesize, root->root_key.objectid, root_gen, lower_key.objectid, level, root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); + memset_extent_buffer(c, 0, 0, root->nodesize); btrfs_set_header_nritems(c, 1); btrfs_set_header_level(c, level); @@ -1416,23 +1563,31 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(c); - /* the super has an extra ref to root->node */ - free_extent_buffer(root->node); + spin_lock(&root->node_lock); + old = root->node; root->node = c; + spin_unlock(&root->node_lock); + + /* the super has an extra ref to root->node */ + free_extent_buffer(old); + add_root_to_dirty_list(root); extent_buffer_get(c); path->nodes[level] = c; + path->locks[level] = 1; path->slots[level] = 0; if (root->ref_cows && lower_gen != trans->transid) { struct btrfs_path *back_path = btrfs_alloc_path(); int ret; + mutex_lock(&root->fs_info->alloc_mutex); ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root, path, lower->start, root->root_key.objectid, trans->transid, 0, 0); BUG_ON(ret); + mutex_unlock(&root->fs_info->alloc_mutex); btrfs_free_path(back_path); } return 0; @@ -1521,7 +1676,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root root_gen = 0; btrfs_node_key(c, &disk_key, 0); - split = __btrfs_alloc_free_block(trans, root, root->nodesize, + split = btrfs_alloc_free_block(trans, root, root->nodesize, root->root_key.objectid, root_gen, btrfs_disk_key_objectid(&disk_key), @@ -1564,10 +1719,12 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root if (path->slots[level] >= mid) { path->slots[level] -= mid; + btrfs_tree_unlock(c); free_extent_buffer(c); path->nodes[level] = split; path->slots[level + 1] += 1; } else { + btrfs_tree_unlock(split); free_extent_buffer(split); } return ret; @@ -1648,30 +1805,24 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; right = read_node_slot(root, upper, slot + 1); + btrfs_tree_lock(right); free_space = btrfs_leaf_free_space(root, right); - if (free_space < data_size + sizeof(struct btrfs_item)) { - free_extent_buffer(right); - return 1; - } + if (free_space < data_size + sizeof(struct btrfs_item)) + goto out_unlock; /* cow and double check */ ret = btrfs_cow_block(trans, root, right, upper, slot + 1, &right); - if (ret) { - free_extent_buffer(right); - return 1; - } + if (ret) + goto out_unlock; + free_space = btrfs_leaf_free_space(root, right); - if (free_space < data_size + sizeof(struct btrfs_item)) { - free_extent_buffer(right); - return 1; - } + if (free_space < data_size + sizeof(struct btrfs_item)) + goto out_unlock; left_nritems = btrfs_header_nritems(left); - if (left_nritems == 0) { - free_extent_buffer(right); - return 1; - } + if (left_nritems == 0) + goto out_unlock; if (empty) nr = 0; @@ -1707,10 +1858,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root left->map_token = NULL; } - if (push_items == 0) { - free_extent_buffer(right); - return 1; - } + if (push_items == 0) + goto out_unlock; if (!empty && push_items == left_nritems) WARN_ON(1); @@ -1778,14 +1927,24 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { path->slots[0] -= left_nritems; + if (btrfs_header_nritems(path->nodes[0]) == 0) + clean_tree_block(trans, root, path->nodes[0]); + btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; path->slots[1] += 1; } else { + btrfs_tree_unlock(right); free_extent_buffer(right); } return 0; + +out_unlock: + btrfs_tree_unlock(right); + free_extent_buffer(right); + return 1; } + /* * push some data in the path leaf to the left, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise @@ -1823,10 +1982,11 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root } left = read_node_slot(root, path->nodes[1], slot - 1); + btrfs_tree_lock(left); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { - free_extent_buffer(left); - return 1; + ret = 1; + goto out; } /* cow and double check */ @@ -1834,14 +1994,14 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root path->nodes[1], slot - 1, &left); if (ret) { /* we hit -ENOSPC, but it isn't fatal here */ - free_extent_buffer(left); - return 1; + ret = 1; + goto out; } free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { - free_extent_buffer(left); - return 1; + ret = 1; + goto out; } if (empty) @@ -1876,8 +2036,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root } if (push_items == 0) { - free_extent_buffer(left); - return 1; + ret = 1; + goto out; } if (!empty && push_items == btrfs_header_nritems(right)) WARN_ON(1); @@ -1975,15 +2135,23 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { path->slots[0] += old_left_nritems; + if (btrfs_header_nritems(path->nodes[0]) == 0) + clean_tree_block(trans, root, path->nodes[0]); + btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = left; path->slots[1] -= 1; } else { + btrfs_tree_unlock(left); free_extent_buffer(left); path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); return ret; +out: + btrfs_tree_unlock(left); + free_extent_buffer(left); + return ret; } /* @@ -2052,7 +2220,7 @@ again: btrfs_item_key(l, &disk_key, 0); - right = __btrfs_alloc_free_block(trans, root, root->leafsize, + right = btrfs_alloc_free_block(trans, root, root->leafsize, root->root_key.objectid, root_gen, disk_key.objectid, 0, l->start, 0); @@ -2085,6 +2253,8 @@ again: path->slots[1] + 1, 1); if (wret) ret = wret; + + btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; path->slots[0] = 0; @@ -2111,6 +2281,7 @@ again: path->slots[1], 1); if (wret) ret = wret; + btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; path->slots[0] = 0; @@ -2184,12 +2355,15 @@ again: BUG_ON(path->slots[0] != slot); if (mid <= slot) { + btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; path->slots[0] -= mid; path->slots[1] += 1; - } else + } else { + btrfs_tree_unlock(right); free_extent_buffer(right); + } BUG_ON(path->slots[0] < 0); @@ -2418,10 +2592,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, total_data += data_size[i]; } - /* create a root if there isn't one */ - if (!root->node) - BUG(); - total_size = total_data + (nr - 1) * sizeof(struct btrfs_item); ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); if (ret == 0) { @@ -2516,7 +2686,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, btrfs_print_leaf(root, leaf); BUG(); } - out: return ret; } @@ -2655,7 +2824,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, btrfs_set_header_level(leaf, 0); } else { u64 root_gen = btrfs_header_generation(path->nodes[1]); - clean_tree_block(trans, root, leaf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; @@ -2706,8 +2874,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, root_gen = btrfs_header_generation( path->nodes[1]); - clean_tree_block(trans, root, leaf); - wret = del_ptr(trans, root, path, 1, slot); if (wret) ret = wret; @@ -2720,7 +2886,13 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } else { - btrfs_mark_buffer_dirty(leaf); + /* if we're still in the path, make sure + * we're dirty. Otherwise, one of the + * push_leaf functions must have already + * dirtied this buffer + */ + if (path->nodes[0] == leaf) + btrfs_mark_buffer_dirty(leaf); free_extent_buffer(leaf); } } else { @@ -2731,56 +2903,40 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, } /* - * walk up the tree as far as required to find the previous leaf. + * search the tree again to find a leaf with lesser keys * returns 0 if it found something or 1 if there are no lesser leaves. * returns < 0 on io errors. */ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { - int slot; - int level = 1; - struct extent_buffer *c; - struct extent_buffer *next = NULL; + struct btrfs_key key; + struct btrfs_disk_key found_key; + int ret; - while(level < BTRFS_MAX_LEVEL) { - if (!path->nodes[level]) - return 1; + btrfs_item_key_to_cpu(path->nodes[0], &key, 0); - slot = path->slots[level]; - c = path->nodes[level]; - if (slot == 0) { - level++; - if (level == BTRFS_MAX_LEVEL) - return 1; - continue; - } - slot--; + if (key.offset > 0) + key.offset--; + else if (key.type > 0) + key.type--; + else if (key.objectid > 0) + key.objectid--; + else + return 1; - if (next) - free_extent_buffer(next); - - next = read_node_slot(root, c, slot); - break; - } - path->slots[level] = slot; - while(1) { - level--; - c = path->nodes[level]; - free_extent_buffer(c); - slot = btrfs_header_nritems(next); - if (slot != 0) - slot--; - path->nodes[level] = next; - path->slots[level] = slot; - if (!level) - break; - next = read_node_slot(root, next, slot); - } - return 0; + btrfs_release_path(root, path); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + btrfs_item_key(path->nodes[0], &found_key, 0); + ret = comp_keys(&found_key, &key); + if (ret < 0) + return 0; + return 1; } /* - * walk up the tree as far as required to find the next leaf. + * search the tree again to find a leaf with greater keys * returns 0 if it found something or 1 if there are no greater leaves. * returns < 0 on io errors. */ @@ -2790,6 +2946,28 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) int level = 1; struct extent_buffer *c; struct extent_buffer *next = NULL; + struct btrfs_key key; + u32 nritems; + int ret; + + nritems = btrfs_header_nritems(path->nodes[0]); + if (nritems == 0) { + return 1; + } + + btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); + + path->keep_locks = 1; + btrfs_release_path(root, path); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + path->keep_locks = 0; + + if (ret < 0) + return ret; + + if (path->slots[0] < nritems - 1) { + goto done; + } while(level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) @@ -2799,33 +2977,45 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) c = path->nodes[level]; if (slot >= btrfs_header_nritems(c)) { level++; - if (level == BTRFS_MAX_LEVEL) + if (level == BTRFS_MAX_LEVEL) { return 1; + } continue; } - if (next) + if (next) { + btrfs_tree_unlock(next); free_extent_buffer(next); + } - if (path->reada) + if (level == 1 && path->locks[1] && path->reada) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, c, slot); + if (!path->skip_locking) + btrfs_tree_lock(next); break; } path->slots[level] = slot; while(1) { level--; c = path->nodes[level]; + if (path->locks[level]) + btrfs_tree_unlock(c); free_extent_buffer(c); path->nodes[level] = next; path->slots[level] = 0; + path->locks[level] = 1; if (!level) break; - if (path->reada) - reada_for_search(root, path, level, 0, 0); + if (level == 1 && path->locks[1] && path->reada) + reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, next, 0); + if (!path->skip_locking) + btrfs_tree_lock(next); } +done: + unlock_up(path, 0, 1); return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index dcea9d706d9b..50891b39f366 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -330,8 +330,13 @@ struct btrfs_node { struct btrfs_path { struct extent_buffer *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; + /* if there is real range locking, this locks field will change */ + int locks[BTRFS_MAX_LEVEL]; int reada; + /* keep some upper locks as we walk down */ + int keep_locks; int lowest_level; + int skip_locking; }; /* @@ -515,6 +520,8 @@ struct btrfs_fs_info { spinlock_t hash_lock; struct mutex trans_mutex; struct mutex fs_mutex; + struct mutex alloc_mutex; + struct mutex chunk_mutex; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; @@ -576,6 +583,10 @@ struct btrfs_fs_info { */ struct btrfs_root { struct extent_buffer *node; + + /* the node lock is held while changing the node pointer */ + spinlock_t node_lock; + struct extent_buffer *commit_root; struct btrfs_root_item root_item; struct btrfs_key root_key; @@ -1353,13 +1364,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, int data, int owner); -int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 owner_objectid); struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u32 size, - u64 root_objectid, - u64 hint, u64 empty_size); -struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 blocksize, u64 root_objectid, @@ -1368,8 +1373,6 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int level, u64 hint, u64 empty_size); -int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 new_size); int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size); int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -1409,6 +1412,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, int type); + +struct extent_buffer *btrfs_root_node(struct btrfs_root *root); +struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); + int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e5c758e306d5..fe40bdd984ff 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -32,6 +32,7 @@ #include "volumes.h" #include "print-tree.h" #include "async-thread.h" +#include "locking.h" #if 0 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) @@ -681,9 +682,11 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, { struct inode *btree_inode = root->fs_info->btree_inode; if (btrfs_header_generation(buf) == - root->fs_info->running_transaction->transid) + root->fs_info->running_transaction->transid) { + WARN_ON(!btrfs_tree_locked(buf)); clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); + } return 0; } @@ -720,6 +723,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->in_sysfs = 0; INIT_LIST_HEAD(&root->dirty_list); + spin_lock_init(&root->node_lock); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); @@ -1196,6 +1200,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); + mutex_init(&fs_info->alloc_mutex); + mutex_init(&fs_info->chunk_mutex); #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1274,7 +1280,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_lock(&fs_info->fs_mutex); + mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_sys_array(tree_root); + mutex_unlock(&fs_info->chunk_mutex); if (ret) { printk("btrfs: failed to read the system array on %s\n", sb->s_id); @@ -1296,7 +1304,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), BTRFS_UUID_SIZE); + mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_chunk_tree(chunk_root); + mutex_unlock(&fs_info->chunk_mutex); BUG_ON(ret); btrfs_close_extra_devices(fs_devices); @@ -1654,6 +1664,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; + WARN_ON(!btrfs_tree_locked(buf)); if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", (unsigned long long)buf->start, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 41a63462d3eb..7e40c516fe62 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -25,6 +25,7 @@ #include "print-tree.h" #include "transaction.h" #include "volumes.h" +#include "locking.h" #define BLOCK_GROUP_DATA EXTENT_WRITEBACK #define BLOCK_GROUP_METADATA EXTENT_UPTODATE @@ -36,7 +37,28 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +static struct btrfs_block_group_cache * +__btrfs_find_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache *hint, + u64 search_start, int data, int owner); +void maybe_lock_mutex(struct btrfs_root *root) +{ + if (root != root->fs_info->extent_root && + root != root->fs_info->chunk_root && + root != root->fs_info->dev_root) { + mutex_lock(&root->fs_info->alloc_mutex); + } +} + +void maybe_unlock_mutex(struct btrfs_root *root) +{ + if (root != root->fs_info->extent_root && + root != root->fs_info->chunk_root && + root != root->fs_info->dev_root) { + mutex_unlock(&root->fs_info->alloc_mutex); + } +} static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) @@ -66,6 +88,7 @@ static int cache_block_group(struct btrfs_root *root, return -ENOMEM; path->reada = 2; + path->skip_locking = 1; first_free = block_group->key.objectid; key.objectid = block_group->key.objectid; key.offset = 0; @@ -290,7 +313,7 @@ no_cache: cache = btrfs_lookup_first_block_group(root->fs_info, last); } cache_miss = 0; - cache = btrfs_find_block_group(root, cache, last, data, 0); + cache = __btrfs_find_block_group(root, cache, last, data, 0); if (!cache) goto no_cache; *cache_ret = cache; @@ -318,10 +341,10 @@ static int block_group_state_bits(u64 flags) return bits; } -struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, - struct btrfs_block_group_cache - *hint, u64 search_start, - int data, int owner) +static struct btrfs_block_group_cache * +__btrfs_find_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache *hint, + u64 search_start, int data, int owner) { struct btrfs_block_group_cache *cache; struct extent_io_tree *block_group_cache; @@ -411,6 +434,18 @@ found: return found_group; } +struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache + *hint, u64 search_start, + int data, int owner) +{ + + struct btrfs_block_group_cache *ret; + mutex_lock(&root->fs_info->alloc_mutex); + ret = __btrfs_find_block_group(root, hint, search_start, data, owner); + mutex_unlock(&root->fs_info->alloc_mutex); + return ret; +} static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset) { @@ -646,7 +681,7 @@ out: return ret; } -int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, +static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 root_objectid, u64 ref_generation, @@ -696,6 +731,22 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, return 0; } +int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset) +{ + int ret; + + mutex_lock(&root->fs_info->alloc_mutex); + ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, + root_objectid, ref_generation, + owner, owner_offset); + mutex_unlock(&root->fs_info->alloc_mutex); + return ret; +} + int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -760,6 +811,10 @@ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, struct btrfs_extent_ref *ref_item; int level = -1; + /* FIXME, needs locking */ + BUG(); + + mutex_lock(&root->fs_info->alloc_mutex); path = btrfs_alloc_path(); again: if (level == -1) @@ -854,33 +909,9 @@ again: out: btrfs_free_path(path); + mutex_unlock(&root->fs_info->alloc_mutex); return total_count; } -int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 owner_objectid) -{ - u64 generation; - u64 key_objectid; - u64 level; - u32 nritems; - struct btrfs_disk_key disk_key; - - level = btrfs_header_level(root->node); - generation = trans->transid; - nritems = btrfs_header_nritems(root->node); - if (nritems > 0) { - if (level == 0) - btrfs_item_key(root->node, &disk_key, 0); - else - btrfs_node_key(root->node, &disk_key, 0); - key_objectid = btrfs_disk_key_objectid(&disk_key); - } else { - key_objectid = 0; - } - return btrfs_inc_extent_ref(trans, root, root->node->start, - root->node->len, owner_objectid, - generation, level, key_objectid); -} int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) @@ -897,6 +928,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (!root->ref_cows) return 0; + mutex_lock(&root->fs_info->alloc_mutex); level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); for (i = 0; i < nritems; i++) { @@ -913,7 +945,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); if (disk_bytenr == 0) continue; - ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, + ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(buf, fi), root->root_key.objectid, trans->transid, key.objectid, key.offset); @@ -924,7 +956,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, } else { bytenr = btrfs_node_blockptr(buf, i); btrfs_node_key_to_cpu(buf, &key, i); - ret = btrfs_inc_extent_ref(trans, root, bytenr, + ret = __btrfs_inc_extent_ref(trans, root, bytenr, btrfs_level_size(root, level - 1), root->root_key.objectid, trans->transid, @@ -935,6 +967,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, } } } + mutex_unlock(&root->fs_info->alloc_mutex); return 0; fail: WARN_ON(1); @@ -965,6 +998,7 @@ fail: } } #endif + mutex_unlock(&root->fs_info->alloc_mutex); return ret; } @@ -1019,6 +1053,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; + mutex_lock(&root->fs_info->alloc_mutex); while(1) { ret = find_first_extent_bit(block_group_cache, last, &start, &end, BLOCK_GROUP_DIRTY); @@ -1045,6 +1080,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, BLOCK_GROUP_DIRTY, GFP_NOFS); } btrfs_free_path(path); + mutex_unlock(&root->fs_info->alloc_mutex); return werr; } @@ -1162,26 +1198,28 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, space_info->force_alloc = 0; } if (space_info->full) - return 0; + goto out; thresh = div_factor(space_info->total_bytes, 6); if (!force && (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < thresh) - return 0; + goto out; + mutex_lock(&extent_root->fs_info->chunk_mutex); ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags); if (ret == -ENOSPC) { printk("space info full %Lu\n", flags); space_info->full = 1; - return 0; + goto out; } BUG_ON(ret); ret = btrfs_make_block_group(trans, extent_root, 0, flags, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); BUG_ON(ret); - + mutex_unlock(&extent_root->fs_info->chunk_mutex); +out: return 0; } @@ -1318,6 +1356,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct extent_io_tree *free_space_cache; free_space_cache = &root->fs_info->free_space_cache; + mutex_lock(&root->fs_info->alloc_mutex); while(1) { ret = find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY); @@ -1327,6 +1366,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, clear_extent_dirty(unpin, start, end, GFP_NOFS); set_extent_dirty(free_space_cache, start, end, GFP_NOFS); } + mutex_unlock(&root->fs_info->alloc_mutex); return 0; } @@ -1363,18 +1403,24 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, GFP_NOFS); eb = read_tree_block(extent_root, ins.objectid, ins.offset, trans->transid); + btrfs_tree_lock(eb); level = btrfs_header_level(eb); if (level == 0) { btrfs_item_key(eb, &first, 0); } else { btrfs_node_key(eb, &first, 0); } + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + /* + * the first key is just a hint, so the race we've created + * against reading it is fine + */ err = btrfs_insert_extent_backref(trans, extent_root, path, start, extent_root->root_key.objectid, 0, level, btrfs_disk_key_objectid(&first)); BUG_ON(err); - free_extent_buffer(eb); } btrfs_free_path(path); return 0; @@ -1384,12 +1430,14 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, int pending) { int err = 0; - struct extent_buffer *buf; if (!pending) { +#if 0 + struct extent_buffer *buf; buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { - if (btrfs_buffer_uptodate(buf, 0)) { + if (!btrfs_try_tree_lock(buf) && + btrfs_buffer_uptodate(buf, 0)) { u64 transid = root->fs_info->running_transaction->transid; u64 header_transid = @@ -1398,12 +1446,15 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { clean_tree_block(NULL, root, buf); + btrfs_tree_unlock(buf); free_extent_buffer(buf); return 1; } + btrfs_tree_unlock(buf); } free_extent_buffer(buf); } +#endif update_pinned_extents(root, bytenr, num_bytes, 1); } else { set_extent_bits(&root->fs_info->pending_del, @@ -1586,10 +1637,11 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct /* * remove an extent from the root, returns 0 on success */ -int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 bytenr, u64 num_bytes, - u64 root_objectid, u64 ref_generation, - u64 owner_objectid, u64 owner_offset, int pin) +static int __btrfs_free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u64 root_objectid, + u64 ref_generation, u64 owner_objectid, + u64 owner_offset, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; int pending_ret; @@ -1610,6 +1662,22 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root return ret ? ret : pending_ret; } +int btrfs_free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u64 root_objectid, + u64 ref_generation, u64 owner_objectid, + u64 owner_offset, int pin) +{ + int ret; + + maybe_lock_mutex(root); + ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, + root_objectid, ref_generation, + owner_objectid, owner_offset, pin); + maybe_unlock_mutex(root); + return ret; +} + static u64 stripe_align(struct btrfs_root *root, u64 val) { u64 mask = ((u64)root->stripesize - 1); @@ -1679,12 +1747,12 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, block_group = btrfs_lookup_first_block_group(info, hint_byte); if (!block_group) hint_byte = search_start; - block_group = btrfs_find_block_group(root, block_group, + block_group = __btrfs_find_block_group(root, block_group, hint_byte, data, 1); if (last_ptr && *last_ptr == 0 && block_group) hint_byte = block_group->key.objectid; } else { - block_group = btrfs_find_block_group(root, + block_group = __btrfs_find_block_group(root, trans->block_group, search_start, data, 1); } @@ -1806,7 +1874,7 @@ enospc: } block_group = btrfs_lookup_first_block_group(info, search_start); cond_resched(); - block_group = btrfs_find_block_group(root, block_group, + block_group = __btrfs_find_block_group(root, block_group, search_start, data, 0); goto check_failed; @@ -1843,6 +1911,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_key keys[2]; + maybe_lock_mutex(root); + if (data) { alloc_profile = info->avail_data_alloc_bits & info->data_alloc_profile; @@ -1892,9 +1962,10 @@ again: if (ret) { printk("allocation failed flags %Lu\n", data); } - BUG_ON(ret); - if (ret) - return ret; + if (ret) { + BUG(); + goto out; + } /* block accounting for super block */ super_used = btrfs_super_bytes_used(&info->super_copy); @@ -1953,11 +2024,11 @@ again: finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); - if (ret) { - return ret; - } + if (ret) + goto out; if (pending_ret) { - return pending_ret; + ret = pending_ret; + goto out; } update_block: @@ -1967,36 +2038,15 @@ update_block: ins->objectid, ins->offset); BUG(); } - return 0; +out: + maybe_unlock_mutex(root); + return ret; } - /* * helper function to allocate a block for a given tree * returns the tree buffer or NULL. */ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u32 blocksize, - u64 root_objectid, u64 hint, - u64 empty_size) -{ - u64 ref_generation; - - if (root->ref_cows) - ref_generation = trans->transid; - else - ref_generation = 0; - - - return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid, - ref_generation, 0, 0, hint, empty_size); -} - -/* - * helper function to allocate a block for a given tree - * returns the tree buffer or NULL. - */ -struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 blocksize, u64 root_objectid, @@ -2026,6 +2076,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); } btrfs_set_header_generation(buf, trans->transid); + btrfs_tree_lock(buf); clean_tree_block(trans, root, buf); btrfs_set_buffer_uptodate(buf); @@ -2076,7 +2127,7 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); if (disk_bytenr == 0) continue; - ret = btrfs_free_extent(trans, root, disk_bytenr, + ret = __btrfs_free_extent(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(leaf, fi), leaf_owner, leaf_generation, key.objectid, key.offset, 0); @@ -2151,6 +2202,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, int ret; u32 refs; + mutex_lock(&root->fs_info->alloc_mutex); + WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = lookup_extent_ref(trans, root, @@ -2182,6 +2235,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, bytenr = btrfs_node_blockptr(cur, path->slots[*level]); ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); blocksize = btrfs_level_size(root, *level - 1); + ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); BUG_ON(ret); if (refs != 1) { @@ -2189,7 +2243,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, root_owner = btrfs_header_owner(parent); root_gen = btrfs_header_generation(parent); path->slots[*level]++; - ret = btrfs_free_extent(trans, root, bytenr, + ret = __btrfs_free_extent(trans, root, bytenr, blocksize, root_owner, root_gen, 0, 0, 1); BUG_ON(ret); @@ -2201,9 +2255,11 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, reada_walk_down(root, cur, path->slots[*level]); mutex_unlock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->alloc_mutex); next = read_tree_block(root, bytenr, blocksize, ptr_gen); mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->alloc_mutex); /* we've dropped the lock, double check */ ret = lookup_extent_ref(trans, root, bytenr, @@ -2216,7 +2272,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, path->slots[*level]++; free_extent_buffer(next); - ret = btrfs_free_extent(trans, root, bytenr, + ret = __btrfs_free_extent(trans, root, bytenr, blocksize, root_owner, root_gen, 0, 0, 1); @@ -2244,13 +2300,14 @@ out: } root_gen = btrfs_header_generation(parent); - ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, + ret = __btrfs_free_extent(trans, root, path->nodes[*level]->start, path->nodes[*level]->len, root_owner, root_gen, 0, 0, 1); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; BUG_ON(ret); + mutex_unlock(&root->fs_info->alloc_mutex); return 0; } @@ -2350,6 +2407,12 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_node_key(node, &found_key, path->slots[level]); WARN_ON(memcmp(&found_key, &root_item->drop_progress, sizeof(found_key))); + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { + if (path->nodes[i] && path->locks[i]) { + path->locks[i] = 0; + btrfs_tree_unlock(path->nodes[i]); + } + } } while(1) { wret = walk_down_tree(trans, root, path, &level); @@ -2383,6 +2446,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) u64 end; u64 ptr; int ret; + + mutex_lock(&info->alloc_mutex); while(1) { ret = find_first_extent_bit(&info->block_group_cache, 0, &start, &end, (unsigned int)-1); @@ -2402,6 +2467,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) clear_extent_dirty(&info->free_space_cache, start, end, GFP_NOFS); } + mutex_unlock(&info->alloc_mutex); return 0; } @@ -2678,6 +2744,7 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, eb = read_tree_block(found_root, extent_key->objectid, extent_key->offset, 0); + btrfs_tree_lock(eb); level = btrfs_header_level(eb); if (level == 0) @@ -2685,6 +2752,7 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, else btrfs_node_key_to_cpu(eb, &found_key, 0); + btrfs_tree_unlock(eb); free_extent_buffer(eb); ret = find_root_for_ref(extent_root, path, &found_key, @@ -2888,6 +2956,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) int ret; int progress; + mutex_lock(&root->fs_info->alloc_mutex); shrink_block_group = btrfs_lookup_block_group(root->fs_info, shrink_start); BUG_ON(!shrink_block_group); @@ -3044,20 +3113,22 @@ next: (unsigned int)-1, GFP_NOFS); out: btrfs_free_path(path); + mutex_unlock(&root->fs_info->alloc_mutex); return ret; } int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key) { - int ret; + int ret = 0; struct btrfs_key found_key; struct extent_buffer *leaf; int slot; ret = btrfs_search_slot(NULL, root, key, path, 0, 0); if (ret < 0) - return ret; + goto out; + while(1) { slot = path->slots[0]; leaf = path->nodes[0]; @@ -3066,18 +3137,20 @@ int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, if (ret == 0) continue; if (ret < 0) - goto error; + goto out; break; } btrfs_item_key_to_cpu(leaf, &found_key, slot); if (found_key.objectid >= key->objectid && - found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) - return 0; + found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { + ret = 0; + goto out; + } path->slots[0]++; } ret = -ENOENT; -error: +out: return ret; } @@ -3103,6 +3176,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) if (!path) return -ENOMEM; + mutex_lock(&root->fs_info->alloc_mutex); while(1) { ret = find_first_block_group(root, path, &key); if (ret > 0) { @@ -3158,6 +3232,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) ret = 0; error: btrfs_free_path(path); + mutex_unlock(&root->fs_info->alloc_mutex); return ret; } @@ -3205,5 +3280,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, ret = del_pending_extents(trans, extent_root); BUG_ON(ret); set_avail_alloc_bits(extent_root->fs_info, type); + return 0; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 17c508a941eb..bd15cdcaba95 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2889,7 +2889,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); - lock_page(page); if (i == 0) set_page_extent_head(page, eb->len); else @@ -2907,7 +2906,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, end = start + PAGE_CACHE_SIZE - 1; if (test_range_bit(tree, start, end, EXTENT_DIRTY, 0)) { - unlock_page(page); continue; } } @@ -2919,7 +2917,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, PAGECACHE_TAG_DIRTY); } read_unlock_irq(&page->mapping->tree_lock); - unlock_page(page); } return 0; } @@ -2948,17 +2945,12 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, * on us if the page isn't already dirty. */ if (i == 0) { - lock_page(page); set_page_extent_head(page, eb->len); } else if (PagePrivate(page) && page->private != EXTENT_PAGE_PRIVATE) { - lock_page(page); set_page_extent_mapped(page); - unlock_page(page); } __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); - if (i == 0) - unlock_page(page); } return set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0c79346fd2c9..61bd8953a683 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -115,6 +115,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); btrfs_set_trans_block_group(trans, inode); + mutex_unlock(&root->fs_info->fs_mutex); num_bytes = (end - start + blocksize) & ~(blocksize - 1); num_bytes = max(blocksize, num_bytes); @@ -159,6 +160,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) btrfs_add_ordered_inode(inode); btrfs_update_inode(trans, root, inode); out: + mutex_lock(&root->fs_info->fs_mutex); btrfs_end_transaction(trans, root); return ret; } @@ -349,10 +351,12 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + mutex_unlock(&root->fs_info->fs_mutex); btrfs_set_trans_block_group(trans, inode); btrfs_csum_file_blocks(trans, root, inode, bio, sums); + mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); @@ -807,6 +811,7 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, goto err; } ret = btrfs_delete_one_dir_name(trans, root, path, di); + btrfs_release_path(root, path); dentry->d_inode->i_ctime = dir->i_ctime; ret = btrfs_del_inode_ref(trans, root, name, name_len, @@ -881,8 +886,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_trans_handle *trans; unsigned long nr = 0; - if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) + if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) { return -ENOTEMPTY; + } mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 1); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6fb455802759..3fbf74e93dba 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -43,6 +43,7 @@ #include "ioctl.h" #include "print-tree.h" #include "volumes.h" +#include "locking.h" @@ -75,9 +76,9 @@ static noinline int create_subvol(struct btrfs_root *root, char *name, if (ret) goto fail; - leaf = __btrfs_alloc_free_block(trans, root, root->leafsize, - objectid, trans->transid, 0, 0, - 0, 0); + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, + objectid, trans->transid, 0, 0, + 0, 0); if (IS_ERR(leaf)) return PTR_ERR(leaf); @@ -108,6 +109,7 @@ static noinline int create_subvol(struct btrfs_root *root, char *name, memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); root_item.drop_level = 0; + btrfs_tree_unlock(leaf); free_extent_buffer(leaf); leaf = NULL; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c new file mode 100644 index 000000000000..80813a307b4b --- /dev/null +++ b/fs/btrfs/locking.c @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#include +#include +#include +#include +#include +#include +#include "ctree.h" +#include "extent_io.h" +#include "locking.h" + +int btrfs_tree_lock(struct extent_buffer *eb) +{ + lock_page(eb->first_page); + return 0; +} + +int btrfs_try_tree_lock(struct extent_buffer *eb) +{ + return TestSetPageLocked(eb->first_page); +} + +int btrfs_tree_unlock(struct extent_buffer *eb) +{ + WARN_ON(!PageLocked(eb->first_page)); + unlock_page(eb->first_page); + return 0; +} + +int btrfs_tree_locked(struct extent_buffer *eb) +{ + return PageLocked(eb->first_page); +} + diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h new file mode 100644 index 000000000000..2dab96d8280e --- /dev/null +++ b/fs/btrfs/locking.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_LOCKING_ +#define __BTRFS_LOCKING_ + +int btrfs_tree_lock(struct extent_buffer *eb); +int btrfs_tree_unlock(struct extent_buffer *eb); +int btrfs_tree_locked(struct extent_buffer *eb); +int btrfs_try_tree_lock(struct extent_buffer *eb); +#endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f04684f7fea3..1ed433a71493 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -23,6 +23,7 @@ #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "locking.h" static int total_trans = 0; extern struct kmem_cache *btrfs_trans_handle_cachep; @@ -96,8 +97,7 @@ static noinline int record_root_in_trans(struct btrfs_root *root) radix_tree_tag_set(&root->fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_DEFRAG_TAG); - root->commit_root = root->node; - extent_buffer_get(root->node); + root->commit_root = btrfs_root_node(root); } else { WARN_ON(1); } @@ -559,6 +559,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *root = pending->root; struct extent_buffer *tmp; + struct extent_buffer *old; int ret; int namelen; u64 objectid; @@ -578,16 +579,18 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, key.offset = 1; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - extent_buffer_get(root->node); - btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); - free_extent_buffer(tmp); + old = btrfs_lock_root_node(root); + btrfs_cow_block(trans, root, old, NULL, 0, &old); - btrfs_copy_root(trans, root, root->node, &tmp, objectid); + btrfs_copy_root(trans, root, old, &tmp, objectid); + btrfs_tree_unlock(old); + free_extent_buffer(old); btrfs_set_root_bytenr(new_root_item, tmp->start); btrfs_set_root_level(new_root_item, btrfs_header_level(tmp)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, new_root_item); + btrfs_tree_unlock(tmp); free_extent_buffer(tmp); if (ret) goto fail; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 155961c7b4d5..fab851d85383 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -181,6 +181,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (root->fs_info->extent_root == root) is_extent = 1; + goto out; + if (root->ref_cows == 0 && !is_extent) goto out; From a213501153fd66e2359e091b1612841305ba6551 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: [PATCH 0571/2985] Btrfs: Replace the big fs_mutex with a collection of other locks Extent alloctions are still protected by a large alloc_mutex. Objectid allocations are covered by a objectid mutex Other btree operations are protected by a lock on individual btree nodes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 40 ++++++++++++++++++---- fs/btrfs/ctree.h | 5 +-- fs/btrfs/disk-io.c | 15 ++++----- fs/btrfs/extent-tree.c | 18 ++++------ fs/btrfs/file.c | 7 +--- fs/btrfs/inode-map.c | 8 +++++ fs/btrfs/inode.c | 76 ++++-------------------------------------- fs/btrfs/ioctl.c | 24 ++++--------- fs/btrfs/super.c | 2 -- fs/btrfs/transaction.c | 42 +++++++++-------------- fs/btrfs/volumes.c | 19 +++++++---- fs/btrfs/xattr.c | 10 +----- 12 files changed, 101 insertions(+), 165 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 75625c68fdd8..dff4da082d06 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -63,6 +63,9 @@ void btrfs_free_path(struct btrfs_path *p) void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; + int skip = p->skip_locking; + int keep = p->keep_locks; + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (!p->nodes[i]) continue; @@ -73,6 +76,8 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) free_extent_buffer(p->nodes[i]); } memset(p, 0, sizeof(*p)); + p->skip_locking = skip; + p->keep_locks = keep; } struct extent_buffer *btrfs_root_node(struct btrfs_root *root) @@ -1202,13 +1207,19 @@ static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) u32 nritems; t = path->nodes[i]; nritems = btrfs_header_nritems(t); - if (path->slots[i] >= nritems - 1) { + if (nritems < 2 || path->slots[i] >= nritems - 2) { +if (path->keep_locks) { +//printk("path %p skip level now %d\n", path, skip_level); +} skip_level = i + 1; continue; } } t = path->nodes[i]; if (i >= lowest_unlock && i > skip_level && path->locks[i]) { +if (path->keep_locks) { +//printk("path %p unlocking level %d slot %d nritems %d skip_level %d\n", path, i, path->slots[i], btrfs_header_nritems(t), skip_level); +} btrfs_tree_unlock(t); path->locks[i] = 0; } @@ -1243,7 +1254,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len); WARN_ON(p->nodes[0] != NULL); - // WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); WARN_ON(root == root->fs_info->extent_root && !mutex_is_locked(&root->fs_info->alloc_mutex)); WARN_ON(root == root->fs_info->chunk_root && @@ -1321,7 +1331,7 @@ again: b = read_node_slot(root, b, slot); if (!p->skip_locking) btrfs_tree_lock(b); - unlock_up(p, level, lowest_unlock); + unlock_up(p, level + 1, lowest_unlock); } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < @@ -1804,6 +1814,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (slot >= btrfs_header_nritems(upper) - 1) return 1; + WARN_ON(!btrfs_tree_locked(path->nodes[1])); + right = read_node_slot(root, upper, slot + 1); btrfs_tree_lock(right); free_space = btrfs_leaf_free_space(root, right); @@ -1981,6 +1993,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } + WARN_ON(!btrfs_tree_locked(path->nodes[1])); + left = read_node_slot(root, path->nodes[1], slot - 1); btrfs_tree_lock(left); free_space = btrfs_leaf_free_space(root, left); @@ -2957,15 +2971,16 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); - path->keep_locks = 1; btrfs_release_path(root, path); + path->keep_locks = 1; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); path->keep_locks = 0; if (ret < 0) return ret; - if (path->slots[0] < nritems - 1) { + nritems = btrfs_header_nritems(path->nodes[0]); + if (nritems > 0 && path->slots[0] < nritems - 1) { goto done; } @@ -2992,8 +3007,17 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, c, slot); - if (!path->skip_locking) + if (!path->skip_locking) { + if (!btrfs_tree_locked(c)) { + int i; + WARN_ON(1); +printk("path %p no lock on level %d\n", path, level); +for (i = 0; i < BTRFS_MAX_LEVEL; i++) { +printk("path %p level %d slot %d nritems %d\n", path, i, path->slots[i], btrfs_header_nritems(path->nodes[i])); +} + } btrfs_tree_lock(next); + } break; } path->slots[level] = slot; @@ -3011,8 +3035,10 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) if (level == 1 && path->locks[1] && path->reada) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, next, 0); - if (!path->skip_locking) + if (!path->skip_locking) { + WARN_ON(!btrfs_tree_locked(path->nodes[level])); btrfs_tree_lock(next); + } } done: unlock_up(path, 0, 1); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 50891b39f366..692b8ea42de1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -519,9 +519,9 @@ struct btrfs_fs_info { struct backing_dev_info bdi; spinlock_t hash_lock; struct mutex trans_mutex; - struct mutex fs_mutex; struct mutex alloc_mutex; struct mutex chunk_mutex; + struct mutex drop_mutex; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; @@ -554,7 +554,7 @@ struct btrfs_fs_info { struct completion kobj_unregister; int do_barriers; int closing; - unsigned long throttles; + atomic_t throttles; u64 total_pinned; struct list_head dirty_cowonly_roots; @@ -594,6 +594,7 @@ struct btrfs_root { struct inode *inode; struct kobject root_kobj; struct completion kobj_unregister; + struct mutex objectid_mutex; u64 objectid; u64 last_trans; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fe40bdd984ff..f638803549e0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -724,6 +724,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, INIT_LIST_HEAD(&root->dirty_list); spin_lock_init(&root->node_lock); + mutex_init(&root->objectid_mutex); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); @@ -1146,6 +1147,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); atomic_set(&fs_info->nr_async_submits, 0); + atomic_set(&fs_info->throttles, 0); fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; @@ -1199,7 +1201,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); mutex_init(&fs_info->trans_mutex); - mutex_init(&fs_info->fs_mutex); + mutex_init(&fs_info->drop_mutex); mutex_init(&fs_info->alloc_mutex); mutex_init(&fs_info->chunk_mutex); @@ -1278,8 +1280,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, goto fail_sb_buffer; } - mutex_lock(&fs_info->fs_mutex); - mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_sys_array(tree_root); mutex_unlock(&fs_info->chunk_mutex); @@ -1342,7 +1342,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->metadata_alloc_profile = (u64)-1; fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; - mutex_unlock(&fs_info->fs_mutex); return tree_root; fail_extent_root: @@ -1350,7 +1349,6 @@ fail_extent_root: fail_tree_root: free_extent_buffer(tree_root->node); fail_sys_array: - mutex_unlock(&fs_info->fs_mutex); fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); btrfs_stop_workers(&fs_info->workers); @@ -1562,8 +1560,9 @@ int close_ctree(struct btrfs_root *root) struct btrfs_fs_info *fs_info = root->fs_info; fs_info->closing = 1; + smp_mb(); + btrfs_transaction_flush_work(root); - mutex_lock(&fs_info->fs_mutex); btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); @@ -1574,7 +1573,6 @@ int close_ctree(struct btrfs_root *root) BUG_ON(ret); write_ctree_super(NULL, root); - mutex_unlock(&fs_info->fs_mutex); btrfs_transaction_flush_work(root); @@ -1679,7 +1677,8 @@ void btrfs_throttle(struct btrfs_root *root) struct backing_dev_info *bdi; bdi = &root->fs_info->bdi; - if (root->fs_info->throttles && bdi_write_congested(bdi)) { + if (atomic_read(&root->fs_info->throttles) && + bdi_write_congested(bdi)) { #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) congestion_wait(WRITE, HZ/20); #else diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7e40c516fe62..890b9e9d8e27 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1577,9 +1577,11 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root } /* block accounting for super block */ + spin_lock_irq(&info->delalloc_lock); super_used = btrfs_super_bytes_used(&info->super_copy); btrfs_set_super_bytes_used(&info->super_copy, super_used - num_bytes); + spin_unlock_irq(&info->delalloc_lock); /* block accounting for root item */ root_used = btrfs_root_used(&root->root_item); @@ -1968,8 +1970,10 @@ again: } /* block accounting for super block */ + spin_lock_irq(&info->delalloc_lock); super_used = btrfs_super_bytes_used(&info->super_copy); btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); + spin_unlock_irq(&info->delalloc_lock); /* block accounting for root item */ root_used = btrfs_root_used(&root->root_item); @@ -2172,12 +2176,12 @@ static void noinline reada_walk_down(struct btrfs_root *root, continue; } } - mutex_unlock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->alloc_mutex); ret = readahead_tree_block(root, bytenr, blocksize, btrfs_node_ptr_generation(node, i)); last = bytenr + blocksize; cond_resched(); - mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->alloc_mutex); if (ret) break; } @@ -2254,11 +2258,9 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, free_extent_buffer(next); reada_walk_down(root, cur, path->slots[*level]); - mutex_unlock(&root->fs_info->fs_mutex); mutex_unlock(&root->fs_info->alloc_mutex); next = read_tree_block(root, bytenr, blocksize, ptr_gen); - mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->alloc_mutex); /* we've dropped the lock, double check */ @@ -2381,6 +2383,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root int orig_level; struct btrfs_root_item *root_item = &root->root_item; + WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); path = btrfs_alloc_path(); BUG_ON(!path); @@ -2710,7 +2713,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, *last_file_root == ref_root) goto out; - mutex_unlock(&extent_root->fs_info->fs_mutex); inode = btrfs_iget_locked(extent_root->fs_info->sb, ref_objectid, found_root); if (inode->i_state & I_NEW) { @@ -2727,7 +2729,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, * the latest version of the tree root */ if (is_bad_inode(inode)) { - mutex_lock(&extent_root->fs_info->fs_mutex); goto out; } *last_file_objectid = inode->i_ino; @@ -2736,7 +2737,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, relocate_inode_pages(inode, ref_offset, extent_key->offset); iput(inode); - mutex_lock(&extent_root->fs_info->fs_mutex); } else { struct btrfs_trans_handle *trans; struct extent_buffer *eb; @@ -3033,9 +3033,7 @@ next: if (progress && need_resched()) { memcpy(&key, &found_key, sizeof(key)); - mutex_unlock(&root->fs_info->fs_mutex); cond_resched(); - mutex_lock(&root->fs_info->fs_mutex); btrfs_release_path(root, path); btrfs_search_slot(NULL, root, &key, path, 0, 0); progress = 0; @@ -3068,9 +3066,7 @@ next: trans = btrfs_start_transaction(tree_root, 1); btrfs_commit_transaction(trans, tree_root); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_clean_old_snapshots(tree_root); - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(tree_root, 1); btrfs_commit_transaction(trans, tree_root); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 73c6d085bd90..18bbe108a0e6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -252,7 +252,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, end_of_last_block = start_pos + num_bytes - 1; lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (!trans) { err = -ENOMEM; @@ -341,7 +340,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, failed: err = btrfs_end_transaction(trans, root); out_unlock: - mutex_unlock(&root->fs_info->fs_mutex); unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); return err; } @@ -905,9 +903,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, WARN_ON(num_pages > nrptrs); memset(pages, 0, sizeof(pages)); - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, write_bytes, 0); - mutex_unlock(&root->fs_info->fs_mutex); if (ret) goto out; @@ -998,9 +994,9 @@ static int btrfs_sync_file(struct file *file, * check the transaction that last modified this inode * and see if its already been committed */ - mutex_lock(&root->fs_info->fs_mutex); if (!BTRFS_I(inode)->last_trans) goto out; + mutex_lock(&root->fs_info->trans_mutex); if (BTRFS_I(inode)->last_trans <= root->fs_info->last_trans_committed) { @@ -1023,7 +1019,6 @@ static int btrfs_sync_file(struct file *file, } ret = btrfs_commit_transaction(trans, root); out: - mutex_unlock(&root->fs_info->fs_mutex); return ret > 0 ? EIO : ret; } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index a0925eabdaa2..298346ae1481 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -69,6 +69,12 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_key search_key; u64 search_start = dirid; + mutex_lock(&root->objectid_mutex); + if (root->last_inode_alloc) { + *objectid = ++root->last_inode_alloc; + mutex_unlock(&root->objectid_mutex); + return 0; + } path = btrfs_alloc_path(); BUG_ON(!path); search_start = root->last_inode_alloc; @@ -124,9 +130,11 @@ found: btrfs_release_path(root, path); btrfs_free_path(path); BUG_ON(*objectid < search_start); + mutex_unlock(&root->objectid_mutex); return 0; error: btrfs_release_path(root, path); btrfs_free_path(path); + mutex_unlock(&root->objectid_mutex); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 61bd8953a683..b2251e27ac84 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -79,12 +79,15 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, int for_del) { - u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy); - u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy); + u64 total; + u64 used; u64 thresh; unsigned long flags; int ret = 0; + spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); + total = btrfs_super_total_bytes(&root->fs_info->super_copy); + used = btrfs_super_bytes_used(&root->fs_info->super_copy); if (for_del) thresh = total * 90; else @@ -92,7 +95,6 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, do_div(thresh, 100); - spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); if (used + root->fs_info->delalloc_bytes + num_required > thresh) ret = -ENOSPC; spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); @@ -115,7 +117,6 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); btrfs_set_trans_block_group(trans, inode); - mutex_unlock(&root->fs_info->fs_mutex); num_bytes = (end - start + blocksize) & ~(blocksize - 1); num_bytes = max(blocksize, num_bytes); @@ -160,7 +161,6 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) btrfs_add_ordered_inode(inode); btrfs_update_inode(trans, root, inode); out: - mutex_lock(&root->fs_info->fs_mutex); btrfs_end_transaction(trans, root); return ret; } @@ -269,14 +269,13 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; - mutex_lock(&root->fs_info->fs_mutex); + if (btrfs_test_opt(root, NODATACOW) || btrfs_test_flag(inode, NODATACOW)) ret = run_delalloc_nocow(inode, start, end); else ret = cow_file_range(inode, start, end); - mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -349,17 +348,13 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, ret = btrfs_csum_one_bio(root, bio, &sums); BUG_ON(ret); - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_set_trans_block_group(trans, inode); btrfs_csum_file_blocks(trans, root, inode, bio, sums); - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); kfree(sums); @@ -404,7 +399,6 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) btrfs_test_flag(inode, NODATASUM)) return 0; - mutex_lock(&root->fs_info->fs_mutex); path = btrfs_alloc_path(); item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); if (IS_ERR(item)) { @@ -422,7 +416,6 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) out: if (path) btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -616,7 +609,6 @@ void btrfs_read_locked_inode(struct inode *inode) path = btrfs_alloc_path(); BUG_ON(!path); - mutex_lock(&root->fs_info->fs_mutex); memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); @@ -662,8 +654,6 @@ void btrfs_read_locked_inode(struct inode *inode) btrfs_free_path(path); inode_item = NULL; - mutex_unlock(&root->fs_info->fs_mutex); - switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; @@ -691,9 +681,7 @@ void btrfs_read_locked_inode(struct inode *inode) return; make_bad: - btrfs_release_path(root, path); btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); make_bad_inode(inode); } @@ -758,7 +746,6 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, btrfs_set_inode_last_trans(trans, inode); ret = 0; failed: - btrfs_release_path(root, path); btrfs_free_path(path); return ret; } @@ -849,7 +836,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) unsigned long nr = 0; root = BTRFS_I(dir)->root; - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 1); if (ret) @@ -871,7 +857,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) btrfs_end_transaction(trans, root); fail: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); return ret; @@ -890,7 +875,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) return -ENOTEMPTY; } - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 1); if (ret) goto fail; @@ -907,7 +891,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) nr = trans->blocks_used; ret = btrfs_end_transaction(trans, root); fail: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); @@ -1129,7 +1112,6 @@ error: ret = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr); } - btrfs_release_path(root, path); btrfs_free_path(path); inode->i_sb->s_dirt = 1; return ret; @@ -1234,9 +1216,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_size <= hole_start) goto out; - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); - mutex_unlock(&root->fs_info->fs_mutex); if (err) goto fail; @@ -1245,7 +1225,6 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); hole_size = block_end - hole_start; - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); err = btrfs_drop_extents(trans, root, inode, @@ -1262,7 +1241,6 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_check_file(root, inode); } btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); if (err) return err; @@ -1286,7 +1264,6 @@ void btrfs_delete_inode(struct inode *inode) } inode->i_size = 0; - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -1298,7 +1275,6 @@ void btrfs_delete_inode(struct inode *inode) clear_inode(inode); btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); return; @@ -1306,7 +1282,6 @@ void btrfs_delete_inode(struct inode *inode) no_delete_lock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); no_delete: @@ -1402,7 +1377,6 @@ static int fixup_tree_root_location(struct btrfs_root *root, path = btrfs_alloc_path(); BUG_ON(!path); - mutex_lock(&root->fs_info->fs_mutex); *sub_root = btrfs_read_fs_root(root->fs_info, location, dentry->d_name.name, @@ -1416,7 +1390,6 @@ static int fixup_tree_root_location(struct btrfs_root *root, location->offset = 0; btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); return 0; } @@ -1482,9 +1455,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_inode_by_name(dir, dentry, &location); - mutex_unlock(&root->fs_info->fs_mutex); if (ret < 0) return ERR_PTR(ret); @@ -1559,7 +1530,6 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) filp->f_pos = 1; } - mutex_lock(&root->fs_info->fs_mutex); key.objectid = inode->i_ino; path = btrfs_alloc_path(); path->reada = 2; @@ -1668,9 +1638,7 @@ read_dir_items: nopos: ret = 0; err: - btrfs_release_path(root, path); btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -1681,11 +1649,9 @@ int btrfs_write_inode(struct inode *inode, int wait) int ret = 0; if (wait) { - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); ret = btrfs_commit_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); } return ret; } @@ -1701,12 +1667,10 @@ void btrfs_dirty_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); } static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, @@ -1874,7 +1838,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (!new_valid_dev(rdev)) return -EINVAL; - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); if (err) goto fail; @@ -1912,8 +1875,6 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); fail: - mutex_unlock(&root->fs_info->fs_mutex); - if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -1934,7 +1895,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, unsigned long nr = 0; u64 objectid; - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); if (err) goto fail; @@ -1980,8 +1940,6 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); fail: - mutex_unlock(&root->fs_info->fs_mutex); - if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -2009,7 +1967,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, #else inc_nlink(inode); #endif - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); if (err) goto fail; @@ -2032,8 +1989,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, nr = trans->blocks_used; btrfs_end_transaction(trans, root); fail: - mutex_unlock(&root->fs_info->fs_mutex); - if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -2053,7 +2008,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) u64 objectid = 0; unsigned long nr = 1; - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); if (err) goto out_unlock; @@ -2106,7 +2060,6 @@ out_fail: btrfs_end_transaction(trans, root); out_unlock: - mutex_unlock(&root->fs_info->fs_mutex); if (drop_on_err) iput(inode); btrfs_btree_balance_dirty(root, nr); @@ -2199,7 +2152,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, path = btrfs_alloc_path(); BUG_ON(!path); - mutex_lock(&root->fs_info->fs_mutex); again: spin_lock(&em_tree->lock); @@ -2402,7 +2354,6 @@ out: if (!err) err = ret; } - mutex_unlock(&root->fs_info->fs_mutex); if (err) { free_extent_map(em); WARN_ON(1); @@ -2584,9 +2535,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) int ret; u64 page_start; - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); - mutex_unlock(&root->fs_info->fs_mutex); if (ret) goto out; @@ -2631,7 +2580,6 @@ static void btrfs_truncate(struct inode *inode) btrfs_truncate_page(inode->i_mapping, inode->i_size); - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -2643,7 +2591,6 @@ static void btrfs_truncate(struct inode *inode) ret = btrfs_end_transaction(trans, root); BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); } @@ -2827,7 +2774,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode *new_inode = new_dentry->d_inode; struct inode *old_inode = old_dentry->d_inode; struct timespec ctime = CURRENT_TIME; - struct btrfs_path *path; int ret; if (S_ISDIR(old_inode->i_mode) && new_inode && @@ -2835,7 +2781,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, return -ENOTEMPTY; } - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 0); if (ret) goto out_unlock; @@ -2843,11 +2788,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, new_dir); - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out_fail; - } old_dentry->d_inode->i_nlink++; old_dir->i_ctime = old_dir->i_mtime = ctime; @@ -2869,10 +2809,8 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, goto out_fail; out_fail: - btrfs_free_path(path); btrfs_end_transaction(trans, root); out_unlock: - mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -2898,7 +2836,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) return -ENAMETOOLONG; - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); if (err) goto out_fail; @@ -2979,7 +2916,6 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); out_fail: - mutex_unlock(&root->fs_info->fs_mutex); if (drop_inode) { inode_dec_link_count(inode); iput(inode); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3fbf74e93dba..6002eb64daf9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -63,7 +63,6 @@ static noinline int create_subvol(struct btrfs_root *root, char *name, u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; unsigned long nr = 1; - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 0); if (ret) goto fail_commit; @@ -164,7 +163,6 @@ fail: if (err && !ret) ret = err; fail_commit: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); return ret; @@ -181,7 +179,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (!root->ref_cows) return -EINVAL; - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 0); if (ret) goto fail_unlock; @@ -208,7 +205,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) err = btrfs_commit_transaction(trans, root); fail_unlock: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); return ret; @@ -228,9 +224,7 @@ int btrfs_defrag_file(struct file *file) unsigned long i; int ret; - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, inode->i_size, 0); - mutex_unlock(&root->fs_info->fs_mutex); if (ret) return -ENOSPC; @@ -315,7 +309,8 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) goto out; } - mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->chunk_mutex); sizestr = vol_args->name; devstr = strchr(sizestr, ':'); if (devstr) { @@ -385,7 +380,8 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) } out_unlock: - mutex_unlock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->chunk_mutex); out: kfree(vol_args); return ret; @@ -428,11 +424,9 @@ static noinline int btrfs_ioctl_snap_create(struct btrfs_root *root, } root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, - mutex_lock(&root->fs_info->fs_mutex); di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path, root_dirid, vol_args->name, namelen, 0); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_free_path(path); if (di && !IS_ERR(di)) { @@ -445,10 +439,12 @@ static noinline int btrfs_ioctl_snap_create(struct btrfs_root *root, goto out; } + mutex_lock(&root->fs_info->drop_mutex); if (root == root->fs_info->tree_root) ret = create_subvol(root, vol_args->name, namelen); else ret = create_snapshot(root, vol_args->name, namelen); + mutex_unlock(&root->fs_info->drop_mutex); out: kfree(vol_args); return ret; @@ -461,10 +457,8 @@ static int btrfs_ioctl_defrag(struct file *file) switch (inode->i_mode & S_IFMT) { case S_IFDIR: - mutex_lock(&root->fs_info->fs_mutex); btrfs_defrag_root(root, 0); btrfs_defrag_root(root->fs_info->extent_root, 0); - mutex_unlock(&root->fs_info->fs_mutex); break; case S_IFREG: btrfs_defrag_file(file); @@ -588,7 +582,6 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); } - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 0); path = btrfs_alloc_path(); if (!path) { @@ -685,7 +678,6 @@ out: unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); out_unlock: mutex_unlock(&src->i_mutex); @@ -711,7 +703,6 @@ long btrfs_ioctl_trans_start(struct file *file) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - mutex_lock(&root->fs_info->fs_mutex); if (file->private_data) { ret = -EINPROGRESS; goto out; @@ -723,7 +714,6 @@ long btrfs_ioctl_trans_start(struct file *file) ret = -ENOMEM; /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/ out: - mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -740,7 +730,6 @@ long btrfs_ioctl_trans_end(struct file *file) struct btrfs_trans_handle *trans; int ret = 0; - mutex_lock(&root->fs_info->fs_mutex); trans = file->private_data; if (!trans) { ret = -EINVAL; @@ -749,7 +738,6 @@ long btrfs_ioctl_trans_end(struct file *file) btrfs_end_transaction(trans, root); file->private_data = 0; out: - mutex_unlock(&root->fs_info->fs_mutex); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 196d0e280b19..b61ded7a20c9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -366,12 +366,10 @@ int btrfs_sync_fs(struct super_block *sb, int wait) return 0; } btrfs_clean_old_snapshots(root); - mutex_lock(&root->fs_info->fs_mutex); btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; - mutex_unlock(&root->fs_info->fs_mutex); return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1ed433a71493..5a1ee0665ae8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -370,6 +370,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) struct btrfs_trans_handle *trans; unsigned long nr; + smp_mb(); if (root->defrag_running) return 0; trans = btrfs_start_transaction(root, 1); @@ -378,16 +379,15 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) ret = btrfs_defrag_leaves(trans, root, cacheonly); nr = trans->blocks_used; btrfs_end_transaction(trans, root); - mutex_unlock(&info->fs_mutex); btrfs_btree_balance_dirty(info->tree_root, nr); cond_resched(); - mutex_lock(&info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (ret != -EAGAIN) break; } root->defrag_running = 0; + smp_mb(); radix_tree_tag_clear(&info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_DEFRAG_TAG); @@ -435,14 +435,14 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, while(!list_empty(list)) { struct btrfs_root *root; - mutex_lock(&tree_root->fs_info->fs_mutex); dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); num_bytes = btrfs_root_used(&dirty->root->root_item); root = dirty->latest_root; - root->fs_info->throttles++; + atomic_inc(&root->fs_info->throttles); + mutex_lock(&root->fs_info->drop_mutex); while(1) { trans = btrfs_start_transaction(tree_root, 1); ret = btrfs_drop_snapshot(trans, dirty->root); @@ -459,14 +459,16 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, nr = trans->blocks_used; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); - mutex_unlock(&tree_root->fs_info->fs_mutex); + + mutex_unlock(&root->fs_info->drop_mutex); btrfs_btree_balance_dirty(tree_root, nr); cond_resched(); - mutex_lock(&tree_root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->drop_mutex); } BUG_ON(ret); - root->fs_info->throttles--; + atomic_dec(&root->fs_info->throttles); + mutex_lock(&root->fs_info->alloc_mutex); num_bytes -= btrfs_root_used(&dirty->root->root_item); bytes_used = btrfs_root_used(&root->root_item); if (num_bytes) { @@ -474,11 +476,15 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, btrfs_set_root_used(&root->root_item, bytes_used - num_bytes); } + mutex_unlock(&root->fs_info->alloc_mutex); + ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); if (ret) { BUG(); break; } + mutex_unlock(&root->fs_info->drop_mutex); + nr = trans->blocks_used; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); @@ -486,7 +492,6 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, free_extent_buffer(dirty->root->node); kfree(dirty->root); kfree(dirty); - mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root, nr); cond_resched(); @@ -503,7 +508,7 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, u64 objectid = 0; int ret; - root->fs_info->throttles++; + atomic_inc(&root->fs_info->throttles); while(1) { ret = btrfs_find_first_ordered_inode( &cur_trans->ordered_inode_tree, @@ -512,7 +517,6 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, break; mutex_unlock(&root->fs_info->trans_mutex); - mutex_unlock(&root->fs_info->fs_mutex); if (S_ISREG(inode->i_mode)) { atomic_inc(&BTRFS_I(inode)->ordered_writeback); @@ -521,7 +525,6 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, } iput(inode); - mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); } while(1) { @@ -533,7 +536,6 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, if (!ret) break; mutex_unlock(&root->fs_info->trans_mutex); - mutex_unlock(&root->fs_info->fs_mutex); if (S_ISREG(inode->i_mode)) { atomic_inc(&BTRFS_I(inode)->ordered_writeback); @@ -543,10 +545,9 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, atomic_dec(&inode->i_count); iput(inode); - mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); } - root->fs_info->throttles--; + atomic_dec(&root->fs_info->throttles); return 0; } @@ -661,7 +662,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); ret = wait_for_commit(root, cur_trans); BUG_ON(ret); @@ -669,7 +669,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); - mutex_lock(&root->fs_info->fs_mutex); return 0; } @@ -687,12 +686,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction, list); if (!prev_trans->commit_done) { prev_trans->use_count++; - mutex_unlock(&root->fs_info->fs_mutex); mutex_unlock(&root->fs_info->trans_mutex); wait_for_commit(root, prev_trans); - mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); put_transaction(prev_trans); } @@ -709,12 +706,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, else timeout = 1; - mutex_unlock(&root->fs_info->fs_mutex); mutex_unlock(&root->fs_info->trans_mutex); schedule_timeout(timeout); - mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); finish_wait(&cur_trans->writer_wait, &wait); ret = btrfs_write_ordered_inodes(trans, root); @@ -755,12 +750,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_copy_pinned(root, pinned_copy); mutex_unlock(&root->fs_info->trans_mutex); - mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root); - mutex_lock(&root->fs_info->fs_mutex); btrfs_finish_extent_commit(trans, root, pinned_copy); mutex_lock(&root->fs_info->trans_mutex); @@ -781,9 +774,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); if (root->fs_info->closing) { - mutex_unlock(&root->fs_info->fs_mutex); drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots); - mutex_lock(&root->fs_info->fs_mutex); } return ret; } @@ -823,7 +814,7 @@ void btrfs_transaction_cleaner(struct work_struct *work) unsigned long delay = HZ * 30; int ret; - mutex_lock(&root->fs_info->fs_mutex); + smp_mb(); if (root->fs_info->closing) goto out; @@ -844,7 +835,6 @@ void btrfs_transaction_cleaner(struct work_struct *work) trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); out: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_clean_old_snapshots(root); btrfs_transaction_queue_work(root, delay); } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ba3968571024..869864ddcc29 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -866,7 +866,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) u64 devid; int ret = 0; - mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->chunk_mutex); mutex_lock(&uuid_mutex); all_avail = root->fs_info->avail_data_alloc_bits | @@ -984,7 +985,8 @@ error_close: close_bdev_excl(bdev); out: mutex_unlock(&uuid_mutex); - mutex_unlock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->chunk_mutex); + mutex_unlock(&root->fs_info->alloc_mutex); return ret; } @@ -1003,7 +1005,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (!bdev) { return -EIO; } - mutex_lock(&root->fs_info->fs_mutex); + + mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->chunk_mutex); + trans = btrfs_start_transaction(root, 1); devices = &root->fs_info->fs_devices->devices; list_for_each(cur, devices) { @@ -1057,7 +1062,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->open_devices++; out: btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->chunk_mutex); + mutex_unlock(&root->fs_info->alloc_mutex); + return ret; out_close_bdev: @@ -1297,9 +1304,10 @@ int btrfs_balance(struct btrfs_root *dev_root) struct btrfs_key found_key; + BUG(); /* FIXME, needs locking */ + dev_root = dev_root->fs_info->dev_root; - mutex_lock(&dev_root->fs_info->fs_mutex); /* step one make some room on all the devices */ list_for_each(cur, devices) { device = list_entry(cur, struct btrfs_device, dev_list); @@ -1368,7 +1376,6 @@ int btrfs_balance(struct btrfs_root *dev_root) ret = 0; error: btrfs_free_path(path); - mutex_unlock(&dev_root->fs_info->fs_mutex); return ret; } diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index f63488dc2f16..3e9eb91c3c89 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -153,7 +153,6 @@ ssize_t btrfs_xattr_get(struct inode *inode, int name_index, return -ENOMEM; } - mutex_lock(&root->fs_info->fs_mutex); /* lookup the xattr by name */ di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name, strlen(name), 0); @@ -181,7 +180,6 @@ ssize_t btrfs_xattr_get(struct inode *inode, int name_index, ret = btrfs_dir_data_len(leaf, di); out: - mutex_unlock(&root->fs_info->fs_mutex); kfree(name); btrfs_free_path(path); return ret; @@ -210,7 +208,6 @@ int btrfs_xattr_set(struct inode *inode, int name_index, return -ENOMEM; } - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -260,7 +257,6 @@ out: } btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); kfree(name); btrfs_free_path(path); @@ -297,8 +293,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) return -ENOMEM; path->reada = 2; - mutex_lock(&root->fs_info->fs_mutex); - /* search for our xattrs */ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) @@ -379,15 +373,13 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) ret = total_size; err: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_free_path(path); return ret; } /* - * delete all the xattrs associated with the inode. fs_mutex should be - * held when we come into here + * delete all the xattrs associated with the inode. */ int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) From 051e1b9f748ae673b7325d3fc049bb838606cffa Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: [PATCH 0572/2985] Drop locks in btrfs_search_slot when reading a tree block. One lock per btree block can make for significant congestion if everyone has to wait for IO at the high levels of the btree. This drops locks held by a path when doing reads during a tree search. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 74 +++++++++++++++++++++--------------------- fs/btrfs/ctree.h | 1 - fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 1 - 4 files changed, 38 insertions(+), 39 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index dff4da082d06..1b756fae2799 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -63,7 +63,6 @@ void btrfs_free_path(struct btrfs_path *p) void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; - int skip = p->skip_locking; int keep = p->keep_locks; for (i = 0; i < BTRFS_MAX_LEVEL; i++) { @@ -76,7 +75,6 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) free_extent_buffer(p->nodes[i]); } memset(p, 0, sizeof(*p)); - p->skip_locking = skip; p->keep_locks = keep; } @@ -1137,7 +1135,6 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, return; node = path->nodes[level]; - WARN_ON(!path->skip_locking && !btrfs_tree_locked(node)); search = btrfs_node_blockptr(node, slot); blocksize = btrfs_level_size(root, level - 1); @@ -1192,6 +1189,7 @@ static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) { int i; int skip_level = level; + int no_skips = 0; struct extent_buffer *t; for (i = level; i < BTRFS_MAX_LEVEL; i++) { @@ -1199,27 +1197,24 @@ static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) break; if (!path->locks[i]) break; - if (path->slots[i] == 0) { + if (!no_skips && path->slots[i] == 0) { skip_level = i + 1; continue; } - if (path->keep_locks) { + if (!no_skips && path->keep_locks) { u32 nritems; t = path->nodes[i]; nritems = btrfs_header_nritems(t); - if (nritems < 2 || path->slots[i] >= nritems - 2) { -if (path->keep_locks) { -//printk("path %p skip level now %d\n", path, skip_level); -} + if (nritems < 1 || path->slots[i] >= nritems - 1) { skip_level = i + 1; continue; } } + if (skip_level < i && i >= lowest_unlock) + no_skips = 1; + t = path->nodes[i]; if (i >= lowest_unlock && i > skip_level && path->locks[i]) { -if (path->keep_locks) { -//printk("path %p unlocking level %d slot %d nritems %d skip_level %d\n", path, i, path->slots[i], btrfs_header_nritems(t), skip_level); -} btrfs_tree_unlock(t); path->locks[i] = 0; } @@ -1244,6 +1239,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow) { struct extent_buffer *b; + struct extent_buffer *tmp; int slot; int ret; int level; @@ -1263,10 +1259,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root if (ins_len < 0) lowest_unlock = 2; again: - if (!p->skip_locking) - b = btrfs_lock_root_node(root); - else - b = btrfs_root_node(root); + b = btrfs_lock_root_node(root); while (b) { level = btrfs_header_level(b); @@ -1286,8 +1279,7 @@ again: WARN_ON(1); level = btrfs_header_level(b); p->nodes[level] = b; - if (!p->skip_locking) - p->locks[level] = 1; + p->locks[level] = 1; ret = check_block(root, p, level); if (ret) return -1; @@ -1328,10 +1320,29 @@ again: reada_for_search(root, p, level, slot, key->objectid); - b = read_node_slot(root, b, slot); - if (!p->skip_locking) - btrfs_tree_lock(b); - unlock_up(p, level + 1, lowest_unlock); + tmp = btrfs_find_tree_block(root, + btrfs_node_blockptr(b, slot), + btrfs_level_size(root, level - 1)); + if (tmp && btrfs_buffer_uptodate(tmp, + btrfs_node_ptr_generation(b, slot))) { + b = tmp; + } else { + /* + * reduce lock contention at high levels + * of the btree by dropping locks before + * we read. + */ + if (level > 1) { + btrfs_release_path(NULL, p); + if (tmp) + free_extent_buffer(tmp); + goto again; + } else { + b = read_node_slot(root, b, slot); + } + } + btrfs_tree_lock(b); + unlock_up(p, level, lowest_unlock); } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < @@ -3007,17 +3018,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, c, slot); - if (!path->skip_locking) { - if (!btrfs_tree_locked(c)) { - int i; - WARN_ON(1); -printk("path %p no lock on level %d\n", path, level); -for (i = 0; i < BTRFS_MAX_LEVEL; i++) { -printk("path %p level %d slot %d nritems %d\n", path, i, path->slots[i], btrfs_header_nritems(path->nodes[i])); -} - } - btrfs_tree_lock(next); - } + WARN_ON(!btrfs_tree_locked(c)); + btrfs_tree_lock(next); break; } path->slots[level] = slot; @@ -3035,10 +3037,8 @@ printk("path %p level %d slot %d nritems %d\n", path, i, path->slots[i], btrfs_h if (level == 1 && path->locks[1] && path->reada) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, next, 0); - if (!path->skip_locking) { - WARN_ON(!btrfs_tree_locked(path->nodes[level])); - btrfs_tree_lock(next); - } + WARN_ON(!btrfs_tree_locked(path->nodes[level])); + btrfs_tree_lock(next); } done: unlock_up(path, 0, 1); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 692b8ea42de1..9ea12d42741c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -336,7 +336,6 @@ struct btrfs_path { /* keep some upper locks as we walk down */ int keep_locks; int lowest_level; - int skip_locking; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f638803549e0..ffc363d2fb24 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1684,6 +1684,7 @@ void btrfs_throttle(struct btrfs_root *root) #else blk_congestion_wait(WRITE, HZ/20); #endif + } } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 890b9e9d8e27..0905653dd3fc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -88,7 +88,6 @@ static int cache_block_group(struct btrfs_root *root, return -ENOMEM; path->reada = 2; - path->skip_locking = 1; first_free = block_group->key.objectid; key.objectid = block_group->key.objectid; key.offset = 0; From 594a24eb0e7fa8413f8b443863be4b7c72bfde9f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: [PATCH 0573/2985] Fix btrfs_del_ordered_inode to allow forcing the drop during unlinks This allows us to delete an unlinked inode with dirty pages from the list instead of forcing commit to write these out before deleting the inode. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 18 +++++++++++++----- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 4 +--- fs/btrfs/ordered-data.c | 6 +++--- fs/btrfs/ordered-data.h | 2 +- 5 files changed, 19 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1b756fae2799..9601241e552b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1245,7 +1245,10 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int level; int should_reada = p->reada; int lowest_unlock = 1; + int blocksize; u8 lowest_level = 0; + u64 blocknr; + u64 gen; lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len); @@ -1320,11 +1323,12 @@ again: reada_for_search(root, p, level, slot, key->objectid); - tmp = btrfs_find_tree_block(root, - btrfs_node_blockptr(b, slot), - btrfs_level_size(root, level - 1)); - if (tmp && btrfs_buffer_uptodate(tmp, - btrfs_node_ptr_generation(b, slot))) { + blocknr = btrfs_node_blockptr(b, slot); + gen = btrfs_node_ptr_generation(b, slot); + blocksize = btrfs_level_size(root, level - 1); + + tmp = btrfs_find_tree_block(root, blocknr, blocksize); + if (tmp && btrfs_buffer_uptodate(tmp, gen)) { b = tmp; } else { /* @@ -1334,6 +1338,10 @@ again: */ if (level > 1) { btrfs_release_path(NULL, p); + if (tmp) + free_extent_buffer(tmp); + tmp = read_tree_block(root, blocknr, + blocksize, gen); if (tmp) free_extent_buffer(tmp); goto again; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 18bbe108a0e6..b7f8f92daf8a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -976,7 +976,7 @@ out_nolock: int btrfs_release_file(struct inode * inode, struct file * filp) { - btrfs_del_ordered_inode(inode); + btrfs_del_ordered_inode(inode, 0); if (filp->private_data) btrfs_ioctl_trans_end(filp); return 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b2251e27ac84..cf27b5984627 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -852,7 +852,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) * we don't need to worry about * data=ordered */ - btrfs_del_ordered_inode(inode); + btrfs_del_ordered_inode(inode, 1); } btrfs_end_transaction(trans, root); @@ -1276,14 +1276,12 @@ void btrfs_delete_inode(struct inode *inode) btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); return; no_delete_lock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); no_delete: clear_inode(inode); } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 8dd8180183ff..5e4c0d95ce43 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -254,7 +254,7 @@ static void __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, return; } -void btrfs_del_ordered_inode(struct inode *inode) +void btrfs_del_ordered_inode(struct inode *inode, int force) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 root_objectid = root->root_key.objectid; @@ -263,8 +263,8 @@ void btrfs_del_ordered_inode(struct inode *inode) return; } - if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) || - mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)) + if (!force && (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) || + mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) return; spin_lock(&root->fs_info->new_trans_lock); diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index c515c4b39996..4fa78736423e 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -38,6 +38,6 @@ int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, u64 *root_objectid, u64 *objectid, struct inode **inode); -void btrfs_del_ordered_inode(struct inode *inode); +void btrfs_del_ordered_inode(struct inode *inode, int force); int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode); #endif From 168fd7d271d9d8e81ff0b03eb08c36d82670c8a9 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: [PATCH 0574/2985] Fix btrfs_next_leaf to check for new items after dropping locks Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9601241e552b..b8f7aecf68d7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2999,7 +2999,14 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) return ret; nritems = btrfs_header_nritems(path->nodes[0]); + /* + * by releasing the path above we dropped all our locks. A balance + * could have added more items next to the key that used to be + * at the very end of the block. So, check again here and + * advance the path if there are now more items available. + */ if (nritems > 0 && path->slots[0] < nritems - 1) { + path->slots[0]++; goto done; } From 5cd57b2cbbb06a350df2698314e4e6a80805fc2f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: [PATCH 0575/2985] Btrfs: Add a skip_locking parameter to struct path, and make various funcs honor it Allocations may need to read in block groups from the extent allocation tree, which will require a tree search and take locks on the extent allocation tree. But, those locks might already be held in other places, leading to deadlocks. Since the alloc_mutex serializes everything right now, it is safe to skip the btree locking while caching block groups. A better fix will be to either create a recursive lock or find a way to back off existing locks while caching block groups. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 32 ++++++++++++++++++-------------- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 6 ++++++ 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b8f7aecf68d7..fd68601b60b1 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -64,6 +64,7 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; int keep = p->keep_locks; + int skip = p->skip_locking; for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (!p->nodes[i]) @@ -76,6 +77,7 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) } memset(p, 0, sizeof(*p)); p->keep_locks = keep; + p->skip_locking = skip; } struct extent_buffer *btrfs_root_node(struct btrfs_root *root) @@ -1262,7 +1264,10 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root if (ins_len < 0) lowest_unlock = 2; again: - b = btrfs_lock_root_node(root); + if (p->skip_locking) + b = btrfs_root_node(root); + else + b = btrfs_lock_root_node(root); while (b) { level = btrfs_header_level(b); @@ -1282,7 +1287,8 @@ again: WARN_ON(1); level = btrfs_header_level(b); p->nodes[level] = b; - p->locks[level] = 1; + if (!p->skip_locking) + p->locks[level] = 1; ret = check_block(root, p, level); if (ret) return -1; @@ -1349,7 +1355,8 @@ again: b = read_node_slot(root, b, slot); } } - btrfs_tree_lock(b); + if (!p->skip_locking) + btrfs_tree_lock(b); unlock_up(p, level, lowest_unlock); } else { p->slots[level] = slot; @@ -1392,13 +1399,6 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, break; t = path->nodes[i]; btrfs_set_node_key(t, key, tslot); - if (!btrfs_tree_locked(path->nodes[i])) { - int ii; -printk("fixup without lock on level %d\n", btrfs_header_level(path->nodes[i])); - for (ii = 0; ii < BTRFS_MAX_LEVEL; ii++) { -printk("level %d slot %d\n", ii, path->slots[ii]); - } - } btrfs_mark_buffer_dirty(path->nodes[i]); if (tslot != 0) break; @@ -3033,8 +3033,10 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, c, slot); - WARN_ON(!btrfs_tree_locked(c)); - btrfs_tree_lock(next); + if (!path->skip_locking) { + WARN_ON(!btrfs_tree_locked(c)); + btrfs_tree_lock(next); + } break; } path->slots[level] = slot; @@ -3052,8 +3054,10 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) if (level == 1 && path->locks[1] && path->reada) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, next, 0); - WARN_ON(!btrfs_tree_locked(path->nodes[level])); - btrfs_tree_lock(next); + if (!path->skip_locking) { + WARN_ON(!btrfs_tree_locked(path->nodes[level])); + btrfs_tree_lock(next); + } } done: unlock_up(path, 0, 1); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9ea12d42741c..e9bbb53eda63 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -335,6 +335,7 @@ struct btrfs_path { int reada; /* keep some upper locks as we walk down */ int keep_locks; + int skip_locking; int lowest_level; }; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0905653dd3fc..544fc3f2fe6c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -88,6 +88,12 @@ static int cache_block_group(struct btrfs_root *root, return -ENOMEM; path->reada = 2; + /* + * we get into deadlocks with paths held by callers of this function. + * since the alloc_mutex is protecting things right now, just + * skip the locking here + */ + path->skip_locking = 1; first_free = block_group->key.objectid; key.objectid = block_group->key.objectid; key.offset = 0; From 333db94cdde9e6dfdedab9290d04d812f83e0922 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: [PATCH 0576/2985] Btrfs: Fix snapshot deletion to release the alloc_mutex much more often. This lowers the impact of snapshot deletion on the rest of the FS. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 +- fs/btrfs/disk-io.c | 2 ++ fs/btrfs/extent-tree.c | 28 ++++++++++++++++++++-------- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index fd68601b60b1..5edbcc09b3cc 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1255,7 +1255,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len); WARN_ON(p->nodes[0] != NULL); - WARN_ON(root == root->fs_info->extent_root && + WARN_ON(cow && root == root->fs_info->extent_root && !mutex_is_locked(&root->fs_info->alloc_mutex)); WARN_ON(root == root->fs_info->chunk_root && !mutex_is_locked(&root->fs_info->chunk_mutex)); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ffc363d2fb24..3cc480b83819 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1674,6 +1674,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) void btrfs_throttle(struct btrfs_root *root) { +#if 0 struct backing_dev_info *bdi; bdi = &root->fs_info->bdi; @@ -1686,6 +1687,7 @@ void btrfs_throttle(struct btrfs_root *root) #endif } +#endif } void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 544fc3f2fe6c..6274f30031db 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1223,8 +1223,8 @@ printk("space info full %Lu\n", flags); ret = btrfs_make_block_group(trans, extent_root, 0, flags, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); BUG_ON(ret); - mutex_unlock(&extent_root->fs_info->chunk_mutex); out: + mutex_unlock(&extent_root->fs_info->chunk_mutex); return 0; } @@ -2181,17 +2181,29 @@ static void noinline reada_walk_down(struct btrfs_root *root, continue; } } - mutex_unlock(&root->fs_info->alloc_mutex); ret = readahead_tree_block(root, bytenr, blocksize, btrfs_node_ptr_generation(node, i)); last = bytenr + blocksize; cond_resched(); - mutex_lock(&root->fs_info->alloc_mutex); if (ret) break; } } +/* + * we want to avoid as much random IO as we can with the alloc mutex + * held, so drop the lock and do the lookup, then do it again with the + * lock held. + */ +int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, + u32 *refs) +{ + mutex_unlock(&root->fs_info->alloc_mutex); + lookup_extent_ref(NULL, root, start, len, refs); + mutex_lock(&root->fs_info->alloc_mutex); + return lookup_extent_ref(NULL, root, start, len, refs); +} + /* * helper function for drop_snapshot, this walks down the tree dropping ref * counts as it goes. @@ -2215,8 +2227,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = lookup_extent_ref(trans, root, - path->nodes[*level]->start, + ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, path->nodes[*level]->len, &refs); BUG_ON(ret); if (refs > 1) @@ -2245,7 +2256,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); blocksize = btrfs_level_size(root, *level - 1); - ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); + ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); BUG_ON(ret); if (refs != 1) { parent = path->nodes[*level]; @@ -2261,15 +2272,16 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { free_extent_buffer(next); + mutex_unlock(&root->fs_info->alloc_mutex); + reada_walk_down(root, cur, path->slots[*level]); - mutex_unlock(&root->fs_info->alloc_mutex); next = read_tree_block(root, bytenr, blocksize, ptr_gen); mutex_lock(&root->fs_info->alloc_mutex); /* we've dropped the lock, double check */ - ret = lookup_extent_ref(trans, root, bytenr, + ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); BUG_ON(ret); if (refs != 1) { From 89ce8a63d0c761fbb02089850605360f389477d8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: [PATCH 0577/2985] Add btrfs_end_transaction_throttle to force writers to wait for pending commits The existing throttle mechanism was often not sufficient to prevent new writers from coming in and making a given transaction run forever. This adds an explicit wait at the end of most operations so they will allow the current transaction to close. There is no wait inside file_write, inode updates, or cow filling, all which have different deadlock possibilities. This is a temporary measure until better asynchronous commit support is added. This code leads to stalls as it waits for data=ordered writeback, and it really needs to be fixed. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 18 ------------ fs/btrfs/disk-io.h | 1 - fs/btrfs/file.c | 1 - fs/btrfs/inode.c | 24 +++++---------- fs/btrfs/ioctl.c | 2 -- fs/btrfs/transaction.c | 66 ++++++++++++++++++++++++++++-------------- fs/btrfs/transaction.h | 2 ++ 7 files changed, 55 insertions(+), 59 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3cc480b83819..52569b57692d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1672,24 +1672,6 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); } -void btrfs_throttle(struct btrfs_root *root) -{ -#if 0 - struct backing_dev_info *bdi; - - bdi = &root->fs_info->bdi; - if (atomic_read(&root->fs_info->throttles) && - bdi_write_congested(bdi)) { -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) - congestion_wait(WRITE, HZ/20); -#else - blk_congestion_wait(WRITE, HZ/20); -#endif - - } -#endif -} - void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) { /* diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 2bc64fefe6ea..deff6b4815a7 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -70,7 +70,6 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf); int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, char *result); -void btrfs_throttle(struct btrfs_root *root); int btrfs_open_device(struct btrfs_device *dev); int btrfs_verify_block_csum(struct btrfs_root *root, struct extent_buffer *buf); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b7f8f92daf8a..ece221cba90c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -934,7 +934,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) btrfs_btree_balance_dirty(root, 1); - btrfs_throttle(root); cond_resched(); } out: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cf27b5984627..bbba3350d023 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -855,10 +855,9 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) btrfs_del_ordered_inode(inode, 1); } - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); fail: btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); return ret; } @@ -889,10 +888,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) } nr = trans->blocks_used; - ret = btrfs_end_transaction(trans, root); + ret = btrfs_end_transaction_throttle(trans, root); fail: btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); if (ret && !err) err = ret; @@ -1871,14 +1869,13 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, btrfs_update_inode_block_group(trans, dir); out_unlock: nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); iput(inode); } btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); return err; } @@ -1936,14 +1933,13 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, btrfs_update_inode_block_group(trans, dir); out_unlock: nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); iput(inode); } btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); return err; } @@ -1985,14 +1981,13 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, drop_inode = 1; nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); iput(inode); } btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); return err; } @@ -2055,13 +2050,12 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) out_fail: nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); out_unlock: if (drop_on_err) iput(inode); btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); return err; } @@ -2587,10 +2581,9 @@ static void btrfs_truncate(struct inode *inode) btrfs_update_inode(trans, root, inode); nr = trans->blocks_used; - ret = btrfs_end_transaction(trans, root); + ret = btrfs_end_transaction_throttle(trans, root); BUG_ON(ret); btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); } /* @@ -2912,14 +2905,13 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, out_unlock: nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); out_fail: if (drop_inode) { inode_dec_link_count(inode); iput(inode); } btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); return err; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6002eb64daf9..026039a2ac58 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -164,7 +164,6 @@ fail: ret = err; fail_commit: btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); return ret; } @@ -206,7 +205,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) fail_unlock: btrfs_btree_balance_dirty(root, nr); - btrfs_throttle(root); return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5a1ee0665ae8..69ed5f85a387 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -130,8 +130,27 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, return h; } -int btrfs_end_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +static noinline int wait_for_commit(struct btrfs_root *root, + struct btrfs_transaction *commit) +{ + DEFINE_WAIT(wait); + mutex_lock(&root->fs_info->trans_mutex); + while(!commit->commit_done) { + prepare_to_wait(&commit->commit_wait, &wait, + TASK_UNINTERRUPTIBLE); + if (commit->commit_done) + break; + mutex_unlock(&root->fs_info->trans_mutex); + schedule(); + mutex_lock(&root->fs_info->trans_mutex); + } + mutex_unlock(&root->fs_info->trans_mutex); + finish_wait(&commit->commit_wait, &wait); + return 0; +} + +static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int throttle) { struct btrfs_transaction *cur_trans; @@ -140,8 +159,18 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, WARN_ON(cur_trans != trans->transaction); WARN_ON(cur_trans->num_writers < 1); cur_trans->num_writers--; + if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); + + if (cur_trans->in_commit && throttle) { + int ret; + mutex_unlock(&root->fs_info->trans_mutex); + ret = wait_for_commit(root, cur_trans); + BUG_ON(ret); + mutex_lock(&root->fs_info->trans_mutex); + } + put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); memset(trans, 0, sizeof(*trans)); @@ -149,6 +178,18 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, return 0; } +int btrfs_end_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + return __btrfs_end_transaction(trans, root, 0); +} + +int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + return __btrfs_end_transaction(trans, root, 1); +} + int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) @@ -240,25 +281,6 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, return 0; } -static noinline int wait_for_commit(struct btrfs_root *root, - struct btrfs_transaction *commit) -{ - DEFINE_WAIT(wait); - mutex_lock(&root->fs_info->trans_mutex); - while(!commit->commit_done) { - prepare_to_wait(&commit->commit_wait, &wait, - TASK_UNINTERRUPTIBLE); - if (commit->commit_done) - break; - mutex_unlock(&root->fs_info->trans_mutex); - schedule(); - mutex_lock(&root->fs_info->trans_mutex); - } - mutex_unlock(&root->fs_info->trans_mutex); - finish_wait(&commit->commit_wait, &wait); - return 0; -} - struct dirty_root { struct list_head list; struct btrfs_root *root; @@ -680,6 +702,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, root->fs_info->btree_inode->i_mapping, GFP_NOFS); trans->transaction->in_commit = 1; +printk("trans %Lu in commit\n", trans->transid); cur_trans = trans->transaction; if (cur_trans->list.prev != &root->fs_info->trans_list) { prev_trans = list_entry(cur_trans->list.prev, @@ -760,6 +783,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, kfree(pinned_copy); cur_trans->commit_done = 1; +printk("trans %Lu done in commit\n", cur_trans->transid); root->fs_info->last_trans_committed = cur_trans->transid; wake_up(&cur_trans->commit_wait); put_transaction(cur_trans); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index c3172ddb3321..52559b51b181 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -101,4 +101,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, + struct btrfs_root *root); #endif From a74a4b97b61beede185b4b3ad359d7d378b0d312 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: [PATCH 0578/2985] Btrfs: Replace the transaction work queue with kthreads This creates one kthread for commits and one kthread for deleting old snapshots. All the work queues are removed. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 5 +- fs/btrfs/ctree.h | 13 ++--- fs/btrfs/disk-io.c | 116 +++++++++++++++++++++++++++++++++++++---- fs/btrfs/extent-tree.c | 10 ++-- fs/btrfs/super.c | 16 +++--- fs/btrfs/transaction.c | 72 +------------------------ fs/btrfs/transaction.h | 10 ---- fs/btrfs/volumes.c | 12 +++-- 8 files changed, 136 insertions(+), 118 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5edbcc09b3cc..40f0e0cb804b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1352,6 +1352,8 @@ again: free_extent_buffer(tmp); goto again; } else { + if (tmp) + free_extent_buffer(tmp); b = read_node_slot(root, b, slot); } } @@ -3048,7 +3050,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) free_extent_buffer(c); path->nodes[level] = next; path->slots[level] = 0; - path->locks[level] = 1; + if (!path->skip_locking) + path->locks[level] = 1; if (!level) break; if (level == 1 && path->locks[1] && path->reada) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e9bbb53eda63..244fe86bcc55 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -519,15 +518,14 @@ struct btrfs_fs_info { struct backing_dev_info bdi; spinlock_t hash_lock; struct mutex trans_mutex; + struct mutex transaction_kthread_mutex; + struct mutex cleaner_mutex; struct mutex alloc_mutex; struct mutex chunk_mutex; struct mutex drop_mutex; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; - struct list_head end_io_work_list; - struct work_struct end_io_work; - spinlock_t end_io_work_lock; atomic_t nr_async_submits; /* @@ -543,13 +541,10 @@ struct btrfs_fs_info { struct btrfs_workers workers; struct btrfs_workers endio_workers; struct btrfs_workers submit_workers; + struct task_struct *transaction_kthread; + struct task_struct *cleaner_kthread; int thread_pool_size; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - struct work_struct trans_work; -#else - struct delayed_work trans_work; -#endif struct kobject super_kobj; struct completion kobj_unregister; int do_barriers; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 52569b57692d..31ca9f89388d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include #include #include #include @@ -24,6 +25,12 @@ #include #include // for block_sync_page #include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) +# include +#else +# include +#endif #include "crc32c.h" #include "ctree.h" #include "disk-io.h" @@ -1100,6 +1107,87 @@ static void end_workqueue_fn(struct btrfs_work *work) #endif } +static int cleaner_kthread(void *arg) +{ + struct btrfs_root *root = arg; + + do { + smp_mb(); + if (root->fs_info->closing) + break; + + vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); + mutex_lock(&root->fs_info->cleaner_mutex); +printk("cleaner awake\n"); + btrfs_clean_old_snapshots(root); +printk("cleaner done\n"); + mutex_unlock(&root->fs_info->cleaner_mutex); + + if (freezing(current)) { + refrigerator(); + } else { + smp_mb(); + if (root->fs_info->closing) + break; + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + __set_current_state(TASK_RUNNING); + } + } while (!kthread_should_stop()); + return 0; +} + +static int transaction_kthread(void *arg) +{ + struct btrfs_root *root = arg; + struct btrfs_trans_handle *trans; + struct btrfs_transaction *cur; + unsigned long now; + unsigned long delay; + int ret; + + do { + smp_mb(); + if (root->fs_info->closing) + break; + + delay = HZ * 30; + vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); + mutex_lock(&root->fs_info->transaction_kthread_mutex); + + mutex_lock(&root->fs_info->trans_mutex); + cur = root->fs_info->running_transaction; + if (!cur) { + mutex_unlock(&root->fs_info->trans_mutex); + goto sleep; + } + now = get_seconds(); + if (now < cur->start_time || now - cur->start_time < 30) { + mutex_unlock(&root->fs_info->trans_mutex); + delay = HZ * 5; + goto sleep; + } + mutex_unlock(&root->fs_info->trans_mutex); + btrfs_defrag_dirty_roots(root->fs_info); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_commit_transaction(trans, root); +sleep: + wake_up_process(root->fs_info->cleaner_kthread); + mutex_unlock(&root->fs_info->transaction_kthread_mutex); + + if (freezing(current)) { + refrigerator(); + } else { + if (root->fs_info->closing) + break; + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(delay); + __set_current_state(TASK_RUNNING); + } + } while (!kthread_should_stop()); + return 0; +} + struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, char *options) @@ -1189,11 +1277,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); -#else - INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); -#endif BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); @@ -1204,6 +1287,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->drop_mutex); mutex_init(&fs_info->alloc_mutex); mutex_init(&fs_info->chunk_mutex); + mutex_init(&fs_info->transaction_kthread_mutex); + mutex_init(&fs_info->cleaner_mutex); #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1247,7 +1332,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); - err = -EINVAL; if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { printk("Btrfs: wanted %llu devices, but found %llu\n", @@ -1341,9 +1425,22 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->data_alloc_profile = (u64)-1; fs_info->metadata_alloc_profile = (u64)-1; fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; + fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, + "btrfs-cleaner"); + if (!fs_info->cleaner_kthread) + goto fail_extent_root; + + fs_info->transaction_kthread = kthread_run(transaction_kthread, + tree_root, + "btrfs-transaction"); + if (!fs_info->transaction_kthread) + goto fail_trans_kthread; + return tree_root; +fail_trans_kthread: + kthread_stop(fs_info->cleaner_kthread); fail_extent_root: free_extent_buffer(extent_root->node); fail_tree_root: @@ -1562,8 +1659,11 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 1; smp_mb(); - btrfs_transaction_flush_work(root); + kthread_stop(root->fs_info->transaction_kthread); + kthread_stop(root->fs_info->cleaner_kthread); + btrfs_defrag_dirty_roots(root->fs_info); + btrfs_clean_old_snapshots(root); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -1574,8 +1674,6 @@ int close_ctree(struct btrfs_root *root) write_ctree_super(NULL, root); - btrfs_transaction_flush_work(root); - if (fs_info->delalloc_bytes) { printk("btrfs: at unmount delalloc count %Lu\n", fs_info->delalloc_bytes); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6274f30031db..89cc4f611869 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1216,15 +1216,16 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, if (ret == -ENOSPC) { printk("space info full %Lu\n", flags); space_info->full = 1; - goto out; + goto out_unlock; } BUG_ON(ret); ret = btrfs_make_block_group(trans, extent_root, 0, flags, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); BUG_ON(ret); -out: +out_unlock: mutex_unlock(&extent_root->fs_info->chunk_mutex); +out: return 0; } @@ -2274,7 +2275,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, free_extent_buffer(next); mutex_unlock(&root->fs_info->alloc_mutex); - reada_walk_down(root, cur, path->slots[*level]); + if (path->slots[*level] == 0) + reada_walk_down(root, cur, path->slots[*level]); next = read_tree_block(root, bytenr, blocksize, ptr_gen); @@ -2446,8 +2448,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root break; if (wret < 0) ret = wret; - ret = -EAGAIN; - break; } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b61ded7a20c9..726d6871fa13 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -340,7 +340,6 @@ static int btrfs_fill_super(struct super_block * sb, goto fail_close; sb->s_root = root_dentry; - btrfs_transaction_queue_work(tree_root, HZ * 30); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) save_mount_options(sb, data); @@ -416,9 +415,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, goto error_free_subvol_name; bdev = fs_devices->latest_bdev; - btrfs_lock_volumes(); s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); - btrfs_unlock_volumes(); if (IS_ERR(s)) goto error_s; @@ -530,13 +527,15 @@ out: static void btrfs_write_super_lockfs(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); - btrfs_transaction_flush_work(root); + mutex_lock(&root->fs_info->transaction_kthread_mutex); + mutex_lock(&root->fs_info->cleaner_mutex); } static void btrfs_unlockfs(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); - btrfs_transaction_queue_work(root, HZ * 30); + mutex_unlock(&root->fs_info->cleaner_mutex); + mutex_unlock(&root->fs_info->transaction_kthread_mutex); } static struct super_operations btrfs_super_ops = { @@ -589,10 +588,9 @@ static int __init init_btrfs_fs(void) if (err) return err; - btrfs_init_transaction_sys(); err = btrfs_init_cachep(); if (err) - goto free_transaction_sys; + goto free_sysfs; err = extent_io_init(); if (err) @@ -618,15 +616,13 @@ free_extent_io: extent_io_exit(); free_cachep: btrfs_destroy_cachep(); -free_transaction_sys: - btrfs_exit_transaction_sys(); +free_sysfs: btrfs_exit_sysfs(); return err; } static void __exit exit_btrfs_fs(void) { - btrfs_exit_transaction_sys(); btrfs_destroy_cachep(); extent_map_exit(); extent_io_exit(); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 69ed5f85a387..0c53ff775b92 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -29,8 +29,6 @@ static int total_trans = 0; extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; -static struct workqueue_struct *trans_wq; - #define BTRFS_ROOT_TRANS_TAG 0 #define BTRFS_ROOT_DEFRAG_TAG 1 @@ -807,81 +805,15 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) { struct list_head dirty_roots; INIT_LIST_HEAD(&dirty_roots); - +again: mutex_lock(&root->fs_info->trans_mutex); list_splice_init(&root->fs_info->dead_roots, &dirty_roots); mutex_unlock(&root->fs_info->trans_mutex); if (!list_empty(&dirty_roots)) { drop_dirty_roots(root, &dirty_roots); + goto again; } return 0; } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) -void btrfs_transaction_cleaner(void *p) -#else -void btrfs_transaction_cleaner(struct work_struct *work) -#endif -{ -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - struct btrfs_fs_info *fs_info = p; -#else - struct btrfs_fs_info *fs_info = container_of(work, - struct btrfs_fs_info, - trans_work.work); - -#endif - struct btrfs_root *root = fs_info->tree_root; - struct btrfs_transaction *cur; - struct btrfs_trans_handle *trans; - unsigned long now; - unsigned long delay = HZ * 30; - int ret; - - smp_mb(); - if (root->fs_info->closing) - goto out; - - mutex_lock(&root->fs_info->trans_mutex); - cur = root->fs_info->running_transaction; - if (!cur) { - mutex_unlock(&root->fs_info->trans_mutex); - goto out; - } - now = get_seconds(); - if (now < cur->start_time || now - cur->start_time < 30) { - mutex_unlock(&root->fs_info->trans_mutex); - delay = HZ * 5; - goto out; - } - mutex_unlock(&root->fs_info->trans_mutex); - btrfs_defrag_dirty_roots(root->fs_info); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_commit_transaction(trans, root); -out: - btrfs_clean_old_snapshots(root); - btrfs_transaction_queue_work(root, delay); -} - -void btrfs_transaction_queue_work(struct btrfs_root *root, int delay) -{ - if (!root->fs_info->closing) - queue_delayed_work(trans_wq, &root->fs_info->trans_work, delay); -} - -void btrfs_transaction_flush_work(struct btrfs_root *root) -{ - cancel_delayed_work(&root->fs_info->trans_work); - flush_workqueue(trans_wq); -} - -void __init btrfs_init_transaction_sys(void) -{ - trans_wq = create_workqueue("btrfs-transaction"); -} - -void btrfs_exit_transaction_sys(void) -{ - destroy_workqueue(trans_wq); -} diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 52559b51b181..e1e5a06b65f4 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -82,16 +82,6 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) -void btrfs_transaction_cleaner(void *p); -#else -void btrfs_transaction_cleaner(struct work_struct *work); -#endif - -void btrfs_transaction_flush_work(struct btrfs_root *root); -void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); -void btrfs_init_transaction_sys(void); -void btrfs_exit_transaction_sys(void); int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, struct list_head *dead_list); int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 869864ddcc29..4e7cee27aab5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -271,13 +271,17 @@ again: list_for_each(cur, head) { device = list_entry(cur, struct btrfs_device, dev_list); if (!device->in_fs_metadata) { - if (device->bdev) { - close_bdev_excl(device->bdev); - fs_devices->open_devices--; - } + struct block_device *bdev; list_del(&device->dev_list); list_del(&device->dev_alloc_list); fs_devices->num_devices--; + if (device->bdev) { + bdev = device->bdev; + fs_devices->open_devices--; + mutex_unlock(&uuid_mutex); + close_bdev_excl(bdev); + mutex_lock(&uuid_mutex); + } kfree(device->name); kfree(device); goto again; From e7a84565bcdb239caad29ccbe559ef978090ac7e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: [PATCH 0579/2985] Btrfs: Add btree locking to the tree defragmentation code The online btree defragger is simplified and rewritten to use standard btree searches instead of a walk up / down mechanism. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 46 ++++++-- fs/btrfs/ctree.h | 2 + fs/btrfs/extent-tree.c | 7 ++ fs/btrfs/tree-defrag.c | 239 ++++++++--------------------------------- 4 files changed, 93 insertions(+), 201 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 40f0e0cb804b..7f4cc2b88d09 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -354,7 +354,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_key *progress) { struct extent_buffer *cur; - struct extent_buffer *tmp; u64 blocknr; u64 gen; u64 search_start = *last_ret; @@ -370,9 +369,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, int progress_passed = 0; struct btrfs_disk_key disk_key; - /* FIXME this code needs locking */ - return 0; - parent_level = btrfs_header_level(parent); if (cache_only && parent_level != 1) return 0; @@ -454,20 +450,23 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (search_start == 0) search_start = last_block; + btrfs_tree_lock(cur); err = __btrfs_cow_block(trans, root, cur, parent, i, - &tmp, search_start, + &cur, search_start, min(16 * blocksize, (end_slot - i) * blocksize)); if (err) { + btrfs_tree_unlock(cur); free_extent_buffer(cur); break; } - search_start = tmp->start; - last_block = tmp->start; + search_start = cur->start; + last_block = cur->start; *last_ret = search_start; if (parent_level == 1) - btrfs_clear_buffer_defrag(tmp); - free_extent_buffer(tmp); + btrfs_clear_buffer_defrag(cur); + btrfs_tree_unlock(cur); + free_extent_buffer(cur); } if (parent->map_token) { unmap_extent_buffer(parent, parent->map_token, @@ -2970,6 +2969,35 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) return 1; } +int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *key, int lowest_level) +{ + int level = lowest_level; + int slot; + struct extent_buffer *c; + + while(level < BTRFS_MAX_LEVEL) { + if (!path->nodes[level]) + return 1; + + slot = path->slots[level] + 1; + c = path->nodes[level]; + if (slot >= btrfs_header_nritems(c)) { + level++; + if (level == BTRFS_MAX_LEVEL) { + return 1; + } + continue; + } + if (level == 0) + btrfs_item_key_to_cpu(c, key, slot); + else + btrfs_node_key_to_cpu(c, key, slot); + return 0; + } + return 1; +} + /* * search the tree again to find a leaf with greater keys * returns 0 if it found something or 1 if there are no greater leaves. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 244fe86bcc55..ca8e6f15859e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1411,6 +1411,8 @@ int btrfs_previous_item(struct btrfs_root *root, struct extent_buffer *btrfs_root_node(struct btrfs_root *root); struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); +int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *key, int lowest_level); int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 89cc4f611869..a9b3a25a45b7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2201,6 +2201,7 @@ int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, { mutex_unlock(&root->fs_info->alloc_mutex); lookup_extent_ref(NULL, root, start, len, refs); + cond_resched(); mutex_lock(&root->fs_info->alloc_mutex); return lookup_extent_ref(NULL, root, start, len, refs); } @@ -2280,6 +2281,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, next = read_tree_block(root, bytenr, blocksize, ptr_gen); + cond_resched(); mutex_lock(&root->fs_info->alloc_mutex); /* we've dropped the lock, double check */ @@ -2329,6 +2331,7 @@ out: *level += 1; BUG_ON(ret); mutex_unlock(&root->fs_info->alloc_mutex); + cond_resched(); return 0; } @@ -2448,6 +2451,10 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root break; if (wret < 0) ret = wret; + if (trans->transaction->in_commit) { + ret = -EAGAIN; + break; + } } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index fab851d85383..1677e4edaf6f 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -21,167 +21,26 @@ #include "disk-io.h" #include "print-tree.h" #include "transaction.h" - -static void reada_defrag(struct btrfs_root *root, - struct extent_buffer *node) -{ - int i; - u32 nritems; - u64 bytenr; - u64 gen; - u32 blocksize; - int ret; - - blocksize = btrfs_level_size(root, btrfs_header_level(node) - 1); - nritems = btrfs_header_nritems(node); - for (i = 0; i < nritems; i++) { - bytenr = btrfs_node_blockptr(node, i); - gen = btrfs_node_ptr_generation(node, i); - ret = readahead_tree_block(root, bytenr, blocksize, gen); - if (ret) - break; - } -} - -static int defrag_walk_down(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, int *level, - int cache_only, u64 *last_ret) -{ - struct extent_buffer *next; - struct extent_buffer *cur; - u64 bytenr; - u64 ptr_gen; - int ret = 0; - int is_extent = 0; - - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - - if (root->fs_info->extent_root == root) - is_extent = 1; - - if (*level == 1 && cache_only && path->nodes[1] && - !btrfs_buffer_defrag(path->nodes[1])) { - goto out; - } - while(*level > 0) { - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - cur = path->nodes[*level]; - - if (!cache_only && *level > 1 && path->slots[*level] == 0) - reada_defrag(root, cur); - - if (btrfs_header_level(cur) != *level) - WARN_ON(1); - - if (path->slots[*level] >= - btrfs_header_nritems(cur)) - break; - - if (*level == 1) { - WARN_ON(btrfs_header_generation(path->nodes[*level]) != - trans->transid); - ret = btrfs_realloc_node(trans, root, - path->nodes[*level], - path->slots[*level], - cache_only, last_ret, - &root->defrag_progress); - if (is_extent) - btrfs_extent_post_op(trans, root); - - break; - } - bytenr = btrfs_node_blockptr(cur, path->slots[*level]); - ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); - - if (cache_only) { - next = btrfs_find_tree_block(root, bytenr, - btrfs_level_size(root, *level - 1)); - if (!next || !btrfs_buffer_uptodate(next, ptr_gen) || - !btrfs_buffer_defrag(next)) { - free_extent_buffer(next); - path->slots[*level]++; - continue; - } - } else { - next = read_tree_block(root, bytenr, - btrfs_level_size(root, *level - 1), - ptr_gen); - } - ret = btrfs_cow_block(trans, root, next, path->nodes[*level], - path->slots[*level], &next); - BUG_ON(ret); - if (is_extent) - btrfs_extent_post_op(trans, root); - - WARN_ON(*level <= 0); - if (path->nodes[*level-1]) - free_extent_buffer(path->nodes[*level-1]); - path->nodes[*level-1] = next; - *level = btrfs_header_level(next); - path->slots[*level] = 0; - } - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - - btrfs_clear_buffer_defrag(path->nodes[*level]); -out: - free_extent_buffer(path->nodes[*level]); - path->nodes[*level] = NULL; - *level += 1; - WARN_ON(ret && ret != -EAGAIN); - return ret; -} - -static int defrag_walk_up(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, int *level, - int cache_only) -{ - int i; - int slot; - struct extent_buffer *node; - - for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { - slot = path->slots[i]; - if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { - path->slots[i]++; - *level = i; - node = path->nodes[i]; - WARN_ON(i == 0); - btrfs_node_key_to_cpu(node, &root->defrag_progress, - path->slots[i]); - root->defrag_level = i; - return 0; - } else { - btrfs_clear_buffer_defrag(path->nodes[*level]); - free_extent_buffer(path->nodes[*level]); - path->nodes[*level] = NULL; - *level = i + 1; - } - } - return 1; -} +#include "locking.h" int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only) { struct btrfs_path *path = NULL; - struct extent_buffer *tmp; + struct btrfs_key key; int ret = 0; int wret; int level; int orig_level; int i; int is_extent = 0; + int next_key_ret = 0; u64 last_ret = 0; - if (root->fs_info->extent_root == root) + if (root->fs_info->extent_root == root) { + mutex_lock(&root->fs_info->alloc_mutex); is_extent = 1; - - goto out; + } if (root->ref_cows == 0 && !is_extent) goto out; @@ -200,67 +59,63 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, goto out; } if (root->defrag_progress.objectid == 0) { + struct extent_buffer *root_node; u32 nritems; - nritems = btrfs_header_nritems(root->node); + root_node = btrfs_lock_root_node(root); + nritems = btrfs_header_nritems(root_node); root->defrag_max.objectid = 0; /* from above we know this is not a leaf */ - btrfs_node_key_to_cpu(root->node, &root->defrag_max, + btrfs_node_key_to_cpu(root_node, &root->defrag_max, nritems - 1); - extent_buffer_get(root->node); - ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); - BUG_ON(ret); - path->nodes[level] = root->node; - path->slots[level] = 0; - if (is_extent) - btrfs_extent_post_op(trans, root); + btrfs_tree_unlock(root_node); + free_extent_buffer(root_node); + memset(&key, 0, sizeof(key)); } else { - level = root->defrag_level; - path->lowest_level = level; - wret = btrfs_search_slot(trans, root, &root->defrag_progress, - path, 0, 1); - - if (is_extent) - btrfs_extent_post_op(trans, root); - - if (wret < 0) { - ret = wret; - goto out; - } - - while(level > 0 && !path->nodes[level]) - level--; - - if (!path->nodes[level]) { - ret = 0; - goto out; - } + memcpy(&key, &root->defrag_progress, sizeof(key)); } - while(1) { - wret = defrag_walk_down(trans, root, path, &level, cache_only, - &last_ret); - if (wret > 0) - break; - if (wret < 0) - ret = wret; + path->lowest_level = 1; + path->keep_locks = 1; + wret = btrfs_search_slot(trans, root, &key, path, 0, 1); - wret = defrag_walk_up(trans, root, path, &level, cache_only); - if (wret > 0) - break; - if (wret < 0) - ret = wret; - else - ret = -EAGAIN; - break; + if (wret < 0) { + ret = wret; + goto out; } - for (i = 0; i <= orig_level; i++) { + if (!path->nodes[1]) { + ret = 0; + goto out; + } + path->slots[1] = btrfs_header_nritems(path->nodes[1]); + next_key_ret = btrfs_find_next_key(root, path, &key, 1); + ret = btrfs_realloc_node(trans, root, + path->nodes[1], 0, + cache_only, &last_ret, + &root->defrag_progress); + WARN_ON(ret && ret != -EAGAIN); + if (next_key_ret == 0) { + memcpy(&root->defrag_progress, &key, sizeof(key)); + ret = -EAGAIN; + } + + for (i = 1; i < BTRFS_MAX_LEVEL; i++) { + if (path->locks[i]) { + btrfs_tree_unlock(path->nodes[i]); + path->locks[i] = 0; + } if (path->nodes[i]) { free_extent_buffer(path->nodes[i]); path->nodes[i] = NULL; } } + if (is_extent) + btrfs_extent_post_op(trans, root); + out: + if (is_extent) + mutex_unlock(&root->fs_info->alloc_mutex); + if (path) btrfs_free_path(path); if (ret == -EAGAIN) { From 079899c2384023cd8efcd3806680b4f1d2abbd54 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: [PATCH 0580/2985] Btrfs: Change find_extent_buffer to use TestSetPageLocked This makes it possible for callers to check for extent_buffers in cache without deadlocking against any btree locks held. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 -- fs/btrfs/extent_io.c | 7 ++++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a9b3a25a45b7..dc3c03c6612d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1438,7 +1438,6 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, int err = 0; if (!pending) { -#if 0 struct extent_buffer *buf; buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { @@ -1460,7 +1459,6 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, } free_extent_buffer(buf); } -#endif update_pinned_extents(root, bytenr, num_bytes, 1); } else { set_extent_bits(&root->fs_info->pending_del, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index bd15cdcaba95..40a5f53cb040 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2808,10 +2808,15 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, goto lru_add; for (i = 0; i < num_pages; i++, index++) { - p = find_lock_page(mapping, index); + p = find_get_page(mapping, index); if (!p) { goto fail; } + if (TestSetPageLocked(p)) { + page_cache_release(p); + goto fail; + } + set_page_extent_mapped(p); mark_page_accessed(p); From 1b1e2135dc1e4efbcf25ac9ac9979316d4e1193e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: [PATCH 0581/2985] Btrfs: Add a per-inode csum mutex to avoid races creating csum items Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/file.c | 6 +++--- fs/btrfs/inode.c | 6 ++++++ fs/btrfs/ordered-data.c | 5 +++-- fs/btrfs/transaction.c | 2 +- fs/btrfs/tree-defrag.c | 7 +++++++ 6 files changed, 21 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 5ba83894c8b7..40b4e0c9cd09 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -30,6 +30,7 @@ struct btrfs_inode { struct extent_map_tree extent_tree; struct extent_io_tree io_tree; struct extent_io_tree io_failure_tree; + struct mutex csum_mutex; struct inode vfs_inode; atomic_t ordered_writeback; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ece221cba90c..8037792f8789 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -267,13 +267,13 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, /* FIXME...EIEIO, ENOSPC and more */ /* insert any holes we need to create */ - if (isize < end_pos) { + if (isize < start_pos) { u64 last_pos_in_file; u64 hole_size; u64 mask = root->sectorsize - 1; last_pos_in_file = (isize + mask) & ~mask; - hole_size = (end_pos - last_pos_in_file + mask) & ~mask; - if (last_pos_in_file < end_pos) { + hole_size = (start_pos - last_pos_in_file + mask) & ~mask; + if (last_pos_in_file < start_pos) { err = btrfs_drop_extents(trans, root, inode, last_pos_in_file, last_pos_in_file + hole_size, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bbba3350d023..d39433dfb2c7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -351,7 +351,9 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); + mutex_lock(&BTRFS_I(inode)->csum_mutex); btrfs_csum_file_blocks(trans, root, inode, bio, sums); + mutex_unlock(&BTRFS_I(inode)->csum_mutex); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); @@ -1400,6 +1402,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) inode->i_mapping, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); + mutex_init(&BTRFS_I(inode)->csum_mutex); atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); return 0; } @@ -1701,6 +1704,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mapping, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); + mutex_init(&BTRFS_I(inode)->csum_mutex); atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->root = root; @@ -1924,6 +1928,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); + mutex_init(&BTRFS_I(inode)->csum_mutex); BTRFS_I(inode)->delalloc_bytes = 0; atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; @@ -2862,6 +2867,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); + mutex_init(&BTRFS_I(inode)->csum_mutex); BTRFS_I(inode)->delalloc_bytes = 0; atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 5e4c0d95ce43..254da8225664 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -152,12 +152,13 @@ int btrfs_add_ordered_inode(struct inode *inode) inode->i_ino, &entry->rb_node); BTRFS_I(inode)->ordered_trans = transid; + if (!node) + igrab(inode); write_unlock(&tree->lock); + if (node) kfree(entry); - else - igrab(inode); return 0; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0c53ff775b92..8e909cb97c6d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -477,7 +477,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, if (err) ret = err; nr = trans->blocks_used; - ret = btrfs_end_transaction(trans, tree_root); + ret = btrfs_end_transaction_throttle(trans, tree_root); BUG_ON(ret); mutex_unlock(&root->fs_info->drop_mutex); diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 1677e4edaf6f..b17693f61fbc 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -38,8 +38,15 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, u64 last_ret = 0; if (root->fs_info->extent_root == root) { + /* + * there's recursion here right now in the tree locking, + * we can't defrag the extent root without deadlock + */ + goto out; +#if 0 mutex_lock(&root->fs_info->alloc_mutex); is_extent = 1; +#endif } if (root->ref_cows == 0 && !is_extent) From 3f157a2fd2ad731e1ed9964fecdc5f459f04a4a4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: [PATCH 0582/2985] Btrfs: Online btree defragmentation fixes The btree defragger wasn't making forward progress because the new key wasn't being saved by the btrfs_search_forward function. This also disables the automatic btree defrag, it wasn't scaling well to huge filesystems. The auto-defrag needs to be done differently. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 170 ++++++++++++++++++++++++++++++++++++++--- fs/btrfs/ctree.h | 7 +- fs/btrfs/disk-io.c | 61 +-------------- fs/btrfs/disk-io.h | 6 -- fs/btrfs/extent-tree.c | 2 - fs/btrfs/super.c | 1 - fs/btrfs/transaction.c | 35 +-------- fs/btrfs/transaction.h | 1 - fs/btrfs/tree-defrag.c | 36 ++++----- 9 files changed, 190 insertions(+), 129 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7f4cc2b88d09..0cb80f32a9c7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -63,10 +63,9 @@ void btrfs_free_path(struct btrfs_path *p) void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; - int keep = p->keep_locks; - int skip = p->skip_locking; for (i = 0; i < BTRFS_MAX_LEVEL; i++) { + p->slots[i] = 0; if (!p->nodes[i]) continue; if (p->locks[i]) { @@ -74,10 +73,8 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) p->locks[i] = 0; } free_extent_buffer(p->nodes[i]); + p->nodes[i] = NULL; } - memset(p, 0, sizeof(*p)); - p->keep_locks = keep; - p->skip_locking = skip; } struct extent_buffer *btrfs_root_node(struct btrfs_root *root) @@ -463,8 +460,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, search_start = cur->start; last_block = cur->start; *last_ret = search_start; - if (parent_level == 1) - btrfs_clear_buffer_defrag(cur); btrfs_tree_unlock(cur); free_extent_buffer(cur); } @@ -2969,8 +2964,138 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) return 1; } +/* + * A helper function to walk down the tree starting at min_key, and looking + * for nodes or leaves that are either in cache or have a minimum + * transaction id. This is used by the btree defrag code, but could + * also be used to search for blocks that have changed since a given + * transaction id. + * + * This does not cow, but it does stuff the starting key it finds back + * into min_key, so you can call btrfs_search_slot with cow=1 on the + * key and get a writable path. + * + * This does lock as it descends, and path->keep_locks should be set + * to 1 by the caller. + * + * This honors path->lowest_level to prevent descent past a given level + * of the tree. + * + * returns zero if something useful was found, < 0 on error and 1 if there + * was nothing in the tree that matched the search criteria. + */ +int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, + struct btrfs_path *path, int cache_only, + u64 min_trans) +{ + struct extent_buffer *cur; + struct btrfs_key found_key; + int slot; + u32 nritems; + int level; + int ret = 1; + +again: + cur = btrfs_lock_root_node(root); + level = btrfs_header_level(cur); + path->nodes[level] = cur; + path->locks[level] = 1; + + if (btrfs_header_generation(cur) < min_trans) { + ret = 1; + goto out; + } + while(1) { + nritems = btrfs_header_nritems(cur); + level = btrfs_header_level(cur); + bin_search(cur, min_key, level, &slot); + + /* at level = 0, we're done, setup the path and exit */ + if (level == 0) { + ret = 0; + path->slots[level] = slot; + btrfs_item_key_to_cpu(cur, &found_key, slot); + goto out; + } + /* + * check this node pointer against the cache_only and + * min_trans parameters. If it isn't in cache or is too + * old, skip to the next one. + */ + while(slot < nritems) { + u64 blockptr; + u64 gen; + struct extent_buffer *tmp; + blockptr = btrfs_node_blockptr(cur, slot); + gen = btrfs_node_ptr_generation(cur, slot); + if (gen < min_trans) { + slot++; + continue; + } + if (!cache_only) + break; + + tmp = btrfs_find_tree_block(root, blockptr, + btrfs_level_size(root, level - 1)); + + if (tmp && btrfs_buffer_uptodate(tmp, gen)) { + free_extent_buffer(tmp); + break; + } + if (tmp) + free_extent_buffer(tmp); + slot++; + } + /* + * we didn't find a candidate key in this node, walk forward + * and find another one + */ + if (slot >= nritems) { + ret = btrfs_find_next_key(root, path, min_key, level, + cache_only, min_trans); + if (ret == 0) { + btrfs_release_path(root, path); + goto again; + } else { + goto out; + } + } + /* save our key for returning back */ + btrfs_node_key_to_cpu(cur, &found_key, slot); + path->slots[level] = slot; + if (level == path->lowest_level) { + ret = 0; + unlock_up(path, level, 1); + goto out; + } + cur = read_node_slot(root, cur, slot); + + btrfs_tree_lock(cur); + path->locks[level - 1] = 1; + path->nodes[level - 1] = cur; + unlock_up(path, level, 1); + } +out: + if (ret == 0) + memcpy(min_key, &found_key, sizeof(found_key)); + return ret; +} + +/* + * this is similar to btrfs_next_leaf, but does not try to preserve + * and fixup the path. It looks for and returns the next key in the + * tree based on the current path and the cache_only and min_trans + * parameters. + * + * 0 is returned if another key is found, < 0 if there are any errors + * and 1 is returned if there are no higher keys in the tree + * + * path->keep_locks should be set to 1 on the search made before + * calling this function. + */ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *key, int lowest_level) + struct btrfs_key *key, int lowest_level, + int cache_only, u64 min_trans) { int level = lowest_level; int slot; @@ -2982,6 +3107,7 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, slot = path->slots[level] + 1; c = path->nodes[level]; +next: if (slot >= btrfs_header_nritems(c)) { level++; if (level == BTRFS_MAX_LEVEL) { @@ -2991,8 +3117,28 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, } if (level == 0) btrfs_item_key_to_cpu(c, key, slot); - else + else { + u64 blockptr = btrfs_node_blockptr(c, slot); + u64 gen = btrfs_node_ptr_generation(c, slot); + + if (cache_only) { + struct extent_buffer *cur; + cur = btrfs_find_tree_block(root, blockptr, + btrfs_level_size(root, level - 1)); + if (!cur || !btrfs_buffer_uptodate(cur, gen)) { + slot++; + if (cur) + free_extent_buffer(cur); + goto next; + } + free_extent_buffer(cur); + } + if (gen < min_trans) { + slot++; + goto next; + } btrfs_node_key_to_cpu(c, key, slot); + } return 0; } return 1; @@ -3095,6 +3241,12 @@ done: return 0; } +/* + * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps + * searching until it gets past min_objectid or finds an item of 'type' + * + * returns 0 if something is found, 1 if nothing was found and < 0 on error + */ int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, int type) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ca8e6f15859e..a28796482b4a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -609,6 +609,7 @@ struct btrfs_root { u64 last_inode_alloc; int ref_cows; int track_dirty; + u64 defrag_trans_start; struct btrfs_key defrag_progress; struct btrfs_key defrag_max; int defrag_running; @@ -1412,7 +1413,11 @@ int btrfs_previous_item(struct btrfs_root *root, struct extent_buffer *btrfs_root_node(struct btrfs_root *root); struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *key, int lowest_level); + struct btrfs_key *key, int lowest_level, + int cache_only, u64 min_trans); +int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, + struct btrfs_path *path, int cache_only, + u64 min_trans); int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 31ca9f89388d..4cdc0b6a2672 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -295,7 +295,6 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, btrfs_header_generation(eb)); BUG_ON(ret); - btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", @@ -355,7 +354,6 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); - btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { ret = -EIO; @@ -736,6 +734,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); memset(&root->root_kobj, 0, sizeof(root->root_kobj)); + root->defrag_trans_start = fs_info->generation; init_completion(&root->kobj_unregister); root->defrag_running = 0; root->defrag_level = 0; @@ -1168,7 +1167,6 @@ static int transaction_kthread(void *arg) goto sleep; } mutex_unlock(&root->fs_info->trans_mutex); - btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sleep: @@ -1434,12 +1432,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, tree_root, "btrfs-transaction"); if (!fs_info->transaction_kthread) - goto fail_trans_kthread; + goto fail_cleaner; return tree_root; -fail_trans_kthread: +fail_cleaner: kthread_stop(fs_info->cleaner_kthread); fail_extent_root: free_extent_buffer(extent_root->node); @@ -1662,7 +1660,6 @@ int close_ctree(struct btrfs_root *root) kthread_stop(root->fs_info->transaction_kthread); kthread_stop(root->fs_info->cleaner_kthread); - btrfs_defrag_dirty_roots(root->fs_info); btrfs_clean_old_snapshots(root); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); @@ -1794,58 +1791,6 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) return; } -void btrfs_set_buffer_defrag(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, - buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); -} - -void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, - buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, - GFP_NOFS); -} - -int btrfs_buffer_defrag(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); -} - -int btrfs_buffer_defrag_done(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_DEFRAG_DONE, 0); -} - -int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_DEFRAG_DONE, GFP_NOFS); -} - -int btrfs_clear_buffer_defrag(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_DEFRAG, GFP_NOFS); -} - int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index deff6b4815a7..353c3c50c957 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -61,12 +61,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); int btrfs_set_buffer_uptodate(struct extent_buffer *buf); int wait_on_tree_block_writeback(struct btrfs_root *root, struct extent_buffer *buf); -void btrfs_set_buffer_defrag(struct extent_buffer *buf); -void btrfs_set_buffer_defrag_done(struct extent_buffer *buf); -int btrfs_buffer_defrag(struct extent_buffer *buf); -int btrfs_buffer_defrag_done(struct extent_buffer *buf); -int btrfs_clear_buffer_defrag(struct extent_buffer *buf); -int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf); int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, char *result); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index dc3c03c6612d..5e0857ffbc35 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2095,8 +2095,6 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); - if (!btrfs_test_opt(root, SSD)) - btrfs_set_buffer_defrag(buf); trans->blocks_used++; return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 726d6871fa13..5e28cf5c2e85 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -365,7 +365,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) return 0; } btrfs_clean_old_snapshots(root); - btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8e909cb97c6d..98f422d9ab07 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -30,7 +30,6 @@ extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; #define BTRFS_ROOT_TRANS_TAG 0 -#define BTRFS_ROOT_DEFRAG_TAG 1 static noinline void put_transaction(struct btrfs_transaction *transaction) { @@ -92,9 +91,6 @@ static noinline int record_root_in_trans(struct btrfs_root *root) radix_tree_tag_set(&root->fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); - radix_tree_tag_set(&root->fs_info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_DEFRAG_TAG); root->commit_root = btrfs_root_node(root); } else { WARN_ON(1); @@ -403,44 +399,15 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) cond_resched(); trans = btrfs_start_transaction(root, 1); - if (ret != -EAGAIN) + if (root->fs_info->closing || ret != -EAGAIN) break; } root->defrag_running = 0; smp_mb(); - radix_tree_tag_clear(&info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_DEFRAG_TAG); btrfs_end_transaction(trans, root); return 0; } -int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) -{ - struct btrfs_root *gang[1]; - struct btrfs_root *root; - int i; - int ret; - int err = 0; - u64 last = 0; - - while(1) { - ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix, - (void **)gang, last, - ARRAY_SIZE(gang), - BTRFS_ROOT_DEFRAG_TAG); - if (ret == 0) - break; - for (i = 0; i < ret; i++) { - root = gang[i]; - last = root->root_key.objectid + 1; - btrfs_defrag_root(root, 1); - } - } - btrfs_defrag_root(info->extent_root, 1); - return err; -} - static noinline int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index e1e5a06b65f4..9ccd5a5b170f 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -84,7 +84,6 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, struct list_head *dead_list); -int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index b17693f61fbc..cc2650b06952 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -32,10 +32,13 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, int wret; int level; int orig_level; - int i; int is_extent = 0; int next_key_ret = 0; u64 last_ret = 0; + u64 min_trans = 0; + + if (cache_only) + goto out; if (root->fs_info->extent_root == root) { /* @@ -43,10 +46,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, * we can't defrag the extent root without deadlock */ goto out; -#if 0 - mutex_lock(&root->fs_info->alloc_mutex); - is_extent = 1; -#endif } if (root->ref_cows == 0 && !is_extent) @@ -84,6 +83,17 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, path->lowest_level = 1; path->keep_locks = 1; + if (cache_only) + min_trans = root->defrag_trans_start; + + ret = btrfs_search_forward(root, &key, path, cache_only, min_trans); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + goto out; + } + btrfs_release_path(root, path); wret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (wret < 0) { @@ -95,7 +105,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, goto out; } path->slots[1] = btrfs_header_nritems(path->nodes[1]); - next_key_ret = btrfs_find_next_key(root, path, &key, 1); + next_key_ret = btrfs_find_next_key(root, path, &key, 1, cache_only, + min_trans); ret = btrfs_realloc_node(trans, root, path->nodes[1], 0, cache_only, &last_ret, @@ -106,19 +117,9 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, ret = -EAGAIN; } - for (i = 1; i < BTRFS_MAX_LEVEL; i++) { - if (path->locks[i]) { - btrfs_tree_unlock(path->nodes[i]); - path->locks[i] = 0; - } - if (path->nodes[i]) { - free_extent_buffer(path->nodes[i]); - path->nodes[i] = NULL; - } - } + btrfs_release_path(root, path); if (is_extent) btrfs_extent_post_op(trans, root); - out: if (is_extent) mutex_unlock(&root->fs_info->alloc_mutex); @@ -138,6 +139,7 @@ done: if (ret != -EAGAIN) { memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); + root->defrag_trans_start = trans->transid; } return ret; } From f9efa9c784aa3b801feb367f72c6867d26fb348e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:14:04 -0400 Subject: [PATCH 0583/2985] Btrfs: Reduce contention on the root node This calls unlock_up sooner in btrfs_search_slot in order to decrease the amount of work done with the higher level tree locks held. Also, it changes btrfs_tree_lock to spin for a big against the page lock before scheduling. This makes a big difference in context switch rate under highly contended workloads. Longer term, a better locking structure is needed than the page lock. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 17 +++++++++++------ fs/btrfs/locking.c | 10 ++++++++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0cb80f32a9c7..c6759fc1004a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1313,16 +1313,13 @@ again: slot = p->slots[level]; BUG_ON(btrfs_header_nritems(b) == 1); } + unlock_up(p, level, lowest_unlock); + /* this is only true while dropping a snapshot */ if (level == lowest_level) { - unlock_up(p, level, lowest_unlock); break; } - if (should_reada) - reada_for_search(root, p, level, slot, - key->objectid); - blocknr = btrfs_node_blockptr(b, slot); gen = btrfs_node_ptr_generation(b, slot); blocksize = btrfs_level_size(root, level - 1); @@ -1340,6 +1337,11 @@ again: btrfs_release_path(NULL, p); if (tmp) free_extent_buffer(tmp); + if (should_reada) + reada_for_search(root, p, + level, slot, + key->objectid); + tmp = read_tree_block(root, blocknr, blocksize, gen); if (tmp) @@ -1348,12 +1350,15 @@ again: } else { if (tmp) free_extent_buffer(tmp); + if (should_reada) + reada_for_search(root, p, + level, slot, + key->objectid); b = read_node_slot(root, b, slot); } } if (!p->skip_locking) btrfs_tree_lock(b); - unlock_up(p, level, lowest_unlock); } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 80813a307b4b..058a506a0dd8 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -27,6 +27,16 @@ int btrfs_tree_lock(struct extent_buffer *eb) { + int i; + + if (!TestSetPageLocked(eb->first_page)) + return 0; + for (i = 0; i < 512; i++) { + cpu_relax(); + if (!TestSetPageLocked(eb->first_page)) + return 0; + } + cpu_relax(); lock_page(eb->first_page); return 0; } From a7a16fd772620605c76e8ac8bdbc8ccc9e3df1a0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 26 Jun 2008 10:34:20 -0400 Subject: [PATCH 0584/2985] Btrfs: Fix deadlock while searching for dead roots on mount btrfs_find_dead_roots called btrfs_read_fs_root_no_radix, which means we end up calling btrfs_search_slot with a path already held. The fix is to remember the key inside btrfs_find_dead_roots and drop the path. Signed-off-by: Chris Mason --- fs/btrfs/root-tree.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 8bf21ba0a43b..a5c0e98b5aeb 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -154,6 +154,7 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, struct btrfs_item *item; struct btrfs_root_item *ri; struct btrfs_key key; + struct btrfs_key found_key; struct btrfs_path *path; int ret; u32 nritems; @@ -166,6 +167,8 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, path = btrfs_alloc_path(); if (!path) return -ENOMEM; + +again: ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; @@ -196,7 +199,11 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, if (btrfs_disk_root_refs(leaf, ri) != 0) goto next; - dead_root = btrfs_read_fs_root_no_radix(root->fs_info, &key); + memcpy(&found_key, &key, sizeof(key)); + key.offset++; + btrfs_release_path(root, path); + dead_root = btrfs_read_fs_root_no_radix(root->fs_info, + &found_key); if (IS_ERR(dead_root)) { ret = PTR_ERR(dead_root); goto err; @@ -206,6 +213,7 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, &root->fs_info->dead_roots); if (ret) goto err; + goto again; next: slot++; path->slots[0]++; From 7d9eb12c8739e7dc80c78c6b3596f912ecd8f941 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jul 2008 14:19:17 -0400 Subject: [PATCH 0585/2985] Btrfs: Add locking around volume management (device add/remove/balance) Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 4 --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 75 +++++++++++++++++++++++++++++++----------- fs/btrfs/ioctl.c | 6 ++-- fs/btrfs/volumes.c | 58 ++++++++++++++++++++++++-------- 6 files changed, 104 insertions(+), 41 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c6759fc1004a..bbf9bf374066 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1251,10 +1251,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root WARN_ON(p->nodes[0] != NULL); WARN_ON(cow && root == root->fs_info->extent_root && !mutex_is_locked(&root->fs_info->alloc_mutex)); - WARN_ON(root == root->fs_info->chunk_root && - !mutex_is_locked(&root->fs_info->chunk_mutex)); - WARN_ON(root == root->fs_info->dev_root && - !mutex_is_locked(&root->fs_info->chunk_mutex)); if (ins_len < 0) lowest_unlock = 2; again: diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a28796482b4a..f3783dbd9b60 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -523,6 +523,7 @@ struct btrfs_fs_info { struct mutex alloc_mutex; struct mutex chunk_mutex; struct mutex drop_mutex; + struct mutex volume_mutex; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4cdc0b6a2672..8f4c40033e92 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1287,6 +1287,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); + mutex_init(&fs_info->volume_mutex); #if 0 ret = add_hasher(fs_info, "crc32c"); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5e0857ffbc35..8ebfa6be0790 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -245,6 +245,7 @@ static int noinline find_search_start(struct btrfs_root *root, u64 search_start = *start_ret; int wrapped = 0; + WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); free_space_cache = &root->fs_info->free_space_cache; @@ -1242,6 +1243,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 start; u64 end; + WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); while(total) { cache = btrfs_lookup_block_group(info, bytenr); if (!cache) { @@ -1297,6 +1299,7 @@ static int update_pinned_extents(struct btrfs_root *root, struct btrfs_block_group_cache *cache; struct btrfs_fs_info *fs_info = root->fs_info; + WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); if (pin) { set_extent_dirty(&fs_info->pinned_extents, bytenr, bytenr + num - 1, GFP_NOFS); @@ -1391,6 +1394,7 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, int level; int err = 0; + WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); btrfs_set_stack_extent_refs(&extent_item, 1); btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); path = btrfs_alloc_path(); @@ -1437,6 +1441,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, { int err = 0; + WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); if (!pending) { struct extent_buffer *buf; buf = btrfs_find_tree_block(root, bytenr, num_bytes); @@ -1490,6 +1495,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_extent_item *ei; u32 refs; + WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_bytes; @@ -1619,6 +1625,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct struct extent_io_tree *pending_del; struct extent_io_tree *pinned_extents; + WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); pending_del = &extent_root->fs_info->pending_del; pinned_extents = &extent_root->fs_info->pinned_extents; @@ -2428,6 +2435,10 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_node_key(node, &found_key, path->slots[level]); WARN_ON(memcmp(&found_key, &root_item->drop_progress, sizeof(found_key))); + /* + * unlock our path, this is safe because only this + * function is allowed to delete this snapshot + */ for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (path->nodes[i] && path->locks[i]) { path->locks[i] = 0; @@ -2611,7 +2622,6 @@ static int find_root_for_ref(struct btrfs_root *root, u64 root_search_start = BTRFS_FS_TREE_OBJECTID; u64 found_bytenr; int ret; - int i; root_location.offset = (u64)-1; root_location.type = BTRFS_ROOT_ITEM_KEY; @@ -2635,12 +2645,6 @@ static int find_root_for_ref(struct btrfs_root *root, found_bytenr = path->nodes[level]->start; } - for (i = level; i < BTRFS_MAX_LEVEL; i++) { - if (!path->nodes[i]) - break; - free_extent_buffer(path->nodes[i]); - path->nodes[i] = NULL; - } btrfs_release_path(cur_root, path); if (found_bytenr == bytenr) { @@ -2689,6 +2693,8 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, int ret; int level; + WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); + ref = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_extent_ref); ref_root = btrfs_ref_root(path->nodes[0], ref); @@ -2707,6 +2713,7 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, &root_location); BUG_ON(!found_root); + mutex_unlock(&extent_root->fs_info->alloc_mutex); if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { found_key.objectid = ref_objectid; @@ -2748,9 +2755,9 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, /* this can happen if the reference is not against * the latest version of the tree root */ - if (is_bad_inode(inode)) { + if (is_bad_inode(inode)) goto out; - } + *last_file_objectid = inode->i_ino; *last_file_root = found_root->root_key.objectid; *last_file_offset = ref_offset; @@ -2760,7 +2767,7 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, } else { struct btrfs_trans_handle *trans; struct extent_buffer *eb; - int i; + int needs_lock = 0; eb = read_tree_block(found_root, extent_key->objectid, extent_key->offset, 0); @@ -2782,26 +2789,40 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, if (ret) goto out; + /* + * right here almost anything could happen to our key, + * but that's ok. The cow below will either relocate it + * or someone else will have relocated it. Either way, + * it is in a different spot than it was before and + * we're happy. + */ + trans = btrfs_start_transaction(found_root, 1); + if (found_root == extent_root->fs_info->extent_root || + found_root == extent_root->fs_info->chunk_root || + found_root == extent_root->fs_info->dev_root) { + needs_lock = 1; + mutex_lock(&extent_root->fs_info->alloc_mutex); + } + path->lowest_level = level; path->reada = 2; ret = btrfs_search_slot(trans, found_root, &found_key, path, 0, 1); path->lowest_level = 0; - for (i = level; i < BTRFS_MAX_LEVEL; i++) { - if (!path->nodes[i]) - break; - free_extent_buffer(path->nodes[i]); - path->nodes[i] = NULL; - } btrfs_release_path(found_root, path); + if (found_root == found_root->fs_info->extent_root) btrfs_extent_post_op(trans, found_root); - btrfs_end_transaction(trans, found_root); - } + if (needs_lock) + mutex_unlock(&extent_root->fs_info->alloc_mutex); + btrfs_end_transaction(trans, found_root); + + } out: + mutex_lock(&extent_root->fs_info->alloc_mutex); return 0; } @@ -2943,7 +2964,10 @@ int __alloc_chunk_for_shrink(struct btrfs_root *root, if (btrfs_block_group_used(&shrink_block_group->item) > 0) { + mutex_unlock(&root->fs_info->alloc_mutex); trans = btrfs_start_transaction(root, 1); + mutex_lock(&root->fs_info->alloc_mutex); + new_alloc_flags = update_block_group_flags(root, shrink_block_group->flags); if (new_alloc_flags != shrink_block_group->flags) { @@ -2954,7 +2978,10 @@ int __alloc_chunk_for_shrink(struct btrfs_root *root, } do_chunk_alloc(trans, root->fs_info->extent_root, calc + 2 * 1024 * 1024, new_alloc_flags, force); + + mutex_unlock(&root->fs_info->alloc_mutex); btrfs_end_transaction(trans, root); + mutex_lock(&root->fs_info->alloc_mutex); } return 0; } @@ -3031,9 +3058,9 @@ again: if (ret < 0) goto out; +next: leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); -next: if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(root, path); if (ret < 0) @@ -3083,6 +3110,7 @@ next: printk("btrfs relocate found %llu last extent was %llu\n", (unsigned long long)total_found, (unsigned long long)found_key.objectid); + mutex_unlock(&root->fs_info->alloc_mutex); trans = btrfs_start_transaction(tree_root, 1); btrfs_commit_transaction(trans, tree_root); @@ -3090,6 +3118,7 @@ next: trans = btrfs_start_transaction(tree_root, 1); btrfs_commit_transaction(trans, tree_root); + mutex_lock(&root->fs_info->alloc_mutex); goto again; } @@ -3097,7 +3126,10 @@ next: * we've freed all the extents, now remove the block * group item from the tree */ + mutex_unlock(&root->fs_info->alloc_mutex); + trans = btrfs_start_transaction(root, 1); + mutex_lock(&root->fs_info->alloc_mutex); memcpy(&key, &shrink_block_group->key, sizeof(key)); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); @@ -3119,8 +3151,12 @@ next: kfree(shrink_block_group); btrfs_del_item(trans, root, path); + btrfs_release_path(root, path); + mutex_unlock(&root->fs_info->alloc_mutex); btrfs_commit_transaction(trans, root); + mutex_lock(&root->fs_info->alloc_mutex); + /* the code to unpin extents might set a few bits in the free * space cache for this range again */ @@ -3263,6 +3299,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *cache; struct extent_io_tree *block_group_cache; + WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); extent_root = root->fs_info->extent_root; block_group_cache = &root->fs_info->block_group_cache; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 026039a2ac58..83f17a5cbd6a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -307,8 +307,7 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) goto out; } - mutex_lock(&root->fs_info->alloc_mutex); - mutex_lock(&root->fs_info->chunk_mutex); + mutex_lock(&root->fs_info->volume_mutex); sizestr = vol_args->name; devstr = strchr(sizestr, ':'); if (devstr) { @@ -378,8 +377,7 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) } out_unlock: - mutex_lock(&root->fs_info->alloc_mutex); - mutex_lock(&root->fs_info->chunk_mutex); + mutex_unlock(&root->fs_info->volume_mutex); out: kfree(vol_args); return ret; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4e7cee27aab5..5e6ee7a6f738 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -56,6 +56,18 @@ void btrfs_unlock_volumes(void) mutex_unlock(&uuid_mutex); } +static void lock_chunks(struct btrfs_root *root) +{ + mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->chunk_mutex); +} + +static void unlock_chunks(struct btrfs_root *root) +{ + mutex_unlock(&root->fs_info->alloc_mutex); + mutex_unlock(&root->fs_info->chunk_mutex); +} + int btrfs_cleanup_fs_uuids(void) { struct btrfs_fs_devices *fs_devices; @@ -822,6 +834,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.type = BTRFS_DEV_ITEM_KEY; key.offset = device->devid; + lock_chunks(root); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) @@ -856,6 +869,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, total_bytes - 1); out: btrfs_free_path(path); + unlock_chunks(root); btrfs_commit_transaction(trans, root); return ret; } @@ -870,9 +884,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) u64 devid; int ret = 0; - mutex_lock(&root->fs_info->alloc_mutex); - mutex_lock(&root->fs_info->chunk_mutex); mutex_lock(&uuid_mutex); + mutex_lock(&root->fs_info->volume_mutex); all_avail = root->fs_info->avail_data_alloc_bits | root->fs_info->avail_system_alloc_bits | @@ -988,9 +1001,8 @@ error_close: if (bdev) close_bdev_excl(bdev); out: + mutex_unlock(&root->fs_info->volume_mutex); mutex_unlock(&uuid_mutex); - mutex_unlock(&root->fs_info->chunk_mutex); - mutex_unlock(&root->fs_info->alloc_mutex); return ret; } @@ -1010,10 +1022,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) return -EIO; } - mutex_lock(&root->fs_info->alloc_mutex); - mutex_lock(&root->fs_info->chunk_mutex); + mutex_lock(&root->fs_info->volume_mutex); trans = btrfs_start_transaction(root, 1); + lock_chunks(root); devices = &root->fs_info->fs_devices->devices; list_for_each(cur, devices) { device = list_entry(cur, struct btrfs_device, dev_list); @@ -1065,9 +1077,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->num_devices++; root->fs_info->fs_devices->open_devices++; out: + unlock_chunks(root); btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->chunk_mutex); - mutex_unlock(&root->fs_info->alloc_mutex); + mutex_unlock(&root->fs_info->volume_mutex); return ret; @@ -1122,7 +1134,7 @@ out: return ret; } -int btrfs_grow_device(struct btrfs_trans_handle *trans, +static int __btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 new_size) { struct btrfs_super_block *super_copy = @@ -1134,6 +1146,16 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, return btrfs_update_device(trans, device); } +int btrfs_grow_device(struct btrfs_trans_handle *trans, + struct btrfs_device *device, u64 new_size) +{ + int ret; + lock_chunks(device->dev_root); + ret = __btrfs_grow_device(trans, device, new_size); + unlock_chunks(device->dev_root); + return ret; +} + static int btrfs_free_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 chunk_tree, u64 chunk_objectid, @@ -1234,6 +1256,8 @@ int btrfs_relocate_chunk(struct btrfs_root *root, trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); + lock_chunks(root); + /* * step two, delete the device extents and the * chunk tree entries @@ -1278,6 +1302,7 @@ int btrfs_relocate_chunk(struct btrfs_root *root, /* once for us */ free_extent_map(em); + unlock_chunks(root); btrfs_end_transaction(trans, root); return 0; } @@ -1308,8 +1333,7 @@ int btrfs_balance(struct btrfs_root *dev_root) struct btrfs_key found_key; - BUG(); /* FIXME, needs locking */ - + mutex_lock(&dev_root->fs_info->volume_mutex); dev_root = dev_root->fs_info->dev_root; /* step one make some room on all the devices */ @@ -1355,13 +1379,14 @@ int btrfs_balance(struct btrfs_root *dev_root) ret = btrfs_previous_item(chunk_root, path, 0, BTRFS_CHUNK_ITEM_KEY); - if (ret) { + if (ret) break; - } + btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); if (found_key.objectid != key.objectid) break; + chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_chunk); @@ -1370,16 +1395,17 @@ int btrfs_balance(struct btrfs_root *dev_root) if (key.offset == 0) break; + btrfs_release_path(chunk_root, path); ret = btrfs_relocate_chunk(chunk_root, chunk_root->root_key.objectid, found_key.objectid, found_key.offset); BUG_ON(ret); - btrfs_release_path(chunk_root, path); } ret = 0; error: btrfs_free_path(path); + mutex_unlock(&dev_root->fs_info->volume_mutex); return ret; } @@ -1419,14 +1445,18 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) path->reada = 2; + lock_chunks(root); + device->total_bytes = new_size; ret = btrfs_update_device(trans, device); if (ret) { + unlock_chunks(root); btrfs_end_transaction(trans, root); goto done; } WARN_ON(diff > old_total); btrfs_set_super_total_bytes(super_copy, old_total - diff); + unlock_chunks(root); btrfs_end_transaction(trans, root); key.objectid = device->devid; From 77a41afb7d0dd0f27b6f2f1a5bc701929c7034de Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jul 2008 14:32:12 -0400 Subject: [PATCH 0586/2985] Btrfs: Drop some verbose printks Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 -- fs/btrfs/transaction.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8f4c40033e92..b01b3f4f92a9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1117,9 +1117,7 @@ static int cleaner_kthread(void *arg) vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); mutex_lock(&root->fs_info->cleaner_mutex); -printk("cleaner awake\n"); btrfs_clean_old_snapshots(root); -printk("cleaner done\n"); mutex_unlock(&root->fs_info->cleaner_mutex); if (freezing(current)) { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 98f422d9ab07..a8a3cb03de59 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -667,7 +667,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, root->fs_info->btree_inode->i_mapping, GFP_NOFS); trans->transaction->in_commit = 1; -printk("trans %Lu in commit\n", trans->transid); cur_trans = trans->transaction; if (cur_trans->list.prev != &root->fs_info->trans_list) { prev_trans = list_entry(cur_trans->list.prev, @@ -748,7 +747,6 @@ printk("trans %Lu in commit\n", trans->transid); kfree(pinned_copy); cur_trans->commit_done = 1; -printk("trans %Lu done in commit\n", cur_trans->transid); root->fs_info->last_trans_committed = cur_trans->transid; wake_up(&cur_trans->commit_wait); put_transaction(cur_trans); From e6dcd2dc9c489108648e2ed543315dd134d50a9a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:53:50 -0400 Subject: [PATCH 0587/2985] Btrfs: New data=ordered implementation The old data=ordered code would force commit to wait until all the data extents from the transaction were fully on disk. This introduced large latencies into the commit and stalled new writers in the transaction for a long time. The new code changes the way data allocations and extents work: * When delayed allocation is filled, data extents are reserved, and the extent bit EXTENT_ORDERED is set on the entire range of the extent. A struct btrfs_ordered_extent is allocated an inserted into a per-inode rbtree to track the pending extents. * As each page is written EXTENT_ORDERED is cleared on the bytes corresponding to that page. * When all of the bytes corresponding to a single struct btrfs_ordered_extent are written, The previously reserved extent is inserted into the FS btree and into the extent allocation trees. The checksums for the file data are also updated. Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 4 +- fs/btrfs/ctree.h | 19 +- fs/btrfs/disk-io.c | 13 +- fs/btrfs/extent-tree.c | 132 ++++++++--- fs/btrfs/extent_io.c | 52 +++- fs/btrfs/extent_io.h | 14 +- fs/btrfs/extent_map.c | 5 +- fs/btrfs/file-item.c | 62 +++-- fs/btrfs/file.c | 67 ++++-- fs/btrfs/inode.c | 447 ++++++++++++++++++++++------------- fs/btrfs/ordered-data.c | 509 ++++++++++++++++++++++++---------------- fs/btrfs/ordered-data.h | 71 +++++- fs/btrfs/transaction.c | 67 +----- fs/btrfs/transaction.h | 4 - 14 files changed, 937 insertions(+), 529 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 40b4e0c9cd09..8d03687510e0 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -21,6 +21,7 @@ #include "extent_map.h" #include "extent_io.h" +#include "ordered-data.h" /* in memory btrfs inode */ struct btrfs_inode { @@ -32,9 +33,8 @@ struct btrfs_inode { struct extent_io_tree io_failure_tree; struct mutex csum_mutex; struct inode vfs_inode; - atomic_t ordered_writeback; + struct btrfs_ordered_inode_tree ordered_tree; - u64 ordered_trans; /* * transid of the trans_handle that last modified this inode */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f3783dbd9b60..ceebc052ddcb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "bit-radix.h" #include "extent_io.h" @@ -37,6 +38,7 @@ extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; +struct btrfs_ordered_sum; #define BTRFS_MAGIC "_B5RfS_M" @@ -510,6 +512,7 @@ struct btrfs_fs_info { u64 max_inline; u64 alloc_start; struct btrfs_transaction *running_transaction; + wait_queue_head_t transaction_throttle; struct btrfs_super_block super_copy; struct btrfs_super_block super_for_commit; struct block_device *__bdev; @@ -541,6 +544,7 @@ struct btrfs_fs_info { */ struct btrfs_workers workers; struct btrfs_workers endio_workers; + struct btrfs_workers endio_write_workers; struct btrfs_workers submit_workers; struct task_struct *transaction_kthread; struct task_struct *cleaner_kthread; @@ -1384,6 +1388,17 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 owner, u64 owner_offset, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, u64 data); +int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins); +int btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, + u64 data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root @@ -1556,9 +1571,9 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, u64 bytenr, int mod); int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - struct bio *bio, char *sums); + struct btrfs_ordered_sum *sums); int btrfs_csum_one_bio(struct btrfs_root *root, - struct bio *bio, char **sums_ret); + struct bio *bio, struct btrfs_ordered_sum **sums_ret); struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b01b3f4f92a9..4a5ebafb935a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -407,7 +407,11 @@ static int end_workqueue_bio(struct bio *bio, end_io_wq->error = err; end_io_wq->work.func = end_workqueue_fn; end_io_wq->work.flags = 0; - btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); + if (bio->bi_rw & (1 << BIO_RW)) + btrfs_queue_worker(&fs_info->endio_write_workers, + &end_io_wq->work); + else + btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) return 0; @@ -1286,6 +1290,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); + init_waitqueue_head(&fs_info->transaction_throttle); #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1325,9 +1330,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->endio_write_workers, + fs_info->thread_pool_size); btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); + btrfs_start_workers(&fs_info->endio_write_workers, + fs_info->thread_pool_size); err = -EINVAL; if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { @@ -1447,6 +1456,7 @@ fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); + btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); fail_iput: iput(fs_info->btree_inode); @@ -1702,6 +1712,7 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); + btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); iput(fs_info->btree_inode); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8ebfa6be0790..343d1101c31c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1895,36 +1895,17 @@ error: return ret; } -/* - * finds a free extent and does all the dirty work required for allocation - * returns the key for the extent through ins, and a tree buffer for - * the first block of the extent through buf. - * - * returns 0 if everything worked, non-zero otherwise. - */ -int btrfs_alloc_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 num_bytes, u64 min_alloc_size, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, - u64 empty_size, u64 hint_byte, - u64 search_end, struct btrfs_key *ins, u64 data) +static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, + u64 data) { int ret; - int pending_ret; - u64 super_used; - u64 root_used; u64 search_start = 0; u64 alloc_profile; - u32 sizes[2]; struct btrfs_fs_info *info = root->fs_info; - struct btrfs_root *extent_root = info->extent_root; - struct btrfs_extent_item *extent_item; - struct btrfs_extent_ref *ref; - struct btrfs_path *path; - struct btrfs_key keys[2]; - - maybe_lock_mutex(root); if (data) { alloc_profile = info->avail_data_alloc_bits & @@ -1974,11 +1955,48 @@ again: } if (ret) { printk("allocation failed flags %Lu\n", data); - } - if (ret) { BUG(); - goto out; } + clear_extent_dirty(&root->fs_info->free_space_cache, + ins->objectid, ins->objectid + ins->offset - 1, + GFP_NOFS); + return 0; +} + +int btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, + u64 data) +{ + int ret; + maybe_lock_mutex(root); + ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, + empty_size, hint_byte, search_end, ins, + data); + maybe_unlock_mutex(root); + return ret; +} + +static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins) +{ + int ret; + int pending_ret; + u64 super_used; + u64 root_used; + u64 num_bytes = ins->offset; + u32 sizes[2]; + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_root *extent_root = info->extent_root; + struct btrfs_extent_item *extent_item; + struct btrfs_extent_ref *ref; + struct btrfs_path *path; + struct btrfs_key keys[2]; /* block accounting for super block */ spin_lock_irq(&info->delalloc_lock); @@ -1990,10 +2008,6 @@ again: root_used = btrfs_root_used(&root->root_item); btrfs_set_root_used(&root->root_item, root_used + num_bytes); - clear_extent_dirty(&root->fs_info->free_space_cache, - ins->objectid, ins->objectid + ins->offset - 1, - GFP_NOFS); - if (root == extent_root) { set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, @@ -2001,10 +2015,6 @@ again: goto update_block; } - WARN_ON(trans->alloc_exclude_nr); - trans->alloc_exclude_start = ins->objectid; - trans->alloc_exclude_nr = ins->offset; - memcpy(&keys[0], ins, sizeof(*ins)); keys[1].offset = hash_extent_ref(root_objectid, ref_generation, owner, owner_offset); @@ -2054,6 +2064,51 @@ update_block: BUG(); } out: + return ret; +} + +int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins) +{ + int ret; + maybe_lock_mutex(root); + ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, + ref_generation, owner, + owner_offset, ins); + maybe_unlock_mutex(root); + return ret; +} +/* + * finds a free extent and does all the dirty work required for allocation + * returns the key for the extent through ins, and a tree buffer for + * the first block of the extent through buf. + * + * returns 0 if everything worked, non-zero otherwise. + */ +int btrfs_alloc_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, u64 data) +{ + int ret; + + maybe_lock_mutex(root); + + ret = __btrfs_reserve_extent(trans, root, num_bytes, + min_alloc_size, empty_size, hint_byte, + search_end, ins, data); + BUG_ON(ret); + ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, + ref_generation, owner, + owner_offset, ins); + BUG_ON(ret); + maybe_unlock_mutex(root); return ret; } @@ -2288,8 +2343,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->alloc_mutex); /* we've dropped the lock, double check */ - ret = drop_snap_lookup_refcount(root, bytenr, - blocksize, &refs); + ret = lookup_extent_ref(NULL, root, bytenr, blocksize, + &refs); BUG_ON(ret); if (refs != 1) { parent = path->nodes[*level]; @@ -2584,7 +2639,6 @@ out_unlock: kfree(ra); trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); if (trans) { - btrfs_add_ordered_inode(inode); btrfs_end_transaction(trans, BTRFS_I(inode)->root); mark_inode_dirty(inode); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 40a5f53cb040..3f82a6e9ca4f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -793,6 +793,13 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(set_extent_dirty); +int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask); +} +EXPORT_SYMBOL(set_extent_ordered); + int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask) { @@ -812,8 +819,8 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return set_extent_bit(tree, start, end, - EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, - mask); + EXTENT_DELALLOC | EXTENT_DIRTY, + 0, NULL, mask); } EXPORT_SYMBOL(set_extent_delalloc); @@ -825,6 +832,13 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(clear_extent_dirty); +int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask); +} +EXPORT_SYMBOL(clear_extent_ordered); + int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { @@ -1395,10 +1409,9 @@ static int end_bio_extent_writepage(struct bio *bio, if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); - if (tree->ops && tree->ops->writepage_end_io_hook) { ret = tree->ops->writepage_end_io_hook(page, start, - end, state); + end, state, uptodate); if (ret) uptodate = 0; } @@ -1868,9 +1881,14 @@ static int __extent_read_full_page(struct extent_io_tree *tree, unlock_extent(tree, cur, end, GFP_NOFS); break; } - extent_offset = cur - em->start; + if (extent_map_end(em) <= cur) { +printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur); + } BUG_ON(extent_map_end(em) <= cur); + if (end < cur) { +printk("2bad mapping end %Lu cur %Lu\n", end, cur); + } BUG_ON(end < cur); iosize = min(extent_map_end(em) - cur, end - cur + 1); @@ -1976,6 +1994,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, u64 last_byte = i_size_read(inode); u64 block_start; u64 iosize; + u64 unlock_start; sector_t sector; struct extent_map *em; struct block_device *bdev; @@ -1988,7 +2007,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, u64 nr_delalloc; u64 delalloc_end; - WARN_ON(!PageLocked(page)); page_offset = i_size & (PAGE_CACHE_SIZE - 1); if (page->index > end_index || @@ -2030,6 +2048,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, delalloc_start = delalloc_end + 1; } lock_extent(tree, start, page_end, GFP_NOFS); + unlock_start = start; end = page_end; if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { @@ -2038,6 +2057,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (last_byte <= start) { clear_extent_dirty(tree, start, page_end, GFP_NOFS); + unlock_extent(tree, start, page_end, GFP_NOFS); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, start, + page_end, NULL, 1); + unlock_start = page_end + 1; goto done; } @@ -2047,6 +2071,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, while (cur <= end) { if (cur >= last_byte) { clear_extent_dirty(tree, cur, page_end, GFP_NOFS); + unlock_extent(tree, unlock_start, page_end, GFP_NOFS); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, cur, + page_end, NULL, 1); + unlock_start = page_end + 1; break; } em = epd->get_extent(inode, page, page_offset, cur, @@ -2071,8 +2100,16 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, block_start == EXTENT_MAP_INLINE) { clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); + + unlock_extent(tree, unlock_start, cur + iosize -1, + GFP_NOFS); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, cur, + cur + iosize - 1, + NULL, 1); cur = cur + iosize; page_offset += iosize; + unlock_start = cur; continue; } @@ -2119,7 +2156,8 @@ done: set_page_writeback(page); end_page_writeback(page); } - unlock_extent(tree, start, page_end, GFP_NOFS); + if (unlock_start <= page_end) + unlock_extent(tree, unlock_start, page_end, GFP_NOFS); unlock_page(page); return 0; } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index f1960dafaa19..2268a7995896 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -13,6 +13,8 @@ #define EXTENT_DEFRAG (1 << 6) #define EXTENT_DEFRAG_DONE (1 << 7) #define EXTENT_BUFFER_FILLED (1 << 8) +#define EXTENT_ORDERED (1 << 9) +#define EXTENT_ORDERED_METADATA (1 << 10) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) /* @@ -42,7 +44,7 @@ struct extent_io_ops { int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, - struct extent_state *state); + struct extent_state *state, int uptodate); int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits); int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, @@ -131,6 +133,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, int filled); int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask); +int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int wake, int delete, gfp_t mask); int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask); int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, @@ -141,8 +145,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start, + u64 end, gfp_t mask); int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); int find_first_extent_bit(struct extent_io_tree *tree, u64 start, u64 *start_ret, u64 *end_ret, int bits); struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, @@ -209,6 +219,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, unsigned long start, unsigned long len); int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, struct extent_buffer *eb); +int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end); +int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits); int clear_extent_buffer_dirty(struct extent_io_tree *tree, struct extent_buffer *eb); int set_extent_buffer_dirty(struct extent_io_tree *tree, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index f5a04eb9a2ac..81123277c2b8 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -206,10 +206,11 @@ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *merge = NULL; struct rb_node *rb; + BUG_ON(spin_trylock(&tree->lock)); rb = tree_insert(&tree->map, em->start, &em->rb_node); if (rb) { - merge = rb_entry(rb, struct extent_map, rb_node); ret = -EEXIST; + free_extent_map(merge); goto out; } atomic_inc(&em->refs); @@ -268,6 +269,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, struct rb_node *next = NULL; u64 end = range_end(start, len); + BUG_ON(spin_trylock(&tree->lock)); em = tree->last; if (em && end > em->start && start < extent_map_end(em)) goto found; @@ -318,6 +320,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { int ret = 0; + BUG_ON(spin_trylock(&tree->lock)); rb_erase(&em->rb_node, &tree->map); em->in_tree = 0; if (tree->last == em) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index f537eb43c2c6..345caf8ff516 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -135,26 +135,37 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, } int btrfs_csum_one_bio(struct btrfs_root *root, - struct bio *bio, char **sums_ret) + struct bio *bio, struct btrfs_ordered_sum **sums_ret) { - u32 *sums; + struct btrfs_ordered_sum *sums; + struct btrfs_sector_sum *sector_sum; char *data; struct bio_vec *bvec = bio->bi_io_vec; int bio_index = 0; - sums = kmalloc(bio->bi_vcnt * BTRFS_CRC32_SIZE, GFP_NOFS); + WARN_ON(bio->bi_vcnt <= 0); + sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS); if (!sums) return -ENOMEM; - *sums_ret = (char *)sums; + *sums_ret = sums; + sector_sum = &sums->sums; + sums->file_offset = page_offset(bvec->bv_page); + sums->len = bio->bi_size; + INIT_LIST_HEAD(&sums->list); while(bio_index < bio->bi_vcnt) { data = kmap_atomic(bvec->bv_page, KM_USER0); - *sums = ~(u32)0; - *sums = btrfs_csum_data(root, data + bvec->bv_offset, - *sums, bvec->bv_len); + sector_sum->sum = ~(u32)0; + sector_sum->sum = btrfs_csum_data(root, + data + bvec->bv_offset, + sector_sum->sum, + bvec->bv_len); kunmap_atomic(data, KM_USER0); - btrfs_csum_final(*sums, (char *)sums); - sums++; + btrfs_csum_final(sector_sum->sum, + (char *)§or_sum->sum); + sector_sum->offset = page_offset(bvec->bv_page) + + bvec->bv_offset; + sector_sum++; bio_index++; bvec++; } @@ -163,7 +174,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - struct bio *bio, char *sums) + struct btrfs_ordered_sum *sums) { u64 objectid = inode->i_ino; u64 offset; @@ -171,17 +182,16 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_key file_key; struct btrfs_key found_key; u64 next_offset; + u64 total_bytes = 0; int found_next; struct btrfs_path *path; struct btrfs_csum_item *item; struct btrfs_csum_item *item_end; struct extent_buffer *leaf = NULL; u64 csum_offset; - u32 *sums32 = (u32 *)sums; + struct btrfs_sector_sum *sector_sum; u32 nritems; u32 ins_size; - int bio_index = 0; - struct bio_vec *bvec = bio->bi_io_vec; char *eb_map; char *eb_token; unsigned long map_len; @@ -189,10 +199,11 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); + sector_sum = &sums->sums; again: next_offset = (u64)-1; found_next = 0; - offset = page_offset(bvec->bv_page) + bvec->bv_offset; + offset = sector_sum->offset; file_key.objectid = objectid; file_key.offset = offset; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); @@ -303,7 +314,7 @@ found: item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + btrfs_item_size_nr(leaf, path->slots[0])); eb_token = NULL; -next_bvec: +next_sector: if (!eb_token || (unsigned long)item + BTRFS_CRC32_SIZE >= map_start + map_len) { @@ -321,21 +332,20 @@ next_bvec: } if (eb_token) { memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)), - sums32, BTRFS_CRC32_SIZE); + §or_sum->sum, BTRFS_CRC32_SIZE); } else { - write_extent_buffer(leaf, sums32, (unsigned long)item, - BTRFS_CRC32_SIZE); + write_extent_buffer(leaf, §or_sum->sum, + (unsigned long)item, BTRFS_CRC32_SIZE); } - bio_index++; - bvec++; - sums32++; - if (bio_index < bio->bi_vcnt) { + total_bytes += root->sectorsize; + sector_sum++; + if (total_bytes < sums->len) { item = (struct btrfs_csum_item *)((char *)item + BTRFS_CRC32_SIZE); if (item < item_end && offset + PAGE_CACHE_SIZE == - page_offset(bvec->bv_page)) { - offset = page_offset(bvec->bv_page); - goto next_bvec; + sector_sum->offset) { + offset = sector_sum->offset; + goto next_sector; } } if (eb_token) { @@ -343,7 +353,7 @@ next_bvec: eb_token = NULL; } btrfs_mark_buffer_dirty(path->nodes[0]); - if (bio_index < bio->bi_vcnt) { + if (total_bytes < sums->len) { btrfs_release_path(root, path); goto again; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8037792f8789..12e765f7e0d4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -34,7 +34,6 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ordered-data.h" #include "ioctl.h" #include "print-tree.h" #include "compat.h" @@ -273,7 +272,9 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, u64 mask = root->sectorsize - 1; last_pos_in_file = (isize + mask) & ~mask; hole_size = (start_pos - last_pos_in_file + mask) & ~mask; - if (last_pos_in_file < start_pos) { + if (hole_size > 0) { + btrfs_wait_ordered_range(inode, last_pos_in_file, + last_pos_in_file + hole_size); err = btrfs_drop_extents(trans, root, inode, last_pos_in_file, last_pos_in_file + hole_size, @@ -303,19 +304,17 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inline_size > root->fs_info->max_inline || (inline_size & (root->sectorsize -1)) == 0 || inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { - u64 last_end; - + /* check for reserved extents on each page, we don't want + * to reset the delalloc bit on things that already have + * extents reserved. + */ + set_extent_delalloc(io_tree, start_pos, + end_of_last_block, GFP_NOFS); for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; SetPageUptodate(p); set_page_dirty(p); } - last_end = (u64)(pages[num_pages -1]->index) << - PAGE_CACHE_SHIFT; - last_end += PAGE_CACHE_SIZE - 1; - set_extent_delalloc(io_tree, start_pos, end_of_last_block, - GFP_NOFS); - btrfs_add_ordered_inode(inode); } else { u64 aligned_end; /* step one, delete the existing extents in this range */ @@ -350,10 +349,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) struct extent_map *split = NULL; struct extent_map *split2 = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map *tmp; u64 len = end - start + 1; + u64 next_start; int ret; int testend = 1; + WARN_ON(end < start); if (end == (u64)-1) { len = (u64)-1; testend = 0; @@ -370,6 +372,8 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) spin_unlock(&em_tree->lock); break; } + tmp = rb_entry(&em->rb_node, struct extent_map, rb_node); + next_start = tmp->start; remove_extent_mapping(em_tree, em); if (em->block_start < EXTENT_MAP_LAST_BYTE && @@ -778,37 +782,58 @@ static int prepare_pages(struct btrfs_root *root, struct file *file, struct inode *inode = fdentry(file)->d_inode; int err = 0; u64 start_pos; + u64 last_pos; start_pos = pos & ~((u64)root->sectorsize - 1); + last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; memset(pages, 0, num_pages * sizeof(struct page *)); - +again: for (i = 0; i < num_pages; i++) { pages[i] = grab_cache_page(inode->i_mapping, index + i); if (!pages[i]) { err = -ENOMEM; BUG_ON(1); } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - ClearPageDirty(pages[i]); -#else - cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); -#endif wait_on_page_writeback(pages[i]); - set_page_extent_mapped(pages[i]); - WARN_ON(!PageLocked(pages[i])); } if (start_pos < inode->i_size) { - u64 last_pos; - last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; + struct btrfs_ordered_extent *ordered; lock_extent(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, GFP_NOFS); + ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1); + if (ordered && + ordered->file_offset + ordered->len > start_pos && + ordered->file_offset < last_pos) { + btrfs_put_ordered_extent(ordered); + unlock_extent(&BTRFS_I(inode)->io_tree, + start_pos, last_pos - 1, GFP_NOFS); + for (i = 0; i < num_pages; i++) { + unlock_page(pages[i]); + page_cache_release(pages[i]); + } + btrfs_wait_ordered_range(inode, start_pos, + last_pos - start_pos); + goto again; + } + if (ordered) + btrfs_put_ordered_extent(ordered); + clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC, GFP_NOFS); unlock_extent(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, GFP_NOFS); } + for (i = 0; i < num_pages; i++) { +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + ClearPageDirty(pages[i]); +#else + cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); +#endif + set_page_extent_mapped(pages[i]); + WARN_ON(!PageLocked(pages[i])); + } return 0; } @@ -969,13 +994,11 @@ out_nolock: (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); } current->backing_dev_info = NULL; - btrfs_ordered_throttle(root, inode); return num_written ? num_written : err; } int btrfs_release_file(struct inode * inode, struct file * filp) { - btrfs_del_ordered_inode(inode, 0); if (filp->private_data) btrfs_ioctl_trans_end(filp); return 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d39433dfb2c7..c5a62f0b9595 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -43,6 +43,7 @@ #include "ioctl.h" #include "print-tree.h" #include "volumes.h" +#include "ordered-data.h" struct btrfs_iget_args { u64 ino; @@ -109,10 +110,11 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) u64 num_bytes; u64 cur_alloc_size; u64 blocksize = root->sectorsize; - u64 orig_start = start; u64 orig_num_bytes; struct btrfs_key ins; - int ret; + struct extent_map *em; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + int ret = 0; trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); @@ -120,33 +122,44 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) num_bytes = (end - start + blocksize) & ~(blocksize - 1); num_bytes = max(blocksize, num_bytes); - ret = btrfs_drop_extents(trans, root, inode, - start, start + num_bytes, start, &alloc_hint); orig_num_bytes = num_bytes; if (alloc_hint == EXTENT_MAP_INLINE) goto out; BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); + btrfs_drop_extent_cache(inode, start, start + num_bytes - 1); while(num_bytes > 0) { cur_alloc_size = min(num_bytes, root->fs_info->max_extent); - ret = btrfs_alloc_extent(trans, root, cur_alloc_size, - root->sectorsize, - root->root_key.objectid, - trans->transid, - inode->i_ino, start, 0, - alloc_hint, (u64)-1, &ins, 1); + ret = btrfs_reserve_extent(trans, root, cur_alloc_size, + root->sectorsize, 0, 0, + (u64)-1, &ins, 1); if (ret) { WARN_ON(1); goto out; } + em = alloc_extent_map(GFP_NOFS); + em->start = start; + em->len = ins.offset; + em->block_start = ins.objectid; + em->bdev = root->fs_info->fs_devices->latest_bdev; + while(1) { + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + if (ret != -EEXIST) { + free_extent_map(em); + break; + } + btrfs_drop_extent_cache(inode, start, + start + ins.offset - 1); + } + cur_alloc_size = ins.offset; - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - start, ins.objectid, ins.offset, - ins.offset, 0); - inode->i_blocks += ins.offset >> 9; - btrfs_check_file(root, inode); + ret = btrfs_add_ordered_extent(inode, start, ins.objectid, + ins.offset); + BUG_ON(ret); if (num_bytes < cur_alloc_size) { printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes, cur_alloc_size); @@ -156,10 +169,6 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; } - btrfs_drop_extent_cache(inode, orig_start, - orig_start + orig_num_bytes - 1); - btrfs_add_ordered_inode(inode); - btrfs_update_inode(trans, root, inode); out: btrfs_end_transaction(trans, root); return ret; @@ -341,25 +350,15 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; int ret = 0; - char *sums = NULL; + struct btrfs_ordered_sum *sums; ret = btrfs_csum_one_bio(root, bio, &sums); BUG_ON(ret); - trans = btrfs_start_transaction(root, 1); - - btrfs_set_trans_block_group(trans, inode); - mutex_lock(&BTRFS_I(inode)->csum_mutex); - btrfs_csum_file_blocks(trans, root, inode, bio, sums); - mutex_unlock(&BTRFS_I(inode)->csum_mutex); - - ret = btrfs_end_transaction(trans, root); + ret = btrfs_add_ordered_sum(inode, sums); BUG_ON(ret); - kfree(sums); - return btrfs_map_bio(root, rw, bio, mirror_num, 1); } @@ -369,14 +368,10 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; - if (!(rw & (1 << BIO_RW))) { - ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); - BUG_ON(ret); - goto mapit; - } + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + BUG_ON(ret); - if (btrfs_test_opt(root, NODATASUM) || - btrfs_test_flag(inode, NODATASUM)) { + if (!(rw & (1 << BIO_RW))) { goto mapit; } @@ -387,6 +382,96 @@ mapit: return btrfs_map_bio(root, rw, bio, mirror_num, 0); } +static int add_pending_csums(struct btrfs_trans_handle *trans, + struct inode *inode, u64 file_offset, + struct list_head *list) +{ + struct list_head *cur; + struct btrfs_ordered_sum *sum; + + btrfs_set_trans_block_group(trans, inode); + while(!list_empty(list)) { + cur = list->next; + sum = list_entry(cur, struct btrfs_ordered_sum, list); + mutex_lock(&BTRFS_I(inode)->csum_mutex); + btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root, + inode, sum); + mutex_unlock(&BTRFS_I(inode)->csum_mutex); + list_del(&sum->list); + kfree(sum); + } + return 0; +} + +int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, + struct extent_state *state, int uptodate) +{ + struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + struct btrfs_ordered_extent *ordered_extent; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + u64 alloc_hint = 0; + struct list_head list; + struct btrfs_key ins; + int ret; + + ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); + if (!ret) { + return 0; + } + + trans = btrfs_start_transaction(root, 1); + + ordered_extent = btrfs_lookup_ordered_extent(inode, start); + BUG_ON(!ordered_extent); + + lock_extent(io_tree, ordered_extent->file_offset, + ordered_extent->file_offset + ordered_extent->len - 1, + GFP_NOFS); + + INIT_LIST_HEAD(&list); + + ins.objectid = ordered_extent->start; + ins.offset = ordered_extent->len; + ins.type = BTRFS_EXTENT_ITEM_KEY; + ret = btrfs_alloc_reserved_extent(trans, root, root->root_key.objectid, + trans->transid, inode->i_ino, + ordered_extent->file_offset, &ins); + BUG_ON(ret); + ret = btrfs_drop_extents(trans, root, inode, + ordered_extent->file_offset, + ordered_extent->file_offset + + ordered_extent->len, + ordered_extent->file_offset, &alloc_hint); + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + ordered_extent->file_offset, + ordered_extent->start, + ordered_extent->len, + ordered_extent->len, 0); + BUG_ON(ret); + btrfs_drop_extent_cache(inode, ordered_extent->file_offset, + ordered_extent->file_offset + + ordered_extent->len - 1); + inode->i_blocks += ordered_extent->len >> 9; + unlock_extent(io_tree, ordered_extent->file_offset, + ordered_extent->file_offset + ordered_extent->len - 1, + GFP_NOFS); + add_pending_csums(trans, inode, ordered_extent->file_offset, + &ordered_extent->list); + + btrfs_remove_ordered_extent(inode, ordered_extent); + /* once for us */ + btrfs_put_ordered_extent(ordered_extent); + /* once for the tree */ + btrfs_put_ordered_extent(ordered_extent); + + btrfs_update_inode(trans, root, inode); + btrfs_end_transaction(trans, root); + return 0; +} + int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) { int ret = 0; @@ -409,7 +494,8 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) if (ret == -ENOENT || ret == -EFBIG) ret = 0; csum = 0; - printk("no csum found for inode %lu start %Lu\n", inode->i_ino, start); + printk("no csum found for inode %lu start %Lu\n", inode->i_ino, + start); goto out; } read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, @@ -833,7 +919,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) { struct btrfs_root *root; struct btrfs_trans_handle *trans; - struct inode *inode = dentry->d_inode; int ret; unsigned long nr = 0; @@ -849,14 +934,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) ret = btrfs_unlink_trans(trans, root, dir, dentry); nr = trans->blocks_used; - if (inode->i_nlink == 0) { - /* if the inode isn't linked anywhere, - * we don't need to worry about - * data=ordered - */ - btrfs_del_ordered_inode(inode, 1); - } - btrfs_end_transaction_throttle(trans, root); fail: btrfs_btree_balance_dirty(root, nr); @@ -931,6 +1008,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int extent_type = -1; u64 mask = root->sectorsize - 1; + btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1); path = btrfs_alloc_path(); path->reada = -1; @@ -1117,34 +1195,6 @@ error: return ret; } -static int btrfs_cow_one_page(struct inode *inode, struct page *page, - size_t zero_start) -{ - char *kaddr; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 page_end = page_start + PAGE_CACHE_SIZE - 1; - int ret = 0; - - WARN_ON(!PageLocked(page)); - set_page_extent_mapped(page); - - lock_extent(io_tree, page_start, page_end, GFP_NOFS); - set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, - page_end, GFP_NOFS); - - if (zero_start != PAGE_CACHE_SIZE) { - kaddr = kmap(page); - memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); - flush_dcache_page(page); - kunmap(page); - } - set_page_dirty(page); - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); - - return ret; -} - /* * taken from block_truncate_page, but does cow as it zeros out * any bytes left in the last page in the file. @@ -1153,12 +1203,16 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) { struct inode *inode = mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_ordered_extent *ordered; + char *kaddr; u32 blocksize = root->sectorsize; pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); struct page *page; int ret = 0; u64 page_start; + u64 page_end; if ((offset & (blocksize - 1)) == 0) goto out; @@ -1168,6 +1222,10 @@ again: page = grab_cache_page(mapping, index); if (!page) goto out; + + page_start = page_offset(page); + page_end = page_start + PAGE_CACHE_SIZE - 1; + if (!PageUptodate(page)) { ret = btrfs_readpage(NULL, page); lock_page(page); @@ -1181,10 +1239,32 @@ again: goto out; } } - - page_start = (u64)page->index << PAGE_CACHE_SHIFT; wait_on_page_writeback(page); - ret = btrfs_cow_one_page(inode, page, offset); + + lock_extent(io_tree, page_start, page_end, GFP_NOFS); + set_page_extent_mapped(page); + + ordered = btrfs_lookup_ordered_extent(inode, page_start); + if (ordered) { + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_page(page); + page_cache_release(page); + btrfs_wait_ordered_extent(inode, ordered); + btrfs_put_ordered_extent(ordered); + goto again; + } + + set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, + page_end, GFP_NOFS); + ret = 0; + if (offset != PAGE_CACHE_SIZE) { + kaddr = kmap(page); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap(page); + } + set_page_dirty(page); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_page(page); page_cache_release(page); @@ -1222,8 +1302,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_truncate_page(inode->i_mapping, inode->i_size); - lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); hole_size = block_end - hole_start; + btrfs_wait_ordered_range(inode, hole_start, hole_size); + lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -1258,6 +1339,7 @@ void btrfs_delete_inode(struct inode *inode) unsigned long nr; int ret; + btrfs_wait_ordered_range(inode, 0, (u64)-1); truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) { goto no_delete; @@ -1403,7 +1485,6 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); - atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); return 0; } @@ -1705,7 +1786,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); - atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->root = root; @@ -1930,7 +2010,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); BTRFS_I(inode)->delalloc_bytes = 0; - atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; @@ -2066,64 +2145,18 @@ out_unlock: static int merge_extent_mapping(struct extent_map_tree *em_tree, struct extent_map *existing, - struct extent_map *em) + struct extent_map *em, + u64 map_start, u64 map_len) { u64 start_diff; - u64 new_end; - int ret = 0; - int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE; - if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE) - goto invalid; - - if (!real_blocks && em->block_start != existing->block_start) - goto invalid; - - new_end = max(existing->start + existing->len, em->start + em->len); - - if (existing->start >= em->start) { - if (em->start + em->len < existing->start) - goto invalid; - - start_diff = existing->start - em->start; - if (real_blocks && em->block_start + start_diff != - existing->block_start) - goto invalid; - - em->len = new_end - em->start; - - remove_extent_mapping(em_tree, existing); - /* free for the tree */ - free_extent_map(existing); - ret = add_extent_mapping(em_tree, em); - - } else if (em->start > existing->start) { - - if (existing->start + existing->len < em->start) - goto invalid; - - start_diff = em->start - existing->start; - if (real_blocks && existing->block_start + start_diff != - em->block_start) - goto invalid; - - remove_extent_mapping(em_tree, existing); - em->block_start = existing->block_start; - em->start = existing->start; - em->len = new_end - existing->start; - free_extent_map(existing); - - ret = add_extent_mapping(em_tree, em); - } else { - goto invalid; - } - return ret; - -invalid: - printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n", - existing->start, existing->len, existing->block_start, - em->start, em->len, em->block_start); - return -EIO; + BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); + start_diff = map_start - em->start; + em->start = map_start; + em->len = map_len; + if (em->block_start < EXTENT_MAP_LAST_BYTE) + em->block_start += start_diff; + return add_extent_mapping(em_tree, em); } struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, @@ -2170,10 +2203,9 @@ again: err = -ENOMEM; goto out; } - + em->bdev = root->fs_info->fs_devices->latest_bdev; em->start = EXTENT_MAP_HOLE; em->len = (u64)-1; - em->bdev = root->fs_info->fs_devices->latest_bdev; ret = btrfs_lookup_file_extent(trans, root, path, objectid, start, trans != NULL); if (ret < 0) { @@ -2314,6 +2346,9 @@ insert: */ if (ret == -EEXIST) { struct extent_map *existing; + + ret = 0; + existing = lookup_extent_mapping(em_tree, start, len); if (existing && (existing->start > start || existing->start + existing->len <= start)) { @@ -2325,7 +2360,8 @@ insert: em->len); if (existing) { err = merge_extent_mapping(em_tree, existing, - em); + em, start, + root->sectorsize); free_extent_map(existing); if (err) { free_extent_map(em); @@ -2341,6 +2377,7 @@ insert: } else { free_extent_map(em); em = existing; + err = 0; } } spin_unlock(&em_tree->lock); @@ -2348,8 +2385,9 @@ out: btrfs_free_path(path); if (trans) { ret = btrfs_end_transaction(trans, root); - if (!err) + if (!err) { err = ret; + } } if (err) { free_extent_map(em); @@ -2474,8 +2512,7 @@ btrfs_readpages(struct file *file, struct address_space *mapping, return extent_readpages(tree, mapping, pages, nr_pages, btrfs_get_extent); } - -static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) +static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) { struct extent_io_tree *tree; struct extent_map_tree *map; @@ -2493,15 +2530,54 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) return ret; } +static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) +{ + struct btrfs_ordered_extent *ordered; + + ordered = btrfs_lookup_ordered_extent(page->mapping->host, + page_offset(page)); + if (ordered) { + btrfs_put_ordered_extent(ordered); + return 0; + } + return __btrfs_releasepage(page, gfp_flags); +} + static void btrfs_invalidatepage(struct page *page, unsigned long offset) { struct extent_io_tree *tree; + struct btrfs_ordered_extent *ordered; + u64 page_start = page_offset(page); + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + wait_on_page_writeback(page); tree = &BTRFS_I(page->mapping->host)->io_tree; - extent_invalidatepage(tree, page, offset); - btrfs_releasepage(page, GFP_NOFS); + if (offset) { + btrfs_releasepage(page, GFP_NOFS); + return; + } + + lock_extent(tree, page_start, page_end, GFP_NOFS); + ordered = btrfs_lookup_ordered_extent(page->mapping->host, + page_offset(page)); + if (ordered) { + clear_extent_bit(tree, page_start, page_end, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_LOCKED, 1, 0, GFP_NOFS); + btrfs_writepage_end_io_hook(page, page_start, + page_end, NULL, 1); + btrfs_put_ordered_extent(ordered); + lock_extent(tree, page_start, page_end, GFP_NOFS); + } + clear_extent_bit(tree, page_start, page_end, + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_ORDERED, + 1, 1, GFP_NOFS); + __btrfs_releasepage(page, GFP_NOFS); + if (PagePrivate(page)) { - invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); + invalidate_extent_lru(tree, page_offset(page), + PAGE_CACHE_SIZE); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); @@ -2527,35 +2603,63 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) { struct inode *inode = fdentry(vma->vm_file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - unsigned long end; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_ordered_extent *ordered; + char *kaddr; + unsigned long zero_start; loff_t size; int ret; u64 page_start; + u64 page_end; ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); if (ret) goto out; ret = -EINVAL; - +again: lock_page(page); - wait_on_page_writeback(page); size = i_size_read(inode); - page_start = (u64)page->index << PAGE_CACHE_SHIFT; + page_start = page_offset(page); + page_end = page_start + PAGE_CACHE_SIZE - 1; if ((page->mapping != inode->i_mapping) || - (page_start > size)) { + (page_start >= size)) { /* page got truncated out from underneath us */ goto out_unlock; } + wait_on_page_writeback(page); + + lock_extent(io_tree, page_start, page_end, GFP_NOFS); + set_page_extent_mapped(page); + + ordered = btrfs_lookup_ordered_extent(inode, page_start); + if (ordered) { + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_page(page); + btrfs_wait_ordered_extent(inode, ordered); + btrfs_put_ordered_extent(ordered); + goto again; + } + + set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, + page_end, GFP_NOFS); + ret = 0; /* page is wholly or partially inside EOF */ if (page_start + PAGE_CACHE_SIZE > size) - end = size & ~PAGE_CACHE_MASK; + zero_start = size & ~PAGE_CACHE_MASK; else - end = PAGE_CACHE_SIZE; + zero_start = PAGE_CACHE_SIZE; - ret = btrfs_cow_one_page(inode, page, end); + if (zero_start != PAGE_CACHE_SIZE) { + kaddr = kmap(page); + memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); + flush_dcache_page(page); + kunmap(page); + } + set_page_dirty(page); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); out_unlock: unlock_page(page); @@ -2662,15 +2766,28 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) if (!ei) return NULL; ei->last_trans = 0; - ei->ordered_trans = 0; + btrfs_ordered_inode_tree_init(&ei->ordered_tree); return &ei->vfs_inode; } void btrfs_destroy_inode(struct inode *inode) { + struct btrfs_ordered_extent *ordered; WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); + while(1) { + ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); + if (!ordered) + break; + else { + printk("found ordered extent %Lu %Lu\n", + ordered->file_offset, ordered->len); + btrfs_remove_ordered_extent(inode, ordered); + btrfs_put_ordered_extent(ordered); + btrfs_put_ordered_extent(ordered); + } + } btrfs_drop_extent_cache(inode, 0, (u64)-1); kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } @@ -2869,7 +2986,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); BTRFS_I(inode)->delalloc_bytes = 0; - atomic_set(&BTRFS_I(inode)->ordered_writeback, 0); BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; @@ -2921,6 +3037,20 @@ out_fail: return err; } +static int btrfs_set_page_dirty(struct page *page) +{ + struct inode *inode = page->mapping->host; + u64 page_start = page_offset(page); + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + + if (!test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, + EXTENT_DELALLOC, 0)) { +printk("inode %lu page %Lu not delalloc\n", inode->i_ino, page_offset(page)); +WARN_ON(1); + } + return __set_page_dirty_nobuffers(page); +} + static int btrfs_permission(struct inode *inode, int mask, struct nameidata *nd) { @@ -2967,6 +3097,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { .merge_bio_hook = btrfs_merge_bio_hook, .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, + .writepage_end_io_hook = btrfs_writepage_end_io_hook, .readpage_io_failed_hook = btrfs_io_failed_hook, .set_bit_hook = btrfs_set_bit_hook, .clear_bit_hook = btrfs_clear_bit_hook, @@ -2982,7 +3113,7 @@ static struct address_space_operations btrfs_aops = { .direct_IO = btrfs_direct_IO, .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, - .set_page_dirty = __set_page_dirty_nobuffers, + .set_page_dirty = btrfs_set_page_dirty, }; static struct address_space_operations btrfs_symlink_aops = { diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 254da8225664..6513270f054c 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -22,48 +22,30 @@ #include "ctree.h" #include "transaction.h" #include "btrfs_inode.h" +#include "extent_io.h" -struct tree_entry { - u64 root_objectid; - u64 objectid; - struct inode *inode; - struct rb_node rb_node; -}; -/* - * returns > 0 if entry passed (root, objectid) is > entry, - * < 0 if (root, objectid) < entry and zero if they are equal - */ -static int comp_entry(struct tree_entry *entry, u64 root_objectid, - u64 objectid) +static u64 entry_end(struct btrfs_ordered_extent *entry) { - if (root_objectid < entry->root_objectid) - return -1; - if (root_objectid > entry->root_objectid) - return 1; - if (objectid < entry->objectid) - return -1; - if (objectid > entry->objectid) - return 1; - return 0; + if (entry->file_offset + entry->len < entry->file_offset) + return (u64)-1; + return entry->file_offset + entry->len; } -static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid, - u64 objectid, struct rb_node *node) +static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, + struct rb_node *node) { struct rb_node ** p = &root->rb_node; struct rb_node * parent = NULL; - struct tree_entry *entry; - int comp; + struct btrfs_ordered_extent *entry; while(*p) { parent = *p; - entry = rb_entry(parent, struct tree_entry, rb_node); + entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node); - comp = comp_entry(entry, root_objectid, objectid); - if (comp < 0) + if (file_offset < entry->file_offset) p = &(*p)->rb_left; - else if (comp > 0) + else if (file_offset >= entry_end(entry)) p = &(*p)->rb_right; else return parent; @@ -74,24 +56,23 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid, return NULL; } -static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid, - u64 objectid, struct rb_node **prev_ret) +static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, + struct rb_node **prev_ret) { struct rb_node * n = root->rb_node; struct rb_node *prev = NULL; - struct tree_entry *entry; - struct tree_entry *prev_entry = NULL; - int comp; + struct rb_node *test; + struct btrfs_ordered_extent *entry; + struct btrfs_ordered_extent *prev_entry = NULL; while(n) { - entry = rb_entry(n, struct tree_entry, rb_node); + entry = rb_entry(n, struct btrfs_ordered_extent, rb_node); prev = n; prev_entry = entry; - comp = comp_entry(entry, root_objectid, objectid); - if (comp < 0) + if (file_offset < entry->file_offset) n = n->rb_left; - else if (comp > 0) + else if (file_offset >= entry_end(entry)) n = n->rb_right; else return n; @@ -99,195 +80,329 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid, if (!prev_ret) return NULL; - while(prev && comp_entry(prev_entry, root_objectid, objectid) >= 0) { - prev = rb_next(prev); - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + while(prev && file_offset >= entry_end(prev_entry)) { + test = rb_next(prev); + if (!test) + break; + prev_entry = rb_entry(test, struct btrfs_ordered_extent, + rb_node); + if (file_offset < entry_end(prev_entry)) + break; + + prev = test; + } + if (prev) + prev_entry = rb_entry(prev, struct btrfs_ordered_extent, + rb_node); + while(prev && file_offset < entry_end(prev_entry)) { + test = rb_prev(prev); + if (!test) + break; + prev_entry = rb_entry(test, struct btrfs_ordered_extent, + rb_node); + prev = test; } *prev_ret = prev; return NULL; } -static inline struct rb_node *tree_search(struct rb_root *root, - u64 root_objectid, u64 objectid) +static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) { + if (file_offset < entry->file_offset || + entry->file_offset + entry->len <= file_offset) + return 0; + return 1; +} + +static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, + u64 file_offset) +{ + struct rb_root *root = &tree->tree; struct rb_node *prev; struct rb_node *ret; - ret = __tree_search(root, root_objectid, objectid, &prev); + struct btrfs_ordered_extent *entry; + + if (tree->last) { + entry = rb_entry(tree->last, struct btrfs_ordered_extent, + rb_node); + if (offset_in_entry(entry, file_offset)) + return tree->last; + } + ret = __tree_search(root, file_offset, &prev); if (!ret) - return prev; + ret = prev; + if (ret) + tree->last = ret; return ret; } -int btrfs_add_ordered_inode(struct inode *inode) +int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, + u64 start, u64 len) { - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 root_objectid = root->root_key.objectid; - u64 transid = root->fs_info->running_transaction->transid; - struct tree_entry *entry; - struct rb_node *node; struct btrfs_ordered_inode_tree *tree; + struct rb_node *node; + struct btrfs_ordered_extent *entry; - if (transid <= BTRFS_I(inode)->ordered_trans) - return 0; - - tree = &root->fs_info->running_transaction->ordered_inode_tree; - - read_lock(&tree->lock); - node = __tree_search(&tree->tree, root_objectid, inode->i_ino, NULL); - read_unlock(&tree->lock); - if (node) { - return 0; - } - - entry = kmalloc(sizeof(*entry), GFP_NOFS); + tree = &BTRFS_I(inode)->ordered_tree; + entry = kzalloc(sizeof(*entry), GFP_NOFS); if (!entry) return -ENOMEM; - write_lock(&tree->lock); - entry->objectid = inode->i_ino; - entry->root_objectid = root_objectid; + mutex_lock(&tree->mutex); + entry->file_offset = file_offset; + entry->start = start; + entry->len = len; entry->inode = inode; + /* one ref for the tree */ + atomic_set(&entry->refs, 1); + init_waitqueue_head(&entry->wait); + INIT_LIST_HEAD(&entry->list); - node = tree_insert(&tree->tree, root_objectid, - inode->i_ino, &entry->rb_node); + node = tree_insert(&tree->tree, file_offset, + &entry->rb_node); + if (node) { + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + atomic_inc(&entry->refs); + } + set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, + entry_end(entry) - 1, GFP_NOFS); - BTRFS_I(inode)->ordered_trans = transid; - if (!node) - igrab(inode); + set_bit(BTRFS_ORDERED_START, &entry->flags); + mutex_unlock(&tree->mutex); + BUG_ON(node); + return 0; +} - write_unlock(&tree->lock); +int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum) +{ + struct btrfs_ordered_inode_tree *tree; + struct rb_node *node; + struct btrfs_ordered_extent *entry; - if (node) + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + node = tree_search(tree, sum->file_offset); + if (!node) { +search_fail: +printk("add ordered sum failed to find a node for inode %lu offset %Lu\n", inode->i_ino, sum->file_offset); + node = rb_first(&tree->tree); + while(node) { + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + printk("entry %Lu %Lu %Lu\n", entry->file_offset, entry->file_offset + entry->len, entry->start); + node = rb_next(node); + } + BUG(); + } + BUG_ON(!node); + + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (!offset_in_entry(entry, sum->file_offset)) { + goto search_fail; + } + + list_add_tail(&sum->list, &entry->list); + mutex_unlock(&tree->mutex); + return 0; +} + +int btrfs_dec_test_ordered_pending(struct inode *inode, + u64 file_offset, u64 io_size) +{ + struct btrfs_ordered_inode_tree *tree; + struct rb_node *node; + struct btrfs_ordered_extent *entry; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + int ret; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1, + GFP_NOFS); + node = tree_search(tree, file_offset); + if (!node) { + ret = 1; + goto out; + } + + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (!offset_in_entry(entry, file_offset)) { + ret = 1; + goto out; + } + + ret = test_range_bit(io_tree, entry->file_offset, + entry->file_offset + entry->len - 1, + EXTENT_ORDERED, 0); + if (!test_bit(BTRFS_ORDERED_START, &entry->flags)) { +printk("inode %lu not ready yet for extent %Lu %Lu\n", inode->i_ino, entry->file_offset, entry_end(entry)); + } + if (ret == 0) + ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); +out: + mutex_unlock(&tree->mutex); + return ret == 0; +} + +int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) +{ + if (atomic_dec_and_test(&entry->refs)) kfree(entry); return 0; } -int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid, - struct inode **inode) +int btrfs_remove_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry) { - struct tree_entry *entry; + struct btrfs_ordered_inode_tree *tree; struct rb_node *node; - write_lock(&tree->lock); - node = tree_search(&tree->tree, *root_objectid, *objectid); - if (!node) { - write_unlock(&tree->lock); - return 0; - } - entry = rb_entry(node, struct tree_entry, rb_node); - - while(comp_entry(entry, *root_objectid, *objectid) >= 0) { - node = rb_next(node); - if (!node) - break; - entry = rb_entry(node, struct tree_entry, rb_node); - } - if (!node) { - write_unlock(&tree->lock); - return 0; - } - - *root_objectid = entry->root_objectid; - *inode = entry->inode; - atomic_inc(&entry->inode->i_count); - *objectid = entry->objectid; - write_unlock(&tree->lock); - return 1; -} - -int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid, - struct inode **inode) -{ - struct tree_entry *entry; - struct rb_node *node; - - write_lock(&tree->lock); - node = tree_search(&tree->tree, *root_objectid, *objectid); - if (!node) { - write_unlock(&tree->lock); - return 0; - } - - entry = rb_entry(node, struct tree_entry, rb_node); - while(comp_entry(entry, *root_objectid, *objectid) >= 0) { - node = rb_next(node); - if (!node) - break; - entry = rb_entry(node, struct tree_entry, rb_node); - } - if (!node) { - write_unlock(&tree->lock); - return 0; - } - - *root_objectid = entry->root_objectid; - *objectid = entry->objectid; - *inode = entry->inode; - atomic_inc(&entry->inode->i_count); + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + node = &entry->rb_node; rb_erase(node, &tree->tree); - write_unlock(&tree->lock); - kfree(entry); - return 1; -} - -static void __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, - struct inode *inode, - u64 root_objectid, u64 objectid) -{ - struct tree_entry *entry; - struct rb_node *node; - struct rb_node *prev; - - write_lock(&tree->lock); - node = __tree_search(&tree->tree, root_objectid, objectid, &prev); - if (!node) { - write_unlock(&tree->lock); - return; - } - rb_erase(node, &tree->tree); - BTRFS_I(inode)->ordered_trans = 0; - write_unlock(&tree->lock); - atomic_dec(&inode->i_count); - entry = rb_entry(node, struct tree_entry, rb_node); - kfree(entry); - return; -} - -void btrfs_del_ordered_inode(struct inode *inode, int force) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 root_objectid = root->root_key.objectid; - - if (!BTRFS_I(inode)->ordered_trans) { - return; - } - - if (!force && (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) || - mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) - return; - - spin_lock(&root->fs_info->new_trans_lock); - if (root->fs_info->running_transaction) { - struct btrfs_ordered_inode_tree *tree; - tree = &root->fs_info->running_transaction->ordered_inode_tree; - __btrfs_del_ordered_inode(tree, inode, root_objectid, - inode->i_ino); - } - spin_unlock(&root->fs_info->new_trans_lock); -} - -int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode) -{ - struct btrfs_transaction *cur = root->fs_info->running_transaction; - while(cur == root->fs_info->running_transaction && - atomic_read(&BTRFS_I(inode)->ordered_writeback)) { -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) - congestion_wait(WRITE, HZ/20); -#else - blk_congestion_wait(WRITE, HZ/20); -#endif - } + tree->last = NULL; + set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); + mutex_unlock(&tree->mutex); + wake_up(&entry->wait); return 0; } + +void btrfs_wait_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry) +{ + u64 start = entry->file_offset; + u64 end = start + entry->len - 1; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE); +#else + do_sync_mapping_range(inode->i_mapping, start, end, + SYNC_FILE_RANGE_WRITE); +#endif + wait_event(entry->wait, + test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags)); +} + +static void btrfs_start_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry, int wait) +{ + u64 start = entry->file_offset; + u64 end = start + entry->len - 1; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE); +#else + do_sync_mapping_range(inode->i_mapping, start, end, + SYNC_FILE_RANGE_WRITE); +#endif + if (wait) + wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, + &entry->flags)); +} + +void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) +{ + u64 end; + struct btrfs_ordered_extent *ordered; + int found; + int should_wait = 0; + +again: + if (start + len < start) + end = (u64)-1; + else + end = start + len - 1; + found = 0; + while(1) { + ordered = btrfs_lookup_first_ordered_extent(inode, end); + if (!ordered) { + break; + } + if (ordered->file_offset >= start + len) { + btrfs_put_ordered_extent(ordered); + break; + } + if (ordered->file_offset + ordered->len < start) { + btrfs_put_ordered_extent(ordered); + break; + } + btrfs_start_ordered_extent(inode, ordered, should_wait); + found++; + end = ordered->file_offset; + btrfs_put_ordered_extent(ordered); + if (end == 0) + break; + end--; + } + if (should_wait && found) { + should_wait = 0; + goto again; + } +} + +int btrfs_add_ordered_pending(struct inode *inode, + struct btrfs_ordered_extent *ordered, + u64 start, u64 len) +{ + WARN_ON(1); + return 0; +#if 0 + int ret; + struct btrfs_ordered_inode_tree *tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) { + ret = -EAGAIN; + goto out; + } + set_extent_ordered(io_tree, start, start + len - 1, GFP_NOFS); + ret = 0; +out: + mutex_unlock(&tree->mutex); + return ret; +#endif +} + +struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, + u64 file_offset) +{ + struct btrfs_ordered_inode_tree *tree; + struct rb_node *node; + struct btrfs_ordered_extent *entry = NULL; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + node = tree_search(tree, file_offset); + if (!node) + goto out; + + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (!offset_in_entry(entry, file_offset)) + entry = NULL; + if (entry) + atomic_inc(&entry->refs); +out: + mutex_unlock(&tree->mutex); + return entry; +} + +struct btrfs_ordered_extent * +btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset) +{ + struct btrfs_ordered_inode_tree *tree; + struct rb_node *node; + struct btrfs_ordered_extent *entry = NULL; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + node = tree_search(tree, file_offset); + if (!node) + goto out; + + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + atomic_inc(&entry->refs); +out: + mutex_unlock(&tree->mutex); + return entry; +} diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 4fa78736423e..33292c5fe90c 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -20,24 +20,73 @@ #define __BTRFS_ORDERED_DATA__ struct btrfs_ordered_inode_tree { - rwlock_t lock; + struct mutex mutex; struct rb_root tree; + struct rb_node *last; }; +struct btrfs_sector_sum { + u64 offset; + u32 sum; +}; + +struct btrfs_ordered_sum { + u64 file_offset; + u64 len; + struct list_head list; + struct btrfs_sector_sum sums; +}; + +/* bits for the flags field */ +#define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */ +#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */ +#define BTRFS_ORDERED_START 2 /* set when tree setup */ + +struct btrfs_ordered_extent { + u64 file_offset; + u64 start; + u64 len; + unsigned long flags; + atomic_t refs; + struct list_head list; + struct inode *inode; + wait_queue_head_t wait; + struct rb_node rb_node; +}; + + +static inline int btrfs_ordered_sum_size(struct btrfs_root *root, u64 bytes) +{ + unsigned long num_sectors = (bytes + root->sectorsize - 1) / + root->sectorsize; + return sizeof(struct btrfs_ordered_sum) + + num_sectors * sizeof(struct btrfs_sector_sum); +} + static inline void btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) { - rwlock_init(&t->lock); + mutex_init(&t->mutex); t->tree.rb_node = NULL; + t->last = NULL; } -int btrfs_add_ordered_inode(struct inode *inode); -int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid, - struct inode **inode); -int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, - u64 *root_objectid, u64 *objectid, - struct inode **inode); -void btrfs_del_ordered_inode(struct inode *inode, int force); -int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode); +int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry); +int btrfs_remove_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry); +int btrfs_dec_test_ordered_pending(struct inode *inode, + u64 file_offset, u64 io_size); +int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, + u64 start, u64 len); +int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum); +struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, + u64 file_offset); +void btrfs_wait_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry); +void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); +struct btrfs_ordered_extent * +btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); +int btrfs_add_ordered_pending(struct inode *inode, + struct btrfs_ordered_extent *ordered, + u64 start, u64 len); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a8a3cb03de59..86a5acc19ce7 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -67,7 +67,6 @@ static noinline int join_transaction(struct btrfs_root *root) cur_trans->start_time = get_seconds(); INIT_LIST_HEAD(&cur_trans->pending_snapshots); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); - btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree); extent_io_tree_init(&cur_trans->dirty_pages, root->fs_info->btree_inode->i_mapping, GFP_NOFS); @@ -158,10 +157,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, wake_up(&cur_trans->writer_wait); if (cur_trans->in_commit && throttle) { - int ret; + DEFINE_WAIT(wait); mutex_unlock(&root->fs_info->trans_mutex); - ret = wait_for_commit(root, cur_trans); - BUG_ON(ret); + prepare_to_wait(&root->fs_info->transaction_throttle, &wait, + TASK_UNINTERRUPTIBLE); + schedule(); + finish_wait(&root->fs_info->transaction_throttle, &wait); mutex_lock(&root->fs_info->trans_mutex); } @@ -486,58 +487,6 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, return ret; } -int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - struct btrfs_transaction *cur_trans = trans->transaction; - struct inode *inode; - u64 root_objectid = 0; - u64 objectid = 0; - int ret; - - atomic_inc(&root->fs_info->throttles); - while(1) { - ret = btrfs_find_first_ordered_inode( - &cur_trans->ordered_inode_tree, - &root_objectid, &objectid, &inode); - if (!ret) - break; - - mutex_unlock(&root->fs_info->trans_mutex); - - if (S_ISREG(inode->i_mode)) { - atomic_inc(&BTRFS_I(inode)->ordered_writeback); - filemap_fdatawrite(inode->i_mapping); - atomic_dec(&BTRFS_I(inode)->ordered_writeback); - } - iput(inode); - - mutex_lock(&root->fs_info->trans_mutex); - } - while(1) { - root_objectid = 0; - objectid = 0; - ret = btrfs_find_del_first_ordered_inode( - &cur_trans->ordered_inode_tree, - &root_objectid, &objectid, &inode); - if (!ret) - break; - mutex_unlock(&root->fs_info->trans_mutex); - - if (S_ISREG(inode->i_mode)) { - atomic_inc(&BTRFS_I(inode)->ordered_writeback); - filemap_write_and_wait(inode->i_mapping); - atomic_dec(&BTRFS_I(inode)->ordered_writeback); - } - atomic_dec(&inode->i_count); - iput(inode); - - mutex_lock(&root->fs_info->trans_mutex); - } - atomic_dec(&root->fs_info->throttles); - return 0; -} - static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_pending_snapshot *pending) @@ -666,6 +615,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, extent_io_tree_init(pinned_copy, root->fs_info->btree_inode->i_mapping, GFP_NOFS); +printk("commit trans %Lu\n", trans->transid); trans->transaction->in_commit = 1; cur_trans = trans->transaction; if (cur_trans->list.prev != &root->fs_info->trans_list) { @@ -699,8 +649,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->trans_mutex); finish_wait(&cur_trans->writer_wait, &wait); - ret = btrfs_write_ordered_inodes(trans, root); - } while (cur_trans->num_writers > 1 || (cur_trans->num_joined != joined)); @@ -736,6 +684,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_copy_pinned(root, pinned_copy); + wake_up(&root->fs_info->transaction_throttle); + mutex_unlock(&root->fs_info->trans_mutex); ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); @@ -758,6 +708,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); mutex_unlock(&root->fs_info->trans_mutex); +printk("done commit trans %Lu\n", trans->transid); kmem_cache_free(btrfs_trans_handle_cachep, trans); if (root->fs_info->closing) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 9ccd5a5b170f..910350cd4cf0 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -19,7 +19,6 @@ #ifndef __BTRFS_TRANSACTION__ #define __BTRFS_TRANSACTION__ #include "btrfs_inode.h" -#include "ordered-data.h" struct btrfs_transaction { u64 transid; @@ -31,7 +30,6 @@ struct btrfs_transaction { struct list_head list; struct extent_io_tree dirty_pages; unsigned long start_time; - struct btrfs_ordered_inode_tree ordered_inode_tree; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; struct list_head pending_snapshots; @@ -88,8 +86,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, - struct btrfs_root *root); int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, struct btrfs_root *root); #endif From 247e743cbe6e655768c3679f84821e03c1577902 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:53:51 -0400 Subject: [PATCH 0588/2985] Btrfs: Use async helpers to deal with pages that have been improperly dirtied Higher layers sometimes call set_page_dirty without asking the filesystem to help. This causes many problems for the data=ordered and cow code. This commit detects pages that haven't been properly setup for IO and kicks off an async helper to deal with them. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 6 +++ fs/btrfs/disk-io.c | 4 ++ fs/btrfs/extent_io.c | 10 +++++ fs/btrfs/extent_io.h | 1 + fs/btrfs/file.c | 1 + fs/btrfs/inode.c | 93 +++++++++++++++++++++++++++++++++++++++----- 6 files changed, 106 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ceebc052ddcb..4ddc8a8f82cd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -546,6 +546,12 @@ struct btrfs_fs_info { struct btrfs_workers endio_workers; struct btrfs_workers endio_write_workers; struct btrfs_workers submit_workers; + /* + * fixup workers take dirty pages that didn't properly go through + * the cow mechanism and make them safe to write. It happens + * for the sys_munmap function call path + */ + struct btrfs_workers fixup_workers; struct task_struct *transaction_kthread; struct task_struct *cleaner_kthread; int thread_pool_size; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4a5ebafb935a..66466d125c05 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1329,11 +1329,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, */ btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->fixup_workers, 1); btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->endio_write_workers, fs_info->thread_pool_size); btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); + btrfs_start_workers(&fs_info->fixup_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); btrfs_start_workers(&fs_info->endio_write_workers, fs_info->thread_pool_size); @@ -1454,6 +1456,7 @@ fail_tree_root: fail_sys_array: fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); + btrfs_stop_workers(&fs_info->fixup_workers); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); btrfs_stop_workers(&fs_info->endio_write_workers); @@ -1710,6 +1713,7 @@ int close_ctree(struct btrfs_root *root) truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + btrfs_stop_workers(&fs_info->fixup_workers); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); btrfs_stop_workers(&fs_info->endio_write_workers); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3f82a6e9ca4f..feff16cb9b40 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2050,6 +2050,16 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, lock_extent(tree, start, page_end, GFP_NOFS); unlock_start = start; + if (tree->ops && tree->ops->writepage_start_hook) { + ret = tree->ops->writepage_start_hook(page, start, page_end); + if (ret == -EAGAIN) { + unlock_extent(tree, start, page_end, GFP_NOFS); + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } + } + end = page_end; if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { printk("found delalloc bits after lock_extent\n"); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 2268a7995896..23affd27af5e 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -30,6 +30,7 @@ typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, struct bio *bio, int mirror_num); struct extent_io_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); + int (*writepage_start_hook)(struct page *page, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); extent_submit_bio_hook_t *submit_bio_hook; int (*merge_bio_hook)(struct page *page, unsigned long offset, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 12e765f7e0d4..20928639d173 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -313,6 +313,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; SetPageUptodate(p); + ClearPageChecked(p); set_page_dirty(p); } } else { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c5a62f0b9595..47a008c19308 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -403,6 +403,87 @@ static int add_pending_csums(struct btrfs_trans_handle *trans, return 0; } +struct btrfs_writepage_fixup { + struct page *page; + struct btrfs_work work; +}; + +/* see btrfs_writepage_start_hook for details on why this is required */ +void btrfs_writepage_fixup_worker(struct btrfs_work *work) +{ + struct btrfs_writepage_fixup *fixup; + struct btrfs_ordered_extent *ordered; + struct page *page; + struct inode *inode; + u64 page_start; + u64 page_end; + + fixup = container_of(work, struct btrfs_writepage_fixup, work); + page = fixup->page; + + lock_page(page); + if (!page->mapping || !PageDirty(page) || !PageChecked(page)) { + ClearPageChecked(page); + goto out_page; + } + + inode = page->mapping->host; + page_start = page_offset(page); + page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; + + lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); + ordered = btrfs_lookup_ordered_extent(inode, page_start); + if (ordered) + goto out; + + set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, + GFP_NOFS); + ClearPageChecked(page); +out: + unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); +out_page: + unlock_page(page); + page_cache_release(page); +} + +/* + * There are a few paths in the higher layers of the kernel that directly + * set the page dirty bit without asking the filesystem if it is a + * good idea. This causes problems because we want to make sure COW + * properly happens and the data=ordered rules are followed. + * + * In our case any range that doesn't have the EXTENT_ORDERED bit set + * hasn't been properly setup for IO. We kick off an async process + * to fix it up. The async helper will wait for ordered extents, set + * the delalloc bit and make it safe to write the page. + */ +int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) +{ + struct inode *inode = page->mapping->host; + struct btrfs_writepage_fixup *fixup; + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + + ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end, + EXTENT_ORDERED, 0); + if (ret) + return 0; + + if (PageChecked(page)) + return -EAGAIN; + + fixup = kzalloc(sizeof(*fixup), GFP_NOFS); + if (!fixup) + return -EAGAIN; +printk("queueing worker to fixup page %lu %Lu\n", inode->i_ino, page_offset(page)); + SetPageChecked(page); + page_cache_get(page); + fixup->work.func = btrfs_writepage_fixup_worker; + fixup->page = page; + btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work); + return -EAGAIN; +} + int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate) { @@ -1263,6 +1344,7 @@ again: flush_dcache_page(page); kunmap(page); } + ClearPageChecked(page); set_page_dirty(page); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); @@ -2658,6 +2740,7 @@ again: flush_dcache_page(page); kunmap(page); } + ClearPageChecked(page); set_page_dirty(page); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); @@ -3039,15 +3122,6 @@ out_fail: static int btrfs_set_page_dirty(struct page *page) { - struct inode *inode = page->mapping->host; - u64 page_start = page_offset(page); - u64 page_end = page_start + PAGE_CACHE_SIZE - 1; - - if (!test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, - EXTENT_DELALLOC, 0)) { -printk("inode %lu page %Lu not delalloc\n", inode->i_ino, page_offset(page)); -WARN_ON(1); - } return __set_page_dirty_nobuffers(page); } @@ -3098,6 +3172,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, .writepage_end_io_hook = btrfs_writepage_end_io_hook, + .writepage_start_hook = btrfs_writepage_start_hook, .readpage_io_failed_hook = btrfs_io_failed_hook, .set_bit_hook = btrfs_set_bit_hook, .clear_bit_hook = btrfs_clear_bit_hook, From dbe674a99c8af088faa4c95eddaeb271a3140ab6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:54:05 -0400 Subject: [PATCH 0589/2985] Btrfs: Update on disk i_size only after pending ordered extents are done This changes the ordered data code to update i_size after the extent is on disk. An on disk i_size is maintained in the in-memory btrfs inode structures, and this is updated as extents finish. Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 9 +++++ fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 28 ++++++++----- fs/btrfs/ordered-data.c | 89 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ordered-data.h | 2 + 5 files changed, 119 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 8d03687510e0..81c0444f37b6 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -40,11 +40,20 @@ struct btrfs_inode { */ u64 last_trans; u64 delalloc_bytes; + u64 disk_i_size; u32 flags; }; + static inline struct btrfs_inode *BTRFS_I(struct inode *inode) { return container_of(inode, struct btrfs_inode, vfs_inode); } +static inline void btrfs_i_size_write(struct inode *inode, u64 size) +{ + inode->i_size = size; + BTRFS_I(inode)->disk_i_size = size; +} + + #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 20928639d173..3e4e5c227c0c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -338,7 +338,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, btrfs_update_inode(trans, root, inode); } failed: - err = btrfs_end_transaction(trans, root); + err = btrfs_end_transaction_throttle(trans, root); out_unlock: unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); return err; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 47a008c19308..baf46017d0d3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -542,6 +542,7 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, add_pending_csums(trans, inode, ordered_extent->file_offset, &ordered_extent->list); + btrfs_ordered_update_i_size(inode, ordered_extent); btrfs_remove_ordered_extent(inode, ordered_extent); /* once for us */ btrfs_put_ordered_extent(ordered_extent); @@ -792,7 +793,7 @@ void btrfs_read_locked_inode(struct inode *inode) inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); inode->i_uid = btrfs_inode_uid(leaf, inode_item); inode->i_gid = btrfs_inode_gid(leaf, inode_item); - inode->i_size = btrfs_inode_size(leaf, inode_item); + btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); tspec = btrfs_inode_atime(inode_item); inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec); @@ -860,7 +861,7 @@ static void fill_inode_item(struct extent_buffer *leaf, { btrfs_set_inode_uid(leaf, item, inode->i_uid); btrfs_set_inode_gid(leaf, item, inode->i_gid); - btrfs_set_inode_size(leaf, item, inode->i_size); + btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); btrfs_set_inode_mode(leaf, item, inode->i_mode); btrfs_set_inode_nlink(leaf, item, inode->i_nlink); @@ -982,7 +983,7 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, err: btrfs_free_path(path); if (!ret) { - dir->i_size -= name_len * 2; + btrfs_i_size_write(dir, dir->i_size - name_len * 2); dir->i_mtime = dir->i_ctime = CURRENT_TIME; btrfs_update_inode(trans, root, dir); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) @@ -1044,7 +1045,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) /* now the directory is empty */ err = btrfs_unlink_trans(trans, root, dir, dentry); if (!err) { - inode->i_size = 0; + btrfs_i_size_write(inode, 0); } nr = trans->blocks_used; @@ -1089,7 +1090,6 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int extent_type = -1; u64 mask = root->sectorsize - 1; - btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1); path = btrfs_alloc_path(); path->reada = -1; @@ -1427,7 +1427,7 @@ void btrfs_delete_inode(struct inode *inode) goto no_delete; } - inode->i_size = 0; + btrfs_i_size_write(inode, 0); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -1561,6 +1561,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) inode->i_ino = args->ino; BTRFS_I(inode)->root = args->root; BTRFS_I(inode)->delalloc_bytes = 0; + BTRFS_I(inode)->disk_i_size = 0; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); @@ -1869,6 +1870,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); BTRFS_I(inode)->delalloc_bytes = 0; + BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->root = root; if (mode & S_IFDIR) @@ -1964,7 +1966,8 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, dentry->d_parent->d_inode->i_ino); } parent_inode = dentry->d_parent->d_inode; - parent_inode->i_size += dentry->d_name.len * 2; + btrfs_i_size_write(parent_inode, parent_inode->i_size + + dentry->d_name.len * 2); parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, root, dentry->d_parent->d_inode); @@ -2092,6 +2095,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); BTRFS_I(inode)->delalloc_bytes = 0; + BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; @@ -2199,7 +2203,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode->i_fop = &btrfs_dir_file_operations; btrfs_set_trans_block_group(trans, inode); - inode->i_size = 0; + btrfs_i_size_write(inode, 0); err = btrfs_update_inode(trans, root, inode); if (err) goto out_fail; @@ -2756,6 +2760,7 @@ static void btrfs_truncate(struct inode *inode) int ret; struct btrfs_trans_handle *trans; unsigned long nr; + u64 mask = root->sectorsize - 1; if (!S_ISREG(inode->i_mode)) return; @@ -2766,6 +2771,8 @@ static void btrfs_truncate(struct inode *inode) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); + btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); + btrfs_i_size_write(inode, inode->i_size); /* FIXME, add redo link to tree so we don't leak on crash */ ret = btrfs_truncate_in_trans(trans, root, inode, @@ -2821,7 +2828,7 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root, ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid, new_dirid); inode->i_nlink = 1; - inode->i_size = 0; + btrfs_i_size_write(inode, 0); return btrfs_update_inode(trans, new_root, inode); } @@ -3069,6 +3076,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); BTRFS_I(inode)->delalloc_bytes = 0; + BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; @@ -3103,7 +3111,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; inode->i_mapping->backing_dev_info = &root->fs_info->bdi; - inode->i_size = name_len - 1; + btrfs_i_size_write(inode, name_len - 1); err = btrfs_update_inode(trans, root, inode); if (err) drop_inode = 1; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 6513270f054c..d86a953ae51d 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -406,3 +406,92 @@ out: mutex_unlock(&tree->mutex); return entry; } + +int btrfs_ordered_update_i_size(struct inode *inode, + struct btrfs_ordered_extent *ordered) +{ + struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + u64 disk_i_size; + u64 new_i_size; + u64 i_size_test; + struct rb_node *node; + struct btrfs_ordered_extent *test; + + mutex_lock(&tree->mutex); + disk_i_size = BTRFS_I(inode)->disk_i_size; + + /* + * if the disk i_size is already at the inode->i_size, or + * this ordered extent is inside the disk i_size, we're done + */ + if (disk_i_size >= inode->i_size || + ordered->file_offset + ordered->len <= disk_i_size) { + goto out; + } + + /* + * we can't update the disk_isize if there are delalloc bytes + * between disk_i_size and this ordered extent + */ + if (test_range_bit(io_tree, disk_i_size, + ordered->file_offset + ordered->len - 1, + EXTENT_DELALLOC, 0)) { + goto out; + } + /* + * walk backward from this ordered extent to disk_i_size. + * if we find an ordered extent then we can't update disk i_size + * yet + */ + while(1) { + node = rb_prev(&ordered->rb_node); + if (!node) + break; + test = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (test->file_offset + test->len <= disk_i_size) + break; + if (test->file_offset >= inode->i_size) + break; + if (test->file_offset >= disk_i_size) + goto out; + } + new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode)); + + /* + * at this point, we know we can safely update i_size to at least + * the offset from this ordered extent. But, we need to + * walk forward and see if ios from higher up in the file have + * finished. + */ + node = rb_next(&ordered->rb_node); + i_size_test = 0; + if (node) { + /* + * do we have an area where IO might have finished + * between our ordered extent and the next one. + */ + test = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (test->file_offset > entry_end(ordered)) { + i_size_test = test->file_offset - 1; + } + } else { + i_size_test = i_size_read(inode); + } + + /* + * i_size_test is the end of a region after this ordered + * extent where there are no ordered extents. As long as there + * are no delalloc bytes in this area, it is safe to update + * disk_i_size to the end of the region. + */ + if (i_size_test > entry_end(ordered) && + !test_range_bit(io_tree, entry_end(ordered), i_size_test, + EXTENT_DELALLOC, 0)) { + new_i_size = min_t(u64, i_size_test, i_size_read(inode)); + } + BTRFS_I(inode)->disk_i_size = new_i_size; +out: + mutex_unlock(&tree->mutex); + return 0; +} diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 33292c5fe90c..40e9126ad954 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -89,4 +89,6 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); int btrfs_add_ordered_pending(struct inode *inode, struct btrfs_ordered_extent *ordered, u64 start, u64 len); +int btrfs_ordered_update_i_size(struct inode *inode, + struct btrfs_ordered_extent *ordered); #endif From f9295749388f82c8d2f485e99c72cd7c7876a99b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:54:14 -0400 Subject: [PATCH 0590/2985] btrfs_start_transaction: wait for commits in progress to finish btrfs_commit_transaction has to loop waiting for any writers in the transaction to finish before it can proceed. btrfs_start_transaction should be polite and not join a transaction that is in the process of being finished off. There are a few places that can't wait, basically the ones doing IO that might be needed to finish the transaction. For them, btrfs_join_transaction is added. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 10 +++++----- fs/btrfs/transaction.c | 43 +++++++++++++++++++++++++++++++++++++++--- fs/btrfs/transaction.h | 3 +++ 6 files changed, 51 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4ddc8a8f82cd..acbce542d291 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -513,6 +513,7 @@ struct btrfs_fs_info { u64 alloc_start; struct btrfs_transaction *running_transaction; wait_queue_head_t transaction_throttle; + wait_queue_head_t transaction_wait; struct btrfs_super_block super_copy; struct btrfs_super_block super_for_commit; struct block_device *__bdev; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 66466d125c05..99bb385c2982 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1291,6 +1291,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); init_waitqueue_head(&fs_info->transaction_throttle); + init_waitqueue_head(&fs_info->transaction_wait); #if 0 ret = add_hasher(fs_info, "crc32c"); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3e4e5c227c0c..d6505892cd52 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -251,7 +251,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, end_of_last_block = start_pos + num_bytes - 1; lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); if (!trans) { err = -ENOMEM; goto out_unlock; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index baf46017d0d3..0a687326c0b0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -116,7 +116,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 0; - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); BUG_ON(!trans); btrfs_set_trans_block_group(trans, inode); @@ -502,7 +502,7 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, return 0; } - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); ordered_extent = btrfs_lookup_ordered_extent(inode, start); BUG_ON(!ordered_extent); @@ -1812,7 +1812,7 @@ int btrfs_write_inode(struct inode *inode, int wait) int ret = 0; if (wait) { - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); ret = btrfs_commit_transaction(trans, root); } @@ -1830,7 +1830,7 @@ void btrfs_dirty_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); @@ -2395,7 +2395,7 @@ again: free_extent_map(em); em = NULL; btrfs_release_path(root, path); - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); goto again; } write_extent_buffer(leaf, map + pg_offset, ptr, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 86a5acc19ce7..05823904ecba 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -62,6 +62,7 @@ static noinline int join_transaction(struct btrfs_root *root) init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); cur_trans->in_commit = 0; + cur_trans->blocked = 0; cur_trans->use_count = 1; cur_trans->commit_done = 0; cur_trans->start_time = get_seconds(); @@ -99,14 +100,36 @@ static noinline int record_root_in_trans(struct btrfs_root *root) return 0; } -struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, - int num_blocks) +struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, + int num_blocks, int join) { struct btrfs_trans_handle *h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); + struct btrfs_transaction *cur_trans; int ret; mutex_lock(&root->fs_info->trans_mutex); + cur_trans = root->fs_info->running_transaction; + if (cur_trans && cur_trans->blocked && !join) { + DEFINE_WAIT(wait); + cur_trans->use_count++; + while(1) { + prepare_to_wait(&root->fs_info->transaction_wait, &wait, + TASK_UNINTERRUPTIBLE); + if (cur_trans->blocked) { + mutex_unlock(&root->fs_info->trans_mutex); + schedule(); + mutex_lock(&root->fs_info->trans_mutex); + finish_wait(&root->fs_info->transaction_wait, + &wait); + } else { + finish_wait(&root->fs_info->transaction_wait, + &wait); + break; + } + } + put_transaction(cur_trans); + } ret = join_transaction(root); BUG_ON(ret); @@ -123,6 +146,17 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, return h; } +struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, + int num_blocks) +{ + return start_transaction(root, num_blocks, 0); +} +struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, + int num_blocks) +{ + return start_transaction(root, num_blocks, 1); +} + static noinline int wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { @@ -156,7 +190,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); - if (cur_trans->in_commit && throttle) { + if (0 && cur_trans->in_commit && throttle) { DEFINE_WAIT(wait); mutex_unlock(&root->fs_info->trans_mutex); prepare_to_wait(&root->fs_info->transaction_throttle, &wait, @@ -617,6 +651,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, printk("commit trans %Lu\n", trans->transid); trans->transaction->in_commit = 1; + trans->transaction->blocked = 1; cur_trans = trans->transaction; if (cur_trans->list.prev != &root->fs_info->trans_list) { prev_trans = list_entry(cur_trans->list.prev, @@ -684,7 +719,9 @@ printk("commit trans %Lu\n", trans->transid); btrfs_copy_pinned(root, pinned_copy); + trans->transaction->blocked = 0; wake_up(&root->fs_info->transaction_throttle); + wake_up(&root->fs_info->transaction_wait); mutex_unlock(&root->fs_info->trans_mutex); ret = btrfs_write_and_wait_transaction(trans, root); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 910350cd4cf0..11fbdeceb26c 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -27,6 +27,7 @@ struct btrfs_transaction { int in_commit; int use_count; int commit_done; + int blocked; struct list_head list; struct extent_io_tree dirty_pages; unsigned long start_time; @@ -75,6 +76,8 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_blocks); +struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, + int num_blocks); int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, From ba1da2f442ec91a1534afa893f9bef7e33056ace Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:54:15 -0400 Subject: [PATCH 0591/2985] Btrfs: Don't pin pages in ram until the entire ordered extent is on disk. Checksum items are not inserted until the entire ordered extent is on disk, but individual pages might be clean and available for reclaim long before the whole extent is on disk. In order to allow those pages to be freed, we need to be able to search the list of ordered extents to find the checksum that is going to be inserted in the tree. This way if the page needs to be read back in before the checksums are in the btree, we'll be able to verify the checksum on the page. This commit adds the ability to search the pending ordered extents for a given offset in the file, and changes btrfs_releasepage to allow ordered pages to be freed. Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 37 +++++++++++++++++-------------- fs/btrfs/ordered-data.c | 48 +++++++++++++++++++++++++++++++++++++++-- fs/btrfs/ordered-data.h | 1 + 4 files changed, 69 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d6505892cd52..3e4e5c227c0c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -251,7 +251,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, end_of_last_block = start_pos + num_bytes - 1; lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_start_transaction(root, 1); if (!trans) { err = -ENOMEM; goto out_unlock; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0a687326c0b0..293355c92a4f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -382,7 +382,7 @@ mapit: return btrfs_map_bio(root, rw, bio, mirror_num, 0); } -static int add_pending_csums(struct btrfs_trans_handle *trans, +static noinline int add_pending_csums(struct btrfs_trans_handle *trans, struct inode *inode, u64 file_offset, struct list_head *list) { @@ -390,15 +390,12 @@ static int add_pending_csums(struct btrfs_trans_handle *trans, struct btrfs_ordered_sum *sum; btrfs_set_trans_block_group(trans, inode); - while(!list_empty(list)) { - cur = list->next; + list_for_each(cur, list) { sum = list_entry(cur, struct btrfs_ordered_sum, list); mutex_lock(&BTRFS_I(inode)->csum_mutex); btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root, inode, sum); mutex_unlock(&BTRFS_I(inode)->csum_mutex); - list_del(&sum->list); - kfree(sum); } return 0; } @@ -498,9 +495,8 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, int ret; ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); - if (!ret) { + if (!ret) return 0; - } trans = btrfs_join_transaction(root, 1); @@ -571,6 +567,18 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) path = btrfs_alloc_path(); item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); if (IS_ERR(item)) { + /* + * It is possible there is an ordered extent that has + * not yet finished for this range in the file. If so, + * that extent will have a csum cached, and it will insert + * the sum after all the blocks in the extent are fully + * on disk. So, look for an ordered extent and use the + * sum if found. + */ + ret = btrfs_find_ordered_sum(inode, start, &csum); + if (ret == 0) + goto found; + ret = PTR_ERR(item); /* a csum that isn't present is a preallocated region. */ if (ret == -ENOENT || ret == -EFBIG) @@ -582,6 +590,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) } read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, BTRFS_CRC32_SIZE); +found: set_state_private(io_tree, start, csum); out: if (path) @@ -888,7 +897,7 @@ static void fill_inode_item(struct extent_buffer *leaf, BTRFS_I(inode)->block_group->key.objectid); } -int btrfs_update_inode(struct btrfs_trans_handle *trans, +int noinline btrfs_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) { @@ -1567,6 +1576,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) inode->i_mapping, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); + btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); mutex_init(&BTRFS_I(inode)->csum_mutex); return 0; } @@ -1868,6 +1878,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mapping, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); + btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); mutex_init(&BTRFS_I(inode)->csum_mutex); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->disk_i_size = 0; @@ -2097,6 +2108,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; + btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -2618,14 +2630,6 @@ static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) { - struct btrfs_ordered_extent *ordered; - - ordered = btrfs_lookup_ordered_extent(page->mapping->host, - page_offset(page)); - if (ordered) { - btrfs_put_ordered_extent(ordered); - return 0; - } return __btrfs_releasepage(page, gfp_flags); } @@ -3078,6 +3082,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; + btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index d86a953ae51d..b739e3abebb9 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -245,8 +245,18 @@ out: int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) { - if (atomic_dec_and_test(&entry->refs)) + struct list_head *cur; + struct btrfs_ordered_sum *sum; + + if (atomic_dec_and_test(&entry->refs)) { + while(!list_empty(&entry->list)) { + cur = entry->list.next; + sum = list_entry(cur, struct btrfs_ordered_sum, list); + list_del(&sum->list); + kfree(sum); + } kfree(entry); + } return 0; } @@ -444,8 +454,9 @@ int btrfs_ordered_update_i_size(struct inode *inode, * if we find an ordered extent then we can't update disk i_size * yet */ + node = &ordered->rb_node; while(1) { - node = rb_prev(&ordered->rb_node); + node = rb_prev(node); if (!node) break; test = rb_entry(node, struct btrfs_ordered_extent, rb_node); @@ -495,3 +506,36 @@ out: mutex_unlock(&tree->mutex); return 0; } + +int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum) +{ + struct btrfs_ordered_sum *ordered_sum; + struct btrfs_sector_sum *sector_sums; + struct btrfs_ordered_extent *ordered; + struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; + struct list_head *cur; + int ret = 1; + int index; + + ordered = btrfs_lookup_ordered_extent(inode, offset); + if (!ordered) + return 1; + + mutex_lock(&tree->mutex); + list_for_each_prev(cur, &ordered->list) { + ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list); + if (offset >= ordered_sum->file_offset && + offset < ordered_sum->file_offset + ordered_sum->len) { + index = (offset - ordered_sum->file_offset) / + BTRFS_I(inode)->root->sectorsize;; + sector_sums = &ordered_sum->sums; + *sum = sector_sums[index].sum; + ret = 0; + goto out; + } + } +out: + mutex_unlock(&tree->mutex); + return ret; +} + diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 40e9126ad954..33f0d9e91b11 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -91,4 +91,5 @@ int btrfs_add_ordered_pending(struct inode *inode, u64 start, u64 len); int btrfs_ordered_update_i_size(struct inode *inode, struct btrfs_ordered_extent *ordered); +int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum); #endif From ee6e6504e147a59a9f4d582662c105e9d72ae638 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:54:40 -0400 Subject: [PATCH 0592/2985] Add a per-inode lock around btrfs_drop_extents btrfs_drop_extents is always called with a range lock held on the inode. But, it may operate on extents outside that range as it drops and splits them. This patch adds a per-inode mutex that is held while calling btrfs_drop_extents and while inserting new extents into the tree. It prevents races from two procs working against adjacent ranges in the tree. Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/extent-tree.c | 2 ++ fs/btrfs/file.c | 8 ++++++++ fs/btrfs/inode.c | 10 ++++++++++ 4 files changed, 21 insertions(+) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 81c0444f37b6..3bf40591742a 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -32,6 +32,7 @@ struct btrfs_inode { struct extent_io_tree io_tree; struct extent_io_tree io_failure_tree; struct mutex csum_mutex; + struct mutex extent_mutex; struct inode vfs_inode; struct btrfs_ordered_inode_tree ordered_tree; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 343d1101c31c..4036c62b6671 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1671,6 +1671,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid, ref_generation, owner_objectid, owner_offset, pin, pin == 0); + + finish_current_insert(trans, root->fs_info->extent_root); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3e4e5c227c0c..40ad1b2958cb 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -242,6 +242,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, u64 end_of_last_block; u64 end_pos = pos + write_bytes; u64 inline_size; + int did_inline = 0; loff_t isize = i_size_read(inode); start_pos = pos & ~((u64)root->sectorsize - 1); @@ -275,6 +276,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, if (hole_size > 0) { btrfs_wait_ordered_range(inode, last_pos_in_file, last_pos_in_file + hole_size); + mutex_lock(&BTRFS_I(inode)->extent_mutex); err = btrfs_drop_extents(trans, root, inode, last_pos_in_file, last_pos_in_file + hole_size, @@ -289,6 +291,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, 0, 0, hole_size, 0); btrfs_drop_extent_cache(inode, last_pos_in_file, last_pos_in_file + hole_size -1); + mutex_unlock(&BTRFS_I(inode)->extent_mutex); btrfs_check_file(root, inode); } if (err) @@ -321,6 +324,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, /* step one, delete the existing extents in this range */ aligned_end = (pos + write_bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + mutex_lock(&BTRFS_I(inode)->extent_mutex); err = btrfs_drop_extents(trans, root, inode, start_pos, aligned_end, aligned_end, &hint_byte); if (err) @@ -332,9 +336,13 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inline_size, pages, 0, num_pages); btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1); BUG_ON(err); + mutex_unlock(&BTRFS_I(inode)->extent_mutex); + did_inline = 1; } if (end_pos > isize) { i_size_write(inode, end_pos); + if (did_inline) + BTRFS_I(inode)->disk_i_size = end_pos; btrfs_update_inode(trans, root, inode); } failed: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 293355c92a4f..0d1e2ba47a9c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -516,6 +516,8 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, trans->transid, inode->i_ino, ordered_extent->file_offset, &ins); BUG_ON(ret); + + mutex_lock(&BTRFS_I(inode)->extent_mutex); ret = btrfs_drop_extents(trans, root, inode, ordered_extent->file_offset, ordered_extent->file_offset + @@ -531,6 +533,8 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, btrfs_drop_extent_cache(inode, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1); + mutex_unlock(&BTRFS_I(inode)->extent_mutex); + inode->i_blocks += ordered_extent->len >> 9; unlock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, @@ -1399,6 +1403,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); + mutex_lock(&BTRFS_I(inode)->extent_mutex); err = btrfs_drop_extents(trans, root, inode, hole_start, block_end, hole_start, &alloc_hint); @@ -1412,6 +1417,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) (u64)-1); btrfs_check_file(root, inode); } + mutex_unlock(&BTRFS_I(inode)->extent_mutex); btrfs_end_transaction(trans, root); unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); if (err) @@ -1578,6 +1584,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) inode->i_mapping, GFP_NOFS); btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); mutex_init(&BTRFS_I(inode)->csum_mutex); + mutex_init(&BTRFS_I(inode)->extent_mutex); return 0; } @@ -1880,6 +1887,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mapping, GFP_NOFS); btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); mutex_init(&BTRFS_I(inode)->csum_mutex); + mutex_init(&BTRFS_I(inode)->extent_mutex); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->root = root; @@ -2105,6 +2113,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); + mutex_init(&BTRFS_I(inode)->extent_mutex); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; @@ -3079,6 +3088,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); mutex_init(&BTRFS_I(inode)->csum_mutex); + mutex_init(&BTRFS_I(inode)->extent_mutex); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; From 0bd40a718444b28793283b70286f9e29d464a189 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:54:43 -0400 Subject: [PATCH 0593/2985] btrfs_next_leaf: do readahead when skip_locking is turned on Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index bbf9bf374066..cdc713062b03 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -3206,7 +3206,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) free_extent_buffer(next); } - if (level == 1 && path->locks[1] && path->reada) + if (level == 1 && (path->locks[1] || path->skip_locking) && + path->reada) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, c, slot); From 54641bd17db9fbfc13c7b1d4ee0dd2713bf3e076 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:54:48 -0400 Subject: [PATCH 0594/2985] Btrfs: Force caching of metadata block groups on mount to avoid deadlock This is a temporary change to avoid deadlocks until the extent tree locking is fixed up. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4036c62b6671..be2aef1cb7ae 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3333,6 +3333,11 @@ int btrfs_read_block_groups(struct btrfs_root *root) set_state_private(block_group_cache, found_key.objectid, (unsigned long)cache); + /* hack for now */ + if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { + cache_block_group(root->fs_info->extent_root, + cache); + } if (key.objectid >= btrfs_super_total_bytes(&info->super_copy)) break; From eb84ae039e10f1f80443d846ba1350122bbdc753 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 13:53:27 -0400 Subject: [PATCH 0595/2985] Btrfs: Cleanup and comment ordered-data.c Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 12 +++- fs/btrfs/ordered-data.c | 130 +++++++++++++++++++++------------------- fs/btrfs/ordered-data.h | 49 ++++++++++++--- 3 files changed, 121 insertions(+), 70 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0d1e2ba47a9c..f37e09e724f7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1343,7 +1343,7 @@ again: unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_page(page); page_cache_release(page); - btrfs_wait_ordered_extent(inode, ordered); + btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); goto again; } @@ -2660,6 +2660,10 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) ordered = btrfs_lookup_ordered_extent(page->mapping->host, page_offset(page)); if (ordered) { + /* + * IO on this page will never be started, so we need + * to account for any ordered extents now + */ clear_extent_bit(tree, page_start, page_end, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_LOCKED, 1, 0, GFP_NOFS); @@ -2732,11 +2736,15 @@ again: lock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_extent_mapped(page); + /* + * we can't set the delalloc bits if there are pending ordered + * extents. Drop our locks and wait for them to finish + */ ordered = btrfs_lookup_ordered_extent(inode, page_start); if (ordered) { unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_page(page); - btrfs_wait_ordered_extent(inode, ordered); + btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); goto again; } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index b739e3abebb9..230fd3ca6b2c 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -136,6 +136,19 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, return ret; } +/* allocate and add a new ordered_extent into the per-inode tree. + * file_offset is the logical offset in the file + * + * start is the disk block number of an extent already reserved in the + * extent allocation tree + * + * len is the length of the extent + * + * This also sets the EXTENT_ORDERED bit on the range in the inode. + * + * The tree is given a single reference on the ordered extent that was + * inserted. + */ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, u64 start, u64 len) { @@ -152,7 +165,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->file_offset = file_offset; entry->start = start; entry->len = len; - entry->inode = inode; /* one ref for the tree */ atomic_set(&entry->refs, 1); init_waitqueue_head(&entry->wait); @@ -167,12 +179,15 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, entry_end(entry) - 1, GFP_NOFS); - set_bit(BTRFS_ORDERED_START, &entry->flags); mutex_unlock(&tree->mutex); BUG_ON(node); return 0; } +/* + * Add a struct btrfs_ordered_sum into the list of checksums to be inserted + * when an ordered extent is finished. + */ int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum) { struct btrfs_ordered_inode_tree *tree; @@ -182,29 +197,25 @@ int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum) tree = &BTRFS_I(inode)->ordered_tree; mutex_lock(&tree->mutex); node = tree_search(tree, sum->file_offset); - if (!node) { -search_fail: -printk("add ordered sum failed to find a node for inode %lu offset %Lu\n", inode->i_ino, sum->file_offset); - node = rb_first(&tree->tree); - while(node) { - entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); - printk("entry %Lu %Lu %Lu\n", entry->file_offset, entry->file_offset + entry->len, entry->start); - node = rb_next(node); - } - BUG(); - } BUG_ON(!node); entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); - if (!offset_in_entry(entry, sum->file_offset)) { - goto search_fail; - } + BUG_ON(!offset_in_entry(entry, sum->file_offset)); list_add_tail(&sum->list, &entry->list); mutex_unlock(&tree->mutex); return 0; } +/* + * this is used to account for finished IO across a given range + * of the file. The IO should not span ordered extents. If + * a given ordered_extent is completely done, 1 is returned, otherwise + * 0. + * + * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used + * to make sure this function only returns 1 once for a given ordered extent. + */ int btrfs_dec_test_ordered_pending(struct inode *inode, u64 file_offset, u64 io_size) { @@ -233,9 +244,6 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, ret = test_range_bit(io_tree, entry->file_offset, entry->file_offset + entry->len - 1, EXTENT_ORDERED, 0); - if (!test_bit(BTRFS_ORDERED_START, &entry->flags)) { -printk("inode %lu not ready yet for extent %Lu %Lu\n", inode->i_ino, entry->file_offset, entry_end(entry)); - } if (ret == 0) ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); out: @@ -243,6 +251,10 @@ out: return ret == 0; } +/* + * used to drop a reference on an ordered extent. This will free + * the extent if the last reference is dropped + */ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) { struct list_head *cur; @@ -260,6 +272,10 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) return 0; } +/* + * remove an ordered extent from the tree. No references are dropped + * but, anyone waiting on this extent is woken up. + */ int btrfs_remove_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry) { @@ -277,27 +293,25 @@ int btrfs_remove_ordered_extent(struct inode *inode, return 0; } -void btrfs_wait_ordered_extent(struct inode *inode, - struct btrfs_ordered_extent *entry) -{ - u64 start = entry->file_offset; - u64 end = start + entry->len - 1; -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) - do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE); -#else - do_sync_mapping_range(inode->i_mapping, start, end, - SYNC_FILE_RANGE_WRITE); -#endif - wait_event(entry->wait, - test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags)); -} - -static void btrfs_start_ordered_extent(struct inode *inode, - struct btrfs_ordered_extent *entry, int wait) +/* + * Used to start IO or wait for a given ordered extent to finish. + * + * If wait is one, this effectively waits on page writeback for all the pages + * in the extent, and it waits on the io completion code to insert + * metadata into the btree corresponding to the extent + */ +void btrfs_start_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry, + int wait) { u64 start = entry->file_offset; u64 end = start + entry->len - 1; + /* + * pages in the range can be dirty, clean or writeback. We + * start IO on any dirty ones so the wait doesn't stall waiting + * for pdflush to find them + */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE); #else @@ -309,6 +323,9 @@ static void btrfs_start_ordered_extent(struct inode *inode, &entry->flags)); } +/* + * Used to wait on ordered extents across a large range of bytes. + */ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) { u64 end; @@ -349,31 +366,11 @@ again: } } -int btrfs_add_ordered_pending(struct inode *inode, - struct btrfs_ordered_extent *ordered, - u64 start, u64 len) -{ - WARN_ON(1); - return 0; -#if 0 - int ret; - struct btrfs_ordered_inode_tree *tree; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - - tree = &BTRFS_I(inode)->ordered_tree; - mutex_lock(&tree->mutex); - if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) { - ret = -EAGAIN; - goto out; - } - set_extent_ordered(io_tree, start, start + len - 1, GFP_NOFS); - ret = 0; -out: - mutex_unlock(&tree->mutex); - return ret; -#endif -} +/* + * find an ordered extent corresponding to file_offset. return NULL if + * nothing is found, otherwise take a reference on the extent and return it + */ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, u64 file_offset) { @@ -397,6 +394,10 @@ out: return entry; } +/* + * lookup and return any extent before 'file_offset'. NULL is returned + * if none is found + */ struct btrfs_ordered_extent * btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset) { @@ -417,6 +418,10 @@ out: return entry; } +/* + * After an extent is done, call this to conditionally update the on disk + * i_size. i_size is updated to cover any fully written part of the file. + */ int btrfs_ordered_update_i_size(struct inode *inode, struct btrfs_ordered_extent *ordered) { @@ -507,6 +512,11 @@ out: return 0; } +/* + * search the ordered extents for one corresponding to 'offset' and + * try to find a checksum. This is used because we allow pages to + * be reclaimed before their checksum is actually put into the btree + */ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum) { struct btrfs_ordered_sum *ordered_sum; diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 33f0d9e91b11..98f491d1022b 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -19,12 +19,19 @@ #ifndef __BTRFS_ORDERED_DATA__ #define __BTRFS_ORDERED_DATA__ +/* one of these per inode */ struct btrfs_ordered_inode_tree { struct mutex mutex; struct rb_root tree; struct rb_node *last; }; +/* + * these are used to collect checksums done just before bios submission. + * They are attached via a list into the ordered extent, and + * checksum items are inserted into the tree after all the blocks in + * the ordered extent are on disk + */ struct btrfs_sector_sum { u64 offset; u32 sum; @@ -34,27 +41,56 @@ struct btrfs_ordered_sum { u64 file_offset; u64 len; struct list_head list; + /* last field is a variable length array of btrfs_sector_sums */ struct btrfs_sector_sum sums; }; -/* bits for the flags field */ +/* + * bits for the flags field: + * + * BTRFS_ORDERED_IO_DONE is set when all of the blocks are written. + * It is used to make sure metadata is inserted into the tree only once + * per extent. + * + * BTRFS_ORDERED_COMPLETE is set when the extent is removed from the + * rbtree, just before waking any waiters. It is used to indicate the + * IO is done and any metadata is inserted into the tree. + */ #define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */ + #define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */ -#define BTRFS_ORDERED_START 2 /* set when tree setup */ struct btrfs_ordered_extent { + /* logical offset in the file */ u64 file_offset; + + /* disk byte number */ u64 start; + + /* length of the extent in bytes */ u64 len; + + /* flags (described above) */ unsigned long flags; + + /* reference count */ atomic_t refs; + + /* list of checksums for insertion when the extent io is done */ struct list_head list; - struct inode *inode; + + /* used to wait for the BTRFS_ORDERED_COMPLETE bit */ wait_queue_head_t wait; + + /* our friendly rbtree entry */ struct rb_node rb_node; }; +/* + * calculates the total size you need to allocate for an ordered sum + * structure spanning 'bytes' in the file + */ static inline int btrfs_ordered_sum_size(struct btrfs_root *root, u64 bytes) { unsigned long num_sectors = (bytes + root->sectorsize - 1) / @@ -81,14 +117,11 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum); struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, u64 file_offset); -void btrfs_wait_ordered_extent(struct inode *inode, - struct btrfs_ordered_extent *entry); +void btrfs_start_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry, int wait); void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); struct btrfs_ordered_extent * btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); -int btrfs_add_ordered_pending(struct inode *inode, - struct btrfs_ordered_extent *ordered, - u64 start, u64 len); int btrfs_ordered_update_i_size(struct inode *inode, struct btrfs_ordered_extent *ordered); int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum); From 3edf7d33f4edb1e4a9bb0a4c0a84d95fb4d22a09 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Jul 2008 06:17:13 -0400 Subject: [PATCH 0596/2985] Btrfs: Handle data checksumming on bios that span multiple ordered extents Data checksumming is done right before the bio is sent down the IO stack, which means a single bio might span more than one ordered extent. In this case, the checksumming data is split between two ordered extents. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/file-item.c | 43 +++++++++++++++++++++++++++++++++++++---- fs/btrfs/inode.c | 6 +----- fs/btrfs/ordered-data.c | 36 +++++++++++++++++----------------- fs/btrfs/ordered-data.h | 11 +++++++++-- 5 files changed, 69 insertions(+), 31 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index acbce542d291..96ab2797c09a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1579,8 +1579,8 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, struct btrfs_ordered_sum *sums); -int btrfs_csum_one_bio(struct btrfs_root *root, - struct bio *bio, struct btrfs_ordered_sum **sums_ret); +int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, + struct bio *bio); struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 345caf8ff516..e02f1e5acb0a 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -134,26 +134,53 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, return ret; } -int btrfs_csum_one_bio(struct btrfs_root *root, - struct bio *bio, struct btrfs_ordered_sum **sums_ret) +int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, + struct bio *bio) { struct btrfs_ordered_sum *sums; struct btrfs_sector_sum *sector_sum; + struct btrfs_ordered_extent *ordered; char *data; struct bio_vec *bvec = bio->bi_io_vec; int bio_index = 0; + unsigned long total_bytes = 0; + unsigned long this_sum_bytes = 0; + u64 offset; WARN_ON(bio->bi_vcnt <= 0); sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS); if (!sums) return -ENOMEM; - *sums_ret = sums; + sector_sum = &sums->sums; - sums->file_offset = page_offset(bvec->bv_page); + sums->file_offset = page_offset(bvec->bv_page) + bvec->bv_offset; sums->len = bio->bi_size; INIT_LIST_HEAD(&sums->list); + ordered = btrfs_lookup_ordered_extent(inode, sums->file_offset); + BUG_ON(!ordered); while(bio_index < bio->bi_vcnt) { + offset = page_offset(bvec->bv_page) + bvec->bv_offset; + if (offset >= ordered->file_offset + ordered->len) { + unsigned long bytes_left; + sums->len = this_sum_bytes; + this_sum_bytes = 0; + btrfs_add_ordered_sum(inode, ordered, sums); + btrfs_put_ordered_extent(ordered); + + bytes_left = bio->bi_size - total_bytes; + + sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), + GFP_NOFS); + BUG_ON(!sums); + sector_sum = &sums->sums; + sums->len = bytes_left; + sums->file_offset = offset; + ordered = btrfs_lookup_ordered_extent(inode, + sums->file_offset); + BUG_ON(!ordered); + } + data = kmap_atomic(bvec->bv_page, KM_USER0); sector_sum->sum = ~(u32)0; sector_sum->sum = btrfs_csum_data(root, @@ -165,10 +192,18 @@ int btrfs_csum_one_bio(struct btrfs_root *root, (char *)§or_sum->sum); sector_sum->offset = page_offset(bvec->bv_page) + bvec->bv_offset; + sector_sum++; bio_index++; + total_bytes += bvec->bv_len; + this_sum_bytes += bvec->bv_len; bvec++; } + btrfs_add_ordered_sum(inode, ordered, sums); + btrfs_put_ordered_extent(ordered); + if (total_bytes != bio->bi_size) { +printk("warning, total bytes %lu bio size %u\n", total_bytes, bio->bi_size); + } return 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f37e09e724f7..4d729d90d2b8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -351,12 +351,8 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, { struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; - struct btrfs_ordered_sum *sums; - ret = btrfs_csum_one_bio(root, bio, &sums); - BUG_ON(ret); - - ret = btrfs_add_ordered_sum(inode, sums); + ret = btrfs_csum_one_bio(root, inode, bio); BUG_ON(ret); return btrfs_map_bio(root, rw, bio, mirror_num, 1); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 230fd3ca6b2c..1ddb7bceea99 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -186,22 +186,17 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, /* * Add a struct btrfs_ordered_sum into the list of checksums to be inserted - * when an ordered extent is finished. + * when an ordered extent is finished. If the list covers more than one + * ordered extent, it is split across multiples. */ -int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum) +int btrfs_add_ordered_sum(struct inode *inode, + struct btrfs_ordered_extent *entry, + struct btrfs_ordered_sum *sum) { struct btrfs_ordered_inode_tree *tree; - struct rb_node *node; - struct btrfs_ordered_extent *entry; tree = &BTRFS_I(inode)->ordered_tree; mutex_lock(&tree->mutex); - node = tree_search(tree, sum->file_offset); - BUG_ON(!node); - - entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); - BUG_ON(!offset_in_entry(entry, sum->file_offset)); - list_add_tail(&sum->list, &entry->list); mutex_unlock(&tree->mutex); return 0; @@ -524,8 +519,10 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum) struct btrfs_ordered_extent *ordered; struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; struct list_head *cur; + unsigned long num_sectors; + unsigned long i; + u32 sectorsize = BTRFS_I(inode)->root->sectorsize; int ret = 1; - int index; ordered = btrfs_lookup_ordered_extent(inode, offset); if (!ordered) @@ -534,14 +531,17 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum) mutex_lock(&tree->mutex); list_for_each_prev(cur, &ordered->list) { ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list); - if (offset >= ordered_sum->file_offset && - offset < ordered_sum->file_offset + ordered_sum->len) { - index = (offset - ordered_sum->file_offset) / - BTRFS_I(inode)->root->sectorsize;; + if (offset >= ordered_sum->file_offset) { + num_sectors = ordered_sum->len / sectorsize; sector_sums = &ordered_sum->sums; - *sum = sector_sums[index].sum; - ret = 0; - goto out; + for (i = 0; i < num_sectors; i++) { + if (sector_sums[i].offset == offset) { +printk("find ordered sum inode %lu offset %Lu\n", inode->i_ino, offset); + *sum = sector_sums[i].sum; + ret = 0; + goto out; + } + } } } out: diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 98f491d1022b..1794efd13ca3 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -39,7 +39,11 @@ struct btrfs_sector_sum { struct btrfs_ordered_sum { u64 file_offset; - u64 len; + /* + * this is the length in bytes covered by the sums array below. + * But, the sums array may not be contiguous in the file. + */ + unsigned long len; struct list_head list; /* last field is a variable length array of btrfs_sector_sums */ struct btrfs_sector_sum sums; @@ -95,6 +99,7 @@ static inline int btrfs_ordered_sum_size(struct btrfs_root *root, u64 bytes) { unsigned long num_sectors = (bytes + root->sectorsize - 1) / root->sectorsize; + num_sectors++; return sizeof(struct btrfs_ordered_sum) + num_sectors * sizeof(struct btrfs_sector_sum); } @@ -114,7 +119,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, u64 file_offset, u64 io_size); int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, u64 start, u64 len); -int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum); +int btrfs_add_ordered_sum(struct inode *inode, + struct btrfs_ordered_extent *entry, + struct btrfs_ordered_sum *sum); struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, u64 file_offset); void btrfs_start_ordered_extent(struct inode *inode, From 211f90e68b679d27fe23c5505f86d6ce62c98bae Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Jul 2008 11:56:15 -0400 Subject: [PATCH 0597/2985] Btrfs: Don't allow releasepage to succeed if EXTENT_ORDERED is set Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 3 ++- fs/btrfs/inode.c | 14 +++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index feff16cb9b40..3153b4fdd0b3 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2542,7 +2542,8 @@ int try_release_extent_state(struct extent_map_tree *map, u64 end = start + PAGE_CACHE_SIZE - 1; int ret = 1; - if (test_range_bit(tree, start, end, EXTENT_IOBITS, 0)) + if (test_range_bit(tree, start, end, + EXTENT_IOBITS | EXTENT_ORDERED, 0)) ret = 0; else { if ((mask & GFP_NOFS) == GFP_NOFS) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4d729d90d2b8..8803abc89bb8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -477,10 +477,8 @@ printk("queueing worker to fixup page %lu %Lu\n", inode->i_ino, page_offset(page return -EAGAIN; } -int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, - struct extent_state *state, int uptodate) +static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) { - struct inode *inode = page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered_extent; @@ -550,6 +548,12 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, return 0; } +int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, + struct extent_state *state, int uptodate) +{ + return btrfs_finish_ordered_io(page->mapping->host, start, end); +} + int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) { int ret = 0; @@ -2663,8 +2667,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) clear_extent_bit(tree, page_start, page_end, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_LOCKED, 1, 0, GFP_NOFS); - btrfs_writepage_end_io_hook(page, page_start, - page_end, NULL, 1); + btrfs_finish_ordered_io(page->mapping->host, + page_start, page_end); btrfs_put_ordered_extent(ordered); lock_extent(tree, page_start, page_end, GFP_NOFS); } From 7f3c74fb831fa19bafe087e817c0a5ff3883f1ea Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Jul 2008 12:01:11 -0400 Subject: [PATCH 0598/2985] Btrfs: Keep extent mappings in ram until pending ordered extents are done It was possible for stale mappings from disk to be used instead of the new pending ordered extent. This adds a flag to the extent map struct to keep it pinned until the pending ordered extent is actually on disk. Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 27 +++++++++++++++------------ fs/btrfs/extent_map.c | 4 ++++ fs/btrfs/extent_map.h | 3 +++ fs/btrfs/file-item.c | 5 +---- fs/btrfs/file.c | 14 ++++++++++---- fs/btrfs/inode.c | 15 +++++++++++++++ 6 files changed, 48 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3153b4fdd0b3..d4a63ae7ed1b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2000,7 +2000,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, struct block_device *bdev; int ret; int nr = 0; - size_t page_offset = 0; + size_t pg_offset = 0; size_t blocksize; loff_t i_size = i_size_read(inode); unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; @@ -2008,9 +2008,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, u64 delalloc_end; WARN_ON(!PageLocked(page)); - page_offset = i_size & (PAGE_CACHE_SIZE - 1); + pg_offset = i_size & (PAGE_CACHE_SIZE - 1); if (page->index > end_index || - (page->index == end_index && !page_offset)) { + (page->index == end_index && !pg_offset)) { page->mapping->a_ops->invalidatepage(page, 0); unlock_page(page); return 0; @@ -2020,12 +2020,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, char *userpage; userpage = kmap_atomic(page, KM_USER0); - memset(userpage + page_offset, 0, - PAGE_CACHE_SIZE - page_offset); + memset(userpage + pg_offset, 0, + PAGE_CACHE_SIZE - pg_offset); kunmap_atomic(userpage, KM_USER0); flush_dcache_page(page); } - page_offset = 0; + pg_offset = 0; set_page_extent_mapped(page); @@ -2088,7 +2088,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, unlock_start = page_end + 1; break; } - em = epd->get_extent(inode, page, page_offset, cur, + em = epd->get_extent(inode, page, pg_offset, cur, end - cur + 1, 1); if (IS_ERR(em) || !em) { SetPageError(page); @@ -2113,12 +2113,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, unlock_extent(tree, unlock_start, cur + iosize -1, GFP_NOFS); + if (tree->ops && tree->ops->writepage_end_io_hook) tree->ops->writepage_end_io_hook(page, cur, cur + iosize - 1, NULL, 1); cur = cur + iosize; - page_offset += iosize; + pg_offset += iosize; unlock_start = cur; continue; } @@ -2127,7 +2128,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (0 && !test_range_bit(tree, cur, cur + iosize - 1, EXTENT_DIRTY, 0)) { cur = cur + iosize; - page_offset += iosize; + pg_offset += iosize; continue; } clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); @@ -2141,6 +2142,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, SetPageError(page); } else { unsigned long max_nr = end_index + 1; + set_range_writeback(tree, cur, cur + iosize - 1); if (!PageWriteback(page)) { printk("warning page %lu not writeback, " @@ -2150,14 +2152,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, } ret = submit_extent_page(WRITE, tree, page, sector, - iosize, page_offset, bdev, + iosize, pg_offset, bdev, &epd->bio, max_nr, end_bio_extent_writepage, 0); if (ret) SetPageError(page); } cur = cur + iosize; - page_offset += iosize; + pg_offset += iosize; nr++; } done: @@ -2579,7 +2581,8 @@ int try_release_extent_mapping(struct extent_map_tree *map, spin_unlock(&map->lock); break; } - if (em->start != start) { + if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || + em->start != start) { spin_unlock(&map->lock); free_extent_map(em); break; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 81123277c2b8..71b1ac155355 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -173,6 +173,9 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) static int mergable_maps(struct extent_map *prev, struct extent_map *next) { + if (test_bit(EXTENT_FLAG_PINNED, &prev->flags)) + return 0; + if (extent_map_end(prev) == next->start && prev->flags == next->flags && prev->bdev == next->bdev && @@ -320,6 +323,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { int ret = 0; + WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); BUG_ON(spin_trylock(&tree->lock)); rb_erase(&em->rb_node, &tree->map); em->in_tree = 0; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 56314217cfc0..a3978ec27846 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -8,6 +8,9 @@ #define EXTENT_MAP_INLINE (u64)-2 #define EXTENT_MAP_DELALLOC (u64)-1 +/* bits for the flags field */ +#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */ + struct extent_map { struct rb_node rb_node; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index e02f1e5acb0a..d9c69e16d368 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -192,7 +192,6 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, (char *)§or_sum->sum); sector_sum->offset = page_offset(bvec->bv_page) + bvec->bv_offset; - sector_sum++; bio_index++; total_bytes += bvec->bv_len; @@ -201,9 +200,6 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, } btrfs_add_ordered_sum(inode, ordered, sums); btrfs_put_ordered_extent(ordered); - if (total_bytes != bio->bi_size) { -printk("warning, total bytes %lu bio size %u\n", total_bytes, bio->bi_size); - } return 0; } @@ -372,6 +368,7 @@ next_sector: write_extent_buffer(leaf, §or_sum->sum, (unsigned long)item, BTRFS_CRC32_SIZE); } + total_bytes += root->sectorsize; sector_sum++; if (total_bytes < sums->len) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 40ad1b2958cb..eccdb9562ba8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -358,9 +358,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) struct extent_map *split = NULL; struct extent_map *split2 = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - struct extent_map *tmp; u64 len = end - start + 1; - u64 next_start; int ret; int testend = 1; @@ -381,8 +379,16 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) spin_unlock(&em_tree->lock); break; } - tmp = rb_entry(&em->rb_node, struct extent_map, rb_node); - next_start = tmp->start; + if (test_bit(EXTENT_FLAG_PINNED, &em->flags)) { + start = em->start + em->len; + free_extent_map(em); + spin_unlock(&em_tree->lock); + if (start < end) { + len = end - start + 1; + continue; + } + break; + } remove_extent_mapping(em_tree, em); if (em->block_start < EXTENT_MAP_LAST_BYTE && diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8803abc89bb8..08dbe738b512 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -144,6 +144,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) em->len = ins.offset; em->block_start = ins.objectid; em->bdev = root->fs_info->fs_devices->latest_bdev; + set_bit(EXTENT_FLAG_PINNED, &em->flags); while(1) { spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); @@ -483,6 +484,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered_extent; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map *em; u64 alloc_hint = 0; struct list_head list; struct btrfs_key ins; @@ -524,6 +527,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->len, ordered_extent->len, 0); BUG_ON(ret); + + + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, ordered_extent->file_offset, + ordered_extent->len); + if (em) { + clear_bit(EXTENT_FLAG_PINNED, &em->flags); + free_extent_map(em); + } + spin_unlock(&em_tree->lock); + btrfs_drop_extent_cache(inode, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1); @@ -538,6 +552,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) btrfs_ordered_update_i_size(inode, ordered_extent); btrfs_remove_ordered_extent(inode, ordered_extent); + /* once for us */ btrfs_put_ordered_extent(ordered_extent); /* once for the tree */ From e5a2217ef6ff088d08a27208929a6f9c635d672c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Jul 2008 20:42:20 -0400 Subject: [PATCH 0599/2985] Fix btrfs_wait_ordered_extent_range to properly wait Signed-off-by: Chris Mason --- fs/btrfs/file-item.c | 3 ++- fs/btrfs/inode.c | 26 +++++++++++++++++++------ fs/btrfs/ordered-data.c | 43 +++++++++++++++++++++++++---------------- 3 files changed, 48 insertions(+), 24 deletions(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index d9c69e16d368..45127e4797c8 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -161,7 +161,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, while(bio_index < bio->bi_vcnt) { offset = page_offset(bvec->bv_page) + bvec->bv_offset; - if (offset >= ordered->file_offset + ordered->len) { + if (offset >= ordered->file_offset + ordered->len || + offset < ordered->file_offset) { unsigned long bytes_left; sums->len = this_sum_bytes; this_sum_bytes = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 08dbe738b512..50ee4befac8e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -128,7 +128,9 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) goto out; BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); + mutex_lock(&BTRFS_I(inode)->extent_mutex); btrfs_drop_extent_cache(inode, start, start + num_bytes - 1); + mutex_unlock(&BTRFS_I(inode)->extent_mutex); while(num_bytes > 0) { cur_alloc_size = min(num_bytes, root->fs_info->max_extent); @@ -144,6 +146,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) em->len = ins.offset; em->block_start = ins.objectid; em->bdev = root->fs_info->fs_devices->latest_bdev; + mutex_lock(&BTRFS_I(inode)->extent_mutex); set_bit(EXTENT_FLAG_PINNED, &em->flags); while(1) { spin_lock(&em_tree->lock); @@ -156,6 +159,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) btrfs_drop_extent_cache(inode, start, start + ins.offset - 1); } + mutex_unlock(&BTRFS_I(inode)->extent_mutex); cur_alloc_size = ins.offset; ret = btrfs_add_ordered_extent(inode, start, ins.objectid, @@ -487,6 +491,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_map *em; u64 alloc_hint = 0; + u64 clear_start; + u64 clear_end; struct list_head list; struct btrfs_key ins; int ret; @@ -509,12 +515,14 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ins.objectid = ordered_extent->start; ins.offset = ordered_extent->len; ins.type = BTRFS_EXTENT_ITEM_KEY; + ret = btrfs_alloc_reserved_extent(trans, root, root->root_key.objectid, trans->transid, inode->i_ino, ordered_extent->file_offset, &ins); BUG_ON(ret); mutex_lock(&BTRFS_I(inode)->extent_mutex); + ret = btrfs_drop_extents(trans, root, inode, ordered_extent->file_offset, ordered_extent->file_offset + @@ -528,13 +536,19 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->len, 0); BUG_ON(ret); - spin_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, ordered_extent->file_offset, - ordered_extent->len); - if (em) { - clear_bit(EXTENT_FLAG_PINNED, &em->flags); - free_extent_map(em); + clear_start = ordered_extent->file_offset; + clear_end = ordered_extent->file_offset + ordered_extent->len; + while(clear_start < clear_end) { + em = lookup_extent_mapping(em_tree, clear_start, + clear_end - clear_start); + if (em) { + clear_bit(EXTENT_FLAG_PINNED, &em->flags); + clear_start = em->start + em->len; + free_extent_map(em); + } else { + break; + } } spin_unlock(&em_tree->lock); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 1ddb7bceea99..c2b4a9c4ddb6 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -324,22 +324,37 @@ void btrfs_start_ordered_extent(struct inode *inode, void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) { u64 end; + u64 orig_end; + u64 wait_end; struct btrfs_ordered_extent *ordered; - int found; - int should_wait = 0; + u64 mask = BTRFS_I(inode)->root->sectorsize - 1; + + if (start + len < start) { + wait_end = (inode->i_size + mask) & ~mask; + orig_end = (u64)-1; + } else { + orig_end = start + len - 1; + wait_end = orig_end; + } + + /* start IO across the range first to instantiate any delalloc + * extents + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + do_sync_file_range(file, start, wait_end, SYNC_FILE_RANGE_WRITE); +#else + do_sync_mapping_range(inode->i_mapping, start, wait_end, + SYNC_FILE_RANGE_WRITE); +#endif + end = orig_end; + wait_on_extent_writeback(&BTRFS_I(inode)->io_tree, start, orig_end); -again: - if (start + len < start) - end = (u64)-1; - else - end = start + len - 1; - found = 0; while(1) { ordered = btrfs_lookup_first_ordered_extent(inode, end); if (!ordered) { break; } - if (ordered->file_offset >= start + len) { + if (ordered->file_offset > orig_end) { btrfs_put_ordered_extent(ordered); break; } @@ -347,21 +362,15 @@ again: btrfs_put_ordered_extent(ordered); break; } - btrfs_start_ordered_extent(inode, ordered, should_wait); - found++; + btrfs_start_ordered_extent(inode, ordered, 1); end = ordered->file_offset; btrfs_put_ordered_extent(ordered); - if (end == 0) + if (end == 0 || end == start) break; end--; } - if (should_wait && found) { - should_wait = 0; - goto again; - } } - /* * find an ordered extent corresponding to file_offset. return NULL if * nothing is found, otherwise take a reference on the extent and return it From 4a09675279674041862d2210635b0cc1f60be28e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 21 Jul 2008 10:29:44 -0400 Subject: [PATCH 0600/2985] Btrfs: Data ordered fixes * In btrfs_delete_inode, wait for ordered extents after calling truncate_inode_pages. This is much faster, and more correct * Properly clear our the PageChecked bit everywhere we redirty the page. * Change the writepage fixup handler to lock the page range and check to see if an ordered extent had been inserted since the improperly dirtied page was discovered * Wait for ordered extents outside the transaction. This isn't required for locking rules but does improve transaction latencies * Reduce contention on the alloc_mutex by dropping it while incrementing refs on a node/leaf and while dropping refs on a leaf. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 18 +++++++++++++++--- fs/btrfs/file.c | 1 + fs/btrfs/inode.c | 23 ++++++++++++++++++----- fs/btrfs/ordered-data.c | 11 +++++++++-- 4 files changed, 43 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index be2aef1cb7ae..ccd49322f793 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -934,7 +934,6 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (!root->ref_cows) return 0; - mutex_lock(&root->fs_info->alloc_mutex); level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); for (i = 0; i < nritems; i++) { @@ -951,29 +950,36 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); if (disk_bytenr == 0) continue; + + mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(buf, fi), root->root_key.objectid, trans->transid, key.objectid, key.offset); + mutex_unlock(&root->fs_info->alloc_mutex); if (ret) { faili = i; + WARN_ON(1); goto fail; } } else { bytenr = btrfs_node_blockptr(buf, i); btrfs_node_key_to_cpu(buf, &key, i); + + mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_inc_extent_ref(trans, root, bytenr, btrfs_level_size(root, level - 1), root->root_key.objectid, trans->transid, level - 1, key.objectid); + mutex_unlock(&root->fs_info->alloc_mutex); if (ret) { faili = i; + WARN_ON(1); goto fail; } } } - mutex_unlock(&root->fs_info->alloc_mutex); return 0; fail: WARN_ON(1); @@ -1004,7 +1010,6 @@ fail: } } #endif - mutex_unlock(&root->fs_info->alloc_mutex); return ret; } @@ -2180,6 +2185,8 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, leaf_owner = btrfs_header_owner(leaf); leaf_generation = btrfs_header_generation(leaf); + mutex_unlock(&root->fs_info->alloc_mutex); + for (i = 0; i < nritems; i++) { u64 disk_bytenr; @@ -2197,12 +2204,17 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); if (disk_bytenr == 0) continue; + + mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(leaf, fi), leaf_owner, leaf_generation, key.objectid, key.offset, 0); + mutex_unlock(&root->fs_info->alloc_mutex); BUG_ON(ret); } + + mutex_lock(&root->fs_info->alloc_mutex); return 0; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index eccdb9562ba8..591a30208acd 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -75,6 +75,7 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) for (i = 0; i < num_pages; i++) { if (!pages[i]) break; + ClearPageChecked(pages[i]); unlock_page(pages[i]); mark_page_accessed(pages[i]); page_cache_release(pages[i]); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 50ee4befac8e..8fb6dc25e7a5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -418,7 +418,7 @@ void btrfs_writepage_fixup_worker(struct btrfs_work *work) fixup = container_of(work, struct btrfs_writepage_fixup, work); page = fixup->page; - +again: lock_page(page); if (!page->mapping || !PageDirty(page) || !PageChecked(page)) { ClearPageChecked(page); @@ -430,9 +430,21 @@ void btrfs_writepage_fixup_worker(struct btrfs_work *work) page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); - ordered = btrfs_lookup_ordered_extent(inode, page_start); - if (ordered) + + /* already ordered? We're done */ + if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, + EXTENT_ORDERED, 0)) { goto out; + } + + ordered = btrfs_lookup_ordered_extent(inode, page_start); + if (ordered) { + unlock_extent(&BTRFS_I(inode)->io_tree, page_start, + page_end, GFP_NOFS); + unlock_page(page); + btrfs_start_ordered_extent(inode, ordered, 1); + goto again; + } set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); @@ -1465,11 +1477,11 @@ void btrfs_delete_inode(struct inode *inode) unsigned long nr; int ret; - btrfs_wait_ordered_range(inode, 0, (u64)-1); truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) { goto no_delete; } + btrfs_wait_ordered_range(inode, 0, (u64)-1); btrfs_i_size_write(inode, 0); trans = btrfs_start_transaction(root, 1); @@ -2707,6 +2719,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) 1, 1, GFP_NOFS); __btrfs_releasepage(page, GFP_NOFS); + ClearPageChecked(page); if (PagePrivate(page)) { invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); @@ -2818,10 +2831,10 @@ static void btrfs_truncate(struct inode *inode) return; btrfs_truncate_page(inode->i_mapping, inode->i_size); + btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); - btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); btrfs_i_size_write(inode, inode->i_size); /* FIXME, add redo link to tree so we don't leak on crash */ diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index c2b4a9c4ddb6..0d87795fdd8f 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -336,7 +336,7 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) orig_end = start + len - 1; wait_end = orig_end; } - +again: /* start IO across the range first to instantiate any delalloc * extents */ @@ -369,6 +369,14 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) break; end--; } + if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, + EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { + printk("inode %lu still ordered or delalloc after wait " + "%llu %llu\n", inode->i_ino, + (unsigned long long)start, + (unsigned long long)orig_end); + goto again; + } } /* @@ -545,7 +553,6 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum) sector_sums = &ordered_sum->sums; for (i = 0; i < num_sectors; i++) { if (sector_sums[i].offset == offset) { -printk("find ordered sum inode %lu offset %Lu\n", inode->i_ino, offset); *sum = sector_sums[i].sum; ret = 0; goto out; From 6af118ce51b52ceda357c671550c79628b9c4a65 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jul 2008 11:18:07 -0400 Subject: [PATCH 0601/2985] Btrfs: Index extent buffers in an rbtree Before, extent buffers were a temporary object, meant to map a number of pages at once and collect operations on them. But, a few extra fields have crept in, and they are also the best place to store a per-tree block lock field as well. This commit puts the extent buffers into an rbtree, and ensures a single extent buffer for each tree block. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 26 ++-- fs/btrfs/extent_io.c | 309 ++++++++++++++++--------------------------- fs/btrfs/extent_io.h | 11 +- fs/btrfs/inode.c | 3 - 4 files changed, 129 insertions(+), 220 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 99bb385c2982..86e84a8579e3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -381,7 +381,6 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, end = min_t(u64, eb->len, PAGE_CACHE_SIZE); end = eb->start + end - 1; - release_extent_buffer_tail_pages(eb); err: free_extent_buffer(eb); out: @@ -563,21 +562,21 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) struct extent_map_tree *map; int ret; - if (page_count(page) > 3) { - /* once for page->private, once for the caller, once - * once for the page cache - */ - return 0; - } tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_state(map, tree, page, gfp_flags); + if (!ret) { + return 0; + } + + ret = try_release_extent_buffer(tree, page); if (ret == 1) { - invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); } + return ret; } @@ -588,7 +587,8 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); if (PagePrivate(page)) { - invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); + printk("warning page private not zero on page %Lu\n", + page_offset(page)); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); @@ -1456,7 +1456,6 @@ fail_tree_root: free_extent_buffer(tree_root->node); fail_sys_array: fail_sb_buffer: - extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); btrfs_stop_workers(&fs_info->fixup_workers); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); @@ -1705,13 +1704,6 @@ int close_ctree(struct btrfs_root *root) filemap_write_and_wait(fs_info->btree_inode->i_mapping); - extent_io_tree_empty_lru(&fs_info->free_space_cache); - extent_io_tree_empty_lru(&fs_info->block_group_cache); - extent_io_tree_empty_lru(&fs_info->pinned_extents); - extent_io_tree_empty_lru(&fs_info->pending_del); - extent_io_tree_empty_lru(&fs_info->extent_ins); - extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); - truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); btrfs_stop_workers(&fs_info->fixup_workers); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d4a63ae7ed1b..32bb4ed3723d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -91,29 +91,16 @@ void extent_io_tree_init(struct extent_io_tree *tree, struct address_space *mapping, gfp_t mask) { tree->state.rb_node = NULL; + tree->buffer.rb_node = NULL; tree->ops = NULL; tree->dirty_bytes = 0; spin_lock_init(&tree->lock); - spin_lock_init(&tree->lru_lock); + spin_lock_init(&tree->buffer_lock); tree->mapping = mapping; - INIT_LIST_HEAD(&tree->buffer_lru); - tree->lru_size = 0; tree->last = NULL; } EXPORT_SYMBOL(extent_io_tree_init); -void extent_io_tree_empty_lru(struct extent_io_tree *tree) -{ - struct extent_buffer *eb; - while(!list_empty(&tree->buffer_lru)) { - eb = list_entry(tree->buffer_lru.next, struct extent_buffer, - lru); - list_del_init(&eb->lru); - free_extent_buffer(eb); - } -} -EXPORT_SYMBOL(extent_io_tree_empty_lru); - struct extent_state *alloc_extent_state(gfp_t mask) { struct extent_state *state; @@ -245,6 +232,50 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree, return ret; } +static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree, + u64 offset, struct rb_node *node) +{ + struct rb_root *root = &tree->buffer; + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct extent_buffer *eb; + + while(*p) { + parent = *p; + eb = rb_entry(parent, struct extent_buffer, rb_node); + + if (offset < eb->start) + p = &(*p)->rb_left; + else if (offset > eb->start) + p = &(*p)->rb_right; + else + return eb; + } + + rb_link_node(node, parent, p); + rb_insert_color(node, root); + return NULL; +} + +static struct extent_buffer *buffer_search(struct extent_io_tree *tree, + u64 offset) +{ + struct rb_root *root = &tree->buffer; + struct rb_node * n = root->rb_node; + struct extent_buffer *eb; + + while(n) { + eb = rb_entry(n, struct extent_buffer, rb_node); + if (offset < eb->start) + n = n->rb_left; + else if (offset > eb->start) + n = n->rb_right; + else + return eb; + } + return NULL; +} + /* * utility function to look for merge candidates inside a given range. * Any extents with matching state are merged together into a single @@ -1817,9 +1848,8 @@ void set_page_extent_mapped(struct page *page) { if (!PagePrivate(page)) { SetPagePrivate(page); - WARN_ON(!page->mapping->a_ops->invalidatepage); - set_page_private(page, EXTENT_PAGE_PRIVATE); page_cache_get(page); + set_page_private(page, EXTENT_PAGE_PRIVATE); } } @@ -2627,51 +2657,6 @@ out: return sector; } -static int add_lru(struct extent_io_tree *tree, struct extent_buffer *eb) -{ - if (list_empty(&eb->lru)) { - extent_buffer_get(eb); - list_add(&eb->lru, &tree->buffer_lru); - tree->lru_size++; - if (tree->lru_size >= BUFFER_LRU_MAX) { - struct extent_buffer *rm; - rm = list_entry(tree->buffer_lru.prev, - struct extent_buffer, lru); - tree->lru_size--; - list_del_init(&rm->lru); - free_extent_buffer(rm); - } - } else - list_move(&eb->lru, &tree->buffer_lru); - return 0; -} -static struct extent_buffer *find_lru(struct extent_io_tree *tree, - u64 start, unsigned long len) -{ - struct list_head *lru = &tree->buffer_lru; - struct list_head *cur = lru->next; - struct extent_buffer *eb; - - if (list_empty(lru)) - return NULL; - - do { - eb = list_entry(cur, struct extent_buffer, lru); - if (eb->start == start && eb->len == len) { - extent_buffer_get(eb); - return eb; - } - cur = cur->next; - } while (cur != lru); - return NULL; -} - -static inline unsigned long num_extent_pages(u64 start, u64 len) -{ - return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT); -} - static inline struct page *extent_buffer_page(struct extent_buffer *eb, unsigned long i) { @@ -2688,44 +2673,10 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, return p; } -int release_extent_buffer_tail_pages(struct extent_buffer *eb) +static inline unsigned long num_extent_pages(u64 start, u64 len) { - unsigned long num_pages = num_extent_pages(eb->start, eb->len); - struct page *page; - unsigned long i; - - if (num_pages == 1) - return 0; - for (i = 1; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - page_cache_release(page); - } - return 0; -} - - -int invalidate_extent_lru(struct extent_io_tree *tree, u64 start, - unsigned long len) -{ - struct list_head *lru = &tree->buffer_lru; - struct list_head *cur = lru->next; - struct extent_buffer *eb; - int found = 0; - - spin_lock(&tree->lru_lock); - if (list_empty(lru)) - goto out; - - do { - eb = list_entry(cur, struct extent_buffer, lru); - if (eb->start <= start && eb->start + eb->len > start) { - eb->flags &= ~EXTENT_UPTODATE; - } - cur = cur->next; - } while (cur != lru); -out: - spin_unlock(&tree->lru_lock); - return found; + return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT); } static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, @@ -2736,15 +2687,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, struct extent_buffer *eb = NULL; unsigned long flags; - spin_lock(&tree->lru_lock); - eb = find_lru(tree, start, len); - spin_unlock(&tree->lru_lock); - if (eb) { - return eb; - } - eb = kmem_cache_zalloc(extent_buffer_cache, mask); - INIT_LIST_HEAD(&eb->lru); eb->start = start; eb->len = len; spin_lock_irqsave(&leak_lock, flags); @@ -2773,17 +2716,24 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT; struct extent_buffer *eb; + struct extent_buffer *exists = NULL; struct page *p; struct address_space *mapping = tree->mapping; int uptodate = 1; + spin_lock(&tree->buffer_lock); + eb = buffer_search(tree, start); + if (eb) { + atomic_inc(&eb->refs); + spin_unlock(&tree->buffer_lock); + return eb; + } + spin_unlock(&tree->buffer_lock); + eb = __alloc_extent_buffer(tree, start, len, mask); if (!eb) return NULL; - if (eb->flags & EXTENT_BUFFER_FILLED) - goto lru_add; - if (page0) { eb->first_page = page0; i = 1; @@ -2800,7 +2750,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); if (!p) { WARN_ON(1); - goto fail; + goto free_eb; } set_page_extent_mapped(p); mark_page_accessed(p); @@ -2818,25 +2768,28 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, eb->flags |= EXTENT_UPTODATE; eb->flags |= EXTENT_BUFFER_FILLED; -lru_add: - spin_lock(&tree->lru_lock); - add_lru(tree, eb); - spin_unlock(&tree->lru_lock); + spin_lock(&tree->buffer_lock); + exists = buffer_tree_insert(tree, start, &eb->rb_node); + if (exists) { + /* add one reference for the caller */ + atomic_inc(&exists->refs); + spin_unlock(&tree->buffer_lock); + goto free_eb; + } + spin_unlock(&tree->buffer_lock); + + /* add one reference for the tree */ + atomic_inc(&eb->refs); return eb; -fail: - spin_lock(&tree->lru_lock); - list_del_init(&eb->lru); - spin_unlock(&tree->lru_lock); +free_eb: if (!atomic_dec_and_test(&eb->refs)) - return NULL; - for (index = 1; index < i; index++) { + return exists; + for (index = 1; index < i; index++) page_cache_release(extent_buffer_page(eb, index)); - } - if (i > 0) - page_cache_release(extent_buffer_page(eb, 0)); + page_cache_release(extent_buffer_page(eb, 0)); __free_extent_buffer(eb); - return NULL; + return exists; } EXPORT_SYMBOL(alloc_extent_buffer); @@ -2844,89 +2797,27 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, u64 start, unsigned long len, gfp_t mask) { - unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; - unsigned long index = start >> PAGE_CACHE_SHIFT; struct extent_buffer *eb; - struct page *p; - struct address_space *mapping = tree->mapping; - int uptodate = 1; - eb = __alloc_extent_buffer(tree, start, len, mask); - if (!eb) - return NULL; + spin_lock(&tree->buffer_lock); + eb = buffer_search(tree, start); + if (eb) + atomic_inc(&eb->refs); + spin_unlock(&tree->buffer_lock); - if (eb->flags & EXTENT_BUFFER_FILLED) - goto lru_add; - - for (i = 0; i < num_pages; i++, index++) { - p = find_get_page(mapping, index); - if (!p) { - goto fail; - } - if (TestSetPageLocked(p)) { - page_cache_release(p); - goto fail; - } - - set_page_extent_mapped(p); - mark_page_accessed(p); - - if (i == 0) { - eb->first_page = p; - set_page_extent_head(p, len); - } else { - set_page_private(p, EXTENT_PAGE_PRIVATE); - } - - if (!PageUptodate(p)) - uptodate = 0; - unlock_page(p); - } - if (uptodate) - eb->flags |= EXTENT_UPTODATE; - eb->flags |= EXTENT_BUFFER_FILLED; - -lru_add: - spin_lock(&tree->lru_lock); - add_lru(tree, eb); - spin_unlock(&tree->lru_lock); return eb; -fail: - spin_lock(&tree->lru_lock); - list_del_init(&eb->lru); - spin_unlock(&tree->lru_lock); - if (!atomic_dec_and_test(&eb->refs)) - return NULL; - for (index = 1; index < i; index++) { - page_cache_release(extent_buffer_page(eb, index)); - } - if (i > 0) - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); - return NULL; } EXPORT_SYMBOL(find_extent_buffer); void free_extent_buffer(struct extent_buffer *eb) { - unsigned long i; - unsigned long num_pages; - if (!eb) return; if (!atomic_dec_and_test(&eb->refs)) return; - WARN_ON(!list_empty(&eb->lru)); - num_pages = num_extent_pages(eb->start, eb->len); - - for (i = 1; i < num_pages; i++) { - page_cache_release(extent_buffer_page(eb, i)); - } - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); + WARN_ON(1); } EXPORT_SYMBOL(free_extent_buffer); @@ -3583,3 +3474,35 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, } } EXPORT_SYMBOL(memmove_extent_buffer); + +int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) +{ + u64 start = page_offset(page); + struct extent_buffer *eb; + int ret = 1; + unsigned long i; + unsigned long num_pages; + + spin_lock(&tree->buffer_lock); + eb = buffer_search(tree, start); + if (!eb) + goto out; + + if (atomic_read(&eb->refs) > 1) { + ret = 0; + goto out; + } + /* at this point we can safely release the extent buffer */ + num_pages = num_extent_pages(eb->start, eb->len); + for (i = 0; i < num_pages; i++) { + struct page *page = extent_buffer_page(eb, i); + page_cache_release(page); + } + rb_erase(&eb->rb_node, &tree->buffer); + __free_extent_buffer(eb); +out: + spin_unlock(&tree->buffer_lock); + return ret; +} +EXPORT_SYMBOL(try_release_extent_buffer); + diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 23affd27af5e..dd367617d780 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -54,13 +54,12 @@ struct extent_io_ops { struct extent_io_tree { struct rb_root state; + struct rb_root buffer; struct address_space *mapping; u64 dirty_bytes; spinlock_t lock; + spinlock_t buffer_lock; struct extent_io_ops *ops; - spinlock_t lru_lock; - struct list_head buffer_lru; - int lru_size; struct extent_state *last; }; @@ -87,10 +86,10 @@ struct extent_buffer { unsigned long map_start; unsigned long map_len; struct page *first_page; - struct list_head lru; atomic_t refs; int flags; struct list_head leak_list; + struct rb_node rb_node; }; struct extent_map_tree; @@ -112,10 +111,10 @@ typedef struct extent_map *(get_extent_t)(struct inode *inode, void extent_io_tree_init(struct extent_io_tree *tree, struct address_space *mapping, gfp_t mask); -void extent_io_tree_empty_lru(struct extent_io_tree *tree); int try_release_extent_mapping(struct extent_map_tree *map, struct extent_io_tree *tree, struct page *page, gfp_t mask); +int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page); int try_release_extent_state(struct extent_map_tree *map, struct extent_io_tree *tree, struct page *page, gfp_t mask); @@ -241,8 +240,6 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long *map_start, unsigned long *map_len, int km); void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); -int invalidate_extent_lru(struct extent_io_tree *tree, u64 start, - unsigned long len); int release_extent_buffer_tail_pages(struct extent_buffer *eb); int extent_range_uptodate(struct extent_io_tree *tree, u64 start, u64 end); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8fb6dc25e7a5..60852ada658e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2670,7 +2670,6 @@ static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) map = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_mapping(map, tree, page, gfp_flags); if (ret == 1) { - invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); @@ -2721,8 +2720,6 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) ClearPageChecked(page); if (PagePrivate(page)) { - invalidate_extent_lru(tree, page_offset(page), - PAGE_CACHE_SIZE); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); From a61e6f29dc7c9d56a776a518eed92bbc61848263 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jul 2008 11:18:08 -0400 Subject: [PATCH 0602/2985] Btrfs: Use a mutex in the extent buffer for tree block locking This replaces the use of the page cache lock bit for locking, which wasn't suitable for block size < page size and couldn't be used recursively. The mutexes alone don't fix either problem, but they are the first step. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 7 +------ fs/btrfs/extent_io.c | 9 +++++++++ fs/btrfs/extent_io.h | 1 + fs/btrfs/locking.c | 13 ++++++------- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ccd49322f793..c51cd11de206 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1451,7 +1451,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, struct extent_buffer *buf; buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { - if (!btrfs_try_tree_lock(buf) && + if (btrfs_try_tree_lock(buf) && btrfs_buffer_uptodate(buf, 0)) { u64 transid = root->fs_info->running_transaction->transid; @@ -3345,11 +3345,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) set_state_private(block_group_cache, found_key.objectid, (unsigned long)cache); - /* hack for now */ - if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { - cache_block_group(root->fs_info->extent_root, - cache); - } if (key.objectid >= btrfs_super_total_bytes(&info->super_copy)) break; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 32bb4ed3723d..7380449cb5b3 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2690,6 +2690,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, eb = kmem_cache_zalloc(extent_buffer_cache, mask); eb->start = start; eb->len = len; + mutex_init(&eb->mutex); spin_lock_irqsave(&leak_lock, flags); list_add(&eb->leak_list, &buffers); spin_unlock_irqrestore(&leak_lock, flags); @@ -2837,6 +2838,7 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); + lock_page(page); if (i == 0) set_page_extent_head(page, eb->len); else @@ -2854,6 +2856,7 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, end = start + PAGE_CACHE_SIZE - 1; if (test_range_bit(tree, start, end, EXTENT_DIRTY, 0)) { + unlock_page(page); continue; } } @@ -2865,6 +2868,7 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, PAGECACHE_TAG_DIRTY); } read_unlock_irq(&page->mapping->tree_lock); + unlock_page(page); } return 0; } @@ -2893,12 +2897,17 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, * on us if the page isn't already dirty. */ if (i == 0) { + lock_page(page); set_page_extent_head(page, eb->len); } else if (PagePrivate(page) && page->private != EXTENT_PAGE_PRIVATE) { + lock_page(page); set_page_extent_mapped(page); + unlock_page(page); } __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); + if (i == 0) + unlock_page(page); } return set_extent_dirty(tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index dd367617d780..6c03e6a19938 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -90,6 +90,7 @@ struct extent_buffer { int flags; struct list_head leak_list; struct rb_node rb_node; + struct mutex mutex; }; struct extent_map_tree; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 058a506a0dd8..01a9ac2be3f7 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -29,32 +29,31 @@ int btrfs_tree_lock(struct extent_buffer *eb) { int i; - if (!TestSetPageLocked(eb->first_page)) + if (mutex_trylock(&eb->mutex)) return 0; for (i = 0; i < 512; i++) { cpu_relax(); - if (!TestSetPageLocked(eb->first_page)) + if (mutex_trylock(&eb->mutex)) return 0; } cpu_relax(); - lock_page(eb->first_page); + mutex_lock(&eb->mutex); return 0; } int btrfs_try_tree_lock(struct extent_buffer *eb) { - return TestSetPageLocked(eb->first_page); + return mutex_trylock(&eb->mutex); } int btrfs_tree_unlock(struct extent_buffer *eb) { - WARN_ON(!PageLocked(eb->first_page)); - unlock_page(eb->first_page); + mutex_unlock(&eb->mutex); return 0; } int btrfs_tree_locked(struct extent_buffer *eb) { - return PageLocked(eb->first_page); + return mutex_is_locked(&eb->mutex); } From f421950f86bf96a11fef932e167ab2e70d4c43a0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jul 2008 11:18:09 -0400 Subject: [PATCH 0603/2985] Btrfs: Fix some data=ordered related data corruptions Stress testing was showing data checksum errors, most of which were caused by a lookup bug in the extent_map tree. The tree was caching the last pointer returned, and searches would check the last pointer first. But, search callers also expect the search to return the very first matching extent in the range, which wasn't always true with the last pointer usage. For now, the code to cache the last return value is just removed. It is easy to fix, but I think lookups are rare enough that it isn't required anymore. This commit also replaces do_sync_mapping_range with a local copy of the related functions. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 + fs/btrfs/extent_io.c | 20 ------- fs/btrfs/extent_io.h | 1 - fs/btrfs/extent_map.c | 9 ---- fs/btrfs/extent_map.h | 1 - fs/btrfs/file.c | 15 +++--- fs/btrfs/inode.c | 54 ++++++++++--------- fs/btrfs/ordered-data.c | 115 +++++++++++++++++++++++++++++++++------- fs/btrfs/ordered-data.h | 4 ++ fs/btrfs/transaction.c | 2 - 10 files changed, 140 insertions(+), 83 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 96ab2797c09a..f8fccdac3055 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1590,6 +1590,8 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); /* inode.c */ +int btrfs_writepages(struct address_space *mapping, + struct writeback_control *wbc); int btrfs_create_subvol_root(struct btrfs_root *new_root, struct btrfs_trans_handle *trans, u64 new_dirid, struct btrfs_block_group_cache *block_group); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7380449cb5b3..9965993748d0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -97,7 +97,6 @@ void extent_io_tree_init(struct extent_io_tree *tree, spin_lock_init(&tree->lock); spin_lock_init(&tree->buffer_lock); tree->mapping = mapping; - tree->last = NULL; } EXPORT_SYMBOL(extent_io_tree_init); @@ -173,12 +172,6 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, struct tree_entry *entry; struct tree_entry *prev_entry = NULL; - if (tree->last) { - struct extent_state *state; - state = tree->last; - if (state->start <= offset && offset <= state->end) - return &tree->last->rb_node; - } while(n) { entry = rb_entry(n, struct tree_entry, rb_node); prev = n; @@ -189,7 +182,6 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, else if (offset > entry->end) n = n->rb_right; else { - tree->last = rb_entry(n, struct extent_state, rb_node); return n; } } @@ -223,10 +215,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree, ret = __etree_search(tree, offset, &prev, NULL); if (!ret) { - if (prev) { - tree->last = rb_entry(prev, struct extent_state, - rb_node); - } return prev; } return ret; @@ -301,8 +289,6 @@ static int merge_state(struct extent_io_tree *tree, other->state == state->state) { state->start = other->start; other->tree = NULL; - if (tree->last == other) - tree->last = state; rb_erase(&other->rb_node, &tree->state); free_extent_state(other); } @@ -314,8 +300,6 @@ static int merge_state(struct extent_io_tree *tree, other->state == state->state) { other->start = state->start; state->tree = NULL; - if (tree->last == state) - tree->last = other; rb_erase(&state->rb_node, &tree->state); free_extent_state(state); } @@ -378,7 +362,6 @@ static int insert_state(struct extent_io_tree *tree, return -EEXIST; } state->tree = tree; - tree->last = state; merge_state(tree, state); return 0; } @@ -444,9 +427,6 @@ static int clear_state_bit(struct extent_io_tree *tree, if (delete || state->state == 0) { if (state->tree) { clear_state_cb(tree, state, state->state); - if (tree->last == state) { - tree->last = extent_state_next(state); - } rb_erase(&state->rb_node, &tree->state); state->tree = NULL; free_extent_state(state); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 6c03e6a19938..315cfceae312 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -60,7 +60,6 @@ struct extent_io_tree { spinlock_t lock; spinlock_t buffer_lock; struct extent_io_ops *ops; - struct extent_state *last; }; struct extent_state { diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 71b1ac155355..8a502ee2f231 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -42,7 +42,6 @@ void extent_map_exit(void) void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) { tree->map.rb_node = NULL; - tree->last = NULL; spin_lock_init(&tree->lock); } EXPORT_SYMBOL(extent_map_tree_init); @@ -239,7 +238,6 @@ int add_extent_mapping(struct extent_map_tree *tree, merge->in_tree = 0; free_extent_map(merge); } - tree->last = em; out: return ret; } @@ -273,10 +271,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, u64 end = range_end(start, len); BUG_ON(spin_trylock(&tree->lock)); - em = tree->last; - if (em && end > em->start && start < extent_map_end(em)) - goto found; - rb_node = __tree_search(&tree->map, start, &prev, &next); if (!rb_node && prev) { em = rb_entry(prev, struct extent_map, rb_node); @@ -305,7 +299,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, found: atomic_inc(&em->refs); - tree->last = em; out: return em; } @@ -327,8 +320,6 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) BUG_ON(spin_trylock(&tree->lock)); rb_erase(&em->rb_node, &tree->map); em->in_tree = 0; - if (tree->last == em) - tree->last = NULL; return ret; } EXPORT_SYMBOL(remove_extent_mapping); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index a3978ec27846..26ac6fe0b268 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -26,7 +26,6 @@ struct extent_map { struct extent_map_tree { struct rb_root map; - struct extent_map *last; spinlock_t lock; }; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 591a30208acd..e5ffb66ad320 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -381,14 +381,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) break; } if (test_bit(EXTENT_FLAG_PINNED, &em->flags)) { - start = em->start + em->len; - free_extent_map(em); - spin_unlock(&em_tree->lock); - if (start < end) { - len = end - start + 1; - continue; - } - break; + printk(KERN_CRIT "inode %lu trying to drop pinned " + "extent start %llu end %llu, em [%llu %llu]\n", + inode->i_ino, + (unsigned long long)start, + (unsigned long long)end, + (unsigned long long)em->start, + (unsigned long long)em->len); } remove_extent_mapping(em_tree, em); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 60852ada658e..3da12a4d913d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -485,7 +485,7 @@ int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) fixup = kzalloc(sizeof(*fixup), GFP_NOFS); if (!fixup) return -EAGAIN; -printk("queueing worker to fixup page %lu %Lu\n", inode->i_ino, page_offset(page)); + SetPageChecked(page); page_cache_get(page); fixup->work.func = btrfs_writepage_fixup_worker; @@ -502,11 +502,13 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_map *em; + struct extent_map *em_orig; u64 alloc_hint = 0; u64 clear_start; u64 clear_end; struct list_head list; struct btrfs_key ins; + struct rb_node *rb; int ret; ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); @@ -535,6 +537,22 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) mutex_lock(&BTRFS_I(inode)->extent_mutex); + spin_lock(&em_tree->lock); + clear_start = ordered_extent->file_offset; + clear_end = ordered_extent->file_offset + ordered_extent->len; + em = lookup_extent_mapping(em_tree, clear_start, + ordered_extent->len); + em_orig = em; + while(em && clear_start < extent_map_end(em) && clear_end > em->start) { + clear_bit(EXTENT_FLAG_PINNED, &em->flags); + rb = rb_next(&em->rb_node); + if (!rb) + break; + em = rb_entry(rb, struct extent_map, rb_node); + } + free_extent_map(em_orig); + spin_unlock(&em_tree->lock); + ret = btrfs_drop_extents(trans, root, inode, ordered_extent->file_offset, ordered_extent->file_offset + @@ -548,22 +566,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->len, 0); BUG_ON(ret); - spin_lock(&em_tree->lock); - clear_start = ordered_extent->file_offset; - clear_end = ordered_extent->file_offset + ordered_extent->len; - while(clear_start < clear_end) { - em = lookup_extent_mapping(em_tree, clear_start, - clear_end - clear_start); - if (em) { - clear_bit(EXTENT_FLAG_PINNED, &em->flags); - clear_start = em->start + em->len; - free_extent_map(em); - } else { - break; - } - } - spin_unlock(&em_tree->lock); - btrfs_drop_extent_cache(inode, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1); @@ -2318,7 +2320,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, u64 extent_end = 0; u64 objectid = inode->i_ino; u32 found_type; - struct btrfs_path *path; + struct btrfs_path *path = NULL; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_file_extent_item *item; struct extent_buffer *leaf; @@ -2328,9 +2330,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_trans_handle *trans = NULL; - path = btrfs_alloc_path(); - BUG_ON(!path); - again: spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); @@ -2354,6 +2353,12 @@ again: em->bdev = root->fs_info->fs_devices->latest_bdev; em->start = EXTENT_MAP_HOLE; em->len = (u64)-1; + + if (!path) { + path = btrfs_alloc_path(); + BUG_ON(!path); + } + ret = btrfs_lookup_file_extent(trans, root, path, objectid, start, trans != NULL); if (ret < 0) { @@ -2530,7 +2535,8 @@ insert: } spin_unlock(&em_tree->lock); out: - btrfs_free_path(path); + if (path) + btrfs_free_path(path); if (trans) { ret = btrfs_end_transaction(trans, root); if (!err) { @@ -2643,8 +2649,8 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) return extent_write_full_page(tree, page, btrfs_get_extent, wbc); } -static int btrfs_writepages(struct address_space *mapping, - struct writeback_control *wbc) +int btrfs_writepages(struct address_space *mapping, + struct writeback_control *wbc) { struct extent_io_tree *tree; tree = &BTRFS_I(mapping->host)->io_tree; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 0d87795fdd8f..830dbaea6853 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include "ctree.h" #include "transaction.h" #include "btrfs_inode.h" @@ -307,12 +309,7 @@ void btrfs_start_ordered_extent(struct inode *inode, * start IO on any dirty ones so the wait doesn't stall waiting * for pdflush to find them */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) - do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE); -#else - do_sync_mapping_range(inode->i_mapping, start, end, - SYNC_FILE_RANGE_WRITE); -#endif + btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_NONE); if (wait) wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags)); @@ -327,28 +324,26 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) u64 orig_end; u64 wait_end; struct btrfs_ordered_extent *ordered; - u64 mask = BTRFS_I(inode)->root->sectorsize - 1; if (start + len < start) { - wait_end = (inode->i_size + mask) & ~mask; - orig_end = (u64)-1; + orig_end = INT_LIMIT(loff_t); } else { orig_end = start + len - 1; - wait_end = orig_end; + if (orig_end > INT_LIMIT(loff_t)) + orig_end = INT_LIMIT(loff_t); } + wait_end = orig_end; again: /* start IO across the range first to instantiate any delalloc * extents */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) - do_sync_file_range(file, start, wait_end, SYNC_FILE_RANGE_WRITE); -#else - do_sync_mapping_range(inode->i_mapping, start, wait_end, - SYNC_FILE_RANGE_WRITE); -#endif - end = orig_end; - wait_on_extent_writeback(&BTRFS_I(inode)->io_tree, start, orig_end); + btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); + btrfs_wait_on_page_writeback_range(inode->i_mapping, + start >> PAGE_CACHE_SHIFT, + orig_end >> PAGE_CACHE_SHIFT); + + end = orig_end; while(1) { ordered = btrfs_lookup_first_ordered_extent(inode, end); if (!ordered) { @@ -565,3 +560,87 @@ out: return ret; } + +/** + * taken from mm/filemap.c because it isn't exported + * + * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range + * @mapping: address space structure to write + * @start: offset in bytes where the range starts + * @end: offset in bytes where the range ends (inclusive) + * @sync_mode: enable synchronous operation + * + * Start writeback against all of a mapping's dirty pages that lie + * within the byte offsets inclusive. + * + * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as + * opposed to a regular memory cleansing writeback. The difference between + * these two operations is that if a dirty page/buffer is encountered, it must + * be waited upon, and not just skipped over. + */ +int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, + loff_t end, int sync_mode) +{ + struct writeback_control wbc = { + .sync_mode = sync_mode, + .nr_to_write = mapping->nrpages * 2, + .range_start = start, + .range_end = end, + .for_writepages = 1, + }; + return btrfs_writepages(mapping, &wbc); +} + +/** + * taken from mm/filemap.c because it isn't exported + * + * wait_on_page_writeback_range - wait for writeback to complete + * @mapping: target address_space + * @start: beginning page index + * @end: ending page index + * + * Wait for writeback to complete against pages indexed by start->end + * inclusive + */ +int btrfs_wait_on_page_writeback_range(struct address_space *mapping, + pgoff_t start, pgoff_t end) +{ + struct pagevec pvec; + int nr_pages; + int ret = 0; + pgoff_t index; + + if (end < start) + return 0; + + pagevec_init(&pvec, 0); + index = start; + while ((index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_WRITEBACK, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { + unsigned i; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* until radix tree lookup accepts end_index */ + if (page->index > end) + continue; + + wait_on_page_writeback(page); + if (PageError(page)) + ret = -EIO; + } + pagevec_release(&pvec); + cond_resched(); + } + + /* Check for outstanding write errors */ + if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) + ret = -ENOSPC; + if (test_and_clear_bit(AS_EIO, &mapping->flags)) + ret = -EIO; + + return ret; +} diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 1794efd13ca3..8e8e3c0404f3 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -132,4 +132,8 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); int btrfs_ordered_update_i_size(struct inode *inode, struct btrfs_ordered_extent *ordered); int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum); +int btrfs_wait_on_page_writeback_range(struct address_space *mapping, + pgoff_t start, pgoff_t end); +int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, + loff_t end, int sync_mode); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 05823904ecba..38c75a0256cb 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -649,7 +649,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, extent_io_tree_init(pinned_copy, root->fs_info->btree_inode->i_mapping, GFP_NOFS); -printk("commit trans %Lu\n", trans->transid); trans->transaction->in_commit = 1; trans->transaction->blocked = 1; cur_trans = trans->transaction; @@ -745,7 +744,6 @@ printk("commit trans %Lu\n", trans->transid); list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); mutex_unlock(&root->fs_info->trans_mutex); -printk("done commit trans %Lu\n", trans->transid); kmem_cache_free(btrfs_trans_handle_cachep, trans); if (root->fs_info->closing) { From 6dddcbeb28b34620ad033f1e8d9f6960bafdd7d2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jul 2008 11:18:09 -0400 Subject: [PATCH 0604/2985] Btrfs: Use mutex_lock_nested for tree locking Lockdep has the notion of locking subclasses so that you can identify locks you expect to be taken after other locks of the same class. This changes the per-extent buffer btree locking routines to use a subclass based on the level in the tree. Unfortunately, lockdep can only handle 8 total subclasses, and the btrfs max level is also 8. So when lockdep is on, use a lower max level. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 6 +++++- fs/btrfs/locking.c | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f8fccdac3055..eeb5afa6e9b1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -42,7 +42,11 @@ struct btrfs_ordered_sum; #define BTRFS_MAGIC "_B5RfS_M" -#define BTRFS_MAX_LEVEL 8 +#ifdef CONFIG_LOCKDEP +# define BTRFS_MAX_LEVEL 7 +#else +# define BTRFS_MAX_LEVEL 8 +#endif /* holds pointers to all of the tree roots */ #define BTRFS_ROOT_TREE_OBJECTID 1ULL diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 01a9ac2be3f7..27a02376ab14 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -37,7 +37,7 @@ int btrfs_tree_lock(struct extent_buffer *eb) return 0; } cpu_relax(); - mutex_lock(&eb->mutex); + mutex_lock_nested(&eb->mutex, BTRFS_MAX_LEVEL - btrfs_header_level(eb)); return 0; } From e34a5b4f77b8448cf2863ad0cbac35e2c2a86a0a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jul 2008 12:08:37 -0400 Subject: [PATCH 0605/2985] Btrfs: Add some conditional schedules near the alloc_mutex This helps prevent stalls, especially while the snapshot cleaner is running hard Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c51cd11de206..72fa28236e5d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -937,6 +937,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); for (i = 0; i < nritems; i++) { + cond_resched(); if (level == 0) { u64 disk_bytenr; btrfs_item_key_to_cpu(buf, &key, i); @@ -2189,6 +2190,7 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, for (i = 0; i < nritems; i++) { u64 disk_bytenr; + cond_resched(); btrfs_item_key_to_cpu(leaf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) From c286ac48ed7aaf53586f575af6053ae2a0f8554a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jul 2008 23:06:41 -0400 Subject: [PATCH 0606/2985] Btrfs: alloc_mutex latency reduction This releases the alloc_mutex in a few places that hold it for over long operations. btrfs_lookup_block_group is changed so that it doesn't need the mutex at all. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 100 ++++++++++++++++++++++++++++++++--------- 2 files changed, 81 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index eeb5afa6e9b1..90504ba7f838 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -483,6 +483,7 @@ struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; struct btrfs_space_info *space_info; + spinlock_t lock; u64 pinned; u64 flags; int cached; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 72fa28236e5d..febc6295c7a9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -319,7 +319,7 @@ no_cache: cache = btrfs_lookup_first_block_group(root->fs_info, last); } cache_miss = 0; - cache = __btrfs_find_block_group(root, cache, last, data, 0); + cache = btrfs_find_block_group(root, cache, last, data, 0); if (!cache) goto no_cache; *cache_ret = cache; @@ -379,19 +379,25 @@ __btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *shint; shint = btrfs_lookup_first_block_group(info, search_start); if (shint && block_group_bits(shint, data) && !shint->ro) { + spin_lock(&shint->lock); used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < div_factor(shint->key.offset, factor)) { + spin_unlock(&shint->lock); return shint; } + spin_unlock(&shint->lock); } } if (hint && !hint->ro && block_group_bits(hint, data)) { + spin_lock(&hint->lock); used = btrfs_block_group_used(&hint->item); if (used + hint->pinned < div_factor(hint->key.offset, factor)) { + spin_unlock(&hint->lock); return hint; } + spin_unlock(&hint->lock); last = hint->key.objectid + hint->key.offset; } else { if (hint) @@ -413,6 +419,7 @@ again: } cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; + spin_lock(&cache->lock); last = cache->key.objectid + cache->key.offset; used = btrfs_block_group_used(&cache->item); @@ -420,9 +427,11 @@ again: free_check = div_factor(cache->key.offset, factor); if (used + cache->pinned < free_check) { found_group = cache; + spin_unlock(&cache->lock); goto found; } } + spin_unlock(&cache->lock); cond_resched(); } if (!wrapped) { @@ -447,9 +456,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, { struct btrfs_block_group_cache *ret; - mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_find_block_group(root, hint, search_start, data, owner); - mutex_unlock(&root->fs_info->alloc_mutex); return ret; } static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, @@ -1262,21 +1269,25 @@ static int update_block_group(struct btrfs_trans_handle *trans, set_extent_bits(&info->block_group_cache, start, end, BLOCK_GROUP_DIRTY, GFP_NOFS); + spin_lock(&cache->lock); old_val = btrfs_block_group_used(&cache->item); num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { old_val += num_bytes; cache->space_info->bytes_used += num_bytes; + btrfs_set_block_group_used(&cache->item, old_val); + spin_unlock(&cache->lock); } else { old_val -= num_bytes; cache->space_info->bytes_used -= num_bytes; + btrfs_set_block_group_used(&cache->item, old_val); + spin_unlock(&cache->lock); if (mark_free) { set_extent_dirty(&info->free_space_cache, bytenr, bytenr + num_bytes - 1, GFP_NOFS); } } - btrfs_set_block_group_used(&cache->item, old_val); total -= num_bytes; bytenr += num_bytes; } @@ -1325,14 +1336,18 @@ static int update_pinned_extents(struct btrfs_root *root, } if (pin) { if (cache) { + spin_lock(&cache->lock); cache->pinned += len; cache->space_info->bytes_pinned += len; + spin_unlock(&cache->lock); } fs_info->total_pinned += len; } else { if (cache) { + spin_lock(&cache->lock); cache->pinned -= len; cache->space_info->bytes_pinned -= len; + spin_unlock(&cache->lock); } fs_info->total_pinned -= len; } @@ -1380,6 +1395,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, update_pinned_extents(root, start, end + 1 - start, 0); clear_extent_dirty(unpin, start, end, GFP_NOFS); set_extent_dirty(free_space_cache, start, end, GFP_NOFS); + if (need_resched()) { + mutex_unlock(&root->fs_info->alloc_mutex); + cond_resched(); + mutex_lock(&root->fs_info->alloc_mutex); + } } mutex_unlock(&root->fs_info->alloc_mutex); return 0; @@ -1417,8 +1437,16 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, &extent_item, sizeof(extent_item)); clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, GFP_NOFS); - eb = read_tree_block(extent_root, ins.objectid, ins.offset, - trans->transid); + + eb = btrfs_find_tree_block(extent_root, ins.objectid, + ins.offset); + + if (!btrfs_buffer_uptodate(eb, trans->transid)) { + mutex_unlock(&extent_root->fs_info->alloc_mutex); + btrfs_read_buffer(eb, trans->transid); + mutex_lock(&extent_root->fs_info->alloc_mutex); + } + btrfs_tree_lock(eb); level = btrfs_header_level(eb); if (level == 0) { @@ -1437,6 +1465,11 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, 0, level, btrfs_disk_key_objectid(&first)); BUG_ON(err); + if (need_resched()) { + mutex_unlock(&extent_root->fs_info->alloc_mutex); + cond_resched(); + mutex_lock(&extent_root->fs_info->alloc_mutex); + } } btrfs_free_path(path); return 0; @@ -1640,15 +1673,28 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct EXTENT_LOCKED); if (ret) break; - update_pinned_extents(extent_root, start, end + 1 - start, 1); clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, GFP_NOFS); - ret = __free_extent(trans, extent_root, - start, end + 1 - start, - extent_root->root_key.objectid, - 0, 0, 0, 0, 0); + if (!test_range_bit(&extent_root->fs_info->extent_ins, + start, end, EXTENT_LOCKED, 0)) { + update_pinned_extents(extent_root, start, + end + 1 - start, 1); + ret = __free_extent(trans, extent_root, + start, end + 1 - start, + extent_root->root_key.objectid, + 0, 0, 0, 0, 0); + } else { + clear_extent_bits(&extent_root->fs_info->extent_ins, + start, end, EXTENT_LOCKED, GFP_NOFS); + } if (ret) err = ret; + + if (need_resched()) { + mutex_unlock(&extent_root->fs_info->alloc_mutex); + cond_resched(); + mutex_lock(&extent_root->fs_info->alloc_mutex); + } } return err; } @@ -1768,12 +1814,12 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, block_group = btrfs_lookup_first_block_group(info, hint_byte); if (!block_group) hint_byte = search_start; - block_group = __btrfs_find_block_group(root, block_group, + block_group = btrfs_find_block_group(root, block_group, hint_byte, data, 1); if (last_ptr && *last_ptr == 0 && block_group) hint_byte = block_group->key.objectid; } else { - block_group = __btrfs_find_block_group(root, + block_group = btrfs_find_block_group(root, trans->block_group, search_start, data, 1); } @@ -1895,7 +1941,7 @@ enospc: } block_group = btrfs_lookup_first_block_group(info, search_start); cond_resched(); - block_group = __btrfs_find_block_group(root, block_group, + block_group = btrfs_find_block_group(root, block_group, search_start, data, 0); goto check_failed; @@ -3032,11 +3078,14 @@ int __alloc_chunk_for_shrink(struct btrfs_root *root, u64 new_alloc_flags; u64 calc; + spin_lock(&shrink_block_group->lock); if (btrfs_block_group_used(&shrink_block_group->item) > 0) { - + spin_unlock(&shrink_block_group->lock); mutex_unlock(&root->fs_info->alloc_mutex); + trans = btrfs_start_transaction(root, 1); mutex_lock(&root->fs_info->alloc_mutex); + spin_lock(&shrink_block_group->lock); new_alloc_flags = update_block_group_flags(root, shrink_block_group->flags); @@ -3046,13 +3095,16 @@ int __alloc_chunk_for_shrink(struct btrfs_root *root, } else { calc = shrink_block_group->key.offset; } + spin_unlock(&shrink_block_group->lock); + do_chunk_alloc(trans, root->fs_info->extent_root, calc + 2 * 1024 * 1024, new_alloc_flags, force); mutex_unlock(&root->fs_info->alloc_mutex); btrfs_end_transaction(trans, root); mutex_lock(&root->fs_info->alloc_mutex); - } + } else + spin_unlock(&shrink_block_group->lock); return 0; } @@ -3199,6 +3251,7 @@ next: mutex_unlock(&root->fs_info->alloc_mutex); trans = btrfs_start_transaction(root, 1); + mutex_lock(&root->fs_info->alloc_mutex); memcpy(&key, &shrink_block_group->key, sizeof(key)); @@ -3316,6 +3369,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) break; } + spin_lock_init(&cache->lock); read_extent_buffer(leaf, &cache->item, btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(cache->item)); @@ -3343,10 +3397,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) /* use EXTENT_LOCKED to prevent merging */ set_extent_bits(block_group_cache, found_key.objectid, found_key.objectid + found_key.offset - 1, - bit | EXTENT_LOCKED, GFP_NOFS); + EXTENT_LOCKED, GFP_NOFS); set_state_private(block_group_cache, found_key.objectid, (unsigned long)cache); - + set_extent_bits(block_group_cache, found_key.objectid, + found_key.objectid + found_key.offset - 1, + bit | EXTENT_LOCKED, GFP_NOFS); if (key.objectid >= btrfs_super_total_bytes(&info->super_copy)) break; @@ -3377,6 +3433,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, BUG_ON(!cache); cache->key.objectid = chunk_offset; cache->key.offset = size; + spin_lock_init(&cache->lock); btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); btrfs_set_block_group_used(&cache->item, bytes_used); @@ -3391,10 +3448,13 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, bit = block_group_state_bits(type); set_extent_bits(block_group_cache, chunk_offset, chunk_offset + size - 1, - bit | EXTENT_LOCKED, GFP_NOFS); - + EXTENT_LOCKED, GFP_NOFS); set_state_private(block_group_cache, chunk_offset, (unsigned long)cache); + set_extent_bits(block_group_cache, chunk_offset, + chunk_offset + size - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, sizeof(cache->item)); BUG_ON(ret); From ed98b56a6393c5e150fd5095b9eb7fd7d3cfb041 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jul 2008 23:06:42 -0400 Subject: [PATCH 0607/2985] Btrfs: Take the csum mutex while reading checksums Signed-off-by: Chris Mason --- fs/btrfs/file-item.c | 8 +++++--- fs/btrfs/inode.c | 2 ++ fs/btrfs/ordered-data.c | 2 +- fs/btrfs/ordered-data.h | 2 +- fs/btrfs/transaction.c | 3 +++ 5 files changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 45127e4797c8..afe42d00b5a6 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -152,7 +152,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, if (!sums) return -ENOMEM; - sector_sum = &sums->sums; + sector_sum = sums->sums; sums->file_offset = page_offset(bvec->bv_page) + bvec->bv_offset; sums->len = bio->bi_size; INIT_LIST_HEAD(&sums->list); @@ -174,7 +174,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), GFP_NOFS); BUG_ON(!sums); - sector_sum = &sums->sums; + sector_sum = sums->sums; sums->len = bytes_left; sums->file_offset = offset; ordered = btrfs_lookup_ordered_extent(inode, @@ -193,12 +193,14 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, (char *)§or_sum->sum); sector_sum->offset = page_offset(bvec->bv_page) + bvec->bv_offset; + sector_sum++; bio_index++; total_bytes += bvec->bv_len; this_sum_bytes += bvec->bv_len; bvec++; } + this_sum_bytes = 0; btrfs_add_ordered_sum(inode, ordered, sums); btrfs_put_ordered_extent(ordered); return 0; @@ -231,7 +233,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); - sector_sum = &sums->sums; + sector_sum = sums->sums; again: next_offset = (u64)-1; found_next = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3da12a4d913d..28e667052ec3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -612,6 +612,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) return 0; path = btrfs_alloc_path(); + mutex_lock(&BTRFS_I(inode)->csum_mutex); item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); if (IS_ERR(item)) { /* @@ -640,6 +641,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) found: set_state_private(io_tree, start, csum); out: + mutex_unlock(&BTRFS_I(inode)->csum_mutex); if (path) btrfs_free_path(path); return ret; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 830dbaea6853..b695f5b29c45 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -545,7 +545,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum) ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list); if (offset >= ordered_sum->file_offset) { num_sectors = ordered_sum->len / sectorsize; - sector_sums = &ordered_sum->sums; + sector_sums = ordered_sum->sums; for (i = 0; i < num_sectors; i++) { if (sector_sums[i].offset == offset) { *sum = sector_sums[i].sum; diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 8e8e3c0404f3..36e63f1f79b3 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -46,7 +46,7 @@ struct btrfs_ordered_sum { unsigned long len; struct list_head list; /* last field is a variable length array of btrfs_sector_sums */ - struct btrfs_sector_sum sums; + struct btrfs_sector_sum sums[]; }; /* diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 38c75a0256cb..0f756e0175c0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -382,6 +382,9 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, memcpy(dirty->root, root, sizeof(*root)); dirty->root->node = root->commit_root; dirty->latest_root = root; + spin_lock_init(&dirty->root->node_lock); + mutex_init(&dirty->root->objectid_mutex); + root->commit_root = NULL; root->root_key.offset = root->fs_info->generation; From 9ba4611a3a7902c6bad70c5c205de5161fcfc17b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 23 Jul 2008 09:26:26 -0400 Subject: [PATCH 0608/2985] Btrfs: Fix 32 bit compiles by using an unsigned long byte count in the ordered extent The ordered extents have to fit in memory, so an unsigned long is sufficient. Signed-off-by: Chris Mason --- fs/btrfs/ordered-data.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 36e63f1f79b3..199cb0b4f1d9 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -95,7 +95,8 @@ struct btrfs_ordered_extent { * calculates the total size you need to allocate for an ordered sum * structure spanning 'bytes' in the file */ -static inline int btrfs_ordered_sum_size(struct btrfs_root *root, u64 bytes) +static inline int btrfs_ordered_sum_size(struct btrfs_root *root, + unsigned long bytes) { unsigned long num_sectors = (bytes + root->sectorsize - 1) / root->sectorsize; From 89642229a582a5c2b6d2ed8ec16986387d9a9047 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Jul 2008 09:41:53 -0400 Subject: [PATCH 0609/2985] Btrfs: Search data ordered extents first for checksums on read Checksum items are not inserted into the tree until all of the io from a given extent is complete. This means one dirty page from an extent may be written, freed, and then read again before the entire extent is on disk and the checksum item is inserted. The checksums themselves are stored in the ordered extent so they can be inserted in bulk when IO is complete. On read, if a checksum item isn't found, the ordered extents were being searched for a checksum record. This all worked most of the time, but the checksum insertion code tries to reduce the number of tree operations by pre-inserting checksum items based on i_size and a few other factors. This means the read code might find a checksum item that hasn't yet really been filled in. This commit changes things to check the ordered extents first and only dive into the btree if nothing was found. This removes the need for extra locking and is more reliable. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 9 ++++++++- fs/btrfs/extent_io.c | 8 ++++---- fs/btrfs/inode.c | 33 ++++++++++++++++++--------------- fs/btrfs/ordered-data.c | 1 + 4 files changed, 31 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 86e84a8579e3..7ce3f83c5dd6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1011,9 +1011,16 @@ void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); spin_unlock(&em_tree->lock); - if (!em) + if (!em) { + __unplug_io_fn(bdi, page); return; + } + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + free_extent_map(em); + __unplug_io_fn(bdi, page); + return; + } offset = offset - em->start; btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree, em->block_start + offset, page); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9965993748d0..e3547a992d5c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1949,18 +1949,18 @@ printk("2bad mapping end %Lu cur %Lu\n", end, cur); cur + iosize - 1); } if (!ret) { - unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1; - nr -= page->index; + unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; + pnr -= page->index; ret = submit_extent_page(READ, tree, page, sector, iosize, page_offset, - bdev, bio, nr, + bdev, bio, pnr, end_bio_extent_readpage, mirror_num); + nr++; } if (ret) SetPageError(page); cur = cur + iosize; page_offset += iosize; - nr++; } if (!nr) { if (!PageError(page)) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 28e667052ec3..0e90315ea803 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -611,22 +611,25 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) btrfs_test_flag(inode, NODATASUM)) return 0; + /* + * It is possible there is an ordered extent that has + * not yet finished for this range in the file. If so, + * that extent will have a csum cached, and it will insert + * the sum after all the blocks in the extent are fully + * on disk. So, look for an ordered extent and use the + * sum if found. We have to do this before looking in the + * btree because csum items are pre-inserted based on + * the file size. btrfs_lookup_csum might find an item + * that still hasn't been fully filled. + */ + ret = btrfs_find_ordered_sum(inode, start, &csum); + if (ret == 0) + goto found; + + ret = 0; path = btrfs_alloc_path(); - mutex_lock(&BTRFS_I(inode)->csum_mutex); item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); if (IS_ERR(item)) { - /* - * It is possible there is an ordered extent that has - * not yet finished for this range in the file. If so, - * that extent will have a csum cached, and it will insert - * the sum after all the blocks in the extent are fully - * on disk. So, look for an ordered extent and use the - * sum if found. - */ - ret = btrfs_find_ordered_sum(inode, start, &csum); - if (ret == 0) - goto found; - ret = PTR_ERR(item); /* a csum that isn't present is a preallocated region. */ if (ret == -ENOENT || ret == -EFBIG) @@ -641,7 +644,6 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) found: set_state_private(io_tree, start, csum); out: - mutex_unlock(&BTRFS_I(inode)->csum_mutex); if (path) btrfs_free_path(path); return ret; @@ -1375,7 +1377,7 @@ again: } if (!PageUptodate(page)) { ret = -EIO; - goto out; + goto out_unlock; } } wait_on_page_writeback(page); @@ -1406,6 +1408,7 @@ again: set_page_dirty(page); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); +out_unlock: unlock_page(page); page_cache_release(page); out: diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index b695f5b29c45..e42fd233e04c 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -557,6 +557,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum) } out: mutex_unlock(&tree->mutex); + btrfs_put_ordered_extent(ordered); return ret; } From 4881ee5a2e995c6a8999b56de70aa3834369d8ee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Jul 2008 09:51:08 -0400 Subject: [PATCH 0610/2985] Btrfs: Fix some build problems on 2.6.18 based enterprise kernels Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 8 ++++++++ fs/btrfs/locking.c | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 90504ba7f838..8ecac2e77a43 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1595,6 +1595,14 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); /* inode.c */ + +/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ +#ifdef ClearPageFsMisc +#define ClearPageChecked ClearPageFsMisc +#define SetPageChecked SetPageFsMisc +#define PageChecked PageFsMisc +#endif + int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc); int btrfs_create_subvol_root(struct btrfs_root *new_root, diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 27a02376ab14..d617c29787fa 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include "ctree.h" #include "extent_io.h" #include "locking.h" From b3c3da71ed88c06bc18a4d8919c6e4af3b933566 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 23 Jul 2008 12:12:13 -0400 Subject: [PATCH 0611/2985] Btrfs: Add version strings on module load Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 6 +++++- fs/btrfs/super.c | 3 +++ fs/btrfs/version.sh | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 fs/btrfs/version.sh diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 0e2dcc718de2..f8a38b0da7fb 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -14,8 +14,12 @@ else # Normal Makefile KERNELDIR := /lib/modules/`uname -r`/build -all: +all: version $(MAKE) -C $(KERNELDIR) M=`pwd` modules + +version: + bash version.sh + modules_install: $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install clean: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5e28cf5c2e85..4cb6aac5122e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -45,6 +45,7 @@ #include "print-tree.h" #include "xattr.h" #include "volumes.h" +#include "version.h" #define BTRFS_SUPER_MAGIC 0x9123683E @@ -605,6 +606,8 @@ static int __init init_btrfs_fs(void) err = register_filesystem(&btrfs_fs_type); if (err) goto unregister_ioctl; + + printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION); return 0; unregister_ioctl: diff --git a/fs/btrfs/version.sh b/fs/btrfs/version.sh new file mode 100644 index 000000000000..fd9b53d39860 --- /dev/null +++ b/fs/btrfs/version.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# +# determine-version -- report a useful version for releases +# +# Copyright 2008, Aron Griffis +# Copyright 2008, Oracle +# Released under the GNU GPLv2 + +v="Btrfs v0.15" + +which hg > /dev/null +if [ $? == 0 ]; then + last=$(hg tags | grep -m1 -o '^v[0-9.]\+') + + # now check if the repo has commits since then... + if [[ $(hg id -t) == $last || \ + $(hg di -r "$last:." | awk '/^diff/{print $NF}' | sort -u) == .hgtags ]] + then + # check if it's dirty + if [[ $(hg id | cut -d' ' -f1) == *+ ]]; then + v=$last+ + else + v=$last + fi + else + # includes dirty flag + v=$last+$(hg id -i) + fi +fi + +echo "#ifndef __BUILD_VERSION" > .build-version.h +echo "#define __BUILD_VERSION" >> .build-version.h +echo "#define BTRFS_BUILD_VERSION \"Btrfs $v\"" >> .build-version.h +echo "#endif" >> .build-version.h + +diff -q version.h .build-version.h >& /dev/null + +if [ $? == 0 ]; then + rm .build-version.h + exit 0 +fi + +mv .build-version.h version.h From 64f26f745084872b916cd1bef6054e21b15c5784 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 24 Jul 2008 10:09:43 -0400 Subject: [PATCH 0612/2985] Btrfs: Use assert_spin_locked instead of spin_trylock On UP systems spin_trylock always succeeds Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 8a502ee2f231..954b047639ab 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -208,7 +208,7 @@ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *merge = NULL; struct rb_node *rb; - BUG_ON(spin_trylock(&tree->lock)); + assert_spin_locked(&tree->lock); rb = tree_insert(&tree->map, em->start, &em->rb_node); if (rb) { ret = -EEXIST; @@ -270,7 +270,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, struct rb_node *next = NULL; u64 end = range_end(start, len); - BUG_ON(spin_trylock(&tree->lock)); + assert_spin_locked(&tree->lock); rb_node = __tree_search(&tree->map, start, &prev, &next); if (!rb_node && prev) { em = rb_entry(prev, struct extent_map, rb_node); @@ -317,7 +317,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) int ret = 0; WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); - BUG_ON(spin_trylock(&tree->lock)); + assert_spin_locked(&tree->lock); rb_erase(&em->rb_node, &tree->map); em->in_tree = 0; return ret; From 3eaa2885276fd6dac7b076a793932428b7168e74 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Jul 2008 11:57:52 -0400 Subject: [PATCH 0613/2985] Btrfs: Fix the defragmention code and the block relocation code for data=ordered Before setting an extent to delalloc, the code needs to wait for pending ordered extents. Also, the relocation code needs to wait for ordered IO before scanning the block group again. This is because the extents are not removed until the IO for the new extents is finished Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 7 ++++++ fs/btrfs/disk-io.c | 3 +++ fs/btrfs/extent-tree.c | 39 ++++++++++++++++++++-------- fs/btrfs/ioctl.c | 21 ++++++++++------ fs/btrfs/ordered-data.c | 56 +++++++++++++++++++++++++++++++++++++++-- fs/btrfs/ordered-data.h | 7 ++++++ 6 files changed, 113 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8ecac2e77a43..6675e916ebcd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -538,6 +538,13 @@ struct btrfs_fs_info { struct list_head dead_roots; atomic_t nr_async_submits; + /* + * this is used by the balancing code to wait for all the pending + * ordered extents + */ + spinlock_t ordered_extent_lock; + struct list_head ordered_extents; + /* * there is a pool of worker threads for checksumming during writes * and a pool for checksumming after reads. This is because readers diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7ce3f83c5dd6..ec01062eb41d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1252,6 +1252,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_nlink = 1; fs_info->thread_pool_size = min(num_online_cpus() + 2, 8); + INIT_LIST_HEAD(&fs_info->ordered_extents); + spin_lock_init(&fs_info->ordered_extent_lock); + sb->s_blocksize = 4096; sb->s_blocksize_bits = blksize_bits(4096); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index febc6295c7a9..f92b297e7da5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2640,6 +2640,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, struct file_ra_state *ra; unsigned long total_read = 0; unsigned long ra_pages; + struct btrfs_ordered_extent *ordered; struct btrfs_trans_handle *trans; ra = kzalloc(sizeof(*ra), GFP_NOFS); @@ -2658,9 +2659,9 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, calc_ra(i, last_index, ra_pages)); } total_read++; - if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size) +again: + if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) goto truncate_racing; - page = grab_cache_page(inode->i_mapping, i); if (!page) { goto out_unlock; @@ -2674,18 +2675,24 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, goto out_unlock; } } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - ClearPageDirty(page); -#else - cancel_dirty_page(page, PAGE_CACHE_SIZE); -#endif wait_on_page_writeback(page); - set_page_extent_mapped(page); + page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(io_tree, page_start, page_end, GFP_NOFS); + ordered = btrfs_lookup_ordered_extent(inode, page_start); + if (ordered) { + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_page(page); + page_cache_release(page); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + goto again; + } + set_page_extent_mapped(page); + + set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); @@ -2694,10 +2701,18 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, unlock_page(page); page_cache_release(page); } - balance_dirty_pages_ratelimited_nr(inode->i_mapping, - total_read); out_unlock: + /* we have to start the IO in order to get the ordered extents + * instantiated. This allows the relocation to code to wait + * for all the ordered extents to hit the disk. + * + * Otherwise, it would constantly loop over the same extents + * because the old ones don't get deleted until the IO is + * started + */ + btrfs_fdatawrite_range(inode->i_mapping, start, start + len - 1, + WB_SYNC_NONE); kfree(ra); trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); if (trans) { @@ -3238,6 +3253,8 @@ next: btrfs_clean_old_snapshots(tree_root); + btrfs_wait_ordered_extents(tree_root); + trans = btrfs_start_transaction(tree_root, 1); btrfs_commit_transaction(trans, tree_root); mutex_lock(&root->fs_info->alloc_mutex); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 83f17a5cbd6a..a61f2e7e2db5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -213,6 +213,7 @@ int btrfs_defrag_file(struct file *file) struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_ordered_extent *ordered; struct page *page; unsigned long last_index; unsigned long ra_pages = root->fs_info->bdi.ra_pages; @@ -234,6 +235,7 @@ int btrfs_defrag_file(struct file *file) min(last_index, i + ra_pages - 1)); } total_read++; +again: page = grab_cache_page(inode->i_mapping, i); if (!page) goto out_unlock; @@ -247,18 +249,23 @@ int btrfs_defrag_file(struct file *file) } } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - ClearPageDirty(page); -#else - cancel_dirty_page(page, PAGE_CACHE_SIZE); -#endif wait_on_page_writeback(page); - set_page_extent_mapped(page); page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(io_tree, page_start, page_end, GFP_NOFS); + + ordered = btrfs_lookup_ordered_extent(inode, page_start); + if (ordered) { + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_page(page); + page_cache_release(page); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + goto again; + } + set_page_extent_mapped(page); + set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index e42fd233e04c..676e4bd65c52 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -167,20 +167,28 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->file_offset = file_offset; entry->start = start; entry->len = len; + entry->inode = inode; + /* one ref for the tree */ atomic_set(&entry->refs, 1); init_waitqueue_head(&entry->wait); INIT_LIST_HEAD(&entry->list); + INIT_LIST_HEAD(&entry->root_extent_list); node = tree_insert(&tree->tree, file_offset, &entry->rb_node); if (node) { - entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); - atomic_inc(&entry->refs); + printk("warning dup entry from add_ordered_extent\n"); + BUG(); } set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, entry_end(entry) - 1, GFP_NOFS); + spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + list_add_tail(&entry->root_extent_list, + &BTRFS_I(inode)->root->fs_info->ordered_extents); + spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + mutex_unlock(&tree->mutex); BUG_ON(node); return 0; @@ -285,11 +293,55 @@ int btrfs_remove_ordered_extent(struct inode *inode, rb_erase(node, &tree->tree); tree->last = NULL; set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); + + spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + list_del_init(&entry->root_extent_list); + spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + mutex_unlock(&tree->mutex); wake_up(&entry->wait); return 0; } +int btrfs_wait_ordered_extents(struct btrfs_root *root) +{ + struct list_head splice; + struct list_head *cur; + struct btrfs_ordered_extent *ordered; + struct inode *inode; + + INIT_LIST_HEAD(&splice); + + spin_lock(&root->fs_info->ordered_extent_lock); + list_splice_init(&root->fs_info->ordered_extents, &splice); + while(!list_empty(&splice)) { + cur = splice.next; + ordered = list_entry(cur, struct btrfs_ordered_extent, + root_extent_list); + list_del_init(&ordered->root_extent_list); + atomic_inc(&ordered->refs); + inode = ordered->inode; + + /* + * the inode can't go away until all the pages are gone + * and the pages won't go away while there is still + * an ordered extent and the ordered extent won't go + * away until it is off this list. So, we can safely + * increment i_count here and call iput later + */ + atomic_inc(&inode->i_count); + spin_unlock(&root->fs_info->ordered_extent_lock); + + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + iput(inode); + + spin_lock(&root->fs_info->ordered_extent_lock); + } + spin_unlock(&root->fs_info->ordered_extent_lock); + return 0; +} + /* * Used to start IO or wait for a given ordered extent to finish. * diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 199cb0b4f1d9..5efe6b63c74c 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -80,6 +80,9 @@ struct btrfs_ordered_extent { /* reference count */ atomic_t refs; + /* the inode we belong to */ + struct inode *inode; + /* list of checksums for insertion when the extent io is done */ struct list_head list; @@ -88,6 +91,9 @@ struct btrfs_ordered_extent { /* our friendly rbtree entry */ struct rb_node rb_node; + + /* a per root list of all the pending ordered extents */ + struct list_head root_extent_list; }; @@ -137,4 +143,5 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping, pgoff_t start, pgoff_t end); int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end, int sync_mode); +int btrfs_wait_ordered_extents(struct btrfs_root *root); #endif From aec7477b3b0e8ec93f6d274f25ba40b0665134d4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 24 Jul 2008 12:12:38 -0400 Subject: [PATCH 0614/2985] Btrfs: Implement new dir index format Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 15 +++++ fs/btrfs/ctree.h | 8 ++- fs/btrfs/dir-item.c | 4 +- fs/btrfs/inode-item.c | 10 ++- fs/btrfs/inode.c | 143 ++++++++++++++++++++++++++++++++++------- fs/btrfs/ioctl.c | 4 +- fs/btrfs/transaction.c | 4 +- 7 files changed, 155 insertions(+), 33 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 3bf40591742a..b7bd60e4fdd7 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -43,6 +43,21 @@ struct btrfs_inode { u64 delalloc_bytes; u64 disk_i_size; u32 flags; + + /* + * if this is a directory then index_cnt is the counter for the index + * number for new files that are created + */ + u64 index_cnt; + + /* + * index holds the directory index for this inode on creation, so + * add_link can do what its supposed to. This isn't populated when the + * inode is read because there isn't really a reason to know this unless + * we are creating the directory index or deleting it, and deletion + * reads the index off of the inode reference at unlink time. + */ + u64 index; }; static inline struct btrfs_inode *BTRFS_I(struct inode *inode) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6675e916ebcd..beb05b1de54c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -372,6 +372,7 @@ struct btrfs_dev_extent { } __attribute__ ((__packed__)); struct btrfs_inode_ref { + __le64 index; __le16 name_len; /* name goes here */ } __attribute__ ((__packed__)); @@ -902,6 +903,7 @@ BTRFS_SETGET_STACK_FUNCS(block_group_flags, /* struct btrfs_inode_ref */ BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); +BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); /* struct btrfs_inode_item */ BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); @@ -1528,7 +1530,7 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, /* dir-item.c */ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, - struct btrfs_key *location, u8 type); + struct btrfs_key *location, u8 type, u64 index); struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, @@ -1566,11 +1568,11 @@ int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid); int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, - u64 inode_objectid, u64 ref_objectid); + u64 inode_objectid, u64 ref_objectid, u64 index); int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, - u64 inode_objectid, u64 ref_objectid); + u64 inode_objectid, u64 ref_objectid, u64 *index); int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 7a73dc59dc4d..eb4dd3d75cf9 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -110,7 +110,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, - struct btrfs_key *location, u8 type) + struct btrfs_key *location, u8 type, u64 index) { int ret = 0; int ret2 = 0; @@ -156,7 +156,7 @@ second_insert: btrfs_release_path(root, path); btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); - key.offset = location->objectid; + key.offset = index; dir_item = insert_with_overflow(trans, root, path, &key, data_size, name, name_len); if (IS_ERR(dir_item)) { diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index cba30b6cc6fe..d93451c66ba1 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -52,7 +52,7 @@ int find_name_in_backref(struct btrfs_path *path, const char * name, int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, - u64 inode_objectid, u64 ref_objectid) + u64 inode_objectid, u64 ref_objectid, u64 *index) { struct btrfs_path *path; struct btrfs_key key; @@ -86,6 +86,10 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, } leaf = path->nodes[0]; item_size = btrfs_item_size_nr(leaf, path->slots[0]); + + if (index) + *index = btrfs_inode_ref_index(leaf, ref); + if (del_len == item_size) { ret = btrfs_del_item(trans, root, path); goto out; @@ -106,7 +110,7 @@ out: int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, - u64 inode_objectid, u64 ref_objectid) + u64 inode_objectid, u64 ref_objectid, u64 index) { struct btrfs_path *path; struct btrfs_key key; @@ -138,6 +142,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, struct btrfs_inode_ref); ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); + btrfs_set_inode_ref_index(path->nodes[0], ref, index); ptr = (unsigned long)(ref + 1); ret = 0; } else if (ret < 0) { @@ -146,6 +151,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, ref = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_ref); btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); + btrfs_set_inode_ref_index(path->nodes[0], ref, index); ptr = (unsigned long)(ref + 1); } write_extent_buffer(path->nodes[0], name, ptr, name_len); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0e90315ea803..8d371d6fe551 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -872,6 +872,8 @@ void btrfs_read_locked_inode(struct inode *inode) inode->i_rdev = 0; rdev = btrfs_inode_rdev(leaf, inode_item); + BTRFS_I(inode)->index_cnt = (u64)-1; + alloc_group_block = btrfs_inode_block_group(leaf, inode_item); BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, alloc_group_block); @@ -993,6 +995,7 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_dir_item *di; struct btrfs_key key; + u64 index; path = btrfs_alloc_path(); if (!path) { @@ -1017,8 +1020,19 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, goto err; btrfs_release_path(root, path); + ret = btrfs_del_inode_ref(trans, root, name, name_len, + dentry->d_inode->i_ino, + dentry->d_parent->d_inode->i_ino, &index); + if (ret) { + printk("failed to delete reference to %.*s, " + "inode %lu parent %lu\n", name_len, name, + dentry->d_inode->i_ino, + dentry->d_parent->d_inode->i_ino); + goto err; + } + di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, - key.objectid, name, name_len, -1); + index, name, name_len, -1); if (IS_ERR(di)) { ret = PTR_ERR(di); goto err; @@ -1031,15 +1045,6 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, btrfs_release_path(root, path); dentry->d_inode->i_ctime = dir->i_ctime; - ret = btrfs_del_inode_ref(trans, root, name, name_len, - dentry->d_inode->i_ino, - dentry->d_parent->d_inode->i_ino); - if (ret) { - printk("failed to delete reference to %.*s, " - "inode %lu parent %lu\n", name_len, name, - dentry->d_inode->i_ino, - dentry->d_parent->d_inode->i_ino); - } err: btrfs_free_path(path); if (!ret) { @@ -1625,6 +1630,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) BTRFS_I(inode)->root = args->root; BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->disk_i_size = 0; + BTRFS_I(inode)->index_cnt = (u64)-1; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); @@ -1901,8 +1907,77 @@ void btrfs_dirty_inode(struct inode *inode) btrfs_end_transaction(trans, root); } +static int btrfs_set_inode_index_count(struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_key key, found_key; + struct btrfs_path *path; + struct extent_buffer *leaf; + int ret; + + key.objectid = inode->i_ino; + btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + key.offset = (u64)-1; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + /* FIXME: we should be able to handle this */ + if (ret == 0) + goto out; + ret = 0; + + /* + * MAGIC NUMBER EXPLANATION: + * since we search a directory based on f_pos we have to start at 2 + * since '.' and '..' have f_pos of 0 and 1 respectively, so everybody + * else has to start at 2 + */ + if (path->slots[0] == 0) { + BTRFS_I(inode)->index_cnt = 2; + goto out; + } + + path->slots[0]--; + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.objectid != inode->i_ino || + btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) { + BTRFS_I(inode)->index_cnt = 2; + goto out; + } + + BTRFS_I(inode)->index_cnt = found_key.offset + 1; +out: + btrfs_free_path(path); + return ret; +} + +static int btrfs_set_inode_index(struct inode *dir, struct inode *inode) +{ + int ret = 0; + + if (BTRFS_I(dir)->index_cnt == (u64)-1) { + ret = btrfs_set_inode_index_count(dir); + if (ret) + return ret; + } + + BTRFS_I(inode)->index = BTRFS_I(dir)->index_cnt; + BTRFS_I(dir)->index_cnt++; + + return ret; +} + static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct inode *dir, const char *name, int name_len, u64 ref_objectid, u64 objectid, @@ -1928,6 +2003,20 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (!inode) return ERR_PTR(-ENOMEM); + if (dir) { + ret = btrfs_set_inode_index(dir, inode); + if (ret) + return ERR_PTR(ret); + } else { + BTRFS_I(inode)->index = 0; + } + /* + * index_cnt is ignored for everything but a dir, + * btrfs_get_inode_index_count has an explanation for the magic + * number + */ + BTRFS_I(inode)->index_cnt = 2; + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); @@ -1984,6 +2073,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, struct btrfs_inode_ref); btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); + btrfs_set_inode_ref_index(path->nodes[0], ref, BTRFS_I(inode)->index); ptr = (unsigned long)(ref + 1); write_extent_buffer(path->nodes[0], name, ptr, name_len); @@ -1998,6 +2088,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, insert_inode_hash(inode); return inode; fail: + if (dir) + BTRFS_I(dir)->index_cnt--; btrfs_free_path(path); return ERR_PTR(ret); } @@ -2014,7 +2106,7 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, int ret; struct btrfs_key key; struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root; - struct inode *parent_inode; + struct inode *parent_inode = dentry->d_parent->d_inode; key.objectid = inode->i_ino; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); @@ -2023,16 +2115,17 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, ret = btrfs_insert_dir_item(trans, root, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, - &key, btrfs_inode_type(inode)); + &key, btrfs_inode_type(inode), + BTRFS_I(inode)->index); if (ret == 0) { if (add_backref) { ret = btrfs_insert_inode_ref(trans, root, dentry->d_name.name, dentry->d_name.len, inode->i_ino, - dentry->d_parent->d_inode->i_ino); + parent_inode->i_ino, + BTRFS_I(inode)->index); } - parent_inode = dentry->d_parent->d_inode; btrfs_i_size_write(parent_inode, parent_inode->i_size + dentry->d_name.len * 2); parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; @@ -2083,7 +2176,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, goto out_unlock; } - inode = btrfs_new_inode(trans, root, dentry->d_name.name, + inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, objectid, BTRFS_I(dir)->block_group, mode); @@ -2138,7 +2231,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; } - inode = btrfs_new_inode(trans, root, dentry->d_name.name, + inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, objectid, BTRFS_I(dir)->block_group, mode); @@ -2203,10 +2296,15 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, err = btrfs_check_free_space(root, 1, 0); if (err) goto fail; + err = btrfs_set_inode_index(dir, inode); + if (err) + goto fail; + trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); atomic_inc(&inode->i_count); + err = btrfs_add_nondir(trans, dentry, inode, 1); if (err) @@ -2258,7 +2356,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) goto out_unlock; } - inode = btrfs_new_inode(trans, root, dentry->d_name.name, + inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, objectid, BTRFS_I(dir)->block_group, S_IFDIR | mode); @@ -2886,9 +2984,8 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root, struct btrfs_block_group_cache *block_group) { struct inode *inode; - int ret; - inode = btrfs_new_inode(trans, new_root, "..", 2, new_dirid, + inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, new_dirid, block_group, S_IFDIR | 0700); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -2896,8 +2993,6 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root, inode->i_fop = &btrfs_dir_file_operations; new_root->inode = inode; - ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid, - new_dirid); inode->i_nlink = 1; btrfs_i_size_write(inode, 0); @@ -3078,6 +3173,10 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, if (ret) goto out_fail; } + ret = btrfs_set_inode_index(new_dir, old_inode); + if (ret) + goto out_fail; + ret = btrfs_add_link(trans, new_dentry, old_inode, 1); if (ret) goto out_fail; @@ -3123,7 +3222,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock; } - inode = btrfs_new_inode(trans, root, dentry->d_name.name, + inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, objectid, BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a61f2e7e2db5..faf081302d02 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -129,13 +129,13 @@ static noinline int create_subvol(struct btrfs_root *root, char *name, dir = root->fs_info->sb->s_root->d_inode; ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, name, namelen, dir->i_ino, &key, - BTRFS_FT_DIR); + BTRFS_FT_DIR, 0); if (ret) goto fail; ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, name, namelen, objectid, - root->fs_info->sb->s_root->d_inode->i_ino); + root->fs_info->sb->s_root->d_inode->i_ino, 0); if (ret) goto fail; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0f756e0175c0..768b0d223e68 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -577,14 +577,14 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, pending->name, namelen, root->fs_info->sb->s_root->d_inode->i_ino, - &key, BTRFS_FT_DIR); + &key, BTRFS_FT_DIR, 0); if (ret) goto fail; ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, pending->name, strlen(pending->name), objectid, - root->fs_info->sb->s_root->d_inode->i_ino); + root->fs_info->sb->s_root->d_inode->i_ino, 0); /* Invalidate existing dcache entry for new snapshot. */ btrfs_invalidate_dcache_root(root, pending->name, namelen); From 6099afe88fe64b2f47c43a8a71c13be3a416bbf7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 24 Jul 2008 12:16:03 -0400 Subject: [PATCH 0615/2985] Btrfs: Remove unused xattr code Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 +-- fs/btrfs/xattr.c | 51 ------------------------------------------------ 2 files changed, 1 insertion(+), 53 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index beb05b1de54c..53d315bdd16d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1689,8 +1689,7 @@ void btrfs_sysfs_del_super(struct btrfs_fs_info *root); /* xattr.c */ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); -int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode); + /* super.c */ u64 btrfs_parse_size(char *str); int btrfs_parse_options(struct btrfs_root *root, char *options); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 3e9eb91c3c89..6730b5958844 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -378,57 +378,6 @@ err: return ret; } -/* - * delete all the xattrs associated with the inode. - */ -int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode) -{ - struct btrfs_path *path; - struct btrfs_key key, found_key; - struct btrfs_item *item; - struct extent_buffer *leaf; - int ret; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - path->reada = -1; - key.objectid = inode->i_ino; - btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); - key.offset = (u64)-1; - - while(1) { - /* look for our next xattr */ - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) - goto out; - BUG_ON(ret == 0); - - if (path->slots[0] == 0) - break; - - path->slots[0]--; - leaf = path->nodes[0]; - item = btrfs_item_nr(leaf, path->slots[0]); - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - - if (found_key.objectid != key.objectid) - break; - if (btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY) - break; - - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); - btrfs_release_path(root, path); - } - ret = 0; -out: - btrfs_free_path(path); - - return ret; -} - /* * Handler functions */ From 33268eaf0b3db5e2bd12c0ada81a8e8f87a46d68 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 24 Jul 2008 12:16:36 -0400 Subject: [PATCH 0616/2985] Btrfs: Add ACL support Signed-off-by: Chris Mason --- fs/btrfs/acl.c | 296 ++++++++++++++++++++++++++++++++++++----- fs/btrfs/btrfs_inode.h | 3 + fs/btrfs/ctree.h | 7 + fs/btrfs/inode.c | 42 +++++- fs/btrfs/super.c | 9 +- fs/btrfs/xattr.c | 26 ++-- 6 files changed, 335 insertions(+), 48 deletions(-) diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index ff0fcc72848a..b192659b4f5f 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -20,76 +20,302 @@ #include #include #include +#include #include + #include "ctree.h" +#include "btrfs_inode.h" #include "xattr.h" -#ifndef is_owner_or_cap -#define is_owner_or_cap(inode) \ - ((current->fsuid == (inode)->i_uid) || capable(CAP_FOWNER)) -#endif + +static void btrfs_update_cached_acl(struct inode *inode, + struct posix_acl **p_acl, + struct posix_acl *acl) +{ + spin_lock(&inode->i_lock); + if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED) + posix_acl_release(*p_acl); + *p_acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); +} + +static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) +{ + int size, name_index; + char *value = NULL; + struct posix_acl *acl = NULL, **p_acl; + + switch (type) { + case ACL_TYPE_ACCESS: + name_index = BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS; + p_acl = &BTRFS_I(inode)->i_acl; + break; + case ACL_TYPE_DEFAULT: + name_index = BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT; + p_acl = &BTRFS_I(inode)->i_default_acl; + break; + default: + return ERR_PTR(-EINVAL); + } + + spin_lock(&inode->i_lock); + if (*p_acl != BTRFS_ACL_NOT_CACHED) + acl = posix_acl_dup(*p_acl); + spin_unlock(&inode->i_lock); + + if (acl) + return acl; + + + size = btrfs_xattr_get(inode, name_index, "", NULL, 0); + if (size > 0) { + value = kzalloc(size, GFP_NOFS); + if (!value) + return ERR_PTR(-ENOMEM); + size = btrfs_xattr_get(inode, name_index, "", value, size); + if (size > 0) { + acl = posix_acl_from_xattr(value, size); + btrfs_update_cached_acl(inode, p_acl, acl); + } + kfree(value); + } else if (size == -ENOENT) { + acl = NULL; + btrfs_update_cached_acl(inode, p_acl, acl); + } + + return acl; +} + +static int btrfs_xattr_get_acl(struct inode *inode, int type, + void *value, size_t size) +{ + struct posix_acl *acl; + int ret = 0; + + acl = btrfs_get_acl(inode, type); + + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + ret = posix_acl_to_xattr(acl, value, size); + posix_acl_release(acl); + + return ret; +} + +/* + * Needs to be called with fs_mutex held + */ +static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + int ret, name_index = 0, size = 0; + struct posix_acl **p_acl; + char *value = NULL; + mode_t mode; + + if (acl) { + ret = posix_acl_valid(acl); + if (ret < 0) + return ret; + ret = 0; + } + + switch (type) { + case ACL_TYPE_ACCESS: + mode = inode->i_mode; + ret = posix_acl_equiv_mode(acl, &mode); + if (ret < 0) + return ret; + ret = 0; + inode->i_mode = mode; + name_index = BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS; + p_acl = &BTRFS_I(inode)->i_acl; + break; + case ACL_TYPE_DEFAULT: + if (!S_ISDIR(inode->i_mode)) + return acl ? -EINVAL : 0; + name_index = BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT; + p_acl = &BTRFS_I(inode)->i_default_acl; + break; + default: + return -EINVAL; + } + + if (acl) { + size = posix_acl_xattr_size(acl->a_count); + value = kmalloc(size, GFP_NOFS); + if (!value) { + ret = -ENOMEM; + goto out; + } + + ret = posix_acl_to_xattr(acl, value, size); + if (ret < 0) + goto out; + } + + ret = btrfs_xattr_set(inode, name_index, "", value, size, 0); + +out: + if (value) + kfree(value); + + if (!ret) + btrfs_update_cached_acl(inode, p_acl, acl); + + return ret; +} static int btrfs_xattr_set_acl(struct inode *inode, int type, const void *value, size_t size) { int ret = 0; - struct posix_acl *acl; + struct posix_acl *acl = NULL; - if (!is_owner_or_cap(inode)) - return -EPERM; if (value) { acl = posix_acl_from_xattr(value, size); if (acl == NULL) { value = NULL; size = 0; } else if (IS_ERR(acl)) { - ret = PTR_ERR(acl); - } else { - ret = posix_acl_valid(acl); - posix_acl_release(acl); + return PTR_ERR(acl); } - if (ret) - return ret; } - return btrfs_xattr_set(inode, type, "", value, size, 0); + + ret = btrfs_set_acl(inode, acl, type); + + posix_acl_release(acl); + + return ret; } -static int btrfs_xattr_get_acl(struct inode *inode, int type, - void *value, size_t size) -{ - return btrfs_xattr_get(inode, type, "", value, size); -} + static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name, void *value, size_t size) { - if (*name != '\0') - return -EINVAL; - return btrfs_xattr_get_acl(inode, BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS, - value, size); + return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size); } + static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) { - if (*name != '\0') - return -EINVAL; - return btrfs_xattr_set_acl(inode, BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS, - value, size); + return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); } + static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name, void *value, size_t size) { - if (*name != '\0') - return -EINVAL; - return btrfs_xattr_get_acl(inode, BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT, - value, size); + return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size); } + static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) { - if (*name != '\0') - return -EINVAL; - return btrfs_xattr_set_acl(inode, BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT, - value, size); + return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); } + +int btrfs_check_acl(struct inode *inode, int mask) +{ + struct posix_acl *acl; + int error = -EAGAIN; + + acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); + + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + error = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + } + + return error; +} + +/* + * btrfs_init_acl is already generally called under fs_mutex, so the locking + * stuff has been fixed to work with that. If the locking stuff changes, we + * need to re-evaluate the acl locking stuff. + */ +int btrfs_init_acl(struct inode *inode, struct inode *dir) +{ + struct posix_acl *acl = NULL; + int ret = 0; + + /* this happens with subvols */ + if (!dir) + return 0; + + if (!S_ISLNK(inode->i_mode)) { + if (IS_POSIXACL(dir)) { + acl = btrfs_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + + if (!acl) + inode->i_mode &= ~current->fs->umask; + } + + if (IS_POSIXACL(dir) && acl) { + struct posix_acl *clone; + mode_t mode; + + if (S_ISDIR(inode->i_mode)) { + ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT); + if (ret) + goto failed; + } + clone = posix_acl_clone(acl, GFP_NOFS); + ret = -ENOMEM; + if (!clone) + goto failed; + + mode = inode->i_mode; + ret = posix_acl_create_masq(clone, &mode); + if (ret >= 0) { + inode->i_mode = mode; + if (ret > 0) { + /* we need an acl */ + ret = btrfs_set_acl(inode, clone, + ACL_TYPE_ACCESS); + } + } + } +failed: + posix_acl_release(acl); + + return ret; +} + +int btrfs_acl_chmod(struct inode *inode) +{ + struct posix_acl *acl, *clone; + int ret = 0; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + if (!IS_POSIXACL(inode)) + return 0; + + acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + + clone = posix_acl_clone(acl, GFP_KERNEL); + posix_acl_release(acl); + if (!clone) + return -ENOMEM; + + ret = posix_acl_chmod_masq(clone, inode->i_mode); + if (!ret) + ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS); + + posix_acl_release(clone); + + return ret; +} + struct xattr_handler btrfs_xattr_acl_default_handler = { .prefix = POSIX_ACL_XATTR_DEFAULT, .list = btrfs_xattr_generic_list, diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index b7bd60e4fdd7..9f2a4ef944a7 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -36,6 +36,9 @@ struct btrfs_inode { struct inode vfs_inode; struct btrfs_ordered_inode_tree ordered_tree; + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; + /* * transid of the trans_handle that last modified this inode */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 53d315bdd16d..f87d7263f2d7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -42,6 +42,8 @@ struct btrfs_ordered_sum; #define BTRFS_MAGIC "_B5RfS_M" +#define BTRFS_ACL_NOT_CACHED ((void *)-1) + #ifdef CONFIG_LOCKDEP # define BTRFS_MAX_LEVEL 7 #else @@ -1694,4 +1696,9 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); u64 btrfs_parse_size(char *str); int btrfs_parse_options(struct btrfs_root *root, char *options); int btrfs_sync_fs(struct super_block *sb, int wait); + +/* acl.c */ +int btrfs_check_acl(struct inode *inode, int mask); +int btrfs_init_acl(struct inode *inode, struct inode *dir); +int btrfs_acl_chmod(struct inode *inode); #endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8d371d6fe551..2d8853543a71 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -1478,6 +1479,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) } out: err = inode_setattr(inode, attr); + + if (!err && ((attr->ia_valid & ATTR_MODE))) + err = btrfs_acl_chmod(inode); fail: return err; } @@ -2184,6 +2188,12 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; + err = btrfs_init_acl(inode, dir); + if (err) { + drop_inode = 1; + goto out_unlock; + } + btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dentry, inode, 0); if (err) @@ -2239,6 +2249,12 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; + err = btrfs_init_acl(inode, dir); + if (err) { + drop_inode = 1; + goto out_unlock; + } + btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dentry, inode, 0); if (err) @@ -2366,6 +2382,11 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) } drop_on_err = 1; + + err = btrfs_init_acl(inode, dir); + if (err) + goto out_fail; + inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; btrfs_set_trans_block_group(trans, inode); @@ -3023,6 +3044,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) return NULL; ei->last_trans = 0; btrfs_ordered_inode_tree_init(&ei->ordered_tree); + ei->i_acl = BTRFS_ACL_NOT_CACHED; + ei->i_default_acl = BTRFS_ACL_NOT_CACHED; return &ei->vfs_inode; } @@ -3032,6 +3055,13 @@ void btrfs_destroy_inode(struct inode *inode) WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); + if (BTRFS_I(inode)->i_acl && + BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED) + posix_acl_release(BTRFS_I(inode)->i_acl); + if (BTRFS_I(inode)->i_default_acl && + BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) + posix_acl_release(BTRFS_I(inode)->i_default_acl); + while(1) { ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); if (!ordered) @@ -3230,6 +3260,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; + err = btrfs_init_acl(inode, dir); + if (err) { + drop_inode = 1; + goto out_unlock; + } + btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dentry, inode, 0); if (err) @@ -3310,7 +3346,7 @@ static int btrfs_permission(struct inode *inode, int mask, { if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE)) return -EACCES; - return generic_permission(inode, mask, NULL); + return generic_permission(inode, mask, btrfs_check_acl); } static struct inode_operations btrfs_dir_inode_operations = { @@ -3392,6 +3428,10 @@ static struct inode_operations btrfs_special_inode_operations = { .getattr = btrfs_getattr, .setattr = btrfs_setattr, .permission = btrfs_permission, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = btrfs_listxattr, + .removexattr = generic_removexattr, }; static struct inode_operations btrfs_symlink_inode_operations = { .readlink = generic_readlink, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4cb6aac5122e..a6a418b6894b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -68,7 +68,7 @@ static void btrfs_put_super (struct super_block * sb) enum { Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, - Opt_ssd, Opt_thread_pool, Opt_err, + Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_err, }; static match_table_t tokens = { @@ -83,7 +83,8 @@ static match_table_t tokens = { {Opt_alloc_start, "alloc_start=%s"}, {Opt_thread_pool, "thread_pool=%d"}, {Opt_ssd, "ssd"}, - {Opt_err, NULL} + {Opt_noacl, "noacl"}, + {Opt_err, NULL}, }; u64 btrfs_parse_size(char *str) @@ -215,6 +216,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) info->alloc_start); } break; + case Opt_noacl: + root->fs_info->sb->s_flags &= ~MS_POSIXACL; + break; default: break; } @@ -301,6 +305,7 @@ static int btrfs_fill_super(struct super_block * sb, sb->s_op = &btrfs_super_ops; sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; + sb->s_flags |= MS_POSIXACL; tree_root = open_ctree(sb, fs_devices, (char *)data); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 6730b5958844..121c9550314f 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -26,25 +26,27 @@ #include "transaction.h" #include "xattr.h" #include "disk-io.h" + static struct xattr_handler *btrfs_xattr_handler_map[] = { [BTRFS_XATTR_INDEX_USER] = &btrfs_xattr_user_handler, #ifdef CONFIG_FS_POSIX_ACL -// [BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &btrfs_xattr_acl_access_handler, -// [BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &btrfs_xattr_acl_default_handler, + [BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &btrfs_xattr_acl_access_handler, + [BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &btrfs_xattr_acl_default_handler, #endif [BTRFS_XATTR_INDEX_TRUSTED] = &btrfs_xattr_trusted_handler, [BTRFS_XATTR_INDEX_SECURITY] = &btrfs_xattr_security_handler, -// [BTRFS_XATTR_INDEX_SYSTEM] = &btrfs_xattr_system_handler, + [BTRFS_XATTR_INDEX_SYSTEM] = &btrfs_xattr_system_handler, }; + struct xattr_handler *btrfs_xattr_handlers[] = { &btrfs_xattr_user_handler, #ifdef CONFIG_FS_POSIX_ACL -// &btrfs_xattr_acl_access_handler, -// &btrfs_xattr_acl_default_handler, + &btrfs_xattr_acl_access_handler, + &btrfs_xattr_acl_default_handler, #endif &btrfs_xattr_trusted_handler, &btrfs_xattr_security_handler, -// &btrfs_xattr_system_handler, + &btrfs_xattr_system_handler, NULL, }; @@ -237,10 +239,14 @@ int btrfs_xattr_set(struct inode *inode, int name_index, mod = 1; goto out; } - } else if (flags & XATTR_REPLACE) { - /* we couldn't find the attr to replace, so error out */ - ret = -ENODATA; - goto out; + } else { + btrfs_release_path(root, path); + + if (flags & XATTR_REPLACE) { + /* we couldn't find the attr to replace */ + ret = -ENODATA; + goto out; + } } /* ok we have to create a completely new xattr */ From 7b1287662304c3cb05cb38f5e3e2d69f386e8f10 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 24 Jul 2008 12:17:14 -0400 Subject: [PATCH 0617/2985] Btrfs: Create orphan inode records to prevent lost files after a crash Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/btrfs_inode.h | 3 + fs/btrfs/ctree.c | 2 +- fs/btrfs/ctree.h | 14 +++ fs/btrfs/disk-io.c | 2 + fs/btrfs/inode.c | 237 ++++++++++++++++++++++++++++++++++++++++- fs/btrfs/orphan.c | 67 ++++++++++++ 7 files changed, 322 insertions(+), 5 deletions(-) create mode 100644 fs/btrfs/orphan.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index f8a38b0da7fb..5a0fd7b0e3e7 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -6,7 +6,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ - extent_io.o volumes.o async-thread.o ioctl.o locking.o + extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o btrfs-$(CONFIG_FS_POSIX_ACL) += acl.o else diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 9f2a4ef944a7..df624fd735c1 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -39,6 +39,9 @@ struct btrfs_inode { struct posix_acl *i_acl; struct posix_acl *i_default_acl; + /* for keeping track of orphaned inodes */ + struct list_head i_orphan; + /* * transid of the trans_handle that last modified this inode */ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index cdc713062b03..f2a94999c371 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2622,7 +2622,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, total_data += data_size[i]; } - total_size = total_data + (nr - 1) * sizeof(struct btrfs_item); + total_size = total_data + (nr * sizeof(struct btrfs_item)); ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); if (ret == 0) { return -EEXIST; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f87d7263f2d7..012ad529cb18 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -74,6 +74,9 @@ struct btrfs_ordered_sum; /* directory objectid inside the root tree */ #define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL +/* orhpan objectid for tracking unlinked/truncated files */ +#define BTRFS_ORPHAN_OBJECTID -5ULL + /* * All files have objectids higher than this. */ @@ -646,6 +649,9 @@ struct btrfs_root { /* the dirty list is only used by non-reference counted roots */ struct list_head dirty_list; + + spinlock_t orphan_lock; + struct list_head orphan_list; }; /* @@ -657,6 +663,7 @@ struct btrfs_root { #define BTRFS_INODE_ITEM_KEY 1 #define BTRFS_INODE_REF_KEY 2 #define BTRFS_XATTR_ITEM_KEY 8 +#define BTRFS_ORPHAN_ITEM_KEY 9 /* reserve 2-15 close to the inode for later flexibility */ /* @@ -1560,6 +1567,13 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, struct btrfs_path *path, u64 dir, const char *name, u16 name_len, int mod); + +/* orphan.c */ +int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 offset); +int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 offset); + /* inode-map.c */ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_root *fs_root, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ec01062eb41d..d60923967347 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -732,7 +732,9 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->in_sysfs = 0; INIT_LIST_HEAD(&root->dirty_list); + INIT_LIST_HEAD(&root->orphan_list); spin_lock_init(&root->node_lock); + spin_lock_init(&root->orphan_lock); mutex_init(&root->objectid_mutex); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2d8853543a71..0c9ec8aa304a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -78,6 +78,8 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, }; +static void btrfs_truncate(struct inode *inode); + int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, int for_del) { @@ -826,6 +828,190 @@ zeroit: return -EIO; } +/* + * This creates an orphan entry for the given inode in case something goes + * wrong in the middle of an unlink/truncate. + */ +int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret = 0; + + spin_lock(&root->orphan_lock); + + /* already on the orphan list, we're good */ + if (!list_empty(&BTRFS_I(inode)->i_orphan)) { + spin_unlock(&root->orphan_lock); + return 0; + } + + list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); + + spin_unlock(&root->orphan_lock); + + /* + * insert an orphan item to track this unlinked/truncated file + */ + ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); + + return ret; +} + +/* + * We have done the truncate/delete so we can go ahead and remove the orphan + * item for this particular inode. + */ +int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret = 0; + + spin_lock(&root->orphan_lock); + + if (list_empty(&BTRFS_I(inode)->i_orphan)) { + spin_unlock(&root->orphan_lock); + return 0; + } + + list_del_init(&BTRFS_I(inode)->i_orphan); + if (!trans) { + spin_unlock(&root->orphan_lock); + return 0; + } + + spin_unlock(&root->orphan_lock); + + ret = btrfs_del_orphan_item(trans, root, inode->i_ino); + + return ret; +} + +/* + * this cleans up any orphans that may be left on the list from the last use + * of this root. + */ +void btrfs_orphan_cleanup(struct btrfs_root *root) +{ + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_item *item; + struct btrfs_key key, found_key; + struct btrfs_trans_handle *trans; + struct inode *inode; + int ret = 0, nr_unlink = 0, nr_truncate = 0; + + /* don't do orphan cleanup if the fs is readonly. */ + if (root->inode->i_sb->s_flags & MS_RDONLY) + return; + + path = btrfs_alloc_path(); + if (!path) + return; + path->reada = -1; + + key.objectid = BTRFS_ORPHAN_OBJECTID; + btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); + key.offset = (u64)-1; + + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, root->inode); + + while (1) { + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + printk(KERN_ERR "Error searching slot for orphan: %d" + "\n", ret); + break; + } + + /* + * if ret == 0 means we found what we were searching for, which + * is weird, but possible, so only screw with path if we didnt + * find the key and see if we have stuff that matches + */ + if (ret > 0) { + if (path->slots[0] == 0) + break; + path->slots[0]--; + } + + /* pull out the item */ + leaf = path->nodes[0]; + item = btrfs_item_nr(leaf, path->slots[0]); + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + /* make sure the item matches what we want */ + if (found_key.objectid != BTRFS_ORPHAN_OBJECTID) + break; + if (btrfs_key_type(&found_key) != BTRFS_ORPHAN_ITEM_KEY) + break; + + /* release the path since we're done with it */ + btrfs_release_path(root, path); + + /* + * this is where we are basically btrfs_lookup, without the + * crossing root thing. we store the inode number in the + * offset of the orphan item. + */ + inode = btrfs_iget_locked(root->inode->i_sb, + found_key.offset, root); + if (!inode) + break; + + if (inode->i_state & I_NEW) { + BTRFS_I(inode)->root = root; + + /* have to set the location manually */ + BTRFS_I(inode)->location.objectid = inode->i_ino; + BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; + BTRFS_I(inode)->location.offset = 0; + + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + } + + /* + * add this inode to the orphan list so btrfs_orphan_del does + * the proper thing when we hit it + */ + spin_lock(&root->orphan_lock); + list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); + spin_unlock(&root->orphan_lock); + + /* + * if this is a bad inode, means we actually succeeded in + * removing the inode, but not the orphan record, which means + * we need to manually delete the orphan since iput will just + * do a destroy_inode + */ + if (is_bad_inode(inode)) { + btrfs_orphan_del(trans, inode); + iput(inode); + continue; + } + + /* if we have links, this was a truncate, lets do that */ + if (inode->i_nlink) { + nr_truncate++; + btrfs_truncate(inode); + } else { + nr_unlink++; + } + + /* this will do delete_inode and everything for us */ + iput(inode); + } + + if (nr_unlink) + printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); + if (nr_truncate) + printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); + + btrfs_free_path(path); + btrfs_end_transaction(trans, root); +} + void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; @@ -1067,6 +1253,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) { struct btrfs_root *root; struct btrfs_trans_handle *trans; + struct inode *inode = dentry->d_inode; int ret; unsigned long nr = 0; @@ -1080,6 +1267,10 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) btrfs_set_trans_block_group(trans, dir); ret = btrfs_unlink_trans(trans, root, dir, dentry); + + if (inode->i_nlink == 0) + ret = btrfs_orphan_add(trans, inode); + nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); @@ -1108,12 +1299,17 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); + err = btrfs_orphan_add(trans, inode); + if (err) + goto fail_trans; + /* now the directory is empty */ err = btrfs_unlink_trans(trans, root, dir, dentry); if (!err) { btrfs_i_size_write(inode, 0); } +fail_trans: nr = trans->blocks_used; ret = btrfs_end_transaction_throttle(trans, root); fail: @@ -1131,6 +1327,9 @@ fail: * * csum items that cross the new i_size are truncated to the new size * as well. + * + * min_type is the minimum key type to truncate down to. If set to 0, this + * will kill all the items on this inode, including the INODE_ITEM_KEY. */ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -1495,6 +1694,7 @@ void btrfs_delete_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) { + btrfs_orphan_del(NULL, inode); goto no_delete; } btrfs_wait_ordered_range(inode, 0, (u64)-1); @@ -1504,8 +1704,12 @@ void btrfs_delete_inode(struct inode *inode) btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode, 0); - if (ret) + if (ret) { + btrfs_orphan_del(NULL, inode); goto no_delete_lock; + } + + btrfs_orphan_del(trans, inode); nr = trans->blocks_used; clear_inode(inode); @@ -1688,7 +1892,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct btrfs_root *root = bi->root; struct btrfs_root *sub_root = root; struct btrfs_key location; - int ret; + int ret, do_orphan = 0; if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); @@ -1706,6 +1910,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(ret); if (ret > 0) return ERR_PTR(-ENOENT); + inode = btrfs_iget_locked(dir->i_sb, location.objectid, sub_root); if (!inode) @@ -1715,6 +1920,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, if (sub_root != root) { igrab(inode); sub_root->inode = inode; + do_orphan = 1; } BTRFS_I(inode)->root = sub_root; memcpy(&BTRFS_I(inode)->location, &location, @@ -1723,6 +1929,10 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, unlock_new_inode(inode); } } + + if (unlikely(do_orphan)) + btrfs_orphan_cleanup(sub_root); + return d_splice_alias(inode, dentry); } @@ -2964,12 +3174,19 @@ static void btrfs_truncate(struct inode *inode) btrfs_set_trans_block_group(trans, inode); btrfs_i_size_write(inode, inode->i_size); + ret = btrfs_orphan_add(trans, inode); + if (ret) + goto out; /* FIXME, add redo link to tree so we don't leak on crash */ ret = btrfs_truncate_in_trans(trans, root, inode, BTRFS_EXTENT_DATA_KEY); btrfs_update_inode(trans, root, inode); - nr = trans->blocks_used; + ret = btrfs_orphan_del(trans, inode); + BUG_ON(ret); + +out: + nr = trans->blocks_used; ret = btrfs_end_transaction_throttle(trans, root); BUG_ON(ret); btrfs_btree_balance_dirty(root, nr); @@ -3046,6 +3263,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) btrfs_ordered_inode_tree_init(&ei->ordered_tree); ei->i_acl = BTRFS_ACL_NOT_CACHED; ei->i_default_acl = BTRFS_ACL_NOT_CACHED; + INIT_LIST_HEAD(&ei->i_orphan); return &ei->vfs_inode; } @@ -3062,6 +3280,14 @@ void btrfs_destroy_inode(struct inode *inode) BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) posix_acl_release(BTRFS_I(inode)->i_default_acl); + spin_lock(&BTRFS_I(inode)->root->orphan_lock); + if (!list_empty(&BTRFS_I(inode)->i_orphan)) { + printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" + " list\n", inode->i_ino); + dump_stack(); + } + spin_unlock(&BTRFS_I(inode)->root->orphan_lock); + while(1) { ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); if (!ordered) @@ -3202,6 +3428,11 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry); if (ret) goto out_fail; + if (new_inode->i_nlink == 0) { + ret = btrfs_orphan_add(trans, new_inode); + if (ret) + goto out_fail; + } } ret = btrfs_set_inode_index(new_dir, old_inode); if (ret) diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c new file mode 100644 index 000000000000..3c0d52af4f80 --- /dev/null +++ b/fs/btrfs/orphan.c @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2008 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include "ctree.h" +#include "disk-io.h" + +int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 offset) +{ + struct btrfs_path *path; + struct btrfs_key key; + int ret = 0; + + key.objectid = BTRFS_ORPHAN_OBJECTID; + btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); + key.offset = offset; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_insert_empty_item(trans, root, path, &key, 0); + + btrfs_free_path(path); + return ret; +} + +int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 offset) +{ + struct btrfs_path *path; + struct btrfs_key key; + int ret = 0; + + key.objectid = BTRFS_ORPHAN_OBJECTID; + btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); + key.offset = offset; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret) + goto out; + + ret = btrfs_del_item(trans, root, path); + +out: + btrfs_free_path(path); + return ret; +} From 8e8a1e31f2780b7865d40a8c5142a04f2bcdcb86 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 24 Jul 2008 12:17:14 -0400 Subject: [PATCH 0618/2985] Btrfs: Fix a few functions that exit without stopping their transaction Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 +++- fs/btrfs/ioctl.c | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f92b297e7da5..e6a3ba9dd107 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3275,8 +3275,10 @@ next: ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) ret = -EIO; - if (ret < 0) + if (ret < 0) { + btrfs_end_transaction(trans, root); goto out; + } clear_extent_bits(&info->block_group_cache, key.objectid, key.objectid + key.offset - 1, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index faf081302d02..7d40778a90e4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -78,8 +78,10 @@ static noinline int create_subvol(struct btrfs_root *root, char *name, leaf = btrfs_alloc_free_block(trans, root, root->leafsize, objectid, trans->transid, 0, 0, 0, 0); - if (IS_ERR(leaf)) - return PTR_ERR(leaf); + if (IS_ERR(leaf)) { + ret = PTR_ERR(leaf); + goto fail; + } btrfs_set_header_nritems(leaf, 0); btrfs_set_header_level(leaf, 0); From 45467261edb590fd5a2d676d9686edb82762fb3f Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Thu, 24 Jul 2008 12:17:15 -0400 Subject: [PATCH 0619/2985] Btrfs: Remove unused variable in fixup_tree_root_location Remove a unused variable 'path' in fixup_tree_root_location. Signed-off-by: Balaji Rao Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0c9ec8aa304a..8bc981c4f5ef 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1805,7 +1805,6 @@ static int fixup_tree_root_location(struct btrfs_root *root, struct btrfs_root **sub_root, struct dentry *dentry) { - struct btrfs_path *path; struct btrfs_root_item *ri; if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) @@ -1813,9 +1812,6 @@ static int fixup_tree_root_location(struct btrfs_root *root, if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) return 0; - path = btrfs_alloc_path(); - BUG_ON(!path); - *sub_root = btrfs_read_fs_root(root->fs_info, location, dentry->d_name.name, dentry->d_name.len); @@ -1827,7 +1823,6 @@ static int fixup_tree_root_location(struct btrfs_root *root, btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); location->offset = 0; - btrfs_free_path(path); return 0; } From 974e35a82d022434efa50fe5f805ae14c542ba8c Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 24 Jul 2008 12:18:16 -0400 Subject: [PATCH 0620/2985] Btrfs: Properly release lock in pin_down_bytes When buffer isn't uptodate, pin_down_bytes may leave the tree locked after it returns. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e6a3ba9dd107..cdfb4ff4b459 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1485,8 +1485,8 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, struct extent_buffer *buf; buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { - if (btrfs_try_tree_lock(buf) && - btrfs_buffer_uptodate(buf, 0)) { + if (btrfs_buffer_uptodate(buf, 0) && + btrfs_try_tree_lock(buf)) { u64 transid = root->fs_info->running_transaction->transid; u64 header_transid = From 445dceb78f3445b9bcade90e93ca35cae6120172 Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 24 Jul 2008 12:19:32 -0400 Subject: [PATCH 0621/2985] Btrfs: Fix .. lookup corner case Inode ref item can be in the next leaf when we find "path->slots[0] == btrfs_header_nritems(...)". Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8bc981c4f5ef..cf9534b79aba 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1752,21 +1752,18 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, if (namelen == 2 && strcmp(name, "..") == 0) { struct btrfs_key key; struct extent_buffer *leaf; - u32 nritems; int slot; key.objectid = dir->i_ino; + key.offset = (u64)-1; btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); - key.offset = 0; + if (ret < 0 || path->slots[0] == 0) + goto out_err; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); BUG_ON(ret == 0); ret = 0; - leaf = path->nodes[0]; - slot = path->slots[0]; - nritems = btrfs_header_nritems(leaf); - if (slot >= nritems) - goto out_err; + slot = path->slots[0] - 1; btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid != dir->i_ino || @@ -1980,16 +1977,15 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* special case for .., just use the back ref */ if (filp->f_pos == 1) { btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); - key.offset = 0; + key.offset = (u64)-1; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - BUG_ON(ret == 0); - leaf = path->nodes[0]; - slot = path->slots[0]; - nritems = btrfs_header_nritems(leaf); - if (slot >= nritems) { + if (ret < 0 || path->slots[0] == 0) { btrfs_release_path(root, path); goto read_dir_items; } + BUG_ON(ret == 0); + leaf = path->nodes[0]; + slot = path->slots[0] - 1; btrfs_item_key_to_cpu(leaf, &found_key, slot); btrfs_release_path(root, path); if (found_key.objectid != key.objectid || From 9652480bf48500885a30754b4a5c436b5b34456d Mon Sep 17 00:00:00 2001 From: Yan Date: Thu, 24 Jul 2008 12:19:49 -0400 Subject: [PATCH 0622/2985] Fix path slots selection in btrfs_search_forward We should decrease the found slot by one as btrfs_search_slot does when bin_search return 1 and node level > 0. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f2a94999c371..ab4ac0365c7f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2992,6 +2992,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, struct extent_buffer *cur; struct btrfs_key found_key; int slot; + int sret; u32 nritems; int level; int ret = 1; @@ -3009,7 +3010,7 @@ again: while(1) { nritems = btrfs_header_nritems(cur); level = btrfs_header_level(cur); - bin_search(cur, min_key, level, &slot); + sret = bin_search(cur, min_key, level, &slot); /* at level = 0, we're done, setup the path and exit */ if (level == 0) { @@ -3018,6 +3019,8 @@ again: btrfs_item_key_to_cpu(cur, &found_key, slot); goto out; } + if (sret && slot > 0) + slot--; /* * check this node pointer against the cache_only and * min_trans parameters. If it isn't in cache or is too From 5516e5957f4b99b19fffffa53bf9fbe7cc793249 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Thu, 24 Jul 2008 12:20:14 -0400 Subject: [PATCH 0623/2985] Btrfs: Null terminate strings passed in from userspace The 'char name[BTRFS_PATH_NAME_MAX]' member of struct btrfs_ioctl_vol_args is passed directly to strlen() after being copied from user. I haven't verified this, but in theory a userspace program could pass in an unterminated string and cause a kernel crash as strlen walks off the end of the array. This patch terminates the ->name string in all btrfs ioctl functions which currently use a 'struct btrfs_ioctl_vol_args'. Since the string is now properly terminated, it's length will never be longer than BTRFS_PATH_NAME_MAX so that error check has been removed. By the way, it might be better overall to just have the ioctl pass an unterminated string + length structure but I didn't bother with that since it'd change the kernel/user interface. Signed-off-by: Mark Fasheh Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7d40778a90e4..5e627746c4e8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -310,11 +310,9 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) ret = -EFAULT; goto out; } + + vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; namelen = strlen(vol_args->name); - if (namelen > BTRFS_VOL_NAME_MAX) { - ret = -EINVAL; - goto out; - } mutex_lock(&root->fs_info->volume_mutex); sizestr = vol_args->name; @@ -412,11 +410,8 @@ static noinline int btrfs_ioctl_snap_create(struct btrfs_root *root, goto out; } + vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; namelen = strlen(vol_args->name); - if (namelen > BTRFS_VOL_NAME_MAX) { - ret = -EINVAL; - goto out; - } if (strchr(vol_args->name, '/')) { ret = -EINVAL; goto out; @@ -487,6 +482,7 @@ long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) ret = -EFAULT; goto out; } + vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; ret = btrfs_init_new_device(root, vol_args->name); out: @@ -508,6 +504,7 @@ long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) ret = -EFAULT; goto out; } + vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; ret = btrfs_rm_device(root, vol_args->name); out: From 3a115f520f391b4ab14041bdd6eedb370d944fa6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Jul 2008 12:25:50 -0400 Subject: [PATCH 0624/2985] Btrfs: Rev the disk format magic Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 012ad529cb18..07d321552dbe 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -40,7 +40,7 @@ extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; struct btrfs_ordered_sum; -#define BTRFS_MAGIC "_B5RfS_M" +#define BTRFS_MAGIC "_B6RfS_M" #define BTRFS_ACL_NOT_CACHED ((void *)-1) From 31153d81284934601d08110ac7698fd9a535e4c0 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Mon, 28 Jul 2008 15:32:19 -0400 Subject: [PATCH 0625/2985] Btrfs: Add a leaf reference cache Much of the IO done while dropping snapshots is done looking up leaves in the filesystem trees to see if they point to any extents and to drop the references on any extents found. This creates a cache so that IO isn't required. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/ctree.c | 4 +- fs/btrfs/ctree.h | 8 +- fs/btrfs/disk-io.c | 14 +++ fs/btrfs/extent-tree.c | 115 +++++++++++++++++++-- fs/btrfs/ref-cache.c | 226 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ref-cache.h | 72 +++++++++++++ fs/btrfs/transaction.c | 67 ++++++++---- 8 files changed, 476 insertions(+), 33 deletions(-) create mode 100644 fs/btrfs/ref-cache.c create mode 100644 fs/btrfs/ref-cache.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 5a0fd7b0e3e7..a4b38177abdb 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -6,7 +6,8 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ - extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o + extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ + ref-cache.o btrfs-$(CONFIG_FS_POSIX_ACL) += acl.o else diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ab4ac0365c7f..245eb00435dd 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -165,7 +165,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); WARN_ON(btrfs_header_generation(buf) > trans->transid); - ret = btrfs_inc_ref(trans, new_root, buf); + ret = btrfs_inc_ref(trans, new_root, buf, 0); kfree(new_root); if (ret) @@ -232,7 +232,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(btrfs_header_generation(buf) > trans->transid); if (btrfs_header_generation(buf) != trans->transid) { different_trans = 1; - ret = btrfs_inc_ref(trans, root, buf); + ret = btrfs_inc_ref(trans, root, buf, 1); if (ret) return ret; } else { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 07d321552dbe..34ed23d64eb5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -592,6 +592,10 @@ struct btrfs_fs_info { u64 last_alloc; u64 last_data_alloc; + spinlock_t ref_cache_lock; + u64 total_ref_cache_size; + u64 running_ref_cache_size; + u64 avail_data_alloc_bits; u64 avail_metadata_alloc_bits; u64 avail_system_alloc_bits; @@ -613,6 +617,8 @@ struct btrfs_root { spinlock_t node_lock; struct extent_buffer *commit_root; + struct btrfs_leaf_ref_tree *ref_tree; + struct btrfs_root_item root_item; struct btrfs_key root_key; struct btrfs_fs_info *fs_info; @@ -1430,7 +1436,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, u64 search_end, struct btrfs_key *ins, u64 data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf); + struct extent_buffer *buf, int cache_ref); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 root_objectid, u64 ref_generation, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d60923967347..4f0e1d06c384 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -716,6 +716,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->node = NULL; root->inode = NULL; root->commit_root = NULL; + root->ref_tree = NULL; root->sectorsize = sectorsize; root->nodesize = nodesize; root->leafsize = leafsize; @@ -1165,12 +1166,19 @@ static int transaction_kthread(void *arg) vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); mutex_lock(&root->fs_info->transaction_kthread_mutex); + printk("btrfs: total reference cache size %Lu\n", + root->fs_info->total_ref_cache_size); + mutex_lock(&root->fs_info->trans_mutex); cur = root->fs_info->running_transaction; if (!cur) { mutex_unlock(&root->fs_info->trans_mutex); goto sleep; } + + printk("btrfs: running reference cache size %Lu\n", + root->fs_info->running_ref_cache_size); + now = get_seconds(); if (now < cur->start_time || now - cur->start_time < 30) { mutex_unlock(&root->fs_info->trans_mutex); @@ -1233,6 +1241,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->hash_lock); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); + spin_lock_init(&fs_info->ref_cache_lock); init_completion(&fs_info->kobj_unregister); fs_info->tree_root = tree_root; @@ -1699,6 +1708,11 @@ int close_ctree(struct btrfs_root *root) printk("btrfs: at unmount delalloc count %Lu\n", fs_info->delalloc_bytes); } + if (fs_info->total_ref_cache_size) { + printk("btrfs: at umount reference cache size %Lu\n", + fs_info->total_ref_cache_size); + } + if (fs_info->extent_root->node) free_extent_buffer(fs_info->extent_root->node); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cdfb4ff4b459..7b24f1511654 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -26,6 +26,7 @@ #include "transaction.h" #include "volumes.h" #include "locking.h" +#include "ref-cache.h" #define BLOCK_GROUP_DATA EXTENT_WRITEBACK #define BLOCK_GROUP_METADATA EXTENT_UPTODATE @@ -927,7 +928,7 @@ out: } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf) + struct extent_buffer *buf, int cache_ref) { u64 bytenr; u32 nritems; @@ -937,6 +938,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int level; int ret; int faili; + int nr_file_extents = 0; if (!root->ref_cows) return 0; @@ -959,6 +961,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (disk_bytenr == 0) continue; + if (buf != root->commit_root) + nr_file_extents++; + mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(buf, fi), @@ -988,6 +993,53 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, } } } + /* cache orignal leaf block's references */ + if (level == 0 && cache_ref && buf != root->commit_root) { + struct btrfs_leaf_ref *ref; + struct btrfs_extent_info *info; + + ref = btrfs_alloc_leaf_ref(nr_file_extents); + if (!ref) { + WARN_ON(1); + goto out; + } + + btrfs_item_key_to_cpu(buf, &ref->key, 0); + + ref->bytenr = buf->start; + ref->owner = btrfs_header_owner(buf); + ref->generation = btrfs_header_generation(buf); + ref->nritems = nr_file_extents; + info = ref->extents; + + for (i = 0; nr_file_extents > 0 && i < nritems; i++) { + u64 disk_bytenr; + btrfs_item_key_to_cpu(buf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(buf, i, + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(buf, fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; + disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); + if (disk_bytenr == 0) + continue; + + info->bytenr = disk_bytenr; + info->num_bytes = + btrfs_file_extent_disk_num_bytes(buf, fi); + info->objectid = key.objectid; + info->offset = key.offset; + info++; + } + + BUG_ON(!root->ref_tree); + ret = btrfs_add_leaf_ref(root, ref); + WARN_ON(ret); + btrfs_free_leaf_ref(ref); + } +out: return 0; fail: WARN_ON(1); @@ -2215,9 +2267,9 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return buf; } -static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_buffer *leaf) +static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *leaf) { u64 leaf_owner; u64 leaf_generation; @@ -2266,6 +2318,30 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } +static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_leaf_ref *ref) +{ + int i; + int ret; + struct btrfs_extent_info *info = ref->extents; + + mutex_unlock(&root->fs_info->alloc_mutex); + for (i = 0; i < ref->nritems; i++) { + mutex_lock(&root->fs_info->alloc_mutex); + ret = __btrfs_free_extent(trans, root, + info->bytenr, info->num_bytes, + ref->owner, ref->generation, + info->objectid, info->offset, 0); + mutex_unlock(&root->fs_info->alloc_mutex); + BUG_ON(ret); + info++; + } + mutex_lock(&root->fs_info->alloc_mutex); + + return 0; +} + static void noinline reada_walk_down(struct btrfs_root *root, struct extent_buffer *node, int slot) @@ -2341,6 +2417,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, struct extent_buffer *next; struct extent_buffer *cur; struct extent_buffer *parent; + struct btrfs_leaf_ref *ref; u32 blocksize; int ret; u32 refs; @@ -2370,7 +2447,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, btrfs_header_nritems(cur)) break; if (*level == 0) { - ret = drop_leaf_ref(trans, root, cur); + ret = drop_leaf_ref_no_cache(trans, root, cur); BUG_ON(ret); break; } @@ -2391,6 +2468,21 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, BUG_ON(ret); continue; } + + if (*level == 1) { + struct btrfs_key key; + btrfs_node_key_to_cpu(cur, &key, path->slots[*level]); + ref = btrfs_lookup_leaf_ref(root, &key); + if (ref) { + ret = drop_leaf_ref(trans, root, ref); + BUG_ON(ret); + btrfs_remove_leaf_ref(root, ref); + btrfs_free_leaf_ref(ref); + *level = 0; + break; + } + } + next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { free_extent_buffer(next); @@ -2398,7 +2490,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, if (path->slots[*level] == 0) reada_walk_down(root, cur, path->slots[*level]); - next = read_tree_block(root, bytenr, blocksize, ptr_gen); cond_resched(); @@ -2435,17 +2526,19 @@ out: WARN_ON(*level >= BTRFS_MAX_LEVEL); if (path->nodes[*level] == root->node) { - root_owner = root->root_key.objectid; parent = path->nodes[*level]; + bytenr = path->nodes[*level]->start; } else { parent = path->nodes[*level + 1]; - root_owner = btrfs_header_owner(parent); + bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); } + blocksize = btrfs_level_size(root, *level); + root_owner = btrfs_header_owner(parent); root_gen = btrfs_header_generation(parent); - ret = __btrfs_free_extent(trans, root, path->nodes[*level]->start, - path->nodes[*level]->len, - root_owner, root_gen, 0, 0, 1); + + ret = __btrfs_free_extent(trans, root, bytenr, blocksize, + root_owner, root_gen, 0, 0, 1); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c new file mode 100644 index 000000000000..95a9faeb9dc4 --- /dev/null +++ b/fs/btrfs/ref-cache.c @@ -0,0 +1,226 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include "ctree.h" +#include "ref-cache.h" +#include "transaction.h" + +struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(int nr_extents) +{ + struct btrfs_leaf_ref *ref; + + ref = kmalloc(btrfs_leaf_ref_size(nr_extents), GFP_NOFS); + if (ref) { + memset(ref, 0, sizeof(*ref)); + atomic_set(&ref->usage, 1); + } + return ref; +} + +void btrfs_free_leaf_ref(struct btrfs_leaf_ref *ref) +{ + if (!ref) + return; + WARN_ON(atomic_read(&ref->usage) == 0); + if (atomic_dec_and_test(&ref->usage)) { + BUG_ON(ref->in_tree); + kfree(ref); + } +} + +static int comp_keys(struct btrfs_key *k1, struct btrfs_key *k2) +{ + if (k1->objectid > k2->objectid) + return 1; + if (k1->objectid < k2->objectid) + return -1; + if (k1->type > k2->type) + return 1; + if (k1->type < k2->type) + return -1; + if (k1->offset > k2->offset) + return 1; + if (k1->offset < k2->offset) + return -1; + return 0; +} + +static struct rb_node *tree_insert(struct rb_root *root, struct btrfs_key *key, + struct rb_node *node) +{ + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct btrfs_leaf_ref *entry; + int ret; + + while(*p) { + parent = *p; + entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node); + WARN_ON(!entry->in_tree); + + ret = comp_keys(key, &entry->key); + if (ret < 0) + p = &(*p)->rb_left; + else if (ret > 0) + p = &(*p)->rb_right; + else + return parent; + } + + entry = rb_entry(node, struct btrfs_leaf_ref, rb_node); + entry->in_tree = 1; + rb_link_node(node, parent, p); + rb_insert_color(node, root); + return NULL; +} + +static struct rb_node *tree_search(struct rb_root *root, struct btrfs_key *key) +{ + struct rb_node * n = root->rb_node; + struct btrfs_leaf_ref *entry; + int ret; + + while(n) { + entry = rb_entry(n, struct btrfs_leaf_ref, rb_node); + WARN_ON(!entry->in_tree); + + ret = comp_keys(key, &entry->key); + if (ret < 0) + n = n->rb_left; + else if (ret > 0) + n = n->rb_right; + else + return n; + } + return NULL; +} + +int btrfs_remove_leaf_refs(struct btrfs_root *root) +{ + struct rb_node *rb; + struct btrfs_leaf_ref *ref = NULL; + struct btrfs_leaf_ref_tree *tree = root->ref_tree; + + if (!tree) + return 0; + + spin_lock(&tree->lock); + while(!btrfs_leaf_ref_tree_empty(tree)) { + tree->last = NULL; + rb = rb_first(&tree->root); + ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); + rb_erase(&ref->rb_node, &tree->root); + ref->in_tree = 0; + + spin_unlock(&tree->lock); + + btrfs_free_leaf_ref(ref); + + cond_resched(); + spin_lock(&tree->lock); + } + spin_unlock(&tree->lock); + return 0; +} + +struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, + struct btrfs_key *key) +{ + struct rb_node *rb; + struct btrfs_leaf_ref *ref = NULL; + struct btrfs_leaf_ref_tree *tree = root->ref_tree; + + if (!tree) + return NULL; + + spin_lock(&tree->lock); + if (tree->last && comp_keys(key, &tree->last->key) == 0) { + ref = tree->last; + } else { + rb = tree_search(&tree->root, key); + if (rb) { + ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); + tree->last = ref; + } + } + if (ref) + atomic_inc(&ref->usage); + spin_unlock(&tree->lock); + return ref; +} + +int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) +{ + int ret = 0; + struct rb_node *rb; + size_t size = btrfs_leaf_ref_size(ref->nritems); + struct btrfs_leaf_ref_tree *tree = root->ref_tree; + struct btrfs_transaction *trans = root->fs_info->running_transaction; + + spin_lock(&tree->lock); + rb = tree_insert(&tree->root, &ref->key, &ref->rb_node); + if (rb) { + ret = -EEXIST; + } else { + spin_lock(&root->fs_info->ref_cache_lock); + root->fs_info->total_ref_cache_size += size; + if (trans && tree->generation == trans->transid) + root->fs_info->running_ref_cache_size += size; + spin_unlock(&root->fs_info->ref_cache_lock); + + tree->last = ref; + atomic_inc(&ref->usage); + } + spin_unlock(&tree->lock); + return ret; +} + +int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) +{ + size_t size = btrfs_leaf_ref_size(ref->nritems); + struct btrfs_leaf_ref_tree *tree = root->ref_tree; + struct btrfs_transaction *trans = root->fs_info->running_transaction; + + BUG_ON(!ref->in_tree); + spin_lock(&tree->lock); + + spin_lock(&root->fs_info->ref_cache_lock); + root->fs_info->total_ref_cache_size -= size; + if (trans && tree->generation == trans->transid) + root->fs_info->running_ref_cache_size -= size; + spin_unlock(&root->fs_info->ref_cache_lock); + + if (tree->last == ref) { + struct rb_node *next = rb_next(&ref->rb_node); + if (next) { + tree->last = rb_entry(next, struct btrfs_leaf_ref, + rb_node); + } else + tree->last = NULL; + } + + rb_erase(&ref->rb_node, &tree->root); + ref->in_tree = 0; + + spin_unlock(&tree->lock); + + btrfs_free_leaf_ref(ref); + return 0; +} + diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h new file mode 100644 index 000000000000..79ecc47110f2 --- /dev/null +++ b/fs/btrfs/ref-cache.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +struct btrfs_extent_info { + u64 bytenr; + u64 num_bytes; + u64 objectid; + u64 offset; +}; + +struct btrfs_leaf_ref { + struct rb_node rb_node; + struct btrfs_key key; + int in_tree; + atomic_t usage; + + u64 bytenr; + u64 owner; + u64 generation; + int nritems; + struct btrfs_extent_info extents[]; +}; + +struct btrfs_leaf_ref_tree { + struct rb_root root; + struct btrfs_leaf_ref *last; + u64 generation; + spinlock_t lock; +}; + +static inline size_t btrfs_leaf_ref_size(int nr_extents) +{ + return sizeof(struct btrfs_leaf_ref) + + sizeof(struct btrfs_extent_info) * nr_extents; +} + +static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree) +{ + tree->root.rb_node = NULL; + tree->last = NULL; + tree->generation = 0; + spin_lock_init(&tree->lock); +} + +static inline int btrfs_leaf_ref_tree_empty(struct btrfs_leaf_ref_tree *tree) +{ + return RB_EMPTY_ROOT(&tree->root); +} + +void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree); +struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(int nr_extents); +void btrfs_free_leaf_ref(struct btrfs_leaf_ref *ref); +struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, + struct btrfs_key *key); +int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); +int btrfs_remove_leaf_refs(struct btrfs_root *root); +int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 768b0d223e68..543e5ee4033a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -24,6 +24,7 @@ #include "disk-io.h" #include "transaction.h" #include "locking.h" +#include "ref-cache.h" static int total_trans = 0; extern struct kmem_cache *btrfs_trans_handle_cachep; @@ -31,6 +32,13 @@ extern struct kmem_cache *btrfs_transaction_cachep; #define BTRFS_ROOT_TRANS_TAG 0 +struct dirty_root { + struct list_head list; + struct btrfs_root *root; + struct btrfs_root *latest_root; + struct btrfs_leaf_ref_tree ref_tree; +}; + static noinline void put_transaction(struct btrfs_transaction *transaction) { WARN_ON(transaction->use_count == 0); @@ -84,6 +92,7 @@ static noinline int join_transaction(struct btrfs_root *root) static noinline int record_root_in_trans(struct btrfs_root *root) { + struct dirty_root *dirty; u64 running_trans_id = root->fs_info->running_transaction->transid; if (root->ref_cows && root->last_trans < running_trans_id) { WARN_ON(root == root->fs_info->extent_root); @@ -91,7 +100,25 @@ static noinline int record_root_in_trans(struct btrfs_root *root) radix_tree_tag_set(&root->fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); + + dirty = kmalloc(sizeof(*dirty), GFP_NOFS); + BUG_ON(!dirty); + dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS); + BUG_ON(!dirty->root); + + dirty->latest_root = root; + INIT_LIST_HEAD(&dirty->list); + btrfs_leaf_ref_tree_init(&dirty->ref_tree); + dirty->ref_tree.generation = running_trans_id; + root->commit_root = btrfs_root_node(root); + root->ref_tree = &dirty->ref_tree; + + memcpy(dirty->root, root, sizeof(*root)); + spin_lock_init(&dirty->root->node_lock); + mutex_init(&dirty->root->objectid_mutex); + dirty->root->node = root->commit_root; + dirty->root->commit_root = NULL; } else { WARN_ON(1); } @@ -310,12 +337,6 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, return 0; } -struct dirty_root { - struct list_head list; - struct btrfs_root *root; - struct btrfs_root *latest_root; -}; - int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, struct list_head *dead_list) @@ -325,8 +346,10 @@ int btrfs_add_dead_root(struct btrfs_root *root, dirty = kmalloc(sizeof(*dirty), GFP_NOFS); if (!dirty) return -ENOMEM; + btrfs_leaf_ref_tree_init(&dirty->ref_tree); dirty->root = root; dirty->latest_root = latest; + root->ref_tree = NULL; list_add(&dirty->list, dead_list); return 0; } @@ -354,11 +377,23 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, radix_tree_tag_clear(radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); + + BUG_ON(!root->ref_tree); + dirty = container_of(root->ref_tree, struct dirty_root, + ref_tree); + if (root->commit_root == root->node) { WARN_ON(root->node->start != btrfs_root_bytenr(&root->root_item)); + + BUG_ON(!btrfs_leaf_ref_tree_empty( + root->ref_tree)); free_extent_buffer(root->commit_root); root->commit_root = NULL; + root->ref_tree = NULL; + + kfree(dirty->root); + kfree(dirty); /* make sure to update the root on disk * so we get any updates to the block used @@ -370,23 +405,12 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, &root->root_item); continue; } - dirty = kmalloc(sizeof(*dirty), GFP_NOFS); - BUG_ON(!dirty); - dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS); - BUG_ON(!dirty->root); memset(&root->root_item.drop_progress, 0, sizeof(struct btrfs_disk_key)); root->root_item.drop_level = 0; - - memcpy(dirty->root, root, sizeof(*root)); - dirty->root->node = root->commit_root; - dirty->latest_root = root; - spin_lock_init(&dirty->root->node_lock); - mutex_init(&dirty->root->objectid_mutex); - root->commit_root = NULL; - + root->ref_tree = NULL; root->root_key.offset = root->fs_info->generation; btrfs_set_root_bytenr(&root->root_item, root->node->start); @@ -409,6 +433,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, list_add(&dirty->list, list); } else { WARN_ON(1); + free_extent_buffer(dirty->root->node); kfree(dirty->root); kfree(dirty); } @@ -514,6 +539,8 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); + btrfs_remove_leaf_refs(dirty->root); + free_extent_buffer(dirty->root->node); kfree(dirty->root); kfree(dirty); @@ -698,6 +725,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, &dirty_fs_roots); BUG_ON(ret); + spin_lock(&root->fs_info->ref_cache_lock); + root->fs_info->running_ref_cache_size = 0; + spin_unlock(&root->fs_info->ref_cache_lock); + ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); From 017e5369eb353559d68a11d4a718faa634533821 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 28 Jul 2008 15:32:51 -0400 Subject: [PATCH 0626/2985] Btrfs: Leaf reference cache update This changes the reference cache to make a single cache per root instead of one cache per transaction, and to key by the byte number of the disk block instead of the keys inside. This makes it much less likely to have cache misses if a snapshot or something has an extra reference on a higher node or a leaf while the first transaction that added the leaf into the cache is dropping. Some throttling is added to functions that free blocks heavily so they wait for old transactions to drop. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 11 ++++++- fs/btrfs/disk-io.c | 8 +++-- fs/btrfs/extent-tree.c | 18 ++++------- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 8 ++--- fs/btrfs/ref-cache.c | 71 +++++++++--------------------------------- fs/btrfs/ref-cache.h | 18 +++++------ fs/btrfs/transaction.c | 34 +++++++------------- 8 files changed, 61 insertions(+), 109 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 34ed23d64eb5..4eca0aa1ce74 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -594,7 +594,6 @@ struct btrfs_fs_info { spinlock_t ref_cache_lock; u64 total_ref_cache_size; - u64 running_ref_cache_size; u64 avail_data_alloc_bits; u64 avail_metadata_alloc_bits; @@ -606,10 +605,18 @@ struct btrfs_fs_info { void *bdev_holder; }; +struct btrfs_leaf_ref_tree { + struct rb_root root; + struct btrfs_leaf_ref *last; + struct list_head list; + spinlock_t lock; +}; + /* * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. */ +struct dirty_root; struct btrfs_root { struct extent_buffer *node; @@ -618,6 +625,8 @@ struct btrfs_root { struct extent_buffer *commit_root; struct btrfs_leaf_ref_tree *ref_tree; + struct btrfs_leaf_ref_tree ref_tree_struct; + struct dirty_root *dirty_root; struct btrfs_root_item root_item; struct btrfs_key root_key; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4f0e1d06c384..eccdf13a95ac 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -40,6 +40,7 @@ #include "print-tree.h" #include "async-thread.h" #include "locking.h" +#include "ref-cache.h" #if 0 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) @@ -737,6 +738,10 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->node_lock); spin_lock_init(&root->orphan_lock); mutex_init(&root->objectid_mutex); + + btrfs_leaf_ref_tree_init(&root->ref_tree_struct); + root->ref_tree = &root->ref_tree_struct; + memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); @@ -1176,9 +1181,6 @@ static int transaction_kthread(void *arg) goto sleep; } - printk("btrfs: running reference cache size %Lu\n", - root->fs_info->running_ref_cache_size); - now = get_seconds(); if (now < cur->start_time || now - cur->start_time < 30) { mutex_unlock(&root->fs_info->trans_mutex); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7b24f1511654..0e294cfaa60c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1004,8 +1004,6 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, goto out; } - btrfs_item_key_to_cpu(buf, &ref->key, 0); - ref->bytenr = buf->start; ref->owner = btrfs_header_owner(buf); ref->generation = btrfs_header_generation(buf); @@ -2387,19 +2385,15 @@ static void noinline reada_walk_down(struct btrfs_root *root, } } -/* - * we want to avoid as much random IO as we can with the alloc mutex - * held, so drop the lock and do the lookup, then do it again with the - * lock held. - */ int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, u32 *refs) { + int ret; mutex_unlock(&root->fs_info->alloc_mutex); - lookup_extent_ref(NULL, root, start, len, refs); + ret = lookup_extent_ref(NULL, root, start, len, refs); cond_resched(); mutex_lock(&root->fs_info->alloc_mutex); - return lookup_extent_ref(NULL, root, start, len, refs); + return ret; } /* @@ -2468,11 +2462,11 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, BUG_ON(ret); continue; } - + if (*level == 1) { struct btrfs_key key; btrfs_node_key_to_cpu(cur, &key, path->slots[*level]); - ref = btrfs_lookup_leaf_ref(root, &key); + ref = btrfs_lookup_leaf_ref(root, bytenr); if (ref) { ret = drop_leaf_ref(trans, root, ref); BUG_ON(ret); @@ -2482,7 +2476,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, break; } } - next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { free_extent_buffer(next); @@ -2672,6 +2665,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root ret = -EAGAIN; break; } + wake_up(&root->fs_info->transaction_throttle); } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e5ffb66ad320..3efec25e34b0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -347,7 +347,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, btrfs_update_inode(trans, root, inode); } failed: - err = btrfs_end_transaction_throttle(trans, root); + err = btrfs_end_transaction(trans, root); out_unlock: unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); return err; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cf9534b79aba..4f977ea5497e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2482,7 +2482,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, btrfs_update_inode_block_group(trans, dir); out_unlock: nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); @@ -2535,7 +2535,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, drop_inode = 1; nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); @@ -2609,7 +2609,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) out_fail: nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); out_unlock: if (drop_on_err) @@ -3548,7 +3548,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, out_unlock: nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction(trans, root); out_fail: if (drop_inode) { inode_dec_link_count(inode); diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index 95a9faeb9dc4..ec9587784a3d 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c @@ -29,6 +29,7 @@ struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(int nr_extents) if (ref) { memset(ref, 0, sizeof(*ref)); atomic_set(&ref->usage, 1); + INIT_LIST_HEAD(&ref->list); } return ref; } @@ -44,40 +45,21 @@ void btrfs_free_leaf_ref(struct btrfs_leaf_ref *ref) } } -static int comp_keys(struct btrfs_key *k1, struct btrfs_key *k2) -{ - if (k1->objectid > k2->objectid) - return 1; - if (k1->objectid < k2->objectid) - return -1; - if (k1->type > k2->type) - return 1; - if (k1->type < k2->type) - return -1; - if (k1->offset > k2->offset) - return 1; - if (k1->offset < k2->offset) - return -1; - return 0; -} - -static struct rb_node *tree_insert(struct rb_root *root, struct btrfs_key *key, +static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, struct rb_node *node) { struct rb_node ** p = &root->rb_node; struct rb_node * parent = NULL; struct btrfs_leaf_ref *entry; - int ret; while(*p) { parent = *p; entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node); WARN_ON(!entry->in_tree); - ret = comp_keys(key, &entry->key); - if (ret < 0) + if (bytenr < entry->bytenr) p = &(*p)->rb_left; - else if (ret > 0) + else if (bytenr > entry->bytenr) p = &(*p)->rb_right; else return parent; @@ -90,20 +72,18 @@ static struct rb_node *tree_insert(struct rb_root *root, struct btrfs_key *key, return NULL; } -static struct rb_node *tree_search(struct rb_root *root, struct btrfs_key *key) +static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) { struct rb_node * n = root->rb_node; struct btrfs_leaf_ref *entry; - int ret; while(n) { entry = rb_entry(n, struct btrfs_leaf_ref, rb_node); WARN_ON(!entry->in_tree); - ret = comp_keys(key, &entry->key); - if (ret < 0) + if (bytenr < entry->bytenr) n = n->rb_left; - else if (ret > 0) + else if (bytenr > entry->bytenr) n = n->rb_right; else return n; @@ -122,11 +102,11 @@ int btrfs_remove_leaf_refs(struct btrfs_root *root) spin_lock(&tree->lock); while(!btrfs_leaf_ref_tree_empty(tree)) { - tree->last = NULL; rb = rb_first(&tree->root); ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); rb_erase(&ref->rb_node, &tree->root); ref->in_tree = 0; + list_del_init(&ref->list); spin_unlock(&tree->lock); @@ -140,7 +120,7 @@ int btrfs_remove_leaf_refs(struct btrfs_root *root) } struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, - struct btrfs_key *key) + u64 bytenr) { struct rb_node *rb; struct btrfs_leaf_ref *ref = NULL; @@ -150,15 +130,9 @@ struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, return NULL; spin_lock(&tree->lock); - if (tree->last && comp_keys(key, &tree->last->key) == 0) { - ref = tree->last; - } else { - rb = tree_search(&tree->root, key); - if (rb) { - ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); - tree->last = ref; - } - } + rb = tree_search(&tree->root, bytenr); + if (rb) + ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); if (ref) atomic_inc(&ref->usage); spin_unlock(&tree->lock); @@ -171,21 +145,17 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) struct rb_node *rb; size_t size = btrfs_leaf_ref_size(ref->nritems); struct btrfs_leaf_ref_tree *tree = root->ref_tree; - struct btrfs_transaction *trans = root->fs_info->running_transaction; spin_lock(&tree->lock); - rb = tree_insert(&tree->root, &ref->key, &ref->rb_node); + rb = tree_insert(&tree->root, ref->bytenr, &ref->rb_node); if (rb) { ret = -EEXIST; } else { spin_lock(&root->fs_info->ref_cache_lock); root->fs_info->total_ref_cache_size += size; - if (trans && tree->generation == trans->transid) - root->fs_info->running_ref_cache_size += size; spin_unlock(&root->fs_info->ref_cache_lock); - - tree->last = ref; atomic_inc(&ref->usage); + list_add_tail(&ref->list, &tree->list); } spin_unlock(&tree->lock); return ret; @@ -195,28 +165,17 @@ int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) { size_t size = btrfs_leaf_ref_size(ref->nritems); struct btrfs_leaf_ref_tree *tree = root->ref_tree; - struct btrfs_transaction *trans = root->fs_info->running_transaction; BUG_ON(!ref->in_tree); spin_lock(&tree->lock); spin_lock(&root->fs_info->ref_cache_lock); root->fs_info->total_ref_cache_size -= size; - if (trans && tree->generation == trans->transid) - root->fs_info->running_ref_cache_size -= size; spin_unlock(&root->fs_info->ref_cache_lock); - if (tree->last == ref) { - struct rb_node *next = rb_next(&ref->rb_node); - if (next) { - tree->last = rb_entry(next, struct btrfs_leaf_ref, - rb_node); - } else - tree->last = NULL; - } - rb_erase(&ref->rb_node, &tree->root); ref->in_tree = 0; + list_del_init(&ref->list); spin_unlock(&tree->lock); diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h index 79ecc47110f2..823c049f72f1 100644 --- a/fs/btrfs/ref-cache.h +++ b/fs/btrfs/ref-cache.h @@ -15,6 +15,8 @@ * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */ +#ifndef __REFCACHE__ +#define __REFCACHE__ struct btrfs_extent_info { u64 bytenr; @@ -25,7 +27,6 @@ struct btrfs_extent_info { struct btrfs_leaf_ref { struct rb_node rb_node; - struct btrfs_key key; int in_tree; atomic_t usage; @@ -33,14 +34,9 @@ struct btrfs_leaf_ref { u64 owner; u64 generation; int nritems; - struct btrfs_extent_info extents[]; -}; -struct btrfs_leaf_ref_tree { - struct rb_root root; - struct btrfs_leaf_ref *last; - u64 generation; - spinlock_t lock; + struct list_head list; + struct btrfs_extent_info extents[]; }; static inline size_t btrfs_leaf_ref_size(int nr_extents) @@ -53,7 +49,7 @@ static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree) { tree->root.rb_node = NULL; tree->last = NULL; - tree->generation = 0; + INIT_LIST_HEAD(&tree->list); spin_lock_init(&tree->lock); } @@ -66,7 +62,9 @@ void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree); struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(int nr_extents); void btrfs_free_leaf_ref(struct btrfs_leaf_ref *ref); struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, - struct btrfs_key *key); + u64 bytenr); int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); int btrfs_remove_leaf_refs(struct btrfs_root *root); int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); + +#endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 543e5ee4033a..fcef3cae0c92 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -36,7 +36,6 @@ struct dirty_root { struct list_head list; struct btrfs_root *root; struct btrfs_root *latest_root; - struct btrfs_leaf_ref_tree ref_tree; }; static noinline void put_transaction(struct btrfs_transaction *transaction) @@ -108,13 +107,13 @@ static noinline int record_root_in_trans(struct btrfs_root *root) dirty->latest_root = root; INIT_LIST_HEAD(&dirty->list); - btrfs_leaf_ref_tree_init(&dirty->ref_tree); - dirty->ref_tree.generation = running_trans_id; root->commit_root = btrfs_root_node(root); - root->ref_tree = &dirty->ref_tree; + root->dirty_root = dirty; memcpy(dirty->root, root, sizeof(*root)); + dirty->root->ref_tree = &root->ref_tree_struct; + spin_lock_init(&dirty->root->node_lock); mutex_init(&dirty->root->objectid_mutex); dirty->root->node = root->commit_root; @@ -217,12 +216,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); - if (0 && cur_trans->in_commit && throttle) { + if (throttle && atomic_read(&root->fs_info->throttles)) { DEFINE_WAIT(wait); mutex_unlock(&root->fs_info->trans_mutex); prepare_to_wait(&root->fs_info->transaction_throttle, &wait, TASK_UNINTERRUPTIBLE); - schedule(); + if (atomic_read(&root->fs_info->throttles)) + schedule(); finish_wait(&root->fs_info->transaction_throttle, &wait); mutex_lock(&root->fs_info->trans_mutex); } @@ -333,6 +333,8 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, list_del_init(next); root = list_entry(next, struct btrfs_root, dirty_list); update_cowonly_root(trans, root); + if (root->fs_info->closing) + btrfs_remove_leaf_refs(root); } return 0; } @@ -346,10 +348,8 @@ int btrfs_add_dead_root(struct btrfs_root *root, dirty = kmalloc(sizeof(*dirty), GFP_NOFS); if (!dirty) return -ENOMEM; - btrfs_leaf_ref_tree_init(&dirty->ref_tree); dirty->root = root; dirty->latest_root = latest; - root->ref_tree = NULL; list_add(&dirty->list, dead_list); return 0; } @@ -379,18 +379,14 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, BTRFS_ROOT_TRANS_TAG); BUG_ON(!root->ref_tree); - dirty = container_of(root->ref_tree, struct dirty_root, - ref_tree); + dirty = root->dirty_root; if (root->commit_root == root->node) { WARN_ON(root->node->start != btrfs_root_bytenr(&root->root_item)); - BUG_ON(!btrfs_leaf_ref_tree_empty( - root->ref_tree)); free_extent_buffer(root->commit_root); root->commit_root = NULL; - root->ref_tree = NULL; kfree(dirty->root); kfree(dirty); @@ -410,7 +406,6 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, sizeof(struct btrfs_disk_key)); root->root_item.drop_level = 0; root->commit_root = NULL; - root->ref_tree = NULL; root->root_key.offset = root->fs_info->generation; btrfs_set_root_bytenr(&root->root_item, root->node->start); @@ -485,7 +480,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, while(!list_empty(list)) { struct btrfs_root *root; - dirty = list_entry(list->next, struct dirty_root, list); + dirty = list_entry(list->prev, struct dirty_root, list); list_del_init(&dirty->list); num_bytes = btrfs_root_used(&dirty->root->root_item); @@ -507,7 +502,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, if (err) ret = err; nr = trans->blocks_used; - ret = btrfs_end_transaction_throttle(trans, tree_root); + ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); mutex_unlock(&root->fs_info->drop_mutex); @@ -517,6 +512,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, } BUG_ON(ret); atomic_dec(&root->fs_info->throttles); + wake_up(&root->fs_info->transaction_throttle); mutex_lock(&root->fs_info->alloc_mutex); num_bytes -= btrfs_root_used(&dirty->root->root_item); @@ -539,8 +535,6 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); - btrfs_remove_leaf_refs(dirty->root); - free_extent_buffer(dirty->root->node); kfree(dirty->root); kfree(dirty); @@ -725,10 +719,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, &dirty_fs_roots); BUG_ON(ret); - spin_lock(&root->fs_info->ref_cache_lock); - root->fs_info->running_ref_cache_size = 0; - spin_unlock(&root->fs_info->ref_cache_lock); - ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); From 1a3f5d0400d786aec41ede15fec5710ad1a1d18b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Jul 2008 16:11:35 -0400 Subject: [PATCH 0627/2985] Btrfs: Fix version.sh when used outside of an hg repo Signed-off-by: Chris Mason --- fs/btrfs/version.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/version.sh b/fs/btrfs/version.sh index fd9b53d39860..4d06497d2ec5 100644 --- a/fs/btrfs/version.sh +++ b/fs/btrfs/version.sh @@ -6,10 +6,10 @@ # Copyright 2008, Oracle # Released under the GNU GPLv2 -v="Btrfs v0.15" +v="v0.15" which hg > /dev/null -if [ $? == 0 ]; then +if [ -d .hg ] && [ $? == 0 ]; then last=$(hg tags | grep -m1 -o '^v[0-9.]\+') # now check if the repo has commits since then... From ab78c84de1ce4db1b2a2cef361625ad80abbab3f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Jul 2008 16:15:18 -0400 Subject: [PATCH 0628/2985] Btrfs: Throttle operations if the reference cache gets too large A large reference cache is directly related to a lot of work pending for the cleaner thread. This throttles back new operations based on the size of the reference cache so the cleaner thread will be able to keep up. Overall, this actually makes the FS faster because the cleaner thread will be more likely to find things in cache. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 7 +++-- fs/btrfs/extent-tree.c | 1 + fs/btrfs/file.c | 1 + fs/btrfs/inode.c | 10 +++---- fs/btrfs/transaction.c | 59 +++++++++++++++++++++++++++++++----------- fs/btrfs/transaction.h | 1 + 7 files changed, 58 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4eca0aa1ce74..5517dfc6f71c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -580,6 +580,7 @@ struct btrfs_fs_info { int do_barriers; int closing; atomic_t throttles; + atomic_t throttle_gen; u64 total_pinned; struct list_head dirty_cowonly_roots; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eccdf13a95ac..27ffa9b7ddc8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1171,8 +1171,10 @@ static int transaction_kthread(void *arg) vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); mutex_lock(&root->fs_info->transaction_kthread_mutex); - printk("btrfs: total reference cache size %Lu\n", - root->fs_info->total_ref_cache_size); + if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { + printk("btrfs: total reference cache size %Lu\n", + root->fs_info->total_ref_cache_size); + } mutex_lock(&root->fs_info->trans_mutex); cur = root->fs_info->running_transaction; @@ -1256,6 +1258,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_mapping_init(&fs_info->mapping_tree); atomic_set(&fs_info->nr_async_submits, 0); atomic_set(&fs_info->throttles, 0); + atomic_set(&fs_info->throttle_gen, 0); fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0e294cfaa60c..6290cf41d647 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2650,6 +2650,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root } } while(1) { + atomic_inc(&root->fs_info->throttle_gen); wret = walk_down_tree(trans, root, path, &level); if (wret > 0) break; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3efec25e34b0..ded5281f8463 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -974,6 +974,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) btrfs_btree_balance_dirty(root, 1); + btrfs_throttle(root); cond_resched(); } out: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4f977ea5497e..7c87f863d6f7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2482,7 +2482,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, btrfs_update_inode_block_group(trans, dir); out_unlock: nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); @@ -2535,7 +2535,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, drop_inode = 1; nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); fail: if (drop_inode) { inode_dec_link_count(inode); @@ -2609,7 +2609,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) out_fail: nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); out_unlock: if (drop_on_err) @@ -3434,7 +3434,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, goto out_fail; out_fail: - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); out_unlock: return ret; } @@ -3548,7 +3548,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, out_unlock: nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); out_fail: if (drop_inode) { inode_dec_link_count(inode); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index fcef3cae0c92..b8be6703189a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -202,35 +202,64 @@ static noinline int wait_for_commit(struct btrfs_root *root, return 0; } +void btrfs_throttle(struct btrfs_root *root) +{ + struct btrfs_fs_info *info = root->fs_info; + +harder: + if (atomic_read(&info->throttles)) { + DEFINE_WAIT(wait); + int thr; + int harder_count = 0; + thr = atomic_read(&info->throttle_gen); + + do { + prepare_to_wait(&info->transaction_throttle, + &wait, TASK_UNINTERRUPTIBLE); + if (!atomic_read(&info->throttles)) { + finish_wait(&info->transaction_throttle, &wait); + break; + } + schedule(); + finish_wait(&info->transaction_throttle, &wait); + } while (thr == atomic_read(&info->throttle_gen)); + + if (harder_count < 5 && + info->total_ref_cache_size > 5 * 1024 * 1024) { + harder_count++; + goto harder; + } + + if (harder_count < 10 && + info->total_ref_cache_size > 10 * 1024 * 1024) { + harder_count++; + goto harder; + } + } +} + static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, int throttle) { struct btrfs_transaction *cur_trans; + struct btrfs_fs_info *info = root->fs_info; - mutex_lock(&root->fs_info->trans_mutex); - cur_trans = root->fs_info->running_transaction; + mutex_lock(&info->trans_mutex); + cur_trans = info->running_transaction; WARN_ON(cur_trans != trans->transaction); WARN_ON(cur_trans->num_writers < 1); cur_trans->num_writers--; if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); - - if (throttle && atomic_read(&root->fs_info->throttles)) { - DEFINE_WAIT(wait); - mutex_unlock(&root->fs_info->trans_mutex); - prepare_to_wait(&root->fs_info->transaction_throttle, &wait, - TASK_UNINTERRUPTIBLE); - if (atomic_read(&root->fs_info->throttles)) - schedule(); - finish_wait(&root->fs_info->transaction_throttle, &wait); - mutex_lock(&root->fs_info->trans_mutex); - } - put_transaction(cur_trans); - mutex_unlock(&root->fs_info->trans_mutex); + mutex_unlock(&info->trans_mutex); memset(trans, 0, sizeof(*trans)); kmem_cache_free(btrfs_trans_handle_cachep, trans); + + if (throttle) + btrfs_throttle(root); + return 0; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 11fbdeceb26c..df2ca2aad1c0 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -91,4 +91,5 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, struct btrfs_root *root); +void btrfs_throttle(struct btrfs_root *root); #endif From 3bf10418675cb424724b5cb9d7725b234defe1fd Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 30 Jul 2008 09:24:37 -0400 Subject: [PATCH 0629/2985] Btrfs: async-thread: fix possible memory leak When kthread_run() returns failure, this worker hasn't been added to the list, so btrfs_stop_workers() won't free it. Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/async-thread.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 8d4cc4679d51..5fe6a0d532ed 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -187,6 +187,7 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) worker->task = kthread_run(worker_loop, worker, "btrfs"); worker->workers = workers; if (IS_ERR(worker->task)) { + kfree(worker); ret = PTR_ERR(worker->task); goto fail; } From f321e4910398cf7922265d269fb17fd26f312571 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 30 Jul 2008 09:26:11 -0400 Subject: [PATCH 0630/2985] Btrfs: Update and fix mount -o nodatacow To check whether a given file extent is referenced by multiple snapshots, the checker walks down the fs tree through dead root and checks all tree blocks in the path. We can easily detect whether a given tree block is directly referenced by other snapshot. We can also detect any indirect reference from other snapshot by checking reference's generation. The checker can always detect multiple references, but can't reliably detect cases of single reference. So btrfs may do file data cow even there is only one reference. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 9 +- fs/btrfs/extent-tree.c | 214 +++++++++++++++++++++++++---------------- fs/btrfs/inode.c | 6 +- fs/btrfs/transaction.c | 16 +-- fs/btrfs/transaction.h | 5 + 5 files changed, 148 insertions(+), 102 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5517dfc6f71c..83422088c629 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -617,7 +617,7 @@ struct btrfs_leaf_ref_tree { * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. */ -struct dirty_root; +struct btrfs_dirty_root; struct btrfs_root { struct extent_buffer *node; @@ -627,7 +627,7 @@ struct btrfs_root { struct extent_buffer *commit_root; struct btrfs_leaf_ref_tree *ref_tree; struct btrfs_leaf_ref_tree ref_tree_struct; - struct dirty_root *dirty_root; + struct btrfs_dirty_root *dirty_root; struct btrfs_root_item root_item; struct btrfs_key root_key; @@ -1399,9 +1399,8 @@ static inline struct dentry *fdentry(struct file *file) { } /* extent-tree.c */ -u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, - struct btrfs_path *count_path, - u64 expected_owner, u64 first_extent); +int btrfs_cross_ref_exists(struct btrfs_root *root, + struct btrfs_key *key, u64 bytenr); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6290cf41d647..fe1ddbd2bfd6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -802,70 +802,57 @@ out: return 0; } -u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, - struct btrfs_path *count_path, - u64 expected_owner, - u64 first_extent) + +static int get_reference_status(struct btrfs_root *root, u64 bytenr, + u64 parent_gen, u64 ref_objectid, + u64 *min_generation, u32 *ref_count) { struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_path *path; - u64 bytenr; - u64 found_objectid; - u64 found_owner; - u64 root_objectid = root->root_key.objectid; - u32 total_count = 0; - u32 extent_refs; - u32 cur_count; - u32 nritems; - int ret; + struct extent_buffer *leaf; + struct btrfs_extent_ref *ref_item; struct btrfs_key key; struct btrfs_key found_key; - struct extent_buffer *l; - struct btrfs_extent_item *item; - struct btrfs_extent_ref *ref_item; - int level = -1; + u64 root_objectid = root->root_key.objectid; + u64 ref_generation; + u32 nritems; + int ret; - /* FIXME, needs locking */ - BUG(); - - mutex_lock(&root->fs_info->alloc_mutex); - path = btrfs_alloc_path(); -again: - if (level == -1) - bytenr = first_extent; - else - bytenr = count_path->nodes[level]->start; - - cur_count = 0; key.objectid = bytenr; key.offset = 0; + key.type = BTRFS_EXTENT_ITEM_KEY; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + path = btrfs_alloc_path(); + mutex_lock(&root->fs_info->alloc_mutex); ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) goto out; BUG_ON(ret == 0); - l = path->nodes[0]; - btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid != bytenr || found_key.type != BTRFS_EXTENT_ITEM_KEY) { + ret = 1; goto out; } - item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - extent_refs = btrfs_extent_refs(l, item); + *ref_count = 0; + *min_generation = (u64)-1; + while (1) { - l = path->nodes[0]; - nritems = btrfs_header_nritems(l); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(extent_root, path); + if (ret < 0) + goto out; if (ret == 0) continue; break; } - btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid != bytenr) break; @@ -874,57 +861,120 @@ again: continue; } - cur_count++; - ref_item = btrfs_item_ptr(l, path->slots[0], + ref_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); - found_objectid = btrfs_ref_root(l, ref_item); + ref_generation = btrfs_ref_generation(leaf, ref_item); + /* + * For (parent_gen > 0 && parent_gen > ref_gen): + * + * we reach here through the oldest root, therefore + * all other reference from same snapshot should have + * a larger generation. + */ + if ((root_objectid != btrfs_ref_root(leaf, ref_item)) || + (parent_gen > 0 && parent_gen > ref_generation) || + (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID && + ref_objectid != btrfs_ref_objectid(leaf, ref_item))) { + if (ref_count) + *ref_count = 2; + break; + } + + *ref_count = 1; + if (*min_generation > ref_generation) + *min_generation = ref_generation; - if (found_objectid != root_objectid) { - total_count = 2; - goto out; - } - if (level == -1) { - found_owner = btrfs_ref_objectid(l, ref_item); - if (found_owner != expected_owner) { - total_count = 2; - goto out; - } - /* - * nasty. we don't count a reference held by - * the running transaction. This allows nodatacow - * to avoid cow most of the time - */ - if (found_owner >= BTRFS_FIRST_FREE_OBJECTID && - btrfs_ref_generation(l, ref_item) == - root->fs_info->generation) { - extent_refs--; - } - } - total_count = 1; path->slots[0]++; } - /* - * if there is more than one reference against a data extent, - * we have to assume the other ref is another snapshot - */ - if (level == -1 && extent_refs > 1) { - total_count = 2; - goto out; - } - if (cur_count == 0) { - total_count = 0; - goto out; - } - if (level >= 0 && root->node == count_path->nodes[level]) - goto out; - level++; - btrfs_release_path(root, path); - goto again; - + ret = 0; out: - btrfs_free_path(path); mutex_unlock(&root->fs_info->alloc_mutex); - return total_count; + btrfs_free_path(path); + return ret; +} + +int btrfs_cross_ref_exists(struct btrfs_root *root, + struct btrfs_key *key, u64 bytenr) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *old_root; + struct btrfs_path *path = NULL; + struct extent_buffer *eb; + struct btrfs_file_extent_item *item; + u64 ref_generation; + u64 min_generation; + u64 extent_start; + u32 ref_count; + int level; + int ret; + + BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY); + ret = get_reference_status(root, bytenr, 0, key->objectid, + &min_generation, &ref_count); + if (ret) + return ret; + + if (ref_count != 1) + return 1; + + trans = btrfs_start_transaction(root, 0); + old_root = root->dirty_root->root; + ref_generation = old_root->root_key.offset; + + /* all references are created in running transaction */ + if (min_generation > ref_generation) { + ret = 0; + goto out; + } + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + path->skip_locking = 1; + /* if no item found, the extent is referenced by other snapshot */ + ret = btrfs_search_slot(NULL, old_root, key, path, 0, 0); + if (ret) + goto out; + + eb = path->nodes[0]; + item = btrfs_item_ptr(eb, path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG || + btrfs_file_extent_disk_bytenr(eb, item) != bytenr) { + ret = 1; + goto out; + } + + for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) { + if (level >= 0) { + eb = path->nodes[level]; + if (!eb) + continue; + extent_start = eb->start; + } else + extent_start = bytenr; + + ret = get_reference_status(root, extent_start, ref_generation, + 0, &min_generation, &ref_count); + if (ret) + goto out; + + if (ref_count != 1) { + ret = 1; + goto out; + } + if (level >= 0) + ref_generation = btrfs_header_generation(eb); + } + ret = 0; +out: + if (path) + btrfs_free_path(path); + btrfs_end_transaction(trans, root); + return ret; } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7c87f863d6f7..3aa82cec6bf7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -249,11 +249,8 @@ again: if (bytenr == 0) goto not_found; - if (btrfs_count_snapshots_in_path(root, path, inode->i_ino, - bytenr) != 1) { + if (btrfs_cross_ref_exists(root, &found_key, bytenr)) goto not_found; - } - /* * we may be called by the resizer, make sure we're inside * the limits of the FS @@ -277,6 +274,7 @@ loop: goto again; not_found: + btrfs_release_path(root, path); cow_file_range(inode, start, end); start = end + 1; goto loop; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b8be6703189a..216f31571620 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -32,12 +32,6 @@ extern struct kmem_cache *btrfs_transaction_cachep; #define BTRFS_ROOT_TRANS_TAG 0 -struct dirty_root { - struct list_head list; - struct btrfs_root *root; - struct btrfs_root *latest_root; -}; - static noinline void put_transaction(struct btrfs_transaction *transaction) { WARN_ON(transaction->use_count == 0); @@ -91,7 +85,7 @@ static noinline int join_transaction(struct btrfs_root *root) static noinline int record_root_in_trans(struct btrfs_root *root) { - struct dirty_root *dirty; + struct btrfs_dirty_root *dirty; u64 running_trans_id = root->fs_info->running_transaction->transid; if (root->ref_cows && root->last_trans < running_trans_id) { WARN_ON(root == root->fs_info->extent_root); @@ -372,7 +366,7 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, struct list_head *dead_list) { - struct dirty_root *dirty; + struct btrfs_dirty_root *dirty; dirty = kmalloc(sizeof(*dirty), GFP_NOFS); if (!dirty) @@ -387,7 +381,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, struct radix_tree_root *radix, struct list_head *list) { - struct dirty_root *dirty; + struct btrfs_dirty_root *dirty; struct btrfs_root *gang[8]; struct btrfs_root *root; int i; @@ -498,7 +492,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) static noinline int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) { - struct dirty_root *dirty; + struct btrfs_dirty_root *dirty; struct btrfs_trans_handle *trans; unsigned long nr; u64 num_bytes; @@ -509,7 +503,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, while(!list_empty(list)) { struct btrfs_root *root; - dirty = list_entry(list->prev, struct dirty_root, list); + dirty = list_entry(list->prev, struct btrfs_dirty_root, list); list_del_init(&dirty->list); num_bytes = btrfs_root_used(&dirty->root->root_item); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index df2ca2aad1c0..f5adb23151fb 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -52,6 +52,11 @@ struct btrfs_pending_snapshot { struct list_head list; }; +struct btrfs_dirty_root { + struct list_head list; + struct btrfs_root *root; + struct btrfs_root *latest_root; +}; static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, struct inode *inode) From 33958dc6d38fb4ca7e62273855fcb2db7e616263 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 30 Jul 2008 10:29:12 -0400 Subject: [PATCH 0631/2985] Btrfs: Fix verify_parent_transid It was incorrectly clearing the up to date flag on the buffer even when the buffer properly verified. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent_io.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 27ffa9b7ddc8..ec1ba8ddb35f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -235,8 +235,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, (unsigned long long)parent_transid, (unsigned long long)btrfs_header_generation(eb)); ret = 1; -out: clear_extent_buffer_uptodate(io_tree, eb); +out: unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); return ret; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e3547a992d5c..964ec1622d66 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2647,6 +2647,8 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, return eb->first_page; i += eb->start >> PAGE_CACHE_SHIFT; mapping = eb->first_page->mapping; + if (!mapping) + return NULL; read_lock_irq(&mapping->tree_lock); p = radix_tree_lookup(&mapping->page_tree, i); read_unlock_irq(&mapping->tree_lock); @@ -2908,7 +2910,8 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, GFP_NOFS); for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); - ClearPageUptodate(page); + if (page) + ClearPageUptodate(page); } return 0; } From bcc63abbf3e9bf948a1b0129b3e6120ec7d7f698 Mon Sep 17 00:00:00 2001 From: Yan Date: Wed, 30 Jul 2008 16:29:20 -0400 Subject: [PATCH 0632/2985] Btrfs: implement memory reclaim for leaf reference cache The memory reclaiming issue happens when snapshot exists. In that case, some cache entries may not be used during old snapshot dropping, so they will remain in the cache until umount. The patch adds a field to struct btrfs_leaf_ref to record create time. Besides, the patch makes all dead roots of a given snapshot linked together in order of create time. After a old snapshot was completely dropped, we check the dead root list and remove all cache entries created before the oldest dead root in the list. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 1 - fs/btrfs/ctree.h | 3 ++- fs/btrfs/dir-item.c | 1 - fs/btrfs/disk-io.c | 5 +++-- fs/btrfs/extent-tree.c | 18 ++++++++-------- fs/btrfs/extent_io.c | 1 - fs/btrfs/file-item.c | 1 - fs/btrfs/file.c | 1 - fs/btrfs/inode.c | 22 +++++++++---------- fs/btrfs/locking.c | 1 - fs/btrfs/print-tree.c | 1 - fs/btrfs/ref-cache.c | 48 ++++++++++++++++++++++-------------------- fs/btrfs/ref-cache.h | 11 +++++----- fs/btrfs/transaction.c | 40 ++++++++++++++++++++++++++--------- fs/btrfs/volumes.c | 1 - 15 files changed, 86 insertions(+), 69 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 245eb00435dd..c4792062dd53 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -3275,4 +3275,3 @@ int btrfs_previous_item(struct btrfs_root *root, } return 1; } - diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 83422088c629..be16cd49ef69 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -666,7 +666,8 @@ struct btrfs_root { /* the dirty list is only used by non-reference counted roots */ struct list_head dirty_list; - spinlock_t orphan_lock; + spinlock_t list_lock; + struct list_head dead_list; struct list_head orphan_list; }; diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index eb4dd3d75cf9..125094617fe8 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -340,4 +340,3 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, } return 0; } - diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ec1ba8ddb35f..e826730d750f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -735,8 +735,9 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, INIT_LIST_HEAD(&root->dirty_list); INIT_LIST_HEAD(&root->orphan_list); + INIT_LIST_HEAD(&root->dead_list); spin_lock_init(&root->node_lock); - spin_lock_init(&root->orphan_lock); + spin_lock_init(&root->list_lock); mutex_init(&root->objectid_mutex); btrfs_leaf_ref_tree_init(&root->ref_tree_struct); @@ -1717,7 +1718,7 @@ int close_ctree(struct btrfs_root *root) printk("btrfs: at umount reference cache size %Lu\n", fs_info->total_ref_cache_size); } - + if (fs_info->extent_root->node) free_extent_buffer(fs_info->extent_root->node); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fe1ddbd2bfd6..37ca8df30c30 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -867,8 +867,8 @@ static int get_reference_status(struct btrfs_root *root, u64 bytenr, /* * For (parent_gen > 0 && parent_gen > ref_gen): * - * we reach here through the oldest root, therefore - * all other reference from same snapshot should have + * we reach here through the oldest root, therefore + * all other reference from same snapshot should have * a larger generation. */ if ((root_objectid != btrfs_ref_root(leaf, ref_item)) || @@ -954,7 +954,7 @@ int btrfs_cross_ref_exists(struct btrfs_root *root, if (!eb) continue; extent_start = eb->start; - } else + } else extent_start = bytenr; ret = get_reference_status(root, extent_start, ref_generation, @@ -1048,7 +1048,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_leaf_ref *ref; struct btrfs_extent_info *info; - ref = btrfs_alloc_leaf_ref(nr_file_extents); + ref = btrfs_alloc_leaf_ref(root, nr_file_extents); if (!ref) { WARN_ON(1); goto out; @@ -1059,7 +1059,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, ref->generation = btrfs_header_generation(buf); ref->nritems = nr_file_extents; info = ref->extents; - + for (i = 0; nr_file_extents > 0 && i < nritems; i++) { u64 disk_bytenr; btrfs_item_key_to_cpu(buf, &key, i); @@ -1085,7 +1085,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, BUG_ON(!root->ref_tree); ret = btrfs_add_leaf_ref(root, ref); WARN_ON(ret); - btrfs_free_leaf_ref(ref); + btrfs_free_leaf_ref(root, ref); } out: return 0; @@ -2316,7 +2316,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, } static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_root *root, struct extent_buffer *leaf) { u64 leaf_owner; @@ -2367,7 +2367,7 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, } static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_root *root, struct btrfs_leaf_ref *ref) { int i; @@ -2521,7 +2521,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, ret = drop_leaf_ref(trans, root, ref); BUG_ON(ret); btrfs_remove_leaf_ref(root, ref); - btrfs_free_leaf_ref(ref); + btrfs_free_leaf_ref(root, ref); *level = 0; break; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 964ec1622d66..5368e3b6eb96 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3497,4 +3497,3 @@ out: return ret; } EXPORT_SYMBOL(try_release_extent_buffer); - diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index afe42d00b5a6..2311061f070e 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -422,4 +422,3 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, BUG_ON(ret); return ret; } - diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ded5281f8463..412ab4a26382 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1095,4 +1095,3 @@ struct file_operations btrfs_file_operations = { .compat_ioctl = btrfs_ioctl, #endif }; - diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3aa82cec6bf7..7af8be076ee5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -835,17 +835,17 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; - spin_lock(&root->orphan_lock); + spin_lock(&root->list_lock); /* already on the orphan list, we're good */ if (!list_empty(&BTRFS_I(inode)->i_orphan)) { - spin_unlock(&root->orphan_lock); + spin_unlock(&root->list_lock); return 0; } list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); - spin_unlock(&root->orphan_lock); + spin_unlock(&root->list_lock); /* * insert an orphan item to track this unlinked/truncated file @@ -864,20 +864,20 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; - spin_lock(&root->orphan_lock); + spin_lock(&root->list_lock); if (list_empty(&BTRFS_I(inode)->i_orphan)) { - spin_unlock(&root->orphan_lock); + spin_unlock(&root->list_lock); return 0; } list_del_init(&BTRFS_I(inode)->i_orphan); if (!trans) { - spin_unlock(&root->orphan_lock); + spin_unlock(&root->list_lock); return 0; } - spin_unlock(&root->orphan_lock); + spin_unlock(&root->list_lock); ret = btrfs_del_orphan_item(trans, root, inode->i_ino); @@ -973,9 +973,9 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) * add this inode to the orphan list so btrfs_orphan_del does * the proper thing when we hit it */ - spin_lock(&root->orphan_lock); + spin_lock(&root->list_lock); list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); - spin_unlock(&root->orphan_lock); + spin_unlock(&root->list_lock); /* * if this is a bad inode, means we actually succeeded in @@ -3269,13 +3269,13 @@ void btrfs_destroy_inode(struct inode *inode) BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) posix_acl_release(BTRFS_I(inode)->i_default_acl); - spin_lock(&BTRFS_I(inode)->root->orphan_lock); + spin_lock(&BTRFS_I(inode)->root->list_lock); if (!list_empty(&BTRFS_I(inode)->i_orphan)) { printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" " list\n", inode->i_ino); dump_stack(); } - spin_unlock(&BTRFS_I(inode)->root->orphan_lock); + spin_unlock(&BTRFS_I(inode)->root->list_lock); while(1) { ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index d617c29787fa..d43e14c7471a 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -56,4 +56,3 @@ int btrfs_tree_locked(struct extent_buffer *eb) { return mutex_is_locked(&eb->mutex); } - diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 14d863720302..f1374d597a17 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -198,4 +198,3 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) free_extent_buffer(next); } } - diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index ec9587784a3d..272b9890c982 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c @@ -21,12 +21,18 @@ #include "ref-cache.h" #include "transaction.h" -struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(int nr_extents) +struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, + int nr_extents) { struct btrfs_leaf_ref *ref; + size_t size = btrfs_leaf_ref_size(nr_extents); - ref = kmalloc(btrfs_leaf_ref_size(nr_extents), GFP_NOFS); + ref = kmalloc(size, GFP_NOFS); if (ref) { + spin_lock(&root->fs_info->ref_cache_lock); + root->fs_info->total_ref_cache_size += size; + spin_unlock(&root->fs_info->ref_cache_lock); + memset(ref, 0, sizeof(*ref)); atomic_set(&ref->usage, 1); INIT_LIST_HEAD(&ref->list); @@ -34,14 +40,20 @@ struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(int nr_extents) return ref; } -void btrfs_free_leaf_ref(struct btrfs_leaf_ref *ref) +void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) { if (!ref) return; WARN_ON(atomic_read(&ref->usage) == 0); if (atomic_dec_and_test(&ref->usage)) { + size_t size = btrfs_leaf_ref_size(ref->nritems); + BUG_ON(ref->in_tree); kfree(ref); + + spin_lock(&root->fs_info->ref_cache_lock); + root->fs_info->total_ref_cache_size -= size; + spin_unlock(&root->fs_info->ref_cache_lock); } } @@ -64,7 +76,7 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, else return parent; } - + entry = rb_entry(node, struct btrfs_leaf_ref, rb_node); entry->in_tree = 1; rb_link_node(node, parent, p); @@ -91,9 +103,8 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) return NULL; } -int btrfs_remove_leaf_refs(struct btrfs_root *root) +int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen) { - struct rb_node *rb; struct btrfs_leaf_ref *ref = NULL; struct btrfs_leaf_ref_tree *tree = root->ref_tree; @@ -101,17 +112,18 @@ int btrfs_remove_leaf_refs(struct btrfs_root *root) return 0; spin_lock(&tree->lock); - while(!btrfs_leaf_ref_tree_empty(tree)) { - rb = rb_first(&tree->root); - ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); + while(!list_empty(&tree->list)) { + ref = list_entry(tree->list.next, struct btrfs_leaf_ref, list); + BUG_ON(!ref->in_tree); + if (ref->root_gen > max_root_gen) + break; + rb_erase(&ref->rb_node, &tree->root); ref->in_tree = 0; list_del_init(&ref->list); spin_unlock(&tree->lock); - - btrfs_free_leaf_ref(ref); - + btrfs_free_leaf_ref(root, ref); cond_resched(); spin_lock(&tree->lock); } @@ -143,7 +155,6 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) { int ret = 0; struct rb_node *rb; - size_t size = btrfs_leaf_ref_size(ref->nritems); struct btrfs_leaf_ref_tree *tree = root->ref_tree; spin_lock(&tree->lock); @@ -151,9 +162,6 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) if (rb) { ret = -EEXIST; } else { - spin_lock(&root->fs_info->ref_cache_lock); - root->fs_info->total_ref_cache_size += size; - spin_unlock(&root->fs_info->ref_cache_lock); atomic_inc(&ref->usage); list_add_tail(&ref->list, &tree->list); } @@ -163,15 +171,10 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) { - size_t size = btrfs_leaf_ref_size(ref->nritems); struct btrfs_leaf_ref_tree *tree = root->ref_tree; BUG_ON(!ref->in_tree); spin_lock(&tree->lock); - - spin_lock(&root->fs_info->ref_cache_lock); - root->fs_info->total_ref_cache_size -= size; - spin_unlock(&root->fs_info->ref_cache_lock); rb_erase(&ref->rb_node, &tree->root); ref->in_tree = 0; @@ -179,7 +182,6 @@ int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) spin_unlock(&tree->lock); - btrfs_free_leaf_ref(ref); + btrfs_free_leaf_ref(root, ref); return 0; } - diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h index 823c049f72f1..c361b321c0c3 100644 --- a/fs/btrfs/ref-cache.h +++ b/fs/btrfs/ref-cache.h @@ -30,6 +30,7 @@ struct btrfs_leaf_ref { int in_tree; atomic_t usage; + u64 root_gen; u64 bytenr; u64 owner; u64 generation; @@ -41,14 +42,13 @@ struct btrfs_leaf_ref { static inline size_t btrfs_leaf_ref_size(int nr_extents) { - return sizeof(struct btrfs_leaf_ref) + + return sizeof(struct btrfs_leaf_ref) + sizeof(struct btrfs_extent_info) * nr_extents; } static inline void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree) { tree->root.rb_node = NULL; - tree->last = NULL; INIT_LIST_HEAD(&tree->list); spin_lock_init(&tree->lock); } @@ -59,12 +59,13 @@ static inline int btrfs_leaf_ref_tree_empty(struct btrfs_leaf_ref_tree *tree) } void btrfs_leaf_ref_tree_init(struct btrfs_leaf_ref_tree *tree); -struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(int nr_extents); -void btrfs_free_leaf_ref(struct btrfs_leaf_ref *ref); +struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, + int nr_extents); +void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, u64 bytenr); int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); -int btrfs_remove_leaf_refs(struct btrfs_root *root); +int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen); int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 216f31571620..52c5524896a3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -98,20 +98,24 @@ static noinline int record_root_in_trans(struct btrfs_root *root) BUG_ON(!dirty); dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS); BUG_ON(!dirty->root); - dirty->latest_root = root; INIT_LIST_HEAD(&dirty->list); root->commit_root = btrfs_root_node(root); - root->dirty_root = dirty; memcpy(dirty->root, root, sizeof(*root)); - dirty->root->ref_tree = &root->ref_tree_struct; - spin_lock_init(&dirty->root->node_lock); + spin_lock_init(&dirty->root->list_lock); mutex_init(&dirty->root->objectid_mutex); + INIT_LIST_HEAD(&dirty->root->dead_list); dirty->root->node = root->commit_root; dirty->root->commit_root = NULL; + + spin_lock(&root->list_lock); + list_add(&dirty->root->dead_list, &root->dead_list); + spin_unlock(&root->list_lock); + + root->dirty_root = dirty; } else { WARN_ON(1); } @@ -356,8 +360,6 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, list_del_init(next); root = list_entry(next, struct btrfs_root, dirty_list); update_cowonly_root(trans, root); - if (root->fs_info->closing) - btrfs_remove_leaf_refs(root); } return 0; } @@ -410,7 +412,11 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, free_extent_buffer(root->commit_root); root->commit_root = NULL; - + + spin_lock(&root->list_lock); + list_del_init(&dirty->root->dead_list); + spin_unlock(&root->list_lock); + kfree(dirty->root); kfree(dirty); @@ -497,6 +503,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, unsigned long nr; u64 num_bytes; u64 bytes_used; + u64 max_useless; int ret = 0; int err; @@ -554,10 +561,25 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, } mutex_unlock(&root->fs_info->drop_mutex); + spin_lock(&root->list_lock); + list_del_init(&dirty->root->dead_list); + if (!list_empty(&root->dead_list)) { + struct btrfs_root *oldest; + oldest = list_entry(root->dead_list.prev, + struct btrfs_root, dead_list); + max_useless = oldest->root_key.offset - 1; + } else { + max_useless = root->root_key.offset - 1; + } + spin_unlock(&root->list_lock); + nr = trans->blocks_used; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); + ret = btrfs_remove_leaf_refs(root, max_useless); + BUG_ON(ret); + free_extent_buffer(dirty->root->node); kfree(dirty->root); kfree(dirty); @@ -785,10 +807,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, put_transaction(cur_trans); put_transaction(cur_trans); + list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); if (root->fs_info->closing) list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots); - else - list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); mutex_unlock(&root->fs_info->trans_mutex); kmem_cache_free(btrfs_trans_handle_cachep, trans); @@ -814,4 +835,3 @@ again: } return 0; } - diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5e6ee7a6f738..18db4cbe2794 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2527,4 +2527,3 @@ again: error: return ret; } - From 0ee0fda06b943d7ef65f3cec50ab9d427a2baf97 Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Wed, 30 Jul 2008 16:54:26 -0400 Subject: [PATCH 0633/2985] Btrfs: Add compatibility for kernels >= 2.6.27-rc1 Add a couple of #if's to follow API changes. Signed-off-by: Sven Wegener Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 23 +++++++++++++++++++++++ fs/btrfs/file.c | 4 ++++ fs/btrfs/inode.c | 12 ++++++++++-- 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5368e3b6eb96..1cf4baba4342 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2649,9 +2649,24 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, mapping = eb->first_page->mapping; if (!mapping) return NULL; + + /* + * extent_buffer_page is only called after pinning the page + * by increasing the reference count. So we know the page must + * be in the radix tree. + */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) + rcu_read_lock(); +#else read_lock_irq(&mapping->tree_lock); +#endif p = radix_tree_lookup(&mapping->page_tree, i); + +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) + rcu_read_unlock(); +#else read_unlock_irq(&mapping->tree_lock); +#endif return p; } @@ -2843,13 +2858,21 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, } } clear_page_dirty_for_io(page); +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) + spin_lock_irq(&page->mapping->tree_lock); +#else read_lock_irq(&page->mapping->tree_lock); +#endif if (!PageDirty(page)) { radix_tree_tag_clear(&page->mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) + spin_unlock_irq(&page->mapping->tree_lock); +#else read_unlock_irq(&page->mapping->tree_lock); +#endif unlock_page(page); } return 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 412ab4a26382..388ac397c2c8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -886,7 +886,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, #ifdef REMOVE_SUID_PATH err = remove_suid(&file->f_path); #else +# if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) + err = file_remove_suid(file); +# else err = remove_suid(fdentry(file)); +# endif #endif if (err) goto out_nolock; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7af8be076ee5..c4afa9d78da9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3293,7 +3293,9 @@ void btrfs_destroy_inode(struct inode *inode) kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) +static void init_once(void *foo) +#elif LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) static void init_once(struct kmem_cache * cachep, void *foo) #else static void init_once(void * foo, struct kmem_cache * cachep, @@ -3321,7 +3323,9 @@ void btrfs_destroy_cachep(void) struct kmem_cache *btrfs_cache_create(const char *name, size_t size, unsigned long extra_flags, -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) + void (*ctor)(void *) +#elif LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) void (*ctor)(struct kmem_cache *, void *) #else void (*ctor)(void *, struct kmem_cache *, @@ -3561,8 +3565,12 @@ static int btrfs_set_page_dirty(struct page *page) return __set_page_dirty_nobuffers(page); } +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) +static int btrfs_permission(struct inode *inode, int mask) +#else static int btrfs_permission(struct inode *inode, int mask, struct nameidata *nd) +#endif { if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE)) return -EACCES; From 47ac14fa0f5306c6b97203f4f086bf1fa21dfddb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 31 Jul 2008 09:46:18 -0400 Subject: [PATCH 0634/2985] Btrfs: Add missing hunk from Yan Zheng's cache reclaim patch Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 37ca8df30c30..4765248000fd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1054,6 +1054,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, goto out; } + ref->root_gen = root->root_key.offset; ref->bytenr = buf->start; ref->owner = btrfs_header_owner(buf); ref->generation = btrfs_header_generation(buf); From 37d1aeee3990385e9bb436c50c2f7e120a668df6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 31 Jul 2008 10:48:37 -0400 Subject: [PATCH 0635/2985] Btrfs: Throttle tuning This avoids waiting for transactions with pages locked by breaking out the code to wait for the current transaction to close into a function called by btrfs_throttle. It also lowers the limits for where we start throttling. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 +++ fs/btrfs/file.c | 2 +- fs/btrfs/transaction.c | 38 +++++++++++++++++++++++++++----------- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4765248000fd..b003b4364ddb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2526,6 +2526,9 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, *level = 0; break; } + if (printk_ratelimit()) + printk("leaf ref miss for bytenr %llu\n", + (unsigned long long)bytenr); } next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 388ac397c2c8..d3f2fe0b7c6c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -253,7 +253,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, end_of_last_block = start_pos + num_bytes - 1; lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); if (!trans) { err = -ENOMEM; goto out_unlock; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 52c5524896a3..66af5140c8ce 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -124,17 +124,12 @@ static noinline int record_root_in_trans(struct btrfs_root *root) return 0; } -struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, - int num_blocks, int join) +static void wait_current_trans(struct btrfs_root *root) { - struct btrfs_trans_handle *h = - kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); struct btrfs_transaction *cur_trans; - int ret; - mutex_lock(&root->fs_info->trans_mutex); cur_trans = root->fs_info->running_transaction; - if (cur_trans && cur_trans->blocked && !join) { + if (cur_trans && cur_trans->blocked) { DEFINE_WAIT(wait); cur_trans->use_count++; while(1) { @@ -154,6 +149,18 @@ struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, } put_transaction(cur_trans); } +} + +struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, + int num_blocks, int join) +{ + struct btrfs_trans_handle *h = + kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); + int ret; + + mutex_lock(&root->fs_info->trans_mutex); + if (!join) + wait_current_trans(root); ret = join_transaction(root); BUG_ON(ret); @@ -200,7 +207,7 @@ static noinline int wait_for_commit(struct btrfs_root *root, return 0; } -void btrfs_throttle(struct btrfs_root *root) +static void throttle_on_drops(struct btrfs_root *root) { struct btrfs_fs_info *info = root->fs_info; @@ -223,19 +230,28 @@ harder: } while (thr == atomic_read(&info->throttle_gen)); if (harder_count < 5 && - info->total_ref_cache_size > 5 * 1024 * 1024) { + info->total_ref_cache_size > 1 * 1024 * 1024) { harder_count++; goto harder; } if (harder_count < 10 && - info->total_ref_cache_size > 10 * 1024 * 1024) { + info->total_ref_cache_size > 5 * 1024 * 1024) { harder_count++; goto harder; } } } +void btrfs_throttle(struct btrfs_root *root) +{ + mutex_lock(&root->fs_info->trans_mutex); + wait_current_trans(root); + mutex_unlock(&root->fs_info->trans_mutex); + + throttle_on_drops(root); +} + static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, int throttle) { @@ -256,7 +272,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); if (throttle) - btrfs_throttle(root); + throttle_on_drops(root); return 0; } From 61b4944018449003ac5f9757f4d125dce519cf51 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 31 Jul 2008 15:42:53 -0400 Subject: [PATCH 0636/2985] Btrfs: Fix streaming read performance with checksumming on Large streaming reads make for large bios, which means each entry on the list async work queues represents a large amount of data. IO congestion throttling on the device was kicking in before the async worker threads decided a single thread was busy and needed some help. The end result was that a streaming read would result in a single CPU running at 100% instead of balancing the work off to other CPUs. This patch also changes the pre-IO checksum lookup done by reads to work on a per-bio basis instead of a per-page. This results in many extra btree lookups on large streaming reads. Doing the checksum lookup right before bio submit allows us to reuse searches while processing adjacent offsets. Signed-off-by: Chris Mason --- fs/btrfs/async-thread.c | 2 +- fs/btrfs/ctree.h | 2 ++ fs/btrfs/disk-io.c | 15 ++++++++ fs/btrfs/file-item.c | 77 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/inode.c | 57 +++--------------------------- 5 files changed, 99 insertions(+), 54 deletions(-) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 5fe6a0d532ed..bc2980c433ef 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -160,7 +160,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, int max) INIT_LIST_HEAD(&workers->idle_list); spin_lock_init(&workers->lock); workers->max_workers = max; - workers->idle_thresh = 64; + workers->idle_thresh = 32; } /* diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index be16cd49ef69..d788ab0dcd96 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1613,6 +1613,8 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key *location, int mod); /* file-item.c */ +int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, + struct bio *bio); int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, u64 disk_offset, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e826730d750f..d2d1cc87e8ad 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1357,10 +1357,25 @@ struct btrfs_root *open_ctree(struct super_block *sb, */ btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); + + /* a higher idle thresh on the submit workers makes it much more + * likely that bios will be send down in a sane order to the + * devices + */ + fs_info->submit_workers.idle_thresh = 64; + btrfs_init_workers(&fs_info->fixup_workers, 1); btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->endio_write_workers, fs_info->thread_pool_size); + + /* + * endios are largely parallel and should have a very + * low idle thresh + */ + fs_info->endio_workers.idle_thresh = 4; + fs_info->endio_write_workers.idle_thresh = 4; + btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->fixup_workers, 1); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 2311061f070e..a5ff19b34b21 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -134,6 +134,83 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, return ret; } +int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, + struct bio *bio) +{ + u32 sum; + struct bio_vec *bvec = bio->bi_io_vec; + int bio_index = 0; + u64 offset; + u64 item_start_offset = 0; + u64 item_last_offset = 0; + u32 diff; + int ret; + struct btrfs_path *path; + struct btrfs_csum_item *item = NULL; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + + path = btrfs_alloc_path(); + path->reada = 2; + + WARN_ON(bio->bi_vcnt <= 0); + + while(bio_index < bio->bi_vcnt) { + offset = page_offset(bvec->bv_page) + bvec->bv_offset; + ret = btrfs_find_ordered_sum(inode, offset, &sum); + if (ret == 0) + goto found; + + if (!item || offset < item_start_offset || + offset >= item_last_offset) { + struct btrfs_key found_key; + u32 item_size; + + if (item) + btrfs_release_path(root, path); + item = btrfs_lookup_csum(NULL, root, path, + inode->i_ino, offset, 0); + if (IS_ERR(item)) { + ret = PTR_ERR(item); + if (ret == -ENOENT || ret == -EFBIG) + ret = 0; + sum = 0; + printk("no csum found for inode %lu start " + "%llu\n", inode->i_ino, + (unsigned long long)offset); + goto found; + } + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + + item_start_offset = found_key.offset; + item_size = btrfs_item_size_nr(path->nodes[0], + path->slots[0]); + item_last_offset = item_start_offset + + (item_size / BTRFS_CRC32_SIZE) * + root->sectorsize; + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_csum_item); + } + /* + * this byte range must be able to fit inside + * a single leaf so it will also fit inside a u32 + */ + diff = offset - item_start_offset; + diff = diff / root->sectorsize; + diff = diff * BTRFS_CRC32_SIZE; + + read_extent_buffer(path->nodes[0], &sum, + (unsigned long)item + diff, + BTRFS_CRC32_SIZE); +found: + set_state_private(io_tree, offset, sum); + bio_index++; + bvec++; + } + btrfs_free_path(path); + return 0; +} + int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, struct bio *bio) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c4afa9d78da9..31d52c51acc3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -374,6 +374,10 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, BUG_ON(ret); if (!(rw & (1 << BIO_RW))) { + if (!btrfs_test_opt(root, NODATASUM) && + !btrfs_test_flag(inode, NODATASUM)) { + btrfs_lookup_bio_sums(root, inode, bio); + } goto mapit; } @@ -598,58 +602,6 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, return btrfs_finish_ordered_io(page->mapping->host, start, end); } -int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) -{ - int ret = 0; - struct inode *inode = page->mapping->host; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct btrfs_csum_item *item; - struct btrfs_path *path = NULL; - u32 csum; - - if (btrfs_test_opt(root, NODATASUM) || - btrfs_test_flag(inode, NODATASUM)) - return 0; - - /* - * It is possible there is an ordered extent that has - * not yet finished for this range in the file. If so, - * that extent will have a csum cached, and it will insert - * the sum after all the blocks in the extent are fully - * on disk. So, look for an ordered extent and use the - * sum if found. We have to do this before looking in the - * btree because csum items are pre-inserted based on - * the file size. btrfs_lookup_csum might find an item - * that still hasn't been fully filled. - */ - ret = btrfs_find_ordered_sum(inode, start, &csum); - if (ret == 0) - goto found; - - ret = 0; - path = btrfs_alloc_path(); - item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); - if (IS_ERR(item)) { - ret = PTR_ERR(item); - /* a csum that isn't present is a preallocated region. */ - if (ret == -ENOENT || ret == -EFBIG) - ret = 0; - csum = 0; - printk("no csum found for inode %lu start %Lu\n", inode->i_ino, - start); - goto out; - } - read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, - BTRFS_CRC32_SIZE); -found: - set_state_private(io_tree, start, csum); -out: - if (path) - btrfs_free_path(path); - return ret; -} - struct io_failure_record { struct page *page; u64 start; @@ -3613,7 +3565,6 @@ static struct extent_io_ops btrfs_extent_io_ops = { .fill_delalloc = run_delalloc_range, .submit_bio_hook = btrfs_submit_bio_hook, .merge_bio_hook = btrfs_merge_bio_hook, - .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, .writepage_end_io_hook = btrfs_writepage_end_io_hook, .writepage_start_hook = btrfs_writepage_start_hook, From 3ce7e67a069b919be774a341b82fc20978b7f69d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 31 Jul 2008 15:42:54 -0400 Subject: [PATCH 0637/2985] Btrfs: Drop some debugging around the extent_map pinned flag Signed-off-by: Chris Mason --- fs/btrfs/file.c | 10 +--------- fs/btrfs/inode.c | 22 ---------------------- 2 files changed, 1 insertion(+), 31 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d3f2fe0b7c6c..c78f184ee5cc 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -380,15 +380,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) spin_unlock(&em_tree->lock); break; } - if (test_bit(EXTENT_FLAG_PINNED, &em->flags)) { - printk(KERN_CRIT "inode %lu trying to drop pinned " - "extent start %llu end %llu, em [%llu %llu]\n", - inode->i_ino, - (unsigned long long)start, - (unsigned long long)end, - (unsigned long long)em->start, - (unsigned long long)em->len); - } + clear_bit(EXTENT_FLAG_PINNED, &em->flags); remove_extent_mapping(em_tree, em); if (em->block_start < EXTENT_MAP_LAST_BYTE && diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 31d52c51acc3..76365ad9bfbf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -505,15 +505,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered_extent; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - struct extent_map *em; - struct extent_map *em_orig; u64 alloc_hint = 0; - u64 clear_start; - u64 clear_end; struct list_head list; struct btrfs_key ins; - struct rb_node *rb; int ret; ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); @@ -542,22 +536,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) mutex_lock(&BTRFS_I(inode)->extent_mutex); - spin_lock(&em_tree->lock); - clear_start = ordered_extent->file_offset; - clear_end = ordered_extent->file_offset + ordered_extent->len; - em = lookup_extent_mapping(em_tree, clear_start, - ordered_extent->len); - em_orig = em; - while(em && clear_start < extent_map_end(em) && clear_end > em->start) { - clear_bit(EXTENT_FLAG_PINNED, &em->flags); - rb = rb_next(&em->rb_node); - if (!rb) - break; - em = rb_entry(rb, struct extent_map, rb_node); - } - free_extent_map(em_orig); - spin_unlock(&em_tree->lock); - ret = btrfs_drop_extents(trans, root, inode, ordered_extent->file_offset, ordered_extent->file_offset + From 492bb6deee3416ad792dcd8584ebd95c463af1a6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 31 Jul 2008 16:29:02 -0400 Subject: [PATCH 0638/2985] Btrfs: Hold a reference on bios during submit_bio, add some extra bio checks Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 18db4cbe2794..0de51e37e789 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -180,7 +180,11 @@ loop: pending = pending->bi_next; cur->bi_next = NULL; atomic_dec(&device->dev_root->fs_info->nr_async_submits); + + BUG_ON(atomic_read(&cur->bi_cnt) == 0); + bio_get(cur); submit_bio(cur->bi_rw, cur); + bio_put(cur); num_run++; /* @@ -188,10 +192,11 @@ loop: * is now congested. Back off and let other work structs * run instead */ - if (pending && num_run && bdi_write_congested(bdi)) { + if (pending && bdi_write_congested(bdi)) { struct bio *old_head; spin_lock(&device->io_lock); + old_head = device->pending_bios; device->pending_bios = pending; if (device->pending_bio_tail) @@ -2125,7 +2130,9 @@ int schedule_bio(struct btrfs_root *root, struct btrfs_device *device, /* don't bother with additional async steps for reads, right now */ if (!(rw & (1 << BIO_RW))) { + bio_get(bio); submit_bio(rw, bio); + bio_put(bio); return 0; } @@ -2136,6 +2143,7 @@ int schedule_bio(struct btrfs_root *root, struct btrfs_device *device, * on a queue for later */ atomic_inc(&root->fs_info->nr_async_submits); + WARN_ON(bio->bi_next); bio->bi_next = NULL; bio->bi_rw |= rw; From f87f057b49ee52cf5c627ab27a706e3252767c9f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 1 Aug 2008 11:27:23 -0400 Subject: [PATCH 0639/2985] Btrfs: Improve and cleanup locking done by walk_down_tree While dropping snapshots, walk_down_tree does most of the work of checking reference counts and limiting tree traversal to just the blocks that we are freeing. It dropped and held the allocation mutex in strange and confusing ways, this commit changes it to only hold the mutex while actually freeing a block. The rest of the checks around reference counts should be safe without the lock because we only allow one process in btrfs_drop_snapshot at a time. Other processes dropping reference counts should not drop it to 1 because their tree roots already have an extra ref on the block. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 85 ++++++++++++++++++++++++++++-------------- fs/btrfs/file.c | 13 ++++--- fs/btrfs/ioctl.c | 6 +++ 3 files changed, 70 insertions(+), 34 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b003b4364ddb..58bceeeda9c5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2333,8 +2333,6 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, leaf_owner = btrfs_header_owner(leaf); leaf_generation = btrfs_header_generation(leaf); - mutex_unlock(&root->fs_info->alloc_mutex); - for (i = 0; i < nritems; i++) { u64 disk_bytenr; cond_resched(); @@ -2362,8 +2360,6 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->alloc_mutex); BUG_ON(ret); } - - mutex_lock(&root->fs_info->alloc_mutex); return 0; } @@ -2375,7 +2371,6 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, int ret; struct btrfs_extent_info *info = ref->extents; - mutex_unlock(&root->fs_info->alloc_mutex); for (i = 0; i < ref->nritems; i++) { mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, @@ -2386,7 +2381,6 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, BUG_ON(ret); info++; } - mutex_lock(&root->fs_info->alloc_mutex); return 0; } @@ -2440,10 +2434,39 @@ int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, u32 *refs) { int ret; - mutex_unlock(&root->fs_info->alloc_mutex); + ret = lookup_extent_ref(NULL, root, start, len, refs); + BUG_ON(ret); + +#if 0 // some debugging code in case we see problems here + /* if the refs count is one, it won't get increased again. But + * if the ref count is > 1, someone may be decreasing it at + * the same time we are. + */ + if (*refs != 1) { + struct extent_buffer *eb = NULL; + eb = btrfs_find_create_tree_block(root, start, len); + if (eb) + btrfs_tree_lock(eb); + + mutex_lock(&root->fs_info->alloc_mutex); + ret = lookup_extent_ref(NULL, root, start, len, refs); + BUG_ON(ret); + mutex_unlock(&root->fs_info->alloc_mutex); + + if (eb) { + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + } + if (*refs == 1) { + printk("block %llu went down to one during drop_snap\n", + (unsigned long long)start); + } + + } +#endif + cond_resched(); - mutex_lock(&root->fs_info->alloc_mutex); return ret; } @@ -2467,8 +2490,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, int ret; u32 refs; - mutex_lock(&root->fs_info->alloc_mutex); - WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, @@ -2507,13 +2528,21 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, root_owner = btrfs_header_owner(parent); root_gen = btrfs_header_generation(parent); path->slots[*level]++; + + mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, blocksize, root_owner, root_gen, 0, 0, 1); BUG_ON(ret); + mutex_unlock(&root->fs_info->alloc_mutex); continue; } - + /* + * at this point, we have a single ref, and since the + * only place referencing this extent is a dead root + * the reference count should never go higher. + * So, we don't need to check it again + */ if (*level == 1) { struct btrfs_key key; btrfs_node_key_to_cpu(cur, &key, path->slots[*level]); @@ -2533,33 +2562,23 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { free_extent_buffer(next); - mutex_unlock(&root->fs_info->alloc_mutex); if (path->slots[*level] == 0) reada_walk_down(root, cur, path->slots[*level]); next = read_tree_block(root, bytenr, blocksize, ptr_gen); cond_resched(); - mutex_lock(&root->fs_info->alloc_mutex); - - /* we've dropped the lock, double check */ +#if 0 + /* + * this is a debugging check and can go away + * the ref should never go all the way down to 1 + * at this point + */ ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs); BUG_ON(ret); - if (refs != 1) { - parent = path->nodes[*level]; - root_owner = btrfs_header_owner(parent); - root_gen = btrfs_header_generation(parent); - - path->slots[*level]++; - free_extent_buffer(next); - ret = __btrfs_free_extent(trans, root, bytenr, - blocksize, - root_owner, - root_gen, 0, 0, 1); - BUG_ON(ret); - continue; - } + WARN_ON(refs != 1); +#endif } WARN_ON(*level <= 0); if (path->nodes[*level-1]) @@ -2584,6 +2603,8 @@ out: root_owner = btrfs_header_owner(parent); root_gen = btrfs_header_generation(parent); + + mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, blocksize, root_owner, root_gen, 0, 0, 1); free_extent_buffer(path->nodes[*level]); @@ -2591,6 +2612,7 @@ out: *level += 1; BUG_ON(ret); mutex_unlock(&root->fs_info->alloc_mutex); + cond_resched(); return 0; } @@ -2834,6 +2856,11 @@ again: } set_page_extent_mapped(page); + /* + * make sure page_mkwrite is called for this page if userland + * wants to change it from mmap + */ + clear_page_dirty_for_io(page); set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c78f184ee5cc..8915f2dc1bce 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -338,6 +338,13 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1); BUG_ON(err); mutex_unlock(&BTRFS_I(inode)->extent_mutex); + + /* + * an ugly way to do all the prop accounting around + * the page bits and mapping tags + */ + set_page_writeback(pages[0]); + end_page_writeback(pages[0]); did_inline = 1; } if (end_pos > isize) { @@ -833,11 +840,7 @@ again: start_pos, last_pos - 1, GFP_NOFS); } for (i = 0; i < num_pages; i++) { -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - ClearPageDirty(pages[i]); -#else - cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); -#endif + clear_page_dirty_for_io(pages[i]); set_page_extent_mapped(pages[i]); WARN_ON(!PageLocked(pages[i])); } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5e627746c4e8..224da287b3ed 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -268,6 +268,12 @@ again: } set_page_extent_mapped(page); + /* + * this makes sure page_mkwrite is called on the + * page if it is dirtied again later + */ + clear_page_dirty_for_io(page); + set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); From 18e35e0ab337ec99c7e03e9ae917745a352c0bb1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 1 Aug 2008 13:11:41 -0400 Subject: [PATCH 0640/2985] Btrfs: Throttle less often waiting for snapshots to delete Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 7 +++++-- fs/btrfs/transaction.c | 14 -------------- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 58bceeeda9c5..74bcd48a9c43 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2535,6 +2535,10 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, root_gen, 0, 0, 1); BUG_ON(ret); mutex_unlock(&root->fs_info->alloc_mutex); + + atomic_inc(&root->fs_info->throttle_gen); + wake_up(&root->fs_info->transaction_throttle); + continue; } /* @@ -2603,7 +2607,6 @@ out: root_owner = btrfs_header_owner(parent); root_gen = btrfs_header_generation(parent); - mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, blocksize, root_owner, root_gen, 0, 0, 1); @@ -2726,7 +2729,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root } } while(1) { - atomic_inc(&root->fs_info->throttle_gen); wret = walk_down_tree(trans, root, path, &level); if (wret > 0) break; @@ -2742,6 +2744,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root ret = -EAGAIN; break; } + atomic_inc(&root->fs_info->throttle_gen); wake_up(&root->fs_info->transaction_throttle); } for (i = 0; i <= orig_level; i++) { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 66af5140c8ce..a68779499302 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -211,11 +211,9 @@ static void throttle_on_drops(struct btrfs_root *root) { struct btrfs_fs_info *info = root->fs_info; -harder: if (atomic_read(&info->throttles)) { DEFINE_WAIT(wait); int thr; - int harder_count = 0; thr = atomic_read(&info->throttle_gen); do { @@ -228,18 +226,6 @@ harder: schedule(); finish_wait(&info->transaction_throttle, &wait); } while (thr == atomic_read(&info->throttle_gen)); - - if (harder_count < 5 && - info->total_ref_cache_size > 1 * 1024 * 1024) { - harder_count++; - goto harder; - } - - if (harder_count < 10 && - info->total_ref_cache_size > 5 * 1024 * 1024) { - harder_count++; - goto harder; - } } } From 65b51a009e29e64c0951f21ea17fdc66bbb0fbd7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 1 Aug 2008 15:11:20 -0400 Subject: [PATCH 0641/2985] btrfs_search_slot: reduce lock contention by cowing in two stages A btree block cow has two parts, the first is to allocate a destination block and the second is to copy the old bock over. The first part needs locks in the extent allocation tree, and may need to do IO. This changeset splits that into a separate function that can be called without any tree locks held. btrfs_search_slot is changed to drop its path and start over if it has to COW a contended block. This often means that many writers will pre-alloc a new destination for a the same contended block, but they cache their prealloc for later use on lower levels in the tree. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 147 +++++++++++++++++++++++++++++++++-------- fs/btrfs/ctree.h | 6 +- fs/btrfs/extent-tree.c | 49 ++++++++------ fs/btrfs/locking.c | 16 +++++ fs/btrfs/locking.h | 1 + fs/btrfs/transaction.c | 2 +- 6 files changed, 173 insertions(+), 48 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c4792062dd53..7114faafa9d4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -181,7 +181,8 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, struct extent_buffer **cow_ret, - u64 search_start, u64 empty_size) + u64 search_start, u64 empty_size, + u64 prealloc_dest) { u64 root_gen; struct extent_buffer *cow; @@ -216,10 +217,27 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, } else { first_key.objectid = 0; } - cow = btrfs_alloc_free_block(trans, root, buf->len, - root->root_key.objectid, - root_gen, first_key.objectid, level, - search_start, empty_size); + if (prealloc_dest) { + struct btrfs_key ins; + + ins.objectid = prealloc_dest; + ins.offset = buf->len; + ins.type = BTRFS_EXTENT_ITEM_KEY; + + ret = btrfs_alloc_reserved_extent(trans, root, + root->root_key.objectid, + root_gen, level, + first_key.objectid, + &ins); + BUG_ON(ret); + cow = btrfs_init_new_buffer(trans, root, prealloc_dest, + buf->len); + } else { + cow = btrfs_alloc_free_block(trans, root, buf->len, + root->root_key.objectid, + root_gen, first_key.objectid, + level, search_start, empty_size); + } if (IS_ERR(cow)) return PTR_ERR(cow); @@ -279,7 +297,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, - struct extent_buffer **cow_ret) + struct extent_buffer **cow_ret, u64 prealloc_dest) { u64 search_start; u64 header_trans; @@ -302,12 +320,14 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { *cow_ret = buf; spin_unlock(&root->fs_info->hash_lock); + WARN_ON(prealloc_dest); return 0; } spin_unlock(&root->fs_info->hash_lock); search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); ret = __btrfs_cow_block(trans, root, buf, parent, - parent_slot, cow_ret, search_start, 0); + parent_slot, cow_ret, search_start, 0, + prealloc_dest); return ret; } @@ -451,7 +471,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, err = __btrfs_cow_block(trans, root, cur, parent, i, &cur, search_start, min(16 * blocksize, - (end_slot - i) * blocksize)); + (end_slot - i) * blocksize), 0); if (err) { btrfs_tree_unlock(cur); free_extent_buffer(cur); @@ -803,7 +823,7 @@ static int balance_level(struct btrfs_trans_handle *trans, child = read_node_slot(root, mid, 0); btrfs_tree_lock(child); BUG_ON(!child); - ret = btrfs_cow_block(trans, root, child, mid, 0, &child); + ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); BUG_ON(ret); spin_lock(&root->node_lock); @@ -836,7 +856,7 @@ static int balance_level(struct btrfs_trans_handle *trans, if (left) { btrfs_tree_lock(left); wret = btrfs_cow_block(trans, root, left, - parent, pslot - 1, &left); + parent, pslot - 1, &left, 0); if (wret) { ret = wret; goto enospc; @@ -846,7 +866,7 @@ static int balance_level(struct btrfs_trans_handle *trans, if (right) { btrfs_tree_lock(right); wret = btrfs_cow_block(trans, root, right, - parent, pslot + 1, &right); + parent, pslot + 1, &right, 0); if (wret) { ret = wret; goto enospc; @@ -1021,7 +1041,7 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, wret = 1; } else { ret = btrfs_cow_block(trans, root, left, parent, - pslot - 1, &left); + pslot - 1, &left, 0); if (ret) wret = 1; else { @@ -1069,7 +1089,7 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, } else { ret = btrfs_cow_block(trans, root, right, parent, pslot + 1, - &right); + &right, 0); if (ret) wret = 1; else { @@ -1245,6 +1265,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root u8 lowest_level = 0; u64 blocknr; u64 gen; + struct btrfs_key prealloc_block; lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len); @@ -1253,6 +1274,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root !mutex_is_locked(&root->fs_info->alloc_mutex)); if (ins_len < 0) lowest_unlock = 2; + + prealloc_block.objectid = 0; + again: if (p->skip_locking) b = btrfs_root_node(root); @@ -1261,27 +1285,82 @@ again: while (b) { level = btrfs_header_level(b); + + /* + * setup the path here so we can release it under lock + * contention with the cow code + */ + p->nodes[level] = b; + if (!p->skip_locking) + p->locks[level] = 1; + if (cow) { int wret; + + /* is a cow on this block not required */ + spin_lock(&root->fs_info->hash_lock); + if (btrfs_header_generation(b) == trans->transid && + !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { + spin_unlock(&root->fs_info->hash_lock); + goto cow_done; + } + spin_unlock(&root->fs_info->hash_lock); + + /* ok, we have to cow, is our old prealloc the right + * size? + */ + if (prealloc_block.objectid && + prealloc_block.offset != b->len) { + btrfs_free_reserved_extent(root, + prealloc_block.objectid, + prealloc_block.offset); + prealloc_block.objectid = 0; + } + + /* + * for higher level blocks, try not to allocate blocks + * with the block and the parent locks held. + */ + if (level > 1 && !prealloc_block.objectid && + btrfs_path_lock_waiting(p, level)) { + u32 size = b->len; + u64 hint = b->start; + + btrfs_release_path(root, p); + ret = btrfs_reserve_extent(trans, root, + size, size, 0, + hint, (u64)-1, + &prealloc_block, 0); + BUG_ON(ret); + goto again; + } + wret = btrfs_cow_block(trans, root, b, p->nodes[level + 1], p->slots[level + 1], - &b); + &b, prealloc_block.objectid); + prealloc_block.objectid = 0; if (wret) { free_extent_buffer(b); - return wret; + ret = wret; + goto done; } } +cow_done: BUG_ON(!cow && ins_len); if (level != btrfs_header_level(b)) WARN_ON(1); level = btrfs_header_level(b); + p->nodes[level] = b; if (!p->skip_locking) p->locks[level] = 1; + ret = check_block(root, p, level); - if (ret) - return -1; + if (ret) { + ret = -1; + goto done; + } ret = bin_search(b, key, level, &slot); if (level != 0) { @@ -1292,15 +1371,19 @@ again: BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { int sret = split_node(trans, root, p, level); BUG_ON(sret > 0); - if (sret) - return sret; + if (sret) { + ret = sret; + goto done; + } b = p->nodes[level]; slot = p->slots[level]; } else if (ins_len < 0) { int sret = balance_level(trans, root, p, level); - if (sret) - return sret; + if (sret) { + ret = sret; + goto done; + } b = p->nodes[level]; if (!b) { btrfs_release_path(NULL, p); @@ -1362,14 +1445,24 @@ again: int sret = split_leaf(trans, root, key, p, ins_len, ret == 0); BUG_ON(sret > 0); - if (sret) - return sret; + if (sret) { + ret = sret; + goto done; + } } unlock_up(p, level, lowest_unlock); - return ret; + goto done; } } - return 1; + ret = 1; +done: + if (prealloc_block.objectid) { + btrfs_free_reserved_extent(root, + prealloc_block.objectid, + prealloc_block.offset); + } + + return ret; } /* @@ -1840,7 +1933,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root /* cow and double check */ ret = btrfs_cow_block(trans, root, right, upper, - slot + 1, &right); + slot + 1, &right, 0); if (ret) goto out_unlock; @@ -2021,7 +2114,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root /* cow and double check */ ret = btrfs_cow_block(trans, root, left, - path->nodes[1], slot - 1, &left); + path->nodes[1], slot - 1, &left, 0); if (ret) { /* we hit -ENOSPC, but it isn't fatal here */ ret = 1; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d788ab0dcd96..9b025960bbde 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1421,6 +1421,9 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int level, u64 hint, u64 empty_size); +struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u32 blocksize); int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size); int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -1451,6 +1454,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 root_objectid, u64 ref_generation, u64 owner_objectid, u64 owner_offset, int pin); +int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_io_tree *unpin); @@ -1484,7 +1488,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, - struct extent_buffer **cow_ret); + struct extent_buffer **cow_ret, u64 prealloc_dest); int btrfs_copy_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 74bcd48a9c43..98a1c0faedae 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2118,6 +2118,15 @@ again: return 0; } +int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) +{ + maybe_lock_mutex(root); + set_extent_dirty(&root->fs_info->free_space_cache, + start, start + len - 1, GFP_NOFS); + maybe_unlock_mutex(root); + return 0; +} + int btrfs_reserve_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, @@ -2267,6 +2276,26 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, maybe_unlock_mutex(root); return ret; } + +struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u32 blocksize) +{ + struct extent_buffer *buf; + + buf = btrfs_find_create_tree_block(root, bytenr, blocksize); + if (!buf) + return ERR_PTR(-ENOMEM); + btrfs_set_header_generation(buf, trans->transid); + btrfs_tree_lock(buf); + clean_tree_block(trans, root, buf); + btrfs_set_buffer_uptodate(buf); + set_extent_dirty(&trans->transaction->dirty_pages, buf->start, + buf->start + buf->len - 1, GFP_NOFS); + trans->blocks_used++; + return buf; +} + /* * helper function to allocate a block for a given tree * returns the tree buffer or NULL. @@ -2293,26 +2322,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, BUG_ON(ret > 0); return ERR_PTR(ret); } - buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize); - if (!buf) { - btrfs_free_extent(trans, root, ins.objectid, blocksize, - root->root_key.objectid, ref_generation, - 0, 0, 0); - return ERR_PTR(-ENOMEM); - } - btrfs_set_header_generation(buf, trans->transid); - btrfs_tree_lock(buf); - clean_tree_block(trans, root, buf); - btrfs_set_buffer_uptodate(buf); - if (PageDirty(buf->first_page)) { - printk("page %lu dirty\n", buf->first_page->index); - WARN_ON(1); - } - - set_extent_dirty(&trans->transaction->dirty_pages, buf->start, - buf->start + buf->len - 1, GFP_NOFS); - trans->blocks_used++; + buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize); return buf; } diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index d43e14c7471a..0cc314c10d66 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -56,3 +56,19 @@ int btrfs_tree_locked(struct extent_buffer *eb) { return mutex_is_locked(&eb->mutex); } + +int btrfs_path_lock_waiting(struct btrfs_path *path, int level) +{ + int i; + struct extent_buffer *eb; + for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) { + eb = path->nodes[i]; + if (!eb) + break; + smp_mb(); + if (!list_empty(&eb->mutex.wait_list)) + return 1; + } + return 0; +} + diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index 2dab96d8280e..bc1faef12519 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -23,4 +23,5 @@ int btrfs_tree_lock(struct extent_buffer *eb); int btrfs_tree_unlock(struct extent_buffer *eb); int btrfs_tree_locked(struct extent_buffer *eb); int btrfs_try_tree_lock(struct extent_buffer *eb); +int btrfs_path_lock_waiting(struct btrfs_path *path, int level); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a68779499302..9d84daf10008 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -622,7 +622,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); old = btrfs_lock_root_node(root); - btrfs_cow_block(trans, root, old, NULL, 0, &old); + btrfs_cow_block(trans, root, old, NULL, 0, &old, 0); btrfs_copy_root(trans, root, old, &tmp, objectid); btrfs_tree_unlock(old); From 2dd3e67b1eaec8504da7e12b8afee77323a49f38 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 4 Aug 2008 08:20:15 -0400 Subject: [PATCH 0642/2985] Btrfs: More throttle tuning * Make walk_down_tree wake up throttled tasks more often * Make walk_down_tree call cond_resched during long loops * As the size of the ref cache grows, wait longer in throttle * Get rid of the reada code in walk_down_tree, the leaves don't get read anymore, thanks to the ref cache. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 13 ++-------- fs/btrfs/extent-tree.c | 59 +++++++++--------------------------------- fs/btrfs/transaction.c | 15 +++++++++++ 3 files changed, 29 insertions(+), 58 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d2d1cc87e8ad..da9dda4338a3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -188,13 +188,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, btrfs_csum_final(crc, result); if (verify) { - int from_this_trans = 0; - - if (root->fs_info->running_transaction && - btrfs_header_generation(buf) == - root->fs_info->running_transaction->transid) - from_this_trans = 1; - /* FIXME, this is not good */ if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { u32 val; @@ -203,11 +196,9 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE); printk("btrfs: %s checksum verify failed on %llu " - "wanted %X found %X from_this_trans %d " - "level %d\n", + "wanted %X found %X level %d\n", root->fs_info->sb->s_id, - buf->start, val, found, from_this_trans, - btrfs_header_level(buf)); + buf->start, val, found, btrfs_header_level(buf)); return 1; } } else { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 98a1c0faedae..1aeb695078b9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2369,6 +2369,11 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, leaf_owner, leaf_generation, key.objectid, key.offset, 0); mutex_unlock(&root->fs_info->alloc_mutex); + + atomic_inc(&root->fs_info->throttle_gen); + wake_up(&root->fs_info->transaction_throttle); + cond_resched(); + BUG_ON(ret); } return 0; @@ -2389,6 +2394,11 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, ref->owner, ref->generation, info->objectid, info->offset, 0); mutex_unlock(&root->fs_info->alloc_mutex); + + atomic_inc(&root->fs_info->throttle_gen); + wake_up(&root->fs_info->transaction_throttle); + cond_resched(); + BUG_ON(ret); info++; } @@ -2396,51 +2406,6 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } -static void noinline reada_walk_down(struct btrfs_root *root, - struct extent_buffer *node, - int slot) -{ - u64 bytenr; - u64 last = 0; - u32 nritems; - u32 refs; - u32 blocksize; - int ret; - int i; - int level; - int skipped = 0; - - nritems = btrfs_header_nritems(node); - level = btrfs_header_level(node); - if (level) - return; - - for (i = slot; i < nritems && skipped < 32; i++) { - bytenr = btrfs_node_blockptr(node, i); - if (last && ((bytenr > last && bytenr - last > 32 * 1024) || - (last > bytenr && last - bytenr > 32 * 1024))) { - skipped++; - continue; - } - blocksize = btrfs_level_size(root, level - 1); - if (i != slot) { - ret = lookup_extent_ref(NULL, root, bytenr, - blocksize, &refs); - BUG_ON(ret); - if (refs != 1) { - skipped++; - continue; - } - } - ret = readahead_tree_block(root, bytenr, blocksize, - btrfs_node_ptr_generation(node, i)); - last = bytenr + blocksize; - cond_resched(); - if (ret) - break; - } -} - int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, u32 *refs) { @@ -2549,6 +2514,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, atomic_inc(&root->fs_info->throttle_gen); wake_up(&root->fs_info->transaction_throttle); + cond_resched(); continue; } @@ -2578,8 +2544,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { free_extent_buffer(next); - if (path->slots[*level] == 0) - reada_walk_down(root, cur, path->slots[*level]); next = read_tree_block(root, bytenr, blocksize, ptr_gen); cond_resched(); @@ -2601,6 +2565,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, path->nodes[*level-1] = next; *level = btrfs_header_level(next); path->slots[*level] = 0; + cond_resched(); } out: WARN_ON(*level < 0); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9d84daf10008..cf73342e8215 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -210,7 +210,9 @@ static noinline int wait_for_commit(struct btrfs_root *root, static void throttle_on_drops(struct btrfs_root *root) { struct btrfs_fs_info *info = root->fs_info; + int harder_count = 0; +harder: if (atomic_read(&info->throttles)) { DEFINE_WAIT(wait); int thr; @@ -226,6 +228,19 @@ static void throttle_on_drops(struct btrfs_root *root) schedule(); finish_wait(&info->transaction_throttle, &wait); } while (thr == atomic_read(&info->throttle_gen)); + harder_count++; + + if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 && + harder_count < 2) + goto harder; + + if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 && + harder_count < 10) + goto harder; + + if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 && + harder_count < 20) + goto harder; } } From 6dab81574346c831ded96ae3ab0e8f9ca72c37ae Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 4 Aug 2008 08:35:53 -0400 Subject: [PATCH 0643/2985] Btrfs: Hold csum mutex while reading in sums during readpages Signed-off-by: Chris Mason --- fs/btrfs/file-item.c | 2 +- fs/btrfs/inode.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a5ff19b34b21..70af24aa582b 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -150,7 +150,6 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; path = btrfs_alloc_path(); - path->reada = 2; WARN_ON(bio->bi_vcnt <= 0); @@ -177,6 +176,7 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, printk("no csum found for inode %lu start " "%llu\n", inode->i_ino, (unsigned long long)offset); + item = NULL; goto found; } btrfs_item_key_to_cpu(path->nodes[0], &found_key, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 76365ad9bfbf..640648c66b22 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -376,7 +376,9 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, if (!(rw & (1 << BIO_RW))) { if (!btrfs_test_opt(root, NODATASUM) && !btrfs_test_flag(inode, NODATASUM)) { + mutex_lock(&BTRFS_I(inode)->csum_mutex); btrfs_lookup_bio_sums(root, inode, bio); + mutex_unlock(&BTRFS_I(inode)->csum_mutex); } goto mapit; } From 3117a77370b6cb902191568e4e647cdcba083d0a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 4 Aug 2008 11:10:20 -0400 Subject: [PATCH 0644/2985] Btrfs: Add support for HW assisted crc32c Intel doesn't yet ship hardware to the public with this enabled, but when they do, they will be ready. Original code from: Austin Zhang It is currently disabled, but edit crc32c.h to turn it on. Signed-off-by: Chris Mason --- fs/btrfs/crc32c.h | 95 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h index a93255b4ee27..bf6c12e85730 100644 --- a/fs/btrfs/crc32c.h +++ b/fs/btrfs/crc32c.h @@ -1,7 +1,95 @@ +#ifndef __BTRFS_CRC32C__ +#define __BTRFS_CRC32C__ #include #include #include +/* #define CONFIG_BTRFS_HW_SUM 1 */ + +#ifdef CONFIG_BTRFS_HW_SUM +#ifdef CONFIG_X86 +/* + * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal. + * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE) + * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at: + * http://www.intel.com/products/processor/manuals/ + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * Volume 2A: Instruction Set Reference, A-M + */ + +#include +#include + +#define X86_FEATURE_XMM4_2 (4*32+20) /* Streaming SIMD Extensions-4.2 */ +#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) + +#ifdef CONFIG_X86_64 +#define REX_PRE "0x48, " +#define SCALE_F 8 +#else +#define REX_PRE +#define SCALE_F 4 +#endif + +static inline u32 btrfs_crc32c_le_hw_byte(u32 crc, unsigned char const *data, + size_t length) +{ + while (length--) { + __asm__ __volatile__( + ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1" + :"=S"(crc) + :"0"(crc), "c"(*data) + ); + data++; + } + + return crc; +} + +static inline u32 __pure btrfs_crc32c_le_hw(u32 crc, unsigned char const *p, + size_t len) +{ + unsigned int iquotient = len / SCALE_F; + unsigned int iremainder = len % SCALE_F; +#ifdef CONFIG_X86_64 + u64 *ptmp = (u64 *)p; +#else + u32 *ptmp = (u32 *)p; +#endif + + while (iquotient--) { + __asm__ __volatile__( + ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;" + :"=S"(crc) + :"0"(crc), "c"(*ptmp) + ); + ptmp++; + } + + if (iremainder) + crc = btrfs_crc32c_le_hw_byte(crc, (unsigned char *)ptmp, + iremainder); + + return crc; +} +#endif /* CONFIG_BTRFS_HW_SUM */ + +static inline u32 __btrfs_crc32c(u32 crc, unsigned char const *address, + size_t len) +{ +#ifdef CONFIG_BTRFS_HW_SUM + if (cpu_has_xmm4_2) + return btrfs_crc32c_le_hw(crc, address, len); +#endif + return crc32c_le(crc, address, len); +} + +#else + +#define __btrfs_crc32c(seed, data, length) crc32c(seed, data, length) + +#endif /* CONFIG_X86 */ + /** * implementation of crc32c_le() changed in linux-2.6.23, * has of v0.13 btrfs-progs is using the latest version. @@ -10,8 +98,11 @@ */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) #define btrfs_crc32c(seed, data, length) \ - __cpu_to_le32( crc32c( __le32_to_cpu(seed), data, length) ) + __cpu_to_le32( __btrfs_crc32c( __le32_to_cpu(seed), \ + (unsigned char const *)data, length) ) #else #define btrfs_crc32c(seed, data, length) \ - crc32c(seed, data, length) + __btrfs_crc32c(seed, (unsigned char const *)data, length) #endif +#endif + From 9ca9ee09c176a814189063c8b88f75c8f8e4ad19 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 4 Aug 2008 10:41:27 -0400 Subject: [PATCH 0645/2985] Btrfs: fix ioctl-initiated transactions vs wait_current_trans() Commit 597:466b27332893 (btrfs_start_transaction: wait for commits in progress) breaks the transaction start/stop ioctls by making btrfs_start_transaction conditionally wait for the next transaction to start. If an application artificially is holding a transaction open, things deadlock. This workaround maintains a count of open ioctl-initiated transactions in fs_info, and avoids wait_current_trans() if any are currently open (in start_transaction() and btrfs_throttle()). The start transaction ioctl uses a new btrfs_start_ioctl_transaction() that _does_ call wait_current_trans(), effectively pushing the join/wait decision to the outer ioctl-initiated transaction. This more or less neuters btrfs_throttle() when ioctl-initiated transactions are in use, but that seems like a pretty fundamental consequence of wrapping lots of write()'s in a transaction. Btrfs has no way to tell if the application considers a given operation as part of it's transaction. Obviously, if the transaction start/stop ioctls aren't being used, there is no effect on current behavior. Signed-off-by: Sage Weil --- ctree.h | 1 + ioctl.c | 12 +++++++++++- transaction.c | 18 +++++++++++++----- transaction.h | 2 ++ 4 files changed, 27 insertions(+), 6 deletions(-) Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/ioctl.c | 12 +++++++++++- fs/btrfs/transaction.c | 18 +++++++++++++----- fs/btrfs/transaction.h | 2 ++ 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9b025960bbde..62499dd761b7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -518,6 +518,7 @@ struct btrfs_fs_info { u64 generation; u64 last_trans_committed; + u64 open_ioctl_trans; unsigned long mount_opt; u64 max_extent; u64 max_inline; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 224da287b3ed..0b63c3c77cfd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -715,7 +715,12 @@ long btrfs_ioctl_trans_start(struct file *file) ret = -EINPROGRESS; goto out; } - trans = btrfs_start_transaction(root, 0); + + mutex_lock(&root->fs_info->trans_mutex); + root->fs_info->open_ioctl_trans++; + mutex_unlock(&root->fs_info->trans_mutex); + + trans = btrfs_start_ioctl_transaction(root, 0); if (trans) file->private_data = trans; else @@ -745,6 +750,11 @@ long btrfs_ioctl_trans_end(struct file *file) } btrfs_end_transaction(trans, root); file->private_data = 0; + + mutex_lock(&root->fs_info->trans_mutex); + root->fs_info->open_ioctl_trans--; + mutex_unlock(&root->fs_info->trans_mutex); + out: return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cf73342e8215..a2c821e3c3a7 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -152,14 +152,14 @@ static void wait_current_trans(struct btrfs_root *root) } struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, - int num_blocks, int join) + int num_blocks, int wait) { struct btrfs_trans_handle *h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); int ret; mutex_lock(&root->fs_info->trans_mutex); - if (!join) + if ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2) wait_current_trans(root); ret = join_transaction(root); BUG_ON(ret); @@ -180,14 +180,21 @@ struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_blocks) { - return start_transaction(root, num_blocks, 0); + return start_transaction(root, num_blocks, 1); } struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, int num_blocks) { - return start_transaction(root, num_blocks, 1); + return start_transaction(root, num_blocks, 0); } +struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, + int num_blocks) +{ + return start_transaction(r, num_blocks, 2); +} + + static noinline int wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { @@ -247,7 +254,8 @@ harder: void btrfs_throttle(struct btrfs_root *root) { mutex_lock(&root->fs_info->trans_mutex); - wait_current_trans(root); + if (!root->fs_info->open_ioctl_trans) + wait_current_trans(root); mutex_unlock(&root->fs_info->trans_mutex); throttle_on_drops(root); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index f5adb23151fb..2c73caeebb2c 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -83,6 +83,8 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_blocks); struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, int num_blocks); +struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, + int num_blocks); int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, From d7a029a89ef370e74b63f18b81498d90d1ee3cc1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 4 Aug 2008 23:17:26 -0400 Subject: [PATCH 0646/2985] Btrfs: Don't corrupt ram in shrink_extent_tree, leak it instead Far from the perfect fix, but these structs are small. TODO for the next release. The block group cache structs are referenced in many different places, and it isn't safe to just free them while resizing. A real fix will be a larger change to the allocator so that it doesn't have to carry about the block group cache structs to find good places to search for free blocks. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1aeb695078b9..dbde12881244 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3437,8 +3437,10 @@ next: key.objectid, key.objectid + key.offset - 1, (unsigned int)-1, GFP_NOFS); + /* memset(shrink_block_group, 0, sizeof(*shrink_block_group)); kfree(shrink_block_group); + */ btrfs_del_item(trans, root, path); btrfs_release_path(root, path); From ea8c281947950fac5f78818b767821d696c9512a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 4 Aug 2008 23:17:27 -0400 Subject: [PATCH 0647/2985] Btrfs: Maintain a list of inodes that are delalloc and a way to wait on them Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 2 ++ fs/btrfs/ctree.h | 4 ++++ fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 12 ++++++++--- fs/btrfs/file.c | 3 +-- fs/btrfs/inode.c | 47 ++++++++++++++++++++++++++++++++++++------ fs/btrfs/ioctl.c | 4 ++-- 7 files changed, 60 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index df624fd735c1..534ac4153ddd 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -42,6 +42,8 @@ struct btrfs_inode { /* for keeping track of orphaned inodes */ struct list_head i_orphan; + struct list_head delalloc_inodes; + /* * transid of the trans_handle that last modified this inode */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 62499dd761b7..116aee21bf77 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -551,6 +551,7 @@ struct btrfs_fs_info { */ spinlock_t ordered_extent_lock; struct list_head ordered_extents; + struct list_head delalloc_inodes; /* * there is a pool of worker threads for checksumming during writes @@ -637,6 +638,7 @@ struct btrfs_root { struct kobject root_kobj; struct completion kobj_unregister; struct mutex objectid_mutex; + u64 objectid; u64 last_trans; @@ -1651,6 +1653,8 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, #define PageChecked PageFsMisc #endif +int btrfs_start_delalloc_inodes(struct btrfs_root *root); +int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc); int btrfs_create_subvol_root(struct btrfs_root *new_root, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index da9dda4338a3..76543683f3b9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1234,6 +1234,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); + INIT_LIST_HEAD(&fs_info->delalloc_inodes); spin_lock_init(&fs_info->hash_lock); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index dbde12881244..33cb2ac4cb28 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1230,7 +1230,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->total_bytes += total_bytes; found->bytes_used += bytes_used; found->full = 0; - WARN_ON(found->total_bytes < found->bytes_used); *space_info = found; return 0; } @@ -2841,8 +2840,7 @@ again: */ clear_page_dirty_for_io(page); - set_extent_delalloc(io_tree, page_start, - page_end, GFP_NOFS); + btrfs_set_extent_delalloc(inode, page_start, page_end); set_page_dirty(page); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); @@ -3319,6 +3317,13 @@ again: key.type = 0; cur_byte = key.objectid; + mutex_unlock(&root->fs_info->alloc_mutex); + + btrfs_start_delalloc_inodes(root); + btrfs_wait_ordered_extents(tree_root); + + mutex_lock(&root->fs_info->alloc_mutex); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; @@ -3401,6 +3406,7 @@ next: btrfs_clean_old_snapshots(tree_root); + btrfs_start_delalloc_inodes(root); btrfs_wait_ordered_extents(tree_root); trans = btrfs_start_transaction(tree_root, 1); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8915f2dc1bce..eb8e4556fa71 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -312,8 +312,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, * to reset the delalloc bit on things that already have * extents reserved. */ - set_extent_delalloc(io_tree, start_pos, - end_of_last_block, GFP_NOFS); + btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; SetPageUptodate(p); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 640648c66b22..8a405a5fa6a3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -303,6 +303,10 @@ int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); BTRFS_I(inode)->delalloc_bytes += end - start + 1; root->fs_info->delalloc_bytes += end - start + 1; + if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { + list_add_tail(&BTRFS_I(inode)->delalloc_inodes, + &root->fs_info->delalloc_inodes); + } spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); } return 0; @@ -325,6 +329,10 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, root->fs_info->delalloc_bytes -= end - start + 1; BTRFS_I(inode)->delalloc_bytes -= end - start + 1; } + if (BTRFS_I(inode)->delalloc_bytes == 0 && + !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { + list_del_init(&BTRFS_I(inode)->delalloc_inodes); + } spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); } return 0; @@ -408,6 +416,12 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, return 0; } +int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) +{ + return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, + GFP_NOFS); +} + struct btrfs_writepage_fixup { struct page *page; struct btrfs_work work; @@ -453,8 +467,7 @@ again: goto again; } - set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, - GFP_NOFS); + btrfs_set_extent_delalloc(inode, page_start, page_end); ClearPageChecked(page); out: unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); @@ -1530,8 +1543,7 @@ again: goto again; } - set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, - page_end, GFP_NOFS); + btrfs_set_extent_delalloc(inode, page_start, page_end); ret = 0; if (offset != PAGE_CACHE_SIZE) { kaddr = kmap(page); @@ -1766,6 +1778,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) inode->i_mapping, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); + INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); mutex_init(&BTRFS_I(inode)->csum_mutex); mutex_init(&BTRFS_I(inode)->extent_mutex); @@ -2158,6 +2171,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); + INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); mutex_init(&BTRFS_I(inode)->csum_mutex); mutex_init(&BTRFS_I(inode)->extent_mutex); BTRFS_I(inode)->delalloc_bytes = 0; @@ -2400,6 +2414,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); + INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); mutex_init(&BTRFS_I(inode)->csum_mutex); mutex_init(&BTRFS_I(inode)->extent_mutex); BTRFS_I(inode)->delalloc_bytes = 0; @@ -3049,8 +3064,7 @@ again: goto again; } - set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, - page_end, GFP_NOFS); + btrfs_set_extent_delalloc(inode, page_start, page_end); ret = 0; /* page is wholly or partially inside EOF */ @@ -3373,6 +3387,26 @@ out_unlock: return ret; } +int btrfs_start_delalloc_inodes(struct btrfs_root *root) +{ + struct list_head *head = &root->fs_info->delalloc_inodes; + struct btrfs_inode *binode; + unsigned long flags; + + spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); + while(!list_empty(head)) { + binode = list_entry(head->next, struct btrfs_inode, + delalloc_inodes); + atomic_inc(&binode->vfs_inode.i_count); + spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); + filemap_write_and_wait(binode->vfs_inode.i_mapping); + iput(&binode->vfs_inode); + spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); + } + spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); + return 0; +} + static int btrfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { @@ -3436,6 +3470,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); + INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); mutex_init(&BTRFS_I(inode)->csum_mutex); mutex_init(&BTRFS_I(inode)->extent_mutex); BTRFS_I(inode)->delalloc_bytes = 0; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0b63c3c77cfd..e1046a54b1c5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -274,8 +274,7 @@ again: */ clear_page_dirty_for_io(page); - set_extent_delalloc(io_tree, page_start, - page_end, GFP_NOFS); + btrfs_set_extent_delalloc(inode, page_start, page_end); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); @@ -784,6 +783,7 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_TRANS_END: return btrfs_ioctl_trans_end(file); case BTRFS_IOC_SYNC: + btrfs_start_delalloc_inodes(root); btrfs_sync_fs(file->f_dentry->d_sb, 1); return 0; } From 3de9d6b649b4cc60687be92e71cef36d7d4e8f2f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 4 Aug 2008 23:17:27 -0400 Subject: [PATCH 0648/2985] btrfs_lookup_bio_sums seems broken, go back to the readpage_io_hook for now Signed-off-by: Chris Mason --- fs/btrfs/file-item.c | 4 ++- fs/btrfs/inode.c | 59 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 70af24aa582b..51aba8cee7ce 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -134,6 +134,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, return ret; } +#if 0 /* broken */ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio) { @@ -200,7 +201,7 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, diff = diff * BTRFS_CRC32_SIZE; read_extent_buffer(path->nodes[0], &sum, - (unsigned long)item + diff, + ((unsigned long)item) + diff, BTRFS_CRC32_SIZE); found: set_state_private(io_tree, offset, sum); @@ -210,6 +211,7 @@ found: btrfs_free_path(path); return 0; } +#endif int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, struct bio *bio) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8a405a5fa6a3..99121a55ffbe 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -382,12 +382,6 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, BUG_ON(ret); if (!(rw & (1 << BIO_RW))) { - if (!btrfs_test_opt(root, NODATASUM) && - !btrfs_test_flag(inode, NODATASUM)) { - mutex_lock(&BTRFS_I(inode)->csum_mutex); - btrfs_lookup_bio_sums(root, inode, bio); - mutex_unlock(&BTRFS_I(inode)->csum_mutex); - } goto mapit; } @@ -595,6 +589,58 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, return btrfs_finish_ordered_io(page->mapping->host, start, end); } +int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) +{ + int ret = 0; + struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_csum_item *item; + struct btrfs_path *path = NULL; + u32 csum; + + if (btrfs_test_opt(root, NODATASUM) || + btrfs_test_flag(inode, NODATASUM)) + return 0; + + /* + * It is possible there is an ordered extent that has + * not yet finished for this range in the file. If so, + * that extent will have a csum cached, and it will insert + * the sum after all the blocks in the extent are fully + * on disk. So, look for an ordered extent and use the + * sum if found. We have to do this before looking in the + * btree because csum items are pre-inserted based on + * the file size. btrfs_lookup_csum might find an item + * that still hasn't been fully filled. + */ + ret = btrfs_find_ordered_sum(inode, start, &csum); + if (ret == 0) + goto found; + + ret = 0; + path = btrfs_alloc_path(); + item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); + if (IS_ERR(item)) { + ret = PTR_ERR(item); + /* a csum that isn't present is a preallocated region. */ + if (ret == -ENOENT || ret == -EFBIG) + ret = 0; + csum = 0; + printk("no csum found for inode %lu start %Lu\n", inode->i_ino, + start); + goto out; + } + read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, + BTRFS_CRC32_SIZE); +found: + set_state_private(io_tree, start, csum); +out: + if (path) + btrfs_free_path(path); + return ret; +} + struct io_failure_record { struct page *page; u64 start; @@ -3580,6 +3626,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { .fill_delalloc = run_delalloc_range, .submit_bio_hook = btrfs_submit_bio_hook, .merge_bio_hook = btrfs_merge_bio_hook, + .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, .writepage_end_io_hook = btrfs_writepage_end_io_hook, .writepage_start_hook = btrfs_writepage_start_hook, From b48652c101cce7a54379a49cc0cf854cec2c94e2 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Mon, 4 Aug 2008 23:23:47 -0400 Subject: [PATCH 0649/2985] Btrfs: Various small fixes. This trivial patch contains two locking fixes and a off by one fix. --- Signed-off-by: Chris Mason --- fs/btrfs/ordered-data.c | 4 ++-- fs/btrfs/root-tree.c | 3 +-- fs/btrfs/super.c | 2 ++ fs/btrfs/transaction.c | 9 +++++---- fs/btrfs/transaction.h | 3 +-- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 676e4bd65c52..db200e6baf7e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -548,7 +548,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, */ test = rb_entry(node, struct btrfs_ordered_extent, rb_node); if (test->file_offset > entry_end(ordered)) { - i_size_test = test->file_offset - 1; + i_size_test = test->file_offset; } } else { i_size_test = i_size_read(inode); @@ -561,7 +561,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, * disk_i_size to the end of the region. */ if (i_size_test > entry_end(ordered) && - !test_range_bit(io_tree, entry_end(ordered), i_size_test, + !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, EXTENT_DELALLOC, 0)) { new_i_size = min_t(u64, i_size_test, i_size_read(inode)); } diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a5c0e98b5aeb..36726696e58b 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -209,8 +209,7 @@ again: goto err; } - ret = btrfs_add_dead_root(dead_root, latest, - &root->fs_info->dead_roots); + ret = btrfs_add_dead_root(dead_root, latest); if (ret) goto err; goto again; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a6a418b6894b..eb4b357d05e1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -449,7 +449,9 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, s->s_flags |= MS_ACTIVE; } + mutex_lock(&s->s_root->d_inode->i_mutex); root = lookup_one_len(subvol_name, s->s_root, strlen(subvol_name)); + mutex_unlock(&s->s_root->d_inode->i_mutex); if (IS_ERR(root)) { up_write(&s->s_umount); deactivate_super(s); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a2c821e3c3a7..ebf5362da1d2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -389,9 +389,7 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, return 0; } -int btrfs_add_dead_root(struct btrfs_root *root, - struct btrfs_root *latest, - struct list_head *dead_list) +int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) { struct btrfs_dirty_root *dirty; @@ -400,7 +398,10 @@ int btrfs_add_dead_root(struct btrfs_root *root, return -ENOMEM; dirty->root = root; dirty->latest_root = latest; - list_add(&dirty->list, dead_list); + + mutex_lock(&root->fs_info->trans_mutex); + list_add(&dirty->list, &latest->fs_info->dead_roots); + mutex_unlock(&root->fs_info->trans_mutex); return 0; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 2c73caeebb2c..598baa312417 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -90,8 +90,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, - struct list_head *dead_list); +int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest); int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, From ae01a0abf343aefe923ace5c1a8c634adfbe29c5 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Mon, 4 Aug 2008 23:23:47 -0400 Subject: [PATCH 0650/2985] Btrfs: Update clone file ioctl This patch updates the file clone ioctl for the tree locking and new data ordered code. --- Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 152 ++++++++++++++++++++++------------------------- 1 file changed, 71 insertions(+), 81 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e1046a54b1c5..3932c7cd0fae 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -517,32 +517,6 @@ out: return ret; } -int dup_item_to_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct extent_buffer *leaf, - int slot, - struct btrfs_key *key, - u64 destino) -{ - char *dup; - int len = btrfs_item_size_nr(leaf, slot); - struct btrfs_key ckey = *key; - int ret = 0; - - dup = kmalloc(len, GFP_NOFS); - if (!dup) - return -ENOMEM; - - read_extent_buffer(leaf, dup, btrfs_item_ptr_offset(leaf, slot), len); - btrfs_release_path(root, path); - - ckey.objectid = destino; - ret = btrfs_insert_item(trans, root, &ckey, dup, len); - kfree(dup); - return ret; -} - long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) { struct inode *inode = fdentry(file)->d_inode; @@ -550,23 +524,42 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) struct file *src_file; struct inode *src; struct btrfs_trans_handle *trans; - int ret; - u64 pos; + struct btrfs_ordered_extent *ordered; struct btrfs_path *path; - struct btrfs_key key; struct extent_buffer *leaf; + char *buf; + struct btrfs_key key; + struct btrfs_key new_key; + u32 size; u32 nritems; int slot; + int ret; src_file = fget(src_fd); if (!src_file) return -EBADF; src = src_file->f_dentry->d_inode; - ret = -EXDEV; - if (src->i_sb != inode->i_sb) + ret = -EISDIR; + if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) goto out_fput; + ret = -EXDEV; + if (src->i_sb != inode->i_sb || BTRFS_I(src)->root != root) + goto out_fput; + + ret = -ENOMEM; + buf = vmalloc(btrfs_level_size(root, 0)); + if (!buf) + goto out_fput; + + path = btrfs_alloc_path(); + if (!path) { + vfree(buf); + goto out_fput; + } + path->reada = 2; + if (inode < src) { mutex_lock(&inode->i_mutex); mutex_lock(&src->i_mutex); @@ -582,24 +575,22 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) /* do any pending delalloc/csum calc on src, one way or another, and lock file content */ while (1) { - filemap_write_and_wait(src->i_mapping); lock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); - if (BTRFS_I(src)->delalloc_bytes == 0) + ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); + if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered) break; unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); + if (ordered) + btrfs_put_ordered_extent(ordered); + btrfs_wait_ordered_range(src, 0, (u64)-1); } - trans = btrfs_start_transaction(root, 0); - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - key.offset = 0; - key.type = BTRFS_EXTENT_DATA_KEY; + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + key.objectid = src->i_ino; - pos = 0; - path->reada = 2; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; while (1) { /* @@ -610,18 +601,19 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) if (ret < 0) goto out; - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + nritems = btrfs_header_nritems(path->nodes[0]); + if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(root, path); if (ret < 0) goto out; if (ret > 0) break; + nritems = btrfs_header_nritems(path->nodes[0]); } leaf = path->nodes[0]; slot = path->slots[0]; - btrfs_item_key_to_cpu(leaf, &key, slot); - nritems = btrfs_header_nritems(leaf); + btrfs_item_key_to_cpu(leaf, &key, slot); if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY || key.objectid != src->i_ino) break; @@ -629,66 +621,64 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { struct btrfs_file_extent_item *extent; int found_type; - pos = key.offset; + extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); found_type = btrfs_file_extent_type(leaf, extent); if (found_type == BTRFS_FILE_EXTENT_REG) { - u64 len = btrfs_file_extent_num_bytes(leaf, - extent); u64 ds = btrfs_file_extent_disk_bytenr(leaf, extent); u64 dl = btrfs_file_extent_disk_num_bytes(leaf, extent); - u64 off = btrfs_file_extent_offset(leaf, - extent); - btrfs_insert_file_extent(trans, root, - inode->i_ino, pos, - ds, dl, len, off); /* ds == 0 means there's a hole */ if (ds != 0) { - btrfs_inc_extent_ref(trans, root, + ret = btrfs_inc_extent_ref(trans, root, ds, dl, root->root_key.objectid, trans->transid, - inode->i_ino, pos); + inode->i_ino, key.offset); + if (ret) + goto out; } - pos = key.offset + len; - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - ret = dup_item_to_inode(trans, root, path, - leaf, slot, &key, - inode->i_ino); - if (ret) - goto out; - pos = key.offset + btrfs_item_size_nr(leaf, - slot); } - } else if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) { - ret = dup_item_to_inode(trans, root, path, leaf, - slot, &key, inode->i_ino); + } - if (ret) - goto out; + if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY || + btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) { + size = btrfs_item_size_nr(leaf, slot); + read_extent_buffer(leaf, buf, + btrfs_item_ptr_offset(leaf, slot), + size); + btrfs_release_path(root, path); + memcpy(&new_key, &key, sizeof(new_key)); + new_key.objectid = inode->i_ino; + ret = btrfs_insert_item(trans, root, &new_key, + buf, size); + BUG_ON(ret); + } else { + btrfs_release_path(root, path); } key.offset++; - btrfs_release_path(root, path); } - ret = 0; out: - btrfs_free_path(path); - - inode->i_blocks = src->i_blocks; - i_size_write(inode, src->i_size); - btrfs_update_inode(trans, root, inode); - - unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); - + btrfs_release_path(root, path); + if (ret == 0) { + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_blocks = src->i_blocks; + btrfs_i_size_write(inode, src->i_size); + BTRFS_I(inode)->flags = BTRFS_I(src)->flags; + ret = btrfs_update_inode(trans, root, inode); + } btrfs_end_transaction(trans, root); - + unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); + if (ret) + vmtruncate(inode, 0); out_unlock: mutex_unlock(&src->i_mutex); mutex_unlock(&inode->i_mutex); + vfree(buf); + btrfs_free_path(path); out_fput: fput(src_file); return ret; From 7d2b4daa67379960477568abda62b8ba9ee3a8aa Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 5 Aug 2008 10:13:57 -0400 Subject: [PATCH 0651/2985] Btrfs: Fix the multi-bio code to save the original bio for completion The multi-bio code is responsible for duplicating blocks in raid1 and single spindle duplication. It has counters to make sure all of the locations for a given extent are properly written before io completion is returned to the higher layers. But, it didn't always complete the same bio it was given, sometimes a clone was completed instead. This lead to problems with the async work queues because they saved a pointer to the bio in a struct off bi_private. The fix is to remember the original bio and only complete that one. Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 11 ++++++++++- fs/btrfs/volumes.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0de51e37e789..09311b3066df 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2070,6 +2070,7 @@ static int end_bio_multi_stripe(struct bio *bio, #endif { struct btrfs_multi_bio *multi = bio->bi_private; + int is_orig_bio = 0; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) if (bio->bi_size) @@ -2078,7 +2079,14 @@ static int end_bio_multi_stripe(struct bio *bio, if (err) atomic_inc(&multi->error); + if (bio == multi->orig_bio) + is_orig_bio = 1; + if (atomic_dec_and_test(&multi->stripes_pending)) { + if (!is_orig_bio) { + bio_put(bio); + bio = multi->orig_bio; + } bio->bi_private = multi->private; bio->bi_end_io = multi->end_io; /* only send an error to the higher layers if it is @@ -2101,7 +2109,7 @@ static int end_bio_multi_stripe(struct bio *bio, #else bio_endio(bio, err); #endif - } else { + } else if (!is_orig_bio) { bio_put(bio); } #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) @@ -2196,6 +2204,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, } multi->end_io = first_bio->bi_end_io; multi->private = first_bio->bi_private; + multi->orig_bio = first_bio; atomic_set(&multi->stripes_pending, multi->num_stripes); while(dev_nr < total_devs) { diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 48a44f7a9385..c50e50580b51 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -95,6 +95,7 @@ struct btrfs_bio_stripe { struct btrfs_multi_bio { atomic_t stripes_pending; bio_end_io_t *end_io; + struct bio *orig_bio; void *private; atomic_t error; int max_errors; From 00e4e6b33a0f78aab4b788d6d31c884fd8bf88da Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 5 Aug 2008 11:18:09 -0400 Subject: [PATCH 0652/2985] Get rid of BTRFS_I(inode)->index and use local vars instead rename and link don't always have a lock on the source inode, and our use of a per-inode index variable was racy. This changes things to store the index in a local variable instead. Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 9 ------- fs/btrfs/inode.c | 59 ++++++++++++++++++++++++------------------ 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 534ac4153ddd..111f90524ae1 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -57,15 +57,6 @@ struct btrfs_inode { * number for new files that are created */ u64 index_cnt; - - /* - * index holds the directory index for this inode on creation, so - * add_link can do what its supposed to. This isn't populated when the - * inode is read because there isn't really a reason to know this unless - * we are creating the directory index or deleting it, and deletion - * reads the index off of the inode reference at unlink time. - */ - u64 index; }; static inline struct btrfs_inode *BTRFS_I(struct inode *inode) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 99121a55ffbe..4d8ffc01931e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2153,7 +2153,8 @@ out: return ret; } -static int btrfs_set_inode_index(struct inode *dir, struct inode *inode) +static int btrfs_set_inode_index(struct inode *dir, struct inode *inode, + u64 *index) { int ret = 0; @@ -2163,7 +2164,7 @@ static int btrfs_set_inode_index(struct inode *dir, struct inode *inode) return ret; } - BTRFS_I(inode)->index = BTRFS_I(dir)->index_cnt; + *index = BTRFS_I(dir)->index_cnt; BTRFS_I(dir)->index_cnt++; return ret; @@ -2176,7 +2177,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, u64 ref_objectid, u64 objectid, struct btrfs_block_group_cache *group, - int mode) + int mode, u64 *index) { struct inode *inode; struct btrfs_inode_item *inode_item; @@ -2198,11 +2199,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); if (dir) { - ret = btrfs_set_inode_index(dir, inode); + ret = btrfs_set_inode_index(dir, inode, index); if (ret) return ERR_PTR(ret); - } else { - BTRFS_I(inode)->index = 0; } /* * index_cnt is ignored for everything but a dir, @@ -2268,7 +2267,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, struct btrfs_inode_ref); btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); - btrfs_set_inode_ref_index(path->nodes[0], ref, BTRFS_I(inode)->index); + btrfs_set_inode_ref_index(path->nodes[0], ref, *index); ptr = (unsigned long)(ref + 1); write_extent_buffer(path->nodes[0], name, ptr, name_len); @@ -2296,7 +2295,7 @@ static inline u8 btrfs_inode_type(struct inode *inode) static int btrfs_add_link(struct btrfs_trans_handle *trans, struct dentry *dentry, struct inode *inode, - int add_backref) + int add_backref, u64 index) { int ret; struct btrfs_key key; @@ -2311,7 +2310,7 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, &key, btrfs_inode_type(inode), - BTRFS_I(inode)->index); + index); if (ret == 0) { if (add_backref) { ret = btrfs_insert_inode_ref(trans, root, @@ -2319,7 +2318,7 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, dentry->d_name.len, inode->i_ino, parent_inode->i_ino, - BTRFS_I(inode)->index); + index); } btrfs_i_size_write(parent_inode, parent_inode->i_size + dentry->d_name.len * 2); @@ -2332,9 +2331,9 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, static int btrfs_add_nondir(struct btrfs_trans_handle *trans, struct dentry *dentry, struct inode *inode, - int backref) + int backref, u64 index) { - int err = btrfs_add_link(trans, dentry, inode, backref); + int err = btrfs_add_link(trans, dentry, inode, backref, index); if (!err) { d_instantiate(dentry, inode); return 0; @@ -2354,6 +2353,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, int drop_inode = 0; u64 objectid; unsigned long nr = 0; + u64 index = 0; if (!new_valid_dev(rdev)) return -EINVAL; @@ -2374,7 +2374,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, objectid, - BTRFS_I(dir)->block_group, mode); + BTRFS_I(dir)->block_group, mode, &index); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_unlock; @@ -2386,7 +2386,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, } btrfs_set_trans_block_group(trans, inode); - err = btrfs_add_nondir(trans, dentry, inode, 0); + err = btrfs_add_nondir(trans, dentry, inode, 0, index); if (err) drop_inode = 1; else { @@ -2419,6 +2419,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, int drop_inode = 0; unsigned long nr = 0; u64 objectid; + u64 index = 0; err = btrfs_check_free_space(root, 1, 0); if (err) @@ -2435,7 +2436,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, - objectid, BTRFS_I(dir)->block_group, mode); + objectid, BTRFS_I(dir)->block_group, mode, + &index); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_unlock; @@ -2447,7 +2449,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, } btrfs_set_trans_block_group(trans, inode); - err = btrfs_add_nondir(trans, dentry, inode, 0); + err = btrfs_add_nondir(trans, dentry, inode, 0, index); if (err) drop_inode = 1; else { @@ -2489,6 +2491,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode = old_dentry->d_inode; + u64 index; unsigned long nr = 0; int err; int drop_inode = 0; @@ -2504,7 +2507,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, err = btrfs_check_free_space(root, 1, 0); if (err) goto fail; - err = btrfs_set_inode_index(dir, inode); + err = btrfs_set_inode_index(dir, inode, &index); if (err) goto fail; @@ -2513,7 +2516,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, btrfs_set_trans_block_group(trans, dir); atomic_inc(&inode->i_count); - err = btrfs_add_nondir(trans, dentry, inode, 1); + err = btrfs_add_nondir(trans, dentry, inode, 1, index); if (err) drop_inode = 1; @@ -2544,6 +2547,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) int err = 0; int drop_on_err = 0; u64 objectid = 0; + u64 index = 0; unsigned long nr = 1; err = btrfs_check_free_space(root, 1, 0); @@ -2567,7 +2571,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, objectid, - BTRFS_I(dir)->block_group, S_IFDIR | mode); + BTRFS_I(dir)->block_group, S_IFDIR | mode, + &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_fail; @@ -2588,7 +2593,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (err) goto out_fail; - err = btrfs_add_link(trans, dentry, inode, 0); + err = btrfs_add_link(trans, dentry, inode, 0, index); if (err) goto out_fail; @@ -3203,9 +3208,10 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root, struct btrfs_block_group_cache *block_group) { struct inode *inode; + u64 index = 0; inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, - new_dirid, block_group, S_IFDIR | 0700); + new_dirid, block_group, S_IFDIR | 0700, &index); if (IS_ERR(inode)) return PTR_ERR(inode); inode->i_op = &btrfs_dir_inode_operations; @@ -3384,6 +3390,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode *new_inode = new_dentry->d_inode; struct inode *old_inode = old_dentry->d_inode; struct timespec ctime = CURRENT_TIME; + u64 index = 0; int ret; if (S_ISDIR(old_inode->i_mode) && new_inode && @@ -3419,11 +3426,11 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, goto out_fail; } } - ret = btrfs_set_inode_index(new_dir, old_inode); + ret = btrfs_set_inode_index(new_dir, old_inode, &index); if (ret) goto out_fail; - ret = btrfs_add_link(trans, new_dentry, old_inode, 1); + ret = btrfs_add_link(trans, new_dentry, old_inode, 1, index); if (ret) goto out_fail; @@ -3464,6 +3471,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, int err; int drop_inode = 0; u64 objectid; + u64 index = 0 ; int name_len; int datasize; unsigned long ptr; @@ -3491,7 +3499,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, objectid, - BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO); + BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, + &index); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_unlock; @@ -3503,7 +3512,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, } btrfs_set_trans_block_group(trans, inode); - err = btrfs_add_nondir(trans, dentry, inode, 0); + err = btrfs_add_nondir(trans, dentry, inode, 0, index); if (err) drop_inode = 1; else { From 7ea394f1192bee1af67ea4762c88ef4b7b0487a8 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Tue, 5 Aug 2008 13:05:02 -0400 Subject: [PATCH 0653/2985] Btrfs: Fix nodatacow for the new data=ordered mode Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 +- fs/btrfs/extent-tree.c | 11 ++++--- fs/btrfs/inode.c | 64 +++++++++++++++++++++++++---------------- fs/btrfs/ioctl.c | 1 + fs/btrfs/ordered-data.c | 16 +++++++++-- fs/btrfs/ordered-data.h | 6 ++-- fs/btrfs/transaction.c | 11 +++++++ 7 files changed, 76 insertions(+), 36 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 116aee21bf77..f90e5a7ac161 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1403,7 +1403,8 @@ static inline struct dentry *fdentry(struct file *file) { } /* extent-tree.c */ -int btrfs_cross_ref_exists(struct btrfs_root *root, +int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_key *key, u64 bytenr); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 33cb2ac4cb28..fff219ed61d9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -893,10 +893,10 @@ out: return ret; } -int btrfs_cross_ref_exists(struct btrfs_root *root, +int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_key *key, u64 bytenr) { - struct btrfs_trans_handle *trans; struct btrfs_root *old_root; struct btrfs_path *path = NULL; struct extent_buffer *eb; @@ -908,6 +908,7 @@ int btrfs_cross_ref_exists(struct btrfs_root *root, int level; int ret; + BUG_ON(trans == NULL); BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY); ret = get_reference_status(root, bytenr, 0, key->objectid, &min_generation, &ref_count); @@ -917,7 +918,6 @@ int btrfs_cross_ref_exists(struct btrfs_root *root, if (ref_count != 1) return 1; - trans = btrfs_start_transaction(root, 0); old_root = root->dirty_root->root; ref_generation = old_root->root_key.offset; @@ -973,7 +973,6 @@ int btrfs_cross_ref_exists(struct btrfs_root *root, out: if (path) btrfs_free_path(path); - btrfs_end_transaction(trans, root); return ret; } @@ -3320,7 +3319,7 @@ again: mutex_unlock(&root->fs_info->alloc_mutex); btrfs_start_delalloc_inodes(root); - btrfs_wait_ordered_extents(tree_root); + btrfs_wait_ordered_extents(tree_root, 0); mutex_lock(&root->fs_info->alloc_mutex); @@ -3407,7 +3406,7 @@ next: btrfs_clean_old_snapshots(tree_root); btrfs_start_delalloc_inodes(root); - btrfs_wait_ordered_extents(tree_root); + btrfs_wait_ordered_extents(tree_root, 0); trans = btrfs_start_transaction(tree_root, 1); btrfs_commit_transaction(trans, tree_root); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4d8ffc01931e..c33053ba3816 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -166,7 +166,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) cur_alloc_size = ins.offset; ret = btrfs_add_ordered_extent(inode, start, ins.objectid, - ins.offset); + ins.offset, 0); BUG_ON(ret); if (num_bytes < cur_alloc_size) { printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes, @@ -187,31 +187,32 @@ static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) u64 extent_start; u64 extent_end; u64 bytenr; - u64 cow_end; u64 loops = 0; u64 total_fs_bytes; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_block_group_cache *block_group; + struct btrfs_trans_handle *trans; struct extent_buffer *leaf; int found_type; struct btrfs_path *path; struct btrfs_file_extent_item *item; int ret; - int err; + int err = 0; struct btrfs_key found_key; total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); path = btrfs_alloc_path(); BUG_ON(!path); + trans = btrfs_join_transaction(root, 1); + BUG_ON(!trans); again: ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino, start, 0); if (ret < 0) { - btrfs_free_path(path); - return ret; + err = ret; + goto out; } - cow_end = end; if (ret != 0) { if (path->slots[0] == 0) goto not_found; @@ -244,12 +245,11 @@ again: if (start < extent_start || start >= extent_end) goto not_found; - cow_end = min(end, extent_end - 1); bytenr = btrfs_file_extent_disk_bytenr(leaf, item); if (bytenr == 0) goto not_found; - if (btrfs_cross_ref_exists(root, &found_key, bytenr)) + if (btrfs_cross_ref_exists(trans, root, &found_key, bytenr)) goto not_found; /* * we may be called by the resizer, make sure we're inside @@ -260,24 +260,32 @@ again: if (!block_group || block_group->ro) goto not_found; - start = extent_end; - } else { - goto not_found; - } -loop: - if (start > end) { - btrfs_free_path(path); - return 0; - } - btrfs_release_path(root, path); - loops++; - goto again; + bytenr += btrfs_file_extent_offset(leaf, item); + extent_num_bytes = min(end + 1, extent_end) - start; + ret = btrfs_add_ordered_extent(inode, start, bytenr, + extent_num_bytes, 1); + if (ret) { + err = ret; + goto out; + } + btrfs_release_path(root, path); + start = extent_end; + if (start <= end) { + loops++; + goto again; + } + } else { not_found: - btrfs_release_path(root, path); - cow_file_range(inode, start, end); - start = end + 1; - goto loop; + btrfs_end_transaction(trans, root); + btrfs_free_path(path); + return cow_file_range(inode, start, end); + } +out: + WARN_ON(err); + btrfs_end_transaction(trans, root); + btrfs_free_path(path); + return err; } static int run_delalloc_range(struct inode *inode, u64 start, u64 end) @@ -385,6 +393,11 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, goto mapit; } + if (btrfs_test_opt(root, NODATASUM) || + btrfs_test_flag(inode, NODATASUM)) { + goto mapit; + } + return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, inode, rw, bio, mirror_num, __btrfs_submit_bio_hook); @@ -527,6 +540,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent = btrfs_lookup_ordered_extent(inode, start); BUG_ON(!ordered_extent); + if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) + goto nocow; lock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, @@ -567,6 +582,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) unlock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, GFP_NOFS); +nocow: add_pending_csums(trans, inode, ordered_extent->file_offset, &ordered_extent->list); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3932c7cd0fae..59b64c738fd1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index db200e6baf7e..da6d43eb41db 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -152,7 +152,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, * inserted. */ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, - u64 start, u64 len) + u64 start, u64 len, int nocow) { struct btrfs_ordered_inode_tree *tree; struct rb_node *node; @@ -168,6 +168,8 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->start = start; entry->len = len; entry->inode = inode; + if (nocow) + set_bit(BTRFS_ORDERED_NOCOW, &entry->flags); /* one ref for the tree */ atomic_set(&entry->refs, 1); @@ -303,10 +305,11 @@ int btrfs_remove_ordered_extent(struct inode *inode, return 0; } -int btrfs_wait_ordered_extents(struct btrfs_root *root) +int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) { struct list_head splice; struct list_head *cur; + struct list_head *tmp; struct btrfs_ordered_extent *ordered; struct inode *inode; @@ -314,10 +317,16 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root) spin_lock(&root->fs_info->ordered_extent_lock); list_splice_init(&root->fs_info->ordered_extents, &splice); - while(!list_empty(&splice)) { + list_for_each_safe(cur, tmp, &splice) { cur = splice.next; ordered = list_entry(cur, struct btrfs_ordered_extent, root_extent_list); + if (nocow_only && + !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { + cond_resched_lock(&root->fs_info->ordered_extent_lock); + continue; + } + list_del_init(&ordered->root_extent_list); atomic_inc(&ordered->refs); inode = ordered->inode; @@ -338,6 +347,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root) spin_lock(&root->fs_info->ordered_extent_lock); } + list_splice_init(&splice, &root->fs_info->ordered_extents); spin_unlock(&root->fs_info->ordered_extent_lock); return 0; } diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 5efe6b63c74c..fd45519f30a8 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -64,6 +64,8 @@ struct btrfs_ordered_sum { #define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */ +#define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ + struct btrfs_ordered_extent { /* logical offset in the file */ u64 file_offset; @@ -125,7 +127,7 @@ int btrfs_remove_ordered_extent(struct inode *inode, int btrfs_dec_test_ordered_pending(struct inode *inode, u64 file_offset, u64 io_size); int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, - u64 start, u64 len); + u64 start, u64 len, int nocow); int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_extent *entry, struct btrfs_ordered_sum *sum); @@ -143,5 +145,5 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping, pgoff_t start, pgoff_t end); int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end, int sync_mode); -int btrfs_wait_ordered_extents(struct btrfs_root *root); +int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index ebf5362da1d2..9d3d08e9f8d1 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -438,6 +438,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, free_extent_buffer(root->commit_root); root->commit_root = NULL; + root->dirty_root = NULL; spin_lock(&root->list_lock); list_del_init(&dirty->root->dead_list); @@ -461,6 +462,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, sizeof(struct btrfs_disk_key)); root->root_item.drop_level = 0; root->commit_root = NULL; + root->dirty_root = NULL; root->root_key.offset = root->fs_info->generation; btrfs_set_root_bytenr(&root->root_item, root->node->start); @@ -762,7 +764,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } do { + int snap_pending = 0; joined = cur_trans->num_joined; + if (!list_empty(&trans->transaction->pending_snapshots)) + snap_pending = 1; + WARN_ON(cur_trans != trans->transaction); prepare_to_wait(&cur_trans->writer_wait, &wait, TASK_UNINTERRUPTIBLE); @@ -774,6 +780,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); + if (snap_pending) { + ret = btrfs_wait_ordered_extents(root, 1); + BUG_ON(ret); + } + schedule_timeout(timeout); mutex_lock(&root->fs_info->trans_mutex); From 4ca8b41e3fe76cc5774fbcc6cba64259d69e0f96 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 5 Aug 2008 13:30:48 -0400 Subject: [PATCH 0654/2985] Btrfs: Avoid calling into the FS for the final iput on fake root inodes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + fs/btrfs/inode.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 76543683f3b9..56c54a41dbbb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1739,6 +1739,7 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(root->fs_info->dev_root->node); btrfs_free_block_groups(root->fs_info); + fs_info->closing = 2; del_fs_roots(fs_info); filemap_write_and_wait(fs_info->btree_inode->i_mapping); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c33053ba3816..a26d365111e5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2092,6 +2092,9 @@ int btrfs_write_inode(struct inode *inode, int wait) struct btrfs_trans_handle *trans; int ret = 0; + if (root->fs_info->closing > 1) + return 0; + if (wait) { trans = btrfs_join_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); From 5707e3b6f77e37bb8ecc7ba9d74c8bfc38011b6d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 4 Aug 2008 15:45:30 -0400 Subject: [PATCH 0655/2985] Btrfs: Update version.sh to v0.16 Signed-off-by: Chris Mason --- fs/btrfs/version.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/version.sh b/fs/btrfs/version.sh index 4d06497d2ec5..0f57f24404d9 100644 --- a/fs/btrfs/version.sh +++ b/fs/btrfs/version.sh @@ -6,7 +6,7 @@ # Copyright 2008, Oracle # Released under the GNU GPLv2 -v="v0.15" +v="v0.16" which hg > /dev/null if [ -d .hg ] && [ $? == 0 ]; then From 5036f53868ae943704ae69a192d21225dc914c35 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 7 Aug 2008 11:19:42 -0400 Subject: [PATCH 0656/2985] Btrfs: fix RHEL test for ClearPageFsMisc Newer RHEL5 kernels define both ClearPageFSMisc and ClearPageChecked, so test for both before redefining. Signed-off-by: Eric Sandeen --- Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f90e5a7ac161..c88f1e16ce2d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1648,7 +1648,7 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, /* inode.c */ /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ -#ifdef ClearPageFsMisc +#if defined(ClearPageFsMisc) && !defined(ClearPageChecked) #define ClearPageChecked ClearPageFsMisc #define SetPageChecked SetPageFsMisc #define PageChecked PageFsMisc From 2db04966ae9a3eeb57f28df9aac4e77d5b287cb1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 7 Aug 2008 11:19:43 -0400 Subject: [PATCH 0657/2985] Btrfs: Change TestSetPageLocked() to trylock_page() Add backwards compatibility in compat.h Signed-off-by: David Woodhouse --- compat.h | 3 +++ extent_io.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletions(-) Signed-off-by: Chris Mason --- fs/btrfs/compat.h | 3 +++ fs/btrfs/extent_io.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h index d39a768e65ff..b3349a66999d 100644 --- a/fs/btrfs/compat.h +++ b/fs/btrfs/compat.h @@ -1,6 +1,9 @@ #ifndef _COMPAT_H_ #define _COMPAT_H_ +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,26) +#define trylock_page(page) (!TestSetPageLocked(page)) +#endif /* * Even if AppArmor isn't enabled, it still has different prototypes. diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1cf4baba4342..f46f88620c70 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -14,6 +14,7 @@ #include #include "extent_io.h" #include "extent_map.h" +#include "compat.h" /* temporary define until extent_map moves out of btrfs */ struct kmem_cache *btrfs_cache_create(const char *name, size_t size, @@ -3055,7 +3056,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); if (!wait) { - if (TestSetPageLocked(page)) + if (!trylock_page(page)) goto unlock_exit; } else { lock_page(page); From db69e0ebae944690de89851315404f483e6464e0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Aug 2008 15:34:14 -0400 Subject: [PATCH 0658/2985] Btrfs: Init address_space->writeback_index properly The writeback_index field is used by write_cache_pages to pick up where writeback on a given inode left off. But, it is never set to a sane value, so writeback can often start at a random offset in the file. Kernels 2.6.28 and higher will have this fixed, but for everyone else, we also fill in the value in btrfs. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a26d365111e5..33b990878d84 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1833,6 +1833,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) inode->i_ino = args->ino; BTRFS_I(inode)->root = args->root; BTRFS_I(inode)->delalloc_bytes = 0; + inode->i_mapping->writeback_index = 0; BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->index_cnt = (u64)-1; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); @@ -2239,6 +2240,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, mutex_init(&BTRFS_I(inode)->csum_mutex); mutex_init(&BTRFS_I(inode)->extent_mutex); BTRFS_I(inode)->delalloc_bytes = 0; + inode->i_mapping->writeback_index = 0; BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->root = root; @@ -2486,6 +2488,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, mutex_init(&BTRFS_I(inode)->extent_mutex); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->disk_i_size = 0; + inode->i_mapping->writeback_index = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); } @@ -3549,6 +3552,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, mutex_init(&BTRFS_I(inode)->extent_mutex); BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->disk_i_size = 0; + inode->i_mapping->writeback_index = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); } From b720d209524fccdd33a499351f2ca880b8c0ffb4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Aug 2008 15:34:14 -0400 Subject: [PATCH 0659/2985] Btrfs: Limit the number of async bio submission kthreads to the number of devices Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 56c54a41dbbb..99bd9f9b9eed 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1348,7 +1348,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, * cannot dynamically grow. */ btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); - btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->submit_workers, + min_t(u64, fs_devices->num_devices, + fs_info->thread_pool_size)); /* a higher idle thresh on the submit workers makes it much more * likely that bios will be send down in a sane order to the From 0986fe9eac24fd186927c3b87af51d62f8ab92cd Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Aug 2008 15:34:15 -0400 Subject: [PATCH 0660/2985] Btrfs: Count async bios separately from async checksum work items Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 25 ++++++++++++++++++++++--- fs/btrfs/volumes.c | 6 +++--- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c88f1e16ce2d..bb4a8d2200d0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -544,6 +544,7 @@ struct btrfs_fs_info { struct list_head hashers; struct list_head dead_roots; atomic_t nr_async_submits; + atomic_t nr_async_bios; /* * this is used by the balancing code to wait for all the pending diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 99bd9f9b9eed..9902d29abd06 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -429,6 +429,21 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, return 0; } +static int congested_async(struct btrfs_fs_info *info, int iodone) +{ + int limit = 256 * info->fs_devices->open_devices; + + if (iodone) + limit = (limit * 3) / 2; + if (atomic_read(&info->nr_async_submits) > limit) + return 1; + + limit = 8192 * info->fs_devices->open_devices; + if (iodone) + limit = (limit * 3) / 2; + return atomic_read(&info->nr_async_bios) > limit; +} + static void run_one_async_submit(struct btrfs_work *work) { struct btrfs_fs_info *fs_info; @@ -437,6 +452,11 @@ static void run_one_async_submit(struct btrfs_work *work) async = container_of(work, struct async_submit_bio, work); fs_info = BTRFS_I(async->inode)->root->fs_info; atomic_dec(&fs_info->nr_async_submits); + + if ((async->bio->bi_rw & (1 << BIO_RW)) && + !congested_async(fs_info, 1)) { + clear_bdi_congested(&fs_info->bdi, WRITE); + } async->submit_bio_hook(async->inode, async->rw, async->bio, async->mirror_num); kfree(async); @@ -938,15 +958,13 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) { struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; int ret = 0; - int limit = 256 * info->fs_devices->open_devices; struct list_head *cur; struct btrfs_device *device; struct backing_dev_info *bdi; if ((bdi_bits & (1 << BDI_write_congested)) && - atomic_read(&info->nr_async_submits) > limit) { + congested_async(info, 0)) return 1; - } list_for_each(cur, &info->fs_devices->devices) { device = list_entry(cur, struct btrfs_device, dev_list); @@ -1250,6 +1268,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); atomic_set(&fs_info->nr_async_submits, 0); + atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->throttles, 0); atomic_set(&fs_info->throttle_gen, 0); fs_info->sb = sb; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 09311b3066df..23a5b0aba00a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -179,7 +179,7 @@ loop: cur = pending; pending = pending->bi_next; cur->bi_next = NULL; - atomic_dec(&device->dev_root->fs_info->nr_async_submits); + atomic_dec(&device->dev_root->fs_info->nr_async_bios); BUG_ON(atomic_read(&cur->bi_cnt) == 0); bio_get(cur); @@ -2145,12 +2145,12 @@ int schedule_bio(struct btrfs_root *root, struct btrfs_device *device, } /* - * nr_async_sumbits allows us to reliably return congestion to the + * nr_async_bios allows us to reliably return congestion to the * higher layers. Otherwise, the async bio makes it appear we have * made progress against dirty pages when we've really just put it * on a queue for later */ - atomic_inc(&root->fs_info->nr_async_submits); + atomic_inc(&root->fs_info->nr_async_bios); WARN_ON(bio->bi_next); bio->bi_next = NULL; bio->bi_rw |= rw; From 777e6bd706ee40897545463871de5b456fbc46dc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Aug 2008 15:34:15 -0400 Subject: [PATCH 0661/2985] Btrfs: Transaction commit: don't use filemap_fdatawait After writing out all the remaining btree blocks in the transaction, the commit code would use filemap_fdatawait to make sure it was all on disk. This means it would wait for blocks written by other procs as well. The new code walks the list of blocks for this transaction again and waits only for those required by this transaction. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 11 ++++------- fs/btrfs/disk-io.h | 1 + fs/btrfs/transaction.c | 36 +++++++++++++++++++++++++++++++----- 3 files changed, 36 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9902d29abd06..9601b13c7d7a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -429,7 +429,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, return 0; } -static int congested_async(struct btrfs_fs_info *info, int iodone) +int btrfs_congested_async(struct btrfs_fs_info *info, int iodone) { int limit = 256 * info->fs_devices->open_devices; @@ -438,9 +438,6 @@ static int congested_async(struct btrfs_fs_info *info, int iodone) if (atomic_read(&info->nr_async_submits) > limit) return 1; - limit = 8192 * info->fs_devices->open_devices; - if (iodone) - limit = (limit * 3) / 2; return atomic_read(&info->nr_async_bios) > limit; } @@ -454,7 +451,7 @@ static void run_one_async_submit(struct btrfs_work *work) atomic_dec(&fs_info->nr_async_submits); if ((async->bio->bi_rw & (1 << BIO_RW)) && - !congested_async(fs_info, 1)) { + !btrfs_congested_async(fs_info, 1)) { clear_bdi_congested(&fs_info->bdi, WRITE); } async->submit_bio_hook(async->inode, async->rw, async->bio, @@ -963,7 +960,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) struct backing_dev_info *bdi; if ((bdi_bits & (1 << BDI_write_congested)) && - congested_async(info, 0)) + btrfs_congested_async(info, 0)) return 1; list_for_each(cur, &info->fs_devices->devices) { @@ -1844,7 +1841,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) struct extent_io_tree *tree; u64 num_dirty; u64 start = 0; - unsigned long thresh = 16 * 1024 * 1024; + unsigned long thresh = 2 * 1024 * 1024; tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; if (current_is_pdflush()) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 353c3c50c957..e904a69347a4 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -72,4 +72,5 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, int rw, struct bio *bio, int mirror_num, extent_submit_bio_hook_t *submit_bio_hook); +int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9d3d08e9f8d1..6bcb0876f9bb 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -303,12 +303,12 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret; - int err; + int err = 0; int werr = 0; struct extent_io_tree *dirty_pages; struct page *page; struct inode *btree_inode = root->fs_info->btree_inode; - u64 start; + u64 start = 0; u64 end; unsigned long index; @@ -317,12 +317,15 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, } dirty_pages = &trans->transaction->dirty_pages; while(1) { - ret = find_first_extent_bit(dirty_pages, 0, &start, &end, + ret = find_first_extent_bit(dirty_pages, start, &start, &end, EXTENT_DIRTY); if (ret) break; - clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); while(start <= end) { + if (btrfs_congested_async(root->fs_info, 0)) + congestion_wait(WRITE, HZ/10); + cond_resched(); + index = start >> PAGE_CACHE_SHIFT; start = (u64)(index + 1) << PAGE_CACHE_SHIFT; page = find_lock_page(btree_inode->i_mapping, index); @@ -343,7 +346,30 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, page_cache_release(page); } } - err = filemap_fdatawait(btree_inode->i_mapping); + while(1) { + ret = find_first_extent_bit(dirty_pages, 0, &start, &end, + EXTENT_DIRTY); + if (ret) + break; + + clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); + while(start <= end) { + index = start >> PAGE_CACHE_SHIFT; + start = (u64)(index + 1) << PAGE_CACHE_SHIFT; + page = find_get_page(btree_inode->i_mapping, index); + if (!page) + continue; + if (PageDirty(page)) { + lock_page(page); + err = write_one_page(page, 0); + if (err) + werr = err; + } + wait_on_page_writeback(page); + page_cache_release(page); + cond_resched(); + } + } if (err) werr = err; return werr; From 5443be45f5cb57d02fd895a0bcaf7e7d9890b1df Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Aug 2008 15:34:16 -0400 Subject: [PATCH 0662/2985] Btrfs: Give all the worker threads descriptive names Signed-off-by: Chris Mason --- fs/btrfs/async-thread.c | 7 +++++-- fs/btrfs/async-thread.h | 5 ++++- fs/btrfs/disk-io.c | 22 +++++++++++++++------- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index bc2980c433ef..5f2f5a8c2289 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -153,7 +153,7 @@ int btrfs_stop_workers(struct btrfs_workers *workers) /* * simple init on struct btrfs_workers */ -void btrfs_init_workers(struct btrfs_workers *workers, int max) +void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) { workers->num_workers = 0; INIT_LIST_HEAD(&workers->worker_list); @@ -161,6 +161,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, int max) spin_lock_init(&workers->lock); workers->max_workers = max; workers->idle_thresh = 32; + workers->name = name; } /* @@ -184,7 +185,9 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) INIT_LIST_HEAD(&worker->worker_list); spin_lock_init(&worker->lock); atomic_set(&worker->num_pending, 0); - worker->task = kthread_run(worker_loop, worker, "btrfs"); + worker->task = kthread_run(worker_loop, worker, + "btrfs-%s-%d", workers->name, + workers->num_workers + i); worker->workers = workers; if (IS_ERR(worker->task)) { kfree(worker); diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 3436ff897597..43e44d115dd1 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -69,11 +69,14 @@ struct btrfs_workers { /* lock for finding the next worker thread to queue on */ spinlock_t lock; + + /* extra name for this worker */ + char *name; }; int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); int btrfs_stop_workers(struct btrfs_workers *workers); -void btrfs_init_workers(struct btrfs_workers *workers, int max); +void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); int btrfs_requeue_work(struct btrfs_work *work); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9601b13c7d7a..1bf210dadef6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -492,11 +492,11 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, /* * when we're called for a write, we're already in the async - * submission context. Just jump ingo btrfs_map_bio + * submission context. Just jump into btrfs_map_bio */ if (rw & (1 << BIO_RW)) { return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, - mirror_num, 0); + mirror_num, 1); } /* @@ -528,6 +528,12 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct extent_io_tree *tree; tree = &BTRFS_I(page->mapping->host)->io_tree; + + if (current->flags & PF_MEMALLOC) { + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } return extent_write_full_page(tree, page, btree_get_extent, wbc); } @@ -1363,8 +1369,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, * queue work function gets called at interrupt time, and so it * cannot dynamically grow. */ - btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); - btrfs_init_workers(&fs_info->submit_workers, + btrfs_init_workers(&fs_info->workers, "worker", + fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->submit_workers, "submit", min_t(u64, fs_devices->num_devices, fs_info->thread_pool_size)); @@ -1374,9 +1381,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, */ fs_info->submit_workers.idle_thresh = 64; - btrfs_init_workers(&fs_info->fixup_workers, 1); - btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); - btrfs_init_workers(&fs_info->endio_write_workers, + btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); + btrfs_init_workers(&fs_info->endio_workers, "endio", + fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", fs_info->thread_pool_size); /* From 4854ddd0ed0a687fc2d7c45a529c406232e31e7b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Aug 2008 15:34:17 -0400 Subject: [PATCH 0663/2985] Btrfs: Wait for kernel threads to make progress during async submission Before this change, btrfs would use a bdi congestion function to make sure there weren't too many pending async checksum work items. This change makes the process creating async work items wait instead, leading to fewer congestion returns from the bdi. This improves pdflush background_writeout scanning. Signed-off-by: Chris Mason --- fs/btrfs/async-thread.c | 7 ++++++- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 45 ++++++++++++++++++++++++----------------- 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 5f2f5a8c2289..958cd8b5f0d7 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -48,6 +48,7 @@ struct btrfs_worker_thread { /* number of things on the pending list */ atomic_t num_pending; + unsigned long sequence; /* protects the pending list. */ spinlock_t lock; @@ -197,6 +198,7 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) spin_lock_irq(&workers->lock); list_add_tail(&worker->worker_list, &workers->idle_list); + worker->idle = 1; workers->num_workers++; spin_unlock_irq(&workers->lock); } @@ -238,7 +240,10 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) */ next = workers->worker_list.next; worker = list_entry(next, struct btrfs_worker_thread, worker_list); - list_move_tail(next, &workers->worker_list); + atomic_inc(&worker->num_pending); + worker->sequence++; + if (worker->sequence % 4 == 0) + list_move_tail(next, &workers->worker_list); return worker; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index bb4a8d2200d0..040213359393 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -526,6 +526,7 @@ struct btrfs_fs_info { struct btrfs_transaction *running_transaction; wait_queue_head_t transaction_throttle; wait_queue_head_t transaction_wait; + wait_queue_head_t async_submit_wait; struct btrfs_super_block super_copy; struct btrfs_super_block super_for_commit; struct block_device *__bdev; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1bf210dadef6..1aed1f4616b6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -429,31 +429,36 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, return 0; } +static unsigned long async_submit_limit(struct btrfs_fs_info *info) +{ + unsigned long limit = min_t(unsigned long, + info->workers.max_workers, + info->fs_devices->open_devices); + return 256 * limit; +} + int btrfs_congested_async(struct btrfs_fs_info *info, int iodone) { - int limit = 256 * info->fs_devices->open_devices; - - if (iodone) - limit = (limit * 3) / 2; - if (atomic_read(&info->nr_async_submits) > limit) - return 1; - - return atomic_read(&info->nr_async_bios) > limit; + return atomic_read(&info->nr_async_bios) > async_submit_limit(info); } static void run_one_async_submit(struct btrfs_work *work) { struct btrfs_fs_info *fs_info; struct async_submit_bio *async; + int limit; async = container_of(work, struct async_submit_bio, work); fs_info = BTRFS_I(async->inode)->root->fs_info; + + limit = async_submit_limit(fs_info); + limit = limit * 2 / 3; + atomic_dec(&fs_info->nr_async_submits); - if ((async->bio->bi_rw & (1 << BIO_RW)) && - !btrfs_congested_async(fs_info, 1)) { - clear_bdi_congested(&fs_info->bdi, WRITE); - } + if (atomic_read(&fs_info->nr_async_submits) < limit) + wake_up(&fs_info->async_submit_wait); + async->submit_bio_hook(async->inode, async->rw, async->bio, async->mirror_num); kfree(async); @@ -464,6 +469,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, extent_submit_bio_hook_t *submit_bio_hook) { struct async_submit_bio *async; + int limit = async_submit_limit(fs_info); async = kmalloc(sizeof(*async), GFP_NOFS); if (!async) @@ -478,6 +484,10 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->work.flags = 0; atomic_inc(&fs_info->nr_async_submits); btrfs_queue_worker(&fs_info->workers, &async->work); + + wait_event_timeout(fs_info->async_submit_wait, + (atomic_read(&fs_info->nr_async_submits) < limit), + HZ/10); return 0; } @@ -545,16 +555,11 @@ static int btree_writepages(struct address_space *mapping, if (wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; - unsigned long thresh = 96 * 1024 * 1024; + unsigned long thresh = 8 * 1024 * 1024; if (wbc->for_kupdate) return 0; - if (current_is_pdflush()) { - thresh = 96 * 1024 * 1024; - } else { - thresh = 8 * 1024 * 1024; - } num_dirty = count_range_bits(tree, &start, (u64)-1, thresh, EXTENT_DIRTY); if (num_dirty < thresh) { @@ -1333,6 +1338,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->volume_mutex); init_waitqueue_head(&fs_info->transaction_throttle); init_waitqueue_head(&fs_info->transaction_wait); + init_waitqueue_head(&fs_info->async_submit_wait); #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1380,6 +1386,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, * devices */ fs_info->submit_workers.idle_thresh = 64; + fs_info->workers.idle_thresh = 32; btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); btrfs_init_workers(&fs_info->endio_workers, "endio", @@ -1849,7 +1856,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) struct extent_io_tree *tree; u64 num_dirty; u64 start = 0; - unsigned long thresh = 2 * 1024 * 1024; + unsigned long thresh = 12 * 1024 * 1024; tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; if (current_is_pdflush()) From 53863232ef961778aa414b700ed88a48e8e871e6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Aug 2008 15:34:18 -0400 Subject: [PATCH 0664/2985] Btrfs: Lower contention on the csum mutex This takes the csum mutex deeper in the call chain and releases it more often. Signed-off-by: Chris Mason --- fs/btrfs/async-thread.c | 3 ++- fs/btrfs/disk-io.c | 9 ++++++++- fs/btrfs/file-item.c | 16 ++++++++++++---- fs/btrfs/inode.c | 2 -- 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 958cd8b5f0d7..2ee301740195 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -48,6 +48,7 @@ struct btrfs_worker_thread { /* number of things on the pending list */ atomic_t num_pending; + unsigned long sequence; /* protects the pending list. */ @@ -242,7 +243,7 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) worker = list_entry(next, struct btrfs_worker_thread, worker_list); atomic_inc(&worker->num_pending); worker->sequence++; - if (worker->sequence % 4 == 0) + if (worker->sequence % workers->idle_thresh == 0) list_move_tail(next, &workers->worker_list); return worker; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1aed1f4616b6..92e14dd9bddb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1386,7 +1386,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, * devices */ fs_info->submit_workers.idle_thresh = 64; - fs_info->workers.idle_thresh = 32; + + /* fs_info->workers is responsible for checksumming file data + * blocks and metadata. Using a larger idle thresh allows each + * worker thread to operate on things in roughly the order they + * were sent by the writeback daemons, improving overall locality + * of the IO going down the pipe. + */ + fs_info->workers.idle_thresh = 128; btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); btrfs_init_workers(&fs_info->endio_workers, "endio", diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 51aba8cee7ce..9454e0a07c89 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -321,6 +321,7 @@ again: file_key.offset = offset; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); + mutex_lock(&BTRFS_I(inode)->csum_mutex); item = btrfs_lookup_csum(trans, root, path, objectid, offset, 1); if (!IS_ERR(item)) { leaf = path->nodes[0]; @@ -367,7 +368,7 @@ again: ret = btrfs_search_slot(trans, root, &file_key, path, BTRFS_CRC32_SIZE, 1); if (ret < 0) - goto fail; + goto fail_unlock; if (ret == 0) { BUG(); } @@ -411,10 +412,10 @@ insert: ret = btrfs_insert_empty_item(trans, root, path, &file_key, ins_size); if (ret < 0) - goto fail; + goto fail_unlock; if (ret != 0) { WARN_ON(1); - goto fail; + goto fail_unlock; } csum: leaf = path->nodes[0]; @@ -427,6 +428,8 @@ found: item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + btrfs_item_size_nr(leaf, path->slots[0])); eb_token = NULL; + mutex_unlock(&BTRFS_I(inode)->csum_mutex); + cond_resched(); next_sector: if (!eb_token || @@ -467,13 +470,18 @@ next_sector: eb_token = NULL; } btrfs_mark_buffer_dirty(path->nodes[0]); + cond_resched(); if (total_bytes < sums->len) { btrfs_release_path(root, path); goto again; } -fail: +out: btrfs_free_path(path); return ret; + +fail_unlock: + mutex_unlock(&BTRFS_I(inode)->csum_mutex); + goto out; } int btrfs_csum_truncate(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 33b990878d84..65107894a5bb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -415,10 +415,8 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, btrfs_set_trans_block_group(trans, inode); list_for_each(cur, list) { sum = list_entry(cur, struct btrfs_ordered_sum, list); - mutex_lock(&BTRFS_I(inode)->csum_mutex); btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root, inode, sum); - mutex_unlock(&BTRFS_I(inode)->csum_mutex); } return 0; } From 902b22f341efa00be802418a0a8c57bddcd269a6 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 20 Aug 2008 08:51:49 -0400 Subject: [PATCH 0665/2985] Btrfs: Remove broken optimisations in end_bio functions. These ended up freeing objects while they were still using them. Under guidance from Chris, just rip out the 'clever' bits and do things the simple way. Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 159 ++++++------------------------------------- 1 file changed, 21 insertions(+), 138 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f46f88620c70..83ba0c328722 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -15,6 +15,8 @@ #include "extent_io.h" #include "extent_map.h" #include "compat.h" +#include "ctree.h" +#include "btrfs_inode.h" /* temporary define until extent_map moves out of btrfs */ struct kmem_cache *btrfs_cache_create(const char *name, size_t size, @@ -1394,15 +1396,11 @@ static int end_bio_extent_writepage(struct bio *bio, { int uptodate = err == 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_state *state = bio->bi_private; - struct extent_io_tree *tree = state->tree; - struct rb_node *node; + struct extent_io_tree *tree; u64 start; u64 end; - u64 cur; int whole_page; int ret; - unsigned long flags; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) if (bio->bi_size) @@ -1410,6 +1408,8 @@ static int end_bio_extent_writepage(struct bio *bio, #endif do { struct page *page = bvec->bv_page; + tree = &BTRFS_I(page->mapping->host)->io_tree; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; end = start + bvec->bv_len - 1; @@ -1423,7 +1423,7 @@ static int end_bio_extent_writepage(struct bio *bio, prefetchw(&bvec->bv_page->flags); if (tree->ops && tree->ops->writepage_end_io_hook) { ret = tree->ops->writepage_end_io_hook(page, start, - end, state, uptodate); + end, NULL, uptodate); if (ret) uptodate = 0; } @@ -1431,9 +1431,8 @@ static int end_bio_extent_writepage(struct bio *bio, if (!uptodate && tree->ops && tree->ops->writepage_io_failed_hook) { ret = tree->ops->writepage_io_failed_hook(bio, page, - start, end, state); + start, end, NULL); if (ret == 0) { - state = NULL; uptodate = (err == 0); continue; } @@ -1445,68 +1444,7 @@ static int end_bio_extent_writepage(struct bio *bio, SetPageError(page); } - /* - * bios can get merged in funny ways, and so we need to - * be careful with the state variable. We know the - * state won't be merged with others because it has - * WRITEBACK set, but we can't be sure each biovec is - * sequential in the file. So, if our cached state - * doesn't match the expected end, search the tree - * for the correct one. - */ - - spin_lock_irqsave(&tree->lock, flags); - if (!state || state->end != end) { - state = NULL; - node = __etree_search(tree, start, NULL, NULL); - if (node) { - state = rb_entry(node, struct extent_state, - rb_node); - if (state->end != end || - !(state->state & EXTENT_WRITEBACK)) - state = NULL; - } - if (!state) { - spin_unlock_irqrestore(&tree->lock, flags); - clear_extent_writeback(tree, start, - end, GFP_ATOMIC); - goto next_io; - } - } - cur = end; - while(1) { - struct extent_state *clear = state; - cur = state->start; - node = rb_prev(&state->rb_node); - if (node) { - state = rb_entry(node, - struct extent_state, - rb_node); - } else { - state = NULL; - } - - clear_state_bit(tree, clear, EXTENT_WRITEBACK, - 1, 0); - if (cur == start) - break; - if (cur < start) { - WARN_ON(1); - break; - } - if (!node) - break; - } - /* before releasing the lock, make sure the next state - * variable has the expected bits set and corresponds - * to the correct offsets in the file - */ - if (state && (state->end + 1 != start || - !(state->state & EXTENT_WRITEBACK))) { - state = NULL; - } - spin_unlock_irqrestore(&tree->lock, flags); -next_io: + clear_extent_writeback(tree, start, end, GFP_ATOMIC); if (whole_page) end_page_writeback(page); @@ -1539,13 +1477,9 @@ static int end_bio_extent_readpage(struct bio *bio, { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_state *state = bio->bi_private; - struct extent_io_tree *tree = state->tree; - struct rb_node *node; + struct extent_io_tree *tree; u64 start; u64 end; - u64 cur; - unsigned long flags; int whole_page; int ret; @@ -1556,6 +1490,8 @@ static int end_bio_extent_readpage(struct bio *bio, do { struct page *page = bvec->bv_page; + tree = &BTRFS_I(page->mapping->host)->io_tree; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; end = start + bvec->bv_len - 1; @@ -1570,80 +1506,26 @@ static int end_bio_extent_readpage(struct bio *bio, if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { ret = tree->ops->readpage_end_io_hook(page, start, end, - state); + NULL); if (ret) uptodate = 0; } if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { ret = tree->ops->readpage_io_failed_hook(bio, page, - start, end, state); + start, end, NULL); if (ret == 0) { - state = NULL; uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); continue; } } - spin_lock_irqsave(&tree->lock, flags); - if (!state || state->end != end) { - state = NULL; - node = __etree_search(tree, start, NULL, NULL); - if (node) { - state = rb_entry(node, struct extent_state, - rb_node); - if (state->end != end || - !(state->state & EXTENT_LOCKED)) - state = NULL; - } - if (!state) { - spin_unlock_irqrestore(&tree->lock, flags); - if (uptodate) - set_extent_uptodate(tree, start, end, - GFP_ATOMIC); - unlock_extent(tree, start, end, GFP_ATOMIC); - goto next_io; - } - } + if (uptodate) + set_extent_uptodate(tree, start, end, + GFP_ATOMIC); + unlock_extent(tree, start, end, GFP_ATOMIC); - cur = end; - while(1) { - struct extent_state *clear = state; - cur = state->start; - node = rb_prev(&state->rb_node); - if (node) { - state = rb_entry(node, - struct extent_state, - rb_node); - } else { - state = NULL; - } - if (uptodate) { - set_state_cb(tree, clear, EXTENT_UPTODATE); - clear->state |= EXTENT_UPTODATE; - } - clear_state_bit(tree, clear, EXTENT_LOCKED, - 1, 0); - if (cur == start) - break; - if (cur < start) { - WARN_ON(1); - break; - } - if (!node) - break; - } - /* before releasing the lock, make sure the next state - * variable has the expected bits set and corresponds - * to the correct offsets in the file - */ - if (state && (state->end + 1 != start || - !(state->state & EXTENT_LOCKED))) { - state = NULL; - } - spin_unlock_irqrestore(&tree->lock, flags); -next_io: if (whole_page) { if (uptodate) { SetPageUptodate(page); @@ -1683,8 +1565,7 @@ static int end_bio_extent_preparewrite(struct bio *bio, { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_state *state = bio->bi_private; - struct extent_io_tree *tree = state->tree; + struct extent_io_tree *tree; u64 start; u64 end; @@ -1695,6 +1576,8 @@ static int end_bio_extent_preparewrite(struct bio *bio, do { struct page *page = bvec->bv_page; + tree = &BTRFS_I(page->mapping->host)->io_tree; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; end = start + bvec->bv_len - 1; @@ -1765,7 +1648,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num) BUG_ON(state->end != end); spin_unlock_irq(&tree->lock); - bio->bi_private = state; + bio->bi_private = NULL; bio_get(bio); From 7c2fe32a238eb12422beca5cbd5194a594baa559 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 20 Aug 2008 08:51:50 -0400 Subject: [PATCH 0666/2985] Btrfs: Fix add_extent_mapping to check for duplicates across the whole range add_extent_mapping was allowing the insertion of overlapping extents. This never used to happen because it only inserted the extents from disk and those were never overlapping. But, with the data=ordered code, the disk and memory representations of the file are not the same. add_extent_mapping needs to ensure a new extent does not overlap before it inserts. Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 7 +++++++ fs/btrfs/inode.c | 21 +++++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 954b047639ab..78ced11d18c7 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -207,7 +207,14 @@ int add_extent_mapping(struct extent_map_tree *tree, int ret = 0; struct extent_map *merge = NULL; struct rb_node *rb; + struct extent_map *exist; + exist = lookup_extent_mapping(tree, em->start, em->len); + if (exist) { + free_extent_map(exist); + ret = -EEXIST; + goto out; + } assert_spin_locked(&tree->lock); rb = tree_insert(&tree->map, em->start, &em->rb_node); if (rb) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 65107894a5bb..6c778043207d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -641,8 +641,9 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) if (ret == -ENOENT || ret == -EFBIG) ret = 0; csum = 0; - printk("no csum found for inode %lu start %Lu\n", inode->i_ino, - start); + if (printk_ratelimit()) + printk("no csum found for inode %lu start %Lu\n", inode->i_ino, + start); goto out; } read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, @@ -1653,8 +1654,20 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_truncate_page(inode->i_mapping, inode->i_size); hole_size = block_end - hole_start; - btrfs_wait_ordered_range(inode, hole_start, hole_size); - lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); + while(1) { + struct btrfs_ordered_extent *ordered; + btrfs_wait_ordered_range(inode, hole_start, hole_size); + + lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); + ordered = btrfs_lookup_ordered_extent(inode, hole_start); + if (ordered) { + unlock_extent(io_tree, hole_start, + block_end - 1, GFP_NOFS); + btrfs_put_ordered_extent(ordered); + } else { + break; + } + } trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); From 4d1b5fb4d7075f862848dbff8873e22382abd482 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 20 Aug 2008 09:44:52 -0400 Subject: [PATCH 0667/2985] Btrfs: Lookup readpage checksums on bio submission again This optimization had been removed because I thought it was triggering csum errors. The real cause of the errors was elsewhere, and so this optimization is back. Signed-off-by: Chris Mason --- fs/btrfs/file-item.c | 4 +-- fs/btrfs/inode.c | 62 +++----------------------------------------- 2 files changed, 6 insertions(+), 60 deletions(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 9454e0a07c89..f861bd506928 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -134,7 +134,6 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, return ret; } -#if 0 /* broken */ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio) { @@ -151,6 +150,8 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; path = btrfs_alloc_path(); + if (bio->bi_size > PAGE_CACHE_SIZE * 8) + path->reada = 2; WARN_ON(bio->bi_vcnt <= 0); @@ -211,7 +212,6 @@ found: btrfs_free_path(path); return 0; } -#endif int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, struct bio *bio) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6c778043207d..38d7a81763cd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -389,15 +389,15 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); BUG_ON(ret); - if (!(rw & (1 << BIO_RW))) { - goto mapit; - } - if (btrfs_test_opt(root, NODATASUM) || btrfs_test_flag(inode, NODATASUM)) { goto mapit; } + if (!(rw & (1 << BIO_RW))) { + btrfs_lookup_bio_sums(root, inode, bio); + goto mapit; + } return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, inode, rw, bio, mirror_num, __btrfs_submit_bio_hook); @@ -603,59 +603,6 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, return btrfs_finish_ordered_io(page->mapping->host, start, end); } -int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) -{ - int ret = 0; - struct inode *inode = page->mapping->host; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct btrfs_csum_item *item; - struct btrfs_path *path = NULL; - u32 csum; - - if (btrfs_test_opt(root, NODATASUM) || - btrfs_test_flag(inode, NODATASUM)) - return 0; - - /* - * It is possible there is an ordered extent that has - * not yet finished for this range in the file. If so, - * that extent will have a csum cached, and it will insert - * the sum after all the blocks in the extent are fully - * on disk. So, look for an ordered extent and use the - * sum if found. We have to do this before looking in the - * btree because csum items are pre-inserted based on - * the file size. btrfs_lookup_csum might find an item - * that still hasn't been fully filled. - */ - ret = btrfs_find_ordered_sum(inode, start, &csum); - if (ret == 0) - goto found; - - ret = 0; - path = btrfs_alloc_path(); - item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); - if (IS_ERR(item)) { - ret = PTR_ERR(item); - /* a csum that isn't present is a preallocated region. */ - if (ret == -ENOENT || ret == -EFBIG) - ret = 0; - csum = 0; - if (printk_ratelimit()) - printk("no csum found for inode %lu start %Lu\n", inode->i_ino, - start); - goto out; - } - read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, - BTRFS_CRC32_SIZE); -found: - set_state_private(io_tree, start, csum); -out: - if (path) - btrfs_free_path(path); - return ret; -} - struct io_failure_record { struct page *page; u64 start; @@ -3669,7 +3616,6 @@ static struct extent_io_ops btrfs_extent_io_ops = { .fill_delalloc = run_delalloc_range, .submit_bio_hook = btrfs_submit_bio_hook, .merge_bio_hook = btrfs_merge_bio_hook, - .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, .writepage_end_io_hook = btrfs_writepage_end_io_hook, .writepage_start_hook = btrfs_writepage_start_hook, From b64a2851ba25b3410a87d3d1b751155612105c8e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 20 Aug 2008 13:39:41 -0400 Subject: [PATCH 0668/2985] Btrfs: Wait for async bio submissions to make some progress at queue time Before, the btrfs bdi congestion function was used to test for too many async bios. This keeps that check to throttle pdflush, but also adds a check while queuing bios. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 16 +++++++++------- fs/btrfs/disk-io.h | 1 + fs/btrfs/transaction.c | 2 -- fs/btrfs/volumes.c | 18 +++++++++++++++++- 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 92e14dd9bddb..bbba14b629d2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -429,7 +429,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, return 0; } -static unsigned long async_submit_limit(struct btrfs_fs_info *info) +unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) { unsigned long limit = min_t(unsigned long, info->workers.max_workers, @@ -439,7 +439,8 @@ static unsigned long async_submit_limit(struct btrfs_fs_info *info) int btrfs_congested_async(struct btrfs_fs_info *info, int iodone) { - return atomic_read(&info->nr_async_bios) > async_submit_limit(info); + return atomic_read(&info->nr_async_bios) > + btrfs_async_submit_limit(info); } static void run_one_async_submit(struct btrfs_work *work) @@ -451,12 +452,13 @@ static void run_one_async_submit(struct btrfs_work *work) async = container_of(work, struct async_submit_bio, work); fs_info = BTRFS_I(async->inode)->root->fs_info; - limit = async_submit_limit(fs_info); + limit = btrfs_async_submit_limit(fs_info); limit = limit * 2 / 3; atomic_dec(&fs_info->nr_async_submits); - if (atomic_read(&fs_info->nr_async_submits) < limit) + if (atomic_read(&fs_info->nr_async_submits) < limit && + waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); async->submit_bio_hook(async->inode, async->rw, async->bio, @@ -469,7 +471,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, extent_submit_bio_hook_t *submit_bio_hook) { struct async_submit_bio *async; - int limit = async_submit_limit(fs_info); + int limit = btrfs_async_submit_limit(fs_info); async = kmalloc(sizeof(*async), GFP_NOFS); if (!async) @@ -1863,10 +1865,10 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) struct extent_io_tree *tree; u64 num_dirty; u64 start = 0; - unsigned long thresh = 12 * 1024 * 1024; + unsigned long thresh = 96 * 1024 * 1024; tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; - if (current_is_pdflush()) + if (current_is_pdflush() || current->flags & PF_MEMALLOC) return; num_dirty = count_range_bits(tree, &start, (u64)-1, diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index e904a69347a4..2562a273ae18 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -73,4 +73,5 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, int rw, struct bio *bio, int mirror_num, extent_submit_bio_hook_t *submit_bio_hook); int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); +unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 6bcb0876f9bb..eff3ad72991b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -322,8 +322,6 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, if (ret) break; while(start <= end) { - if (btrfs_congested_async(root->fs_info, 0)) - congestion_wait(WRITE, HZ/10); cond_resched(); index = start >> PAGE_CACHE_SHIFT; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 23a5b0aba00a..2652660e6079 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -138,12 +138,18 @@ int run_scheduled_bios(struct btrfs_device *device) { struct bio *pending; struct backing_dev_info *bdi; + struct btrfs_fs_info *fs_info; struct bio *tail; struct bio *cur; int again = 0; unsigned long num_run = 0; + unsigned long limit; bdi = device->bdev->bd_inode->i_mapping->backing_dev_info; + fs_info = device->dev_root->fs_info; + limit = btrfs_async_submit_limit(fs_info); + limit = limit * 2 / 3; + loop: spin_lock(&device->io_lock); @@ -179,7 +185,11 @@ loop: cur = pending; pending = pending->bi_next; cur->bi_next = NULL; - atomic_dec(&device->dev_root->fs_info->nr_async_bios); + atomic_dec(&fs_info->nr_async_bios); + + if (atomic_read(&fs_info->nr_async_bios) < limit && + waitqueue_active(&fs_info->async_submit_wait)) + wake_up(&fs_info->async_submit_wait); BUG_ON(atomic_read(&cur->bi_cnt) == 0); bio_get(cur); @@ -2135,6 +2145,7 @@ int schedule_bio(struct btrfs_root *root, struct btrfs_device *device, int rw, struct bio *bio) { int should_queue = 1; + unsigned long limit; /* don't bother with additional async steps for reads, right now */ if (!(rw & (1 << BIO_RW))) { @@ -2171,6 +2182,11 @@ int schedule_bio(struct btrfs_root *root, struct btrfs_device *device, if (should_queue) btrfs_queue_worker(&root->fs_info->submit_workers, &device->work); + + limit = btrfs_async_submit_limit(root->fs_info); + wait_event_timeout(root->fs_info->async_submit_wait, + (atomic_read(&root->fs_info->nr_async_bios) < limit), + HZ/10); return 0; } From 1a54ef8c11a0eaef59ff418089f109c27f8bd48d Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Mon, 21 Jul 2008 02:01:04 +0530 Subject: [PATCH 0669/2985] Introduce btrfs_iget helper Date: Mon, 21 Jul 2008 02:01:04 +0530 This patch introduces a btrfs_iget helper to be used in NFS support. Signed-off-by: Balaji Rao Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/inode.c | 53 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 040213359393..cecf29e03630 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1700,6 +1700,8 @@ struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root); struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, u64 root_objectid); +struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, + struct btrfs_root *root, int *is_new); int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to); struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 38d7a81763cd..266ca5514b33 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1840,6 +1840,33 @@ struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, return inode; } +/* Get an inode object given its location and corresponding root. + * Returns in *is_new if the inode was read from disk + */ +struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, + struct btrfs_root *root, int *is_new) +{ + struct inode *inode; + + inode = btrfs_iget_locked(s, location->objectid, root); + if (!inode) + return ERR_PTR(-EACCES); + + if (inode->i_state & I_NEW) { + BTRFS_I(inode)->root = root; + memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + if (is_new) + *is_new = 1; + } else { + if (is_new) + *is_new = 0; + } + + return inode; +} + static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { @@ -1848,7 +1875,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct btrfs_root *root = bi->root; struct btrfs_root *sub_root = root; struct btrfs_key location; - int ret, do_orphan = 0; + int ret, new, do_orphan = 0; if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); @@ -1866,23 +1893,15 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(ret); if (ret > 0) return ERR_PTR(-ENOENT); + inode = btrfs_iget(dir->i_sb, &location, sub_root, &new); + if (IS_ERR(inode)) + return ERR_CAST(inode); - inode = btrfs_iget_locked(dir->i_sb, location.objectid, - sub_root); - if (!inode) - return ERR_PTR(-EACCES); - if (inode->i_state & I_NEW) { - /* the inode and parent dir are two different roots */ - if (sub_root != root) { - igrab(inode); - sub_root->inode = inode; - do_orphan = 1; - } - BTRFS_I(inode)->root = sub_root; - memcpy(&BTRFS_I(inode)->location, &location, - sizeof(location)); - btrfs_read_locked_inode(inode); - unlock_new_inode(inode); + /* the inode and parent dir are two different roots */ + if (new && root != sub_root) { + igrab(inode); + sub_root->inode = inode; + do_orphan = 1; } } From be6e8dc0ba84029997075a1ec77b4ddb863cbe15 Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Mon, 21 Jul 2008 02:01:56 +0530 Subject: [PATCH 0670/2985] NFS support for btrfs - v3 Date: Mon, 21 Jul 2008 02:01:56 +0530 Here's an implementation of NFS support for btrfs. It relies on the fixes which are going in to 2.6.28 for the NFS readdir/lookup deadlock. This uses the btrfs_iget helper introduced previously. [dwmw2: Tidy up a little, switch to d_obtain_alias() w/compat routine, change fh_type, store parent's root object ID where needed, fix some get_parent() and fs_to_dentry() bugs] Signed-off-by: Balaji Rao Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/compat.h | 17 ++++ fs/btrfs/export.c | 208 ++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/export.h | 19 +++++ fs/btrfs/super.c | 2 + 5 files changed, 247 insertions(+), 1 deletion(-) create mode 100644 fs/btrfs/export.c create mode 100644 fs/btrfs/export.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a4b38177abdb..75f8818cbfee 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -7,7 +7,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ - ref-cache.o + ref-cache.o export.o btrfs-$(CONFIG_FS_POSIX_ACL) += acl.o else diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h index b3349a66999d..d45fb37887bc 100644 --- a/fs/btrfs/compat.h +++ b/fs/btrfs/compat.h @@ -5,6 +5,23 @@ #define trylock_page(page) (!TestSetPageLocked(page)) #endif +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,27) +static inline struct dentry *d_obtain_alias(struct inode *inode) +{ + struct dentry *d; + + if (!inode) + return NULL; + if (IS_ERR(inode)) + return ERR_CAST(inode); + + d = d_alloc_anon(inode); + if (!d) + iput(inode); + return d; +} +#endif + /* * Even if AppArmor isn't enabled, it still has different prototypes. * Add more distro/version pairs here to declare which has AppArmor applied. diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c new file mode 100644 index 000000000000..797b4cbc3786 --- /dev/null +++ b/fs/btrfs/export.c @@ -0,0 +1,208 @@ +#include +#include +#include "ctree.h" +#include "disk-io.h" +#include "btrfs_inode.h" +#include "print-tree.h" +#include "export.h" +#include "compat.h" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) +#define FILEID_BTRFS_WITHOUT_PARENT 0x4d +#define FILEID_BTRFS_WITH_PARENT 0x4e +#define FILEID_BTRFS_WITH_PARENT_ROOT 0x4f +#endif + +#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, parent_objectid)/4) +#define BTRFS_FID_SIZE_CONNECTABLE (offsetof(struct btrfs_fid, parent_root_objectid)/4) +#define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid)/4) + +static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, + int connectable) +{ + struct btrfs_fid *fid = (struct btrfs_fid *)fh; + struct inode *inode = dentry->d_inode; + int len = *max_len; + int type; + + if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) || + (connectable && len < BTRFS_FID_SIZE_CONNECTABLE)) + return 255; + + len = BTRFS_FID_SIZE_NON_CONNECTABLE; + type = FILEID_BTRFS_WITHOUT_PARENT; + + fid->objectid = BTRFS_I(inode)->location.objectid; + fid->root_objectid = BTRFS_I(inode)->root->objectid; + fid->gen = inode->i_generation; + + if (connectable && !S_ISDIR(inode->i_mode)) { + struct inode *parent; + u64 parent_root_id; + + spin_lock(&dentry->d_lock); + + parent = dentry->d_parent->d_inode; + fid->parent_objectid = BTRFS_I(parent)->location.objectid; + fid->parent_gen = parent->i_generation; + parent_root_id = BTRFS_I(parent)->root->objectid; + + spin_unlock(&dentry->d_lock); + + if (parent_root_id != fid->root_objectid) { + fid->parent_root_objectid = parent_root_id; + len = BTRFS_FID_SIZE_CONNECTABLE_ROOT; + type = FILEID_BTRFS_WITH_PARENT_ROOT; + } else { + len = BTRFS_FID_SIZE_CONNECTABLE; + type = FILEID_BTRFS_WITH_PARENT; + } + } + + *max_len = len; + return type; +} + +static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, + u64 root_objectid, u32 generation) +{ + struct btrfs_root *root; + struct inode *inode; + struct dentry *result; + struct btrfs_key key; + + key.objectid = objectid; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.offset = 0; + + root = btrfs_lookup_fs_root(btrfs_sb(sb)->fs_info, root_objectid); + inode = btrfs_iget(sb, &key, root, NULL); + if (IS_ERR(inode)) + return (void *)inode; + + if (generation != inode->i_generation) { + iput(inode); + return ERR_PTR(-ESTALE); + } + + result = d_obtain_alias(inode); + if (!result) + return ERR_PTR(-ENOMEM); + + return result; +} + +static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, + int fh_len, int fh_type) +{ + struct btrfs_fid *fid = (struct btrfs_fid *) fh; + u64 objectid, root_objectid; + u32 generation; + + if (fh_type == FILEID_BTRFS_WITH_PARENT) { + if (fh_len != BTRFS_FID_SIZE_CONNECTABLE) + return NULL; + root_objectid = fid->root_objectid; + } else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) { + if (fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) + return NULL; + root_objectid = fid->parent_root_objectid; + } else + return NULL; + + objectid = fid->parent_objectid; + generation = fid->parent_gen; + + return btrfs_get_dentry(sb, objectid, root_objectid, generation); +} + +static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, + int fh_len, int fh_type) +{ + struct btrfs_fid *fid = (struct btrfs_fid *) fh; + u64 objectid, root_objectid; + u32 generation; + + if ((fh_type != FILEID_BTRFS_WITH_PARENT || + fh_len != BTRFS_FID_SIZE_CONNECTABLE) && + (fh_type != FILEID_BTRFS_WITH_PARENT_ROOT || + fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) && + (fh_type != FILEID_BTRFS_WITHOUT_PARENT || + fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE)) + return NULL; + + objectid = fid->objectid; + root_objectid = fid->root_objectid; + generation = fid->gen; + + return btrfs_get_dentry(sb, objectid, root_objectid, generation); +} + +static struct dentry *btrfs_get_parent(struct dentry *child) +{ + struct inode *dir = child->d_inode; + struct inode *inode; + struct dentry *parent; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_key key; + struct btrfs_path *path; + struct extent_buffer *leaf; + u32 nritems; + int slot; + u64 objectid; + int ret; + + path = btrfs_alloc_path(); + + key.objectid = dir->i_ino; + btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); + key.offset = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + BUG_ON(ret == 0); + ret = 0; + + leaf = path->nodes[0]; + slot = path->slots[0]; + nritems = btrfs_header_nritems(leaf); + if (slot >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret) { + btrfs_free_path(path); + goto out; + } + leaf = path->nodes[0]; + slot = path->slots[0]; + } + + btrfs_free_path(path); + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY) + goto out; + + objectid = key.offset; + + /* Build a new key for the inode item */ + key.objectid = objectid; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.offset = 0; + + inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); + + parent = d_obtain_alias(inode); + if (!parent) + parent = ERR_PTR(-ENOMEM); + + return parent; + +out: + btrfs_free_path(path); + return ERR_PTR(-EINVAL); +} + +const struct export_operations btrfs_export_ops = { + .encode_fh = btrfs_encode_fh, + .fh_to_dentry = btrfs_fh_to_dentry, + .fh_to_parent = btrfs_fh_to_parent, + .get_parent = btrfs_get_parent, +}; diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h new file mode 100644 index 000000000000..074348a95841 --- /dev/null +++ b/fs/btrfs/export.h @@ -0,0 +1,19 @@ +#ifndef BTRFS_EXPORT_H +#define BTRFS_EXPORT_H + +#include + +extern const struct export_operations btrfs_export_ops; + +struct btrfs_fid { + u64 objectid; + u64 root_objectid; + u32 gen; + + u64 parent_objectid; + u32 parent_gen; + + u64 parent_root_objectid; +} __attribute__ ((packed)); + +#endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index eb4b357d05e1..e830e0ed409a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -46,6 +46,7 @@ #include "xattr.h" #include "volumes.h" #include "version.h" +#include "export.h" #define BTRFS_SUPER_MAGIC 0x9123683E @@ -303,6 +304,7 @@ static int btrfs_fill_super(struct super_block * sb, sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_magic = BTRFS_SUPER_MAGIC; sb->s_op = &btrfs_super_ops; + sb->s_export_op = &btrfs_export_ops; sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; sb->s_flags |= MS_POSIXACL; From cbdf5a2442330102c08f5a2ad3058e29e90a43a9 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 6 Aug 2008 19:42:33 +0100 Subject: [PATCH 0671/2985] Btrfs: Implement our own copy of the nfsd readdir hack, for older kernels Date: Wed, 6 Aug 2008 19:42:33 +0100 Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 102 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 266ca5514b33..0da1ae4ee407 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1915,7 +1915,8 @@ static unsigned char btrfs_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK }; -static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +static int btrfs_real_readdir(struct file *filp, void *dirent, + filldir_t filldir) { struct inode *inode = filp->f_dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -2064,6 +2065,101 @@ err: return ret; } +/* Kernels earlier than 2.6.28 still have the NFS deadlock where nfsd + will call the file system's ->lookup() method from within its + filldir callback, which in turn was called from the file system's + ->readdir() method. And will deadlock for many file systems. */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + +struct nfshack_dirent { + u64 ino; + loff_t offset; + int namlen; + unsigned int d_type; + char name[]; +}; + +struct nfshack_readdir { + char *dirent; + size_t used; +}; + + + +static int btrfs_nfshack_filldir(void *__buf, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct nfshack_readdir *buf = __buf; + struct nfshack_dirent *de = (void *)(buf->dirent + buf->used); + unsigned int reclen; + + reclen = ALIGN(sizeof(struct nfshack_dirent) + namlen, sizeof(u64)); + if (buf->used + reclen > PAGE_SIZE) + return -EINVAL; + + de->namlen = namlen; + de->offset = offset; + de->ino = ino; + de->d_type = d_type; + memcpy(de->name, name, namlen); + buf->used += reclen; + + return 0; +} + +static int btrfs_nfshack_readdir(struct file *file, void *dirent, + filldir_t filldir) +{ + struct nfshack_readdir buf; + struct nfshack_dirent *de; + int err; + int size; + loff_t offset; + + buf.dirent = (void *)__get_free_page(GFP_KERNEL); + if (!buf.dirent) + return -ENOMEM; + + offset = file->f_pos; + + while (1) { + unsigned int reclen; + + buf.used = 0; + + err = btrfs_real_readdir(file, &buf, btrfs_nfshack_filldir); + if (err) + break; + + size = buf.used; + + if (!size) + break; + + de = (struct nfshack_dirent *)buf.dirent; + while (size > 0) { + offset = de->offset; + + if (filldir(dirent, de->name, de->namlen, de->offset, + de->ino, de->d_type)) + goto done; + offset = file->f_pos; + + reclen = ALIGN(sizeof(*de) + de->namlen, + sizeof(u64)); + size -= reclen; + de = (struct nfshack_dirent *)((char *)de + reclen); + } + } + + done: + free_page((unsigned long)buf.dirent); + file->f_pos = offset; + + return err; +} +#endif + int btrfs_write_inode(struct inode *inode, int wait) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -3623,7 +3719,11 @@ static struct inode_operations btrfs_dir_ro_inode_operations = { static struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .readdir = btrfs_readdir, +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + .readdir = btrfs_nfshack_readdir, +#else /* NFSd readdir/lookup deadlock is fixed */ + .readdir = btrfs_real_readdir, +#endif .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_ioctl, From 21af804c07141c035085f99798efaabbc7836a97 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 12 Aug 2008 14:13:26 +0100 Subject: [PATCH 0672/2985] Btrfs: Discard sector data in __free_extent() Date: Tue, 12 Aug 2008 14:13:26 +0100 Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fff219ed61d9..e63b3b4bed7c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "hash.h" #include "crc32c.h" #include "ctree.h" @@ -1716,6 +1717,10 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (refs == 0) { u64 super_used; u64 root_used; +#ifdef BIO_RW_DISCARD + u64 map_length = num_bytes; + struct btrfs_multi_bio *multi = NULL; +#endif if (pin) { ret = pin_down_bytes(root, bytenr, num_bytes, 0); @@ -1743,6 +1748,26 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ret = update_block_group(trans, root, bytenr, num_bytes, 0, mark_free); BUG_ON(ret); + +#ifdef BIO_RW_DISCARD + /* Tell the block device(s) that the sectors can be discarded */ + ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, + bytenr, &map_length, &multi, 0); + if (!ret) { + struct btrfs_bio_stripe *stripe = multi->stripes; + int i; + + if (map_length > num_bytes) + map_length = num_bytes; + + for (i = 0; i < multi->num_stripes; i++, stripe++) { + blkdev_issue_discard(stripe->dev->bdev, + stripe->physical >> 9, + map_length >> 9); + } + kfree(multi); + } +#endif } btrfs_free_path(path); finish_current_insert(trans, extent_root); From 5ecc7e5d1d58731b3a6a69303d69aca7fb0fe1f5 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 17 Aug 2008 15:14:48 +0100 Subject: [PATCH 0673/2985] Btrfs: Remove special cases for "." and ".." Date: Sun, 17 Aug 2008 15:14:48 +0100 We never get asked by the VFS to lookup either of them, and we can handle the readdir() case a lot more simply, too. Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 52 ++---------------------------------------------- 1 file changed, 2 insertions(+), 50 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0da1ae4ee407..4520a0e86e70 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1702,42 +1702,9 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, struct btrfs_root *root = BTRFS_I(dir)->root; int ret = 0; - if (namelen == 1 && strcmp(name, ".") == 0) { - location->objectid = dir->i_ino; - location->type = BTRFS_INODE_ITEM_KEY; - location->offset = 0; - return 0; - } path = btrfs_alloc_path(); BUG_ON(!path); - if (namelen == 2 && strcmp(name, "..") == 0) { - struct btrfs_key key; - struct extent_buffer *leaf; - int slot; - - key.objectid = dir->i_ino; - key.offset = (u64)-1; - btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); - if (ret < 0 || path->slots[0] == 0) - goto out_err; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - BUG_ON(ret == 0); - ret = 0; - leaf = path->nodes[0]; - slot = path->slots[0] - 1; - - btrfs_item_key_to_cpu(leaf, &key, slot); - if (key.objectid != dir->i_ino || - key.type != BTRFS_INODE_REF_KEY) { - goto out_err; - } - location->objectid = key.offset; - location->type = BTRFS_INODE_ITEM_KEY; - location->offset = 0; - goto out; - } - di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, namelen, 0); if (IS_ERR(di)) @@ -1960,29 +1927,14 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, /* special case for .., just use the back ref */ if (filp->f_pos == 1) { - btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); - key.offset = (u64)-1; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0 || path->slots[0] == 0) { - btrfs_release_path(root, path); - goto read_dir_items; - } - BUG_ON(ret == 0); - leaf = path->nodes[0]; - slot = path->slots[0] - 1; - btrfs_item_key_to_cpu(leaf, &found_key, slot); - btrfs_release_path(root, path); - if (found_key.objectid != key.objectid || - found_key.type != BTRFS_INODE_REF_KEY) - goto read_dir_items; + u64 pino = parent_ino(filp->f_path.dentry); over = filldir(dirent, "..", 2, - 2, found_key.offset, DT_DIR); + 2, pino, DT_DIR); if (over) goto nopos; filp->f_pos = 2; } -read_dir_items: btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; From 49593bfa575b7e3fda073b6d1033ee273bdaf97c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 17 Aug 2008 17:08:36 +0100 Subject: [PATCH 0674/2985] Minor cleanup of btrfs_real_readdir() Date: Sun, 17 Aug 2008 17:08:36 +0100 Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4520a0e86e70..c7b7095634da 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1920,34 +1920,34 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, return 0; filp->f_pos = 1; } - - key.objectid = inode->i_ino; - path = btrfs_alloc_path(); - path->reada = 2; - /* special case for .., just use the back ref */ if (filp->f_pos == 1) { u64 pino = parent_ino(filp->f_path.dentry); over = filldir(dirent, "..", 2, 2, pino, DT_DIR); if (over) - goto nopos; + return 0; filp->f_pos = 2; } + path = btrfs_alloc_path(); + path->reada = 2; + btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; + key.objectid = inode->i_ino; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; advance = 0; - while(1) { + + while (1) { leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; if (advance || slot >= nritems) { - if (slot >= nritems -1) { + if (slot >= nritems - 1) { ret = btrfs_next_leaf(root, path); if (ret) break; @@ -1971,19 +1971,23 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, continue; filp->f_pos = found_key.offset; - advance = 1; + di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); di_cur = 0; di_total = btrfs_item_size(leaf, item); - while(di_cur < di_total) { + + while (di_cur < di_total) { struct btrfs_key location; name_len = btrfs_dir_name_len(leaf, di); - if (name_len < 32) { + if (name_len <= sizeof(tmp_name)) { name_ptr = tmp_name; } else { name_ptr = kmalloc(name_len, GFP_NOFS); - BUG_ON(!name_ptr); + if (!name_ptr) { + ret = -ENOMEM; + goto err; + } } read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), name_len); @@ -1991,8 +1995,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; btrfs_dir_item_key_to_cpu(leaf, di, &location); over = filldir(dirent, name_ptr, name_len, - found_key.offset, - location.objectid, + found_key.offset, location.objectid, d_type); if (name_ptr != tmp_name) @@ -2000,12 +2003,15 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, if (over) goto nopos; + di_len = btrfs_dir_name_len(leaf, di) + - btrfs_dir_data_len(leaf, di) +sizeof(*di); + btrfs_dir_data_len(leaf, di) + sizeof(*di); di_cur += di_len; di = (struct btrfs_dir_item *)((char *)di + di_len); } } + + /* Reached end of directory/root. Bump pos past the last item. */ if (key_type == BTRFS_DIR_INDEX_KEY) filp->f_pos = INT_LIMIT(typeof(filp->f_pos)); else From f2322b1c652add8bcd64b10843d76b0211ab1fc6 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 17 Aug 2008 17:12:56 +0100 Subject: [PATCH 0675/2985] Btrfs: Optimise NFS readdir hack slightly; don't call readdir() again when done Date: Sun, 17 Aug 2008 17:12:56 +0100 Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c7b7095634da..10f26f445328 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2040,6 +2040,7 @@ struct nfshack_dirent { struct nfshack_readdir { char *dirent; size_t used; + int full; }; @@ -2052,8 +2053,10 @@ static int btrfs_nfshack_filldir(void *__buf, const char *name, int namlen, unsigned int reclen; reclen = ALIGN(sizeof(struct nfshack_dirent) + namlen, sizeof(u64)); - if (buf->used + reclen > PAGE_SIZE) + if (buf->used + reclen > PAGE_SIZE) { + buf->full = 1; return -EINVAL; + } de->namlen = namlen; de->offset = offset; @@ -2080,11 +2083,11 @@ static int btrfs_nfshack_readdir(struct file *file, void *dirent, offset = file->f_pos; - while (1) { + do { unsigned int reclen; buf.used = 0; - + buf.full = 0; err = btrfs_real_readdir(file, &buf, btrfs_nfshack_filldir); if (err) break; @@ -2108,7 +2111,7 @@ static int btrfs_nfshack_readdir(struct file *file, void *dirent, size -= reclen; de = (struct nfshack_dirent *)((char *)de + reclen); } - } + } while (buf.full); done: free_page((unsigned long)buf.dirent); From 9d03632e26e1a0a9e4a632cf426a7c0566768a7d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 18 Aug 2008 12:01:52 +0100 Subject: [PATCH 0676/2985] Fill f_fsid field in btrfs_statfs() Date: Mon, 18 Aug 2008 12:01:52 +0100 Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index e830e0ed409a..6446ab73f35a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -489,6 +489,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) struct btrfs_root *root = btrfs_sb(dentry->d_sb); struct btrfs_super_block *disk_super = &root->fs_info->super_copy; int bits = dentry->d_sb->s_blocksize_bits; + __be32 *fsid = (__be32 *)root->fs_info->fsid; buf->f_namelen = BTRFS_NAME_LEN; buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; @@ -497,6 +498,11 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bavail = buf->f_bfree; buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_type = BTRFS_SUPER_MAGIC; + /* We treat it as constant endianness (it doesn't matter _which_) + because we want the fsid to come out the same whether mounted + on a big-endian or little-endian host */ + buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]); + buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]); return 0; } From 32d48fa1af1fe066a6a4798e6f5a50ac6a3ce4a3 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 18 Aug 2008 13:10:20 +0100 Subject: [PATCH 0677/2985] Mask root object ID into f_fsid in btrfs_statfs() Date: Mon, 18 Aug 2008 13:10:20 +0100 This means that subvolumes get a different fsid, and NFS exporting them works properly. Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/super.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6446ab73f35a..55f4d00fda3a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -503,6 +503,10 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) on a big-endian or little-endian host */ buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]); buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]); + /* Mask in the root object ID too, to disambiguate subvols */ + buf->f_fsid.val[0] ^= BTRFS_I(dentry->d_inode)->root->objectid >> 32; + buf->f_fsid.val[1] ^= BTRFS_I(dentry->d_inode)->root->objectid; + return 0; } From 87acb4ef9b2991e1c453b78d71bce2ef994ef1ff Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 18 Aug 2008 22:50:22 +0100 Subject: [PATCH 0678/2985] Simplify btrfs_get_parent(), fix use-after-free bug Date: Mon, 18 Aug 2008 22:50:22 +0100 Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/export.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 797b4cbc3786..a913b9befe68 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -147,7 +147,6 @@ static struct dentry *btrfs_get_parent(struct dentry *child) struct btrfs_key key; struct btrfs_path *path; struct extent_buffer *leaf; - u32 nritems; int slot; u64 objectid; int ret; @@ -156,27 +155,24 @@ static struct dentry *btrfs_get_parent(struct dentry *child) key.objectid = dir->i_ino; btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); - key.offset = 0; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - BUG_ON(ret == 0); - ret = 0; + key.offset = (u64)-1; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); leaf = path->nodes[0]; slot = path->slots[0]; - nritems = btrfs_header_nritems(leaf); - if (slot >= nritems) { - ret = btrfs_next_leaf(root, path); - if (ret) { - btrfs_free_path(path); - goto out; - } - leaf = path->nodes[0]; - slot = path->slots[0]; + if (ret < 0 || slot == 0) { + btrfs_free_path(path); + goto out; } - - btrfs_free_path(path); + /* btrfs_search_slot() returns the slot where we'd want to insert + an INODE_REF_KEY for parent inode #0xFFFFFFFFFFFFFFFF. The _real_ + one, telling us what the parent inode _actually_ is, will be in + the slot _before_ the one that btrfs_search_slot() returns. */ + slot--; btrfs_item_key_to_cpu(leaf, &key, slot); + btrfs_free_path(path); + if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY) goto out; From 76fcef19c40328499a2f6d59d76b72fd03d2cc82 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 19 Aug 2008 16:49:35 +0100 Subject: [PATCH 0679/2985] Btrfs: Reinstate '-osubvol=.' option to mount entire tree Date: Tue, 19 Aug 2008 16:49:35 +0100 This disappeared when I removed the special case for '.' in btrfs_lookup() Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/super.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 55f4d00fda3a..f7b3eac7ac6d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -451,21 +451,25 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, s->s_flags |= MS_ACTIVE; } - mutex_lock(&s->s_root->d_inode->i_mutex); - root = lookup_one_len(subvol_name, s->s_root, strlen(subvol_name)); - mutex_unlock(&s->s_root->d_inode->i_mutex); - if (IS_ERR(root)) { - up_write(&s->s_umount); - deactivate_super(s); - error = PTR_ERR(root); - goto error; - } - if (!root->d_inode) { - dput(root); - up_write(&s->s_umount); - deactivate_super(s); - error = -ENXIO; - goto error; + if (!strcmp(subvol_name, ".")) + root = dget(s->s_root); + else { + mutex_lock(&s->s_root->d_inode->i_mutex); + root = lookup_one_len(subvol_name, s->s_root, strlen(subvol_name)); + mutex_unlock(&s->s_root->d_inode->i_mutex); + if (IS_ERR(root)) { + up_write(&s->s_umount); + deactivate_super(s); + error = PTR_ERR(root); + goto error; + } + if (!root->d_inode) { + dput(root); + up_write(&s->s_umount); + deactivate_super(s); + error = -ENXIO; + goto error; + } } mnt->mnt_sb = s; From 615f996fb8185a0bc02812ebd72cb77ded5645f1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 19 Aug 2008 19:21:57 +0100 Subject: [PATCH 0680/2985] Switch btrfs_name_hash() to crc32c Date: Tue, 19 Aug 2008 19:21:57 +0100 Using a 64-bit hash as the readdir cookie is just asking for trouble. And gets it, when we try to export the file system by NFS. Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/hash.c | 112 ---------------------------------------------- fs/btrfs/hash.h | 7 ++- 3 files changed, 7 insertions(+), 114 deletions(-) delete mode 100644 fs/btrfs/hash.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 75f8818cbfee..8213bba1de9f 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -3,7 +3,7 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ - hash.o file-item.o inode-item.o inode-map.o disk-io.o \ + file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c deleted file mode 100644 index 21037cfca9bd..000000000000 --- a/fs/btrfs/hash.c +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -/* - * Original copy from: - * linux/fs/ext3/hash.c - * - * Copyright (C) 2002 by Theodore Ts'o - * - * This file is released under the GPL v2. - * - * This file may be redistributed under the terms of the GNU Public - * License. - */ - -#include -#include "hash.h" -#define DELTA 0x9E3779B9 - -static void TEA_transform(__u32 buf[2], __u32 const in[]) -{ - __u32 sum = 0; - __u32 b0 = buf[0], b1 = buf[1]; - __u32 a = in[0], b = in[1], c = in[2], d = in[3]; - int n = 16; - - do { - sum += DELTA; - b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); - b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); - } while(--n); - - buf[0] += b0; - buf[1] += b1; -} - -static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) -{ - __u32 pad, val; - int i; - - pad = (__u32)len | ((__u32)len << 8); - pad |= pad << 16; - - val = pad; - if (len > num*4) - len = num * 4; - for (i=0; i < len; i++) { - if ((i % 4) == 0) - val = pad; - val = msg[i] + (val << 8); - if ((i % 4) == 3) { - *buf++ = val; - val = pad; - num--; - } - } - if (--num >= 0) - *buf++ = val; - while (--num >= 0) - *buf++ = pad; -} - -u64 btrfs_name_hash(const char *name, int len) -{ - __u32 hash; - __u32 minor_hash = 0; - const char *p; - __u32 in[8], buf[4]; - u64 hash_result; - - if (len == 1 && *name == '.') { - return 1; - } else if (len == 2 && name[0] == '.' && name[1] == '.') { - return 2; - } - - /* Initialize the default seed for the hash checksum functions */ - buf[0] = 0x67452301; - buf[1] = 0xefcdab89; - buf[2] = 0x98badcfe; - buf[3] = 0x10325476; - - p = name; - while (len > 0) { - str2hashbuf(p, len, in, 4); - TEA_transform(buf, in); - len -= 16; - p += 16; - } - hash = buf[0]; - minor_hash = buf[1]; - hash_result = buf[0]; - hash_result <<= 32; - hash_result |= buf[1]; - return hash_result; -} diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h index 868ee17ca77a..2a020b276768 100644 --- a/fs/btrfs/hash.h +++ b/fs/btrfs/hash.h @@ -18,5 +18,10 @@ #ifndef __HASH__ #define __HASH__ -u64 btrfs_name_hash(const char *name, int len); + +#include "crc32c.h" +static inline u64 btrfs_name_hash(const char *name, int len) +{ + return btrfs_crc32c((u32)~1, name, len); +} #endif From 2d4d9fbd6efa858dfa009518fca1ab85a73fd848 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 19 Aug 2008 22:20:17 +0100 Subject: [PATCH 0681/2985] Btrfs: Fix NFS exporting of subvol roots. Date: Tue, 19 Aug 2008 22:20:17 +0100 btrfs_lookup_fs_root() only finds subvol roots which have already been seen and put into the cache. For btrfs_get_dentry() we actually have to go to the medium -- so use btrfs_read_fs_root_no_name() instead. In btrfs_get_parent(), notice when we've hit the root of the subvolume and return the real root instead. Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/export.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index a913b9befe68..36cbc6872fd0 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -71,11 +71,18 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, struct dentry *result; struct btrfs_key key; + key.objectid = root_objectid; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + key.offset = (u64)-1; + + root = btrfs_read_fs_root_no_name(btrfs_sb(sb)->fs_info, &key); + if (IS_ERR(root)) + return ERR_CAST(root); + key.objectid = objectid; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; - root = btrfs_lookup_fs_root(btrfs_sb(sb)->fs_info, root_objectid); inode = btrfs_iget(sb, &key, root, NULL); if (IS_ERR(inode)) return (void *)inode; @@ -178,6 +185,10 @@ static struct dentry *btrfs_get_parent(struct dentry *child) objectid = key.offset; + /* If we are already at the root of a subvol, return the real root */ + if (objectid == dir->i_ino) + return dget(dir->i_sb->s_root); + /* Build a new key for the inode item */ key.objectid = objectid; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); From d54a83901055bb0bffca64fa09fce4d897274870 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 19 Aug 2008 22:33:04 +0100 Subject: [PATCH 0682/2985] Clean up btrfs_get_parent() a little more, fix a free-after-free bug Date: Tue, 19 Aug 2008 22:33:04 +0100 Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/export.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 36cbc6872fd0..292b0b24c302 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -165,23 +165,32 @@ static struct dentry *btrfs_get_parent(struct dentry *child) key.offset = (u64)-1; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + /* Error */ + btrfs_free_path(path); + return ERR_PTR(ret); + } leaf = path->nodes[0]; slot = path->slots[0]; - if (ret < 0 || slot == 0) { - btrfs_free_path(path); - goto out; + if (ret) { + /* btrfs_search_slot() returns the slot where we'd want to + insert a backref for parent inode #0xFFFFFFFFFFFFFFFF. + The _real_ backref, telling us what the parent inode + _actually_ is, will be in the slot _before_ the one + that btrfs_search_slot() returns. */ + if (!slot) { + /* Unless there is _no_ key in the tree before... */ + btrfs_free_path(path); + return ERR_PTR(-EIO); + } + slot--; } - /* btrfs_search_slot() returns the slot where we'd want to insert - an INODE_REF_KEY for parent inode #0xFFFFFFFFFFFFFFFF. The _real_ - one, telling us what the parent inode _actually_ is, will be in - the slot _before_ the one that btrfs_search_slot() returns. */ - slot--; btrfs_item_key_to_cpu(leaf, &key, slot); btrfs_free_path(path); if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY) - goto out; + return ERR_PTR(-EINVAL); objectid = key.offset; @@ -201,10 +210,6 @@ static struct dentry *btrfs_get_parent(struct dentry *child) parent = ERR_PTR(-ENOMEM); return parent; - -out: - btrfs_free_path(path); - return ERR_PTR(-EINVAL); } const struct export_operations btrfs_export_ops = { From f3f9931e3d0836509cfccdf473b34e34543a3272 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 21 Aug 2008 15:49:09 -0400 Subject: [PATCH 0683/2985] Btrfs: Rev the disk format Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index cecf29e03630..b305ae7e10b0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -40,7 +40,7 @@ extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; struct btrfs_ordered_sum; -#define BTRFS_MAGIC "_B6RfS_M" +#define BTRFS_MAGIC "_B7RfS_M" #define BTRFS_ACL_NOT_CACHED ((void *)-1) From 9473f16c75606fe6b2e5000525fe9766114505f3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 28 Aug 2008 06:15:24 -0400 Subject: [PATCH 0684/2985] Btrfs: Throttle for async bio submits higher up the chain The current code waits for the count of async bio submits to get below a given threshold if it is too high right after adding the latest bio to the work queue. This isn't optimal because the caller may have sequential adjacent bios pending they are waiting to send down the pipe. This changeset requires the caller to wait on the async bio count, and changes the async checksumming submits to wait for async bios any time they self throttle. The end result is much higher sequential throughput. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 8 +++++++- fs/btrfs/volumes.c | 6 ------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bbba14b629d2..6a218f792e59 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -487,9 +487,15 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, atomic_inc(&fs_info->nr_async_submits); btrfs_queue_worker(&fs_info->workers, &async->work); - wait_event_timeout(fs_info->async_submit_wait, + if (atomic_read(&fs_info->nr_async_submits) > limit) { + wait_event_timeout(fs_info->async_submit_wait, (atomic_read(&fs_info->nr_async_submits) < limit), HZ/10); + + wait_event_timeout(fs_info->async_submit_wait, + (atomic_read(&fs_info->nr_async_bios) < limit), + HZ/10); + } return 0; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2652660e6079..5b1b60839d21 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2145,7 +2145,6 @@ int schedule_bio(struct btrfs_root *root, struct btrfs_device *device, int rw, struct bio *bio) { int should_queue = 1; - unsigned long limit; /* don't bother with additional async steps for reads, right now */ if (!(rw & (1 << BIO_RW))) { @@ -2182,11 +2181,6 @@ int schedule_bio(struct btrfs_root *root, struct btrfs_device *device, if (should_queue) btrfs_queue_worker(&root->fs_info->submit_workers, &device->work); - - limit = btrfs_async_submit_limit(root->fs_info); - wait_event_timeout(root->fs_info->async_submit_wait, - (atomic_read(&root->fs_info->nr_async_bios) < limit), - HZ/10); return 0; } From 639cb58675ce9b507eed9c3d6b3335488079b21a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 28 Aug 2008 06:15:25 -0400 Subject: [PATCH 0685/2985] Btrfs: Fix variable init during csum creation Signed-off-by: Chris Mason --- fs/btrfs/file-item.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index f861bd506928..6dbe88b9d7d4 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -325,6 +325,7 @@ again: item = btrfs_lookup_csum(trans, root, path, objectid, offset, 1); if (!IS_ERR(item)) { leaf = path->nodes[0]; + ret = 0; goto found; } ret = PTR_ERR(item); From eab922ec8907b8c506e799785e7e2d16eabe50e4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 28 Aug 2008 06:21:15 -0400 Subject: [PATCH 0686/2985] Btrfs: compile when posix acl's are disabled This patch makes btrfs so it will compile properly when acls are disabled. I tested this and it worked with CONFIG_FS_POSIX_ACL off and on. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +-- fs/btrfs/acl.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 8213bba1de9f..3a01065d4ef1 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -7,9 +7,8 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ - ref-cache.o export.o + ref-cache.o acl.o export.o -btrfs-$(CONFIG_FS_POSIX_ACL) += acl.o else # Normal Makefile diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index b192659b4f5f..b95147ef1c77 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -27,6 +27,8 @@ #include "btrfs_inode.h" #include "xattr.h" +#ifdef CONFIG_FS_POSIX_ACL + static void btrfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl, struct posix_acl *acl) @@ -329,3 +331,22 @@ struct xattr_handler btrfs_xattr_acl_access_handler = { .get = btrfs_xattr_acl_access_get, .set = btrfs_xattr_acl_access_set, }; + +#else /* CONFIG_FS_POSIX_ACL */ + +int btrfs_acl_chmod(struct inode *inode) +{ + return 0; +} + +int btrfs_init_acl(struct inode *inode, struct inode *dir) +{ + return 0; +} + +int btrfs_check_acl(struct inode *inode, int mask) +{ + return 0; +} + +#endif /* CONFIG_FS_POSIX_ACL */ From eaa47d8612783807ef9703ebc9bf0d0f0455bf62 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 Aug 2008 06:21:16 -0400 Subject: [PATCH 0687/2985] btrfs: optmize listxattr The ->list handler is really not useful at all, because we always call btrfs_xattr_generic_list anyway. After this is done find_btrfs_xattr_handler becomes unused, and it becomes obvious that the temporary name buffer allocation isn't needed but we can directly copy into the supplied buffer. Tested with various getfattr -d calls on varying xattr lists. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/acl.c | 2 -- fs/btrfs/xattr.c | 82 +++++++----------------------------------------- fs/btrfs/xattr.h | 8 ----- 3 files changed, 11 insertions(+), 81 deletions(-) diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index b95147ef1c77..2f865311460c 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -320,14 +320,12 @@ int btrfs_acl_chmod(struct inode *inode) struct xattr_handler btrfs_xattr_acl_default_handler = { .prefix = POSIX_ACL_XATTR_DEFAULT, - .list = btrfs_xattr_generic_list, .get = btrfs_xattr_acl_default_get, .set = btrfs_xattr_acl_default_set, }; struct xattr_handler btrfs_xattr_acl_access_handler = { .prefix = POSIX_ACL_XATTR_ACCESS, - .list = btrfs_xattr_generic_list, .get = btrfs_xattr_acl_access_get, .set = btrfs_xattr_acl_access_set, }; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 121c9550314f..fdfece41dd16 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -50,35 +50,6 @@ struct xattr_handler *btrfs_xattr_handlers[] = { NULL, }; -/* - * @param name - the xattr name - * @return - the xattr_handler for the xattr, NULL if its not found - * - * use this with listxattr where we don't already know the type of xattr we - * have - */ -static struct xattr_handler *find_btrfs_xattr_handler(struct extent_buffer *l, - unsigned long name_ptr, - u16 name_len) -{ - struct xattr_handler *handler = NULL; - int i = 0; - - for (handler = btrfs_xattr_handlers[i]; handler != NULL; i++, - handler = btrfs_xattr_handlers[i]) { - u16 prefix_len = strlen(handler->prefix); - - if (name_len < prefix_len) - continue; - - if (memcmp_extent_buffer(l, handler->prefix, name_ptr, - prefix_len) == 0) - break; - } - - return handler; -} - /* * @param name_index - the index for the xattr handler * @return the xattr_handler if we found it, NULL otherwise @@ -118,19 +89,6 @@ static inline char *get_name(const char *name, int name_index) return ret; } -size_t btrfs_xattr_generic_list(struct inode *inode, char *list, - size_t list_size, const char *name, - size_t name_len) -{ - if (list && (name_len+1) <= list_size) { - memcpy(list, name, name_len); - list[name_len] = '\0'; - } else - return -ERANGE; - - return name_len+1; -} - ssize_t btrfs_xattr_get(struct inode *inode, int name_index, const char *attr_name, void *buffer, size_t size) { @@ -278,11 +236,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) struct btrfs_item *item; struct extent_buffer *leaf; struct btrfs_dir_item *di; - struct xattr_handler *handler; int ret = 0, slot, advance; - size_t total_size = 0, size_left = size, written; + size_t total_size = 0, size_left = size; unsigned long name_ptr; - char *name; + size_t name_len; u32 nritems; /* @@ -344,37 +301,24 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); - total_size += btrfs_dir_name_len(leaf, di)+1; + name_len = btrfs_dir_name_len(leaf, di); + total_size += name_len + 1; /* we are just looking for how big our buffer needs to be */ if (!size) continue; - /* find our handler for this xattr */ - name_ptr = (unsigned long)(di + 1); - handler = find_btrfs_xattr_handler(leaf, name_ptr, - btrfs_dir_name_len(leaf, di)); - if (!handler) { - printk(KERN_ERR "btrfs: unsupported xattr found\n"); - continue; - } - - name = kmalloc(btrfs_dir_name_len(leaf, di), GFP_KERNEL); - read_extent_buffer(leaf, name, name_ptr, - btrfs_dir_name_len(leaf, di)); - - /* call the list function associated with this xattr */ - written = handler->list(inode, buffer, size_left, name, - btrfs_dir_name_len(leaf, di)); - kfree(name); - - if (written < 0) { + if (!buffer || (name_len + 1) > size_left) { ret = -ERANGE; break; } - size_left -= written; - buffer += written; + name_ptr = (unsigned long)(di + 1); + read_extent_buffer(leaf, buffer, name_ptr, name_len); + buffer[name_len] = '\0'; + + size_left -= name_len + 1; + buffer += name_len + 1; } ret = total_size; @@ -412,28 +356,24 @@ BTRFS_XATTR_SETGET_FUNCS(trusted, BTRFS_XATTR_INDEX_TRUSTED); struct xattr_handler btrfs_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, - .list = btrfs_xattr_generic_list, .get = btrfs_xattr_security_get, .set = btrfs_xattr_security_set, }; struct xattr_handler btrfs_xattr_system_handler = { .prefix = XATTR_SYSTEM_PREFIX, - .list = btrfs_xattr_generic_list, .get = btrfs_xattr_system_get, .set = btrfs_xattr_system_set, }; struct xattr_handler btrfs_xattr_user_handler = { .prefix = XATTR_USER_PREFIX, - .list = btrfs_xattr_generic_list, .get = btrfs_xattr_user_get, .set = btrfs_xattr_user_set, }; struct xattr_handler btrfs_xattr_trusted_handler = { .prefix = XATTR_TRUSTED_PREFIX, - .list = btrfs_xattr_generic_list, .get = btrfs_xattr_trusted_get, .set = btrfs_xattr_trusted_set, }; diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h index b2e47e3f2442..825e55bd4960 100644 --- a/fs/btrfs/xattr.h +++ b/fs/btrfs/xattr.h @@ -47,12 +47,4 @@ ssize_t btrfs_xattr_get(struct inode *inode, int name_index, const char *name, int btrfs_xattr_set(struct inode *inode, int name_index, const char *name, const void *value, size_t size, int flags); -/* - * the only reason this is public is for acl.c. There may be a point where - * acl.c doesn't need it, and if thats the case we need to remove it and make - * it static in xattr.c - */ -size_t btrfs_xattr_generic_list(struct inode *inode, char *list, - size_t list_size, const char *name, - size_t name_len); #endif /* __XATTR__ */ From 95819c05732c511338b43c115ffbcee978c02888 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 Aug 2008 06:21:17 -0400 Subject: [PATCH 0688/2985] Btrfs: optimize btrget/set/removexattr btrfs actually stores the whole xattr name, including the prefix ondisk, so using the generic resolver that strips off the prefix is not very helpful. Instead do the real ondisk xattrs manually and only use the generic resolver for synthetic xattrs like ACLs. (Sorry Josef for guiding you towards the wrong direction here intially) Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/acl.c | 20 ++--- fs/btrfs/inode.c | 19 ++--- fs/btrfs/xattr.c | 204 +++++++++++++++++------------------------------ fs/btrfs/xattr.h | 31 +++---- 4 files changed, 104 insertions(+), 170 deletions(-) diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 2f865311460c..867eaf1f8efb 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -42,17 +42,18 @@ static void btrfs_update_cached_acl(struct inode *inode, static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) { - int size, name_index; + int size; + const char *name; char *value = NULL; struct posix_acl *acl = NULL, **p_acl; switch (type) { case ACL_TYPE_ACCESS: - name_index = BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS; + name = POSIX_ACL_XATTR_ACCESS; p_acl = &BTRFS_I(inode)->i_acl; break; case ACL_TYPE_DEFAULT: - name_index = BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT; + name = POSIX_ACL_XATTR_DEFAULT; p_acl = &BTRFS_I(inode)->i_default_acl; break; default: @@ -68,12 +69,12 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) return acl; - size = btrfs_xattr_get(inode, name_index, "", NULL, 0); + size = __btrfs_getxattr(inode, name, "", 0); if (size > 0) { value = kzalloc(size, GFP_NOFS); if (!value) return ERR_PTR(-ENOMEM); - size = btrfs_xattr_get(inode, name_index, "", value, size); + size = __btrfs_getxattr(inode, name, value, size); if (size > 0) { acl = posix_acl_from_xattr(value, size); btrfs_update_cached_acl(inode, p_acl, acl); @@ -110,7 +111,8 @@ static int btrfs_xattr_get_acl(struct inode *inode, int type, */ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { - int ret, name_index = 0, size = 0; + int ret, size = 0; + const char *name; struct posix_acl **p_acl; char *value = NULL; mode_t mode; @@ -130,13 +132,13 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) return ret; ret = 0; inode->i_mode = mode; - name_index = BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS; + name = POSIX_ACL_XATTR_ACCESS; p_acl = &BTRFS_I(inode)->i_acl; break; case ACL_TYPE_DEFAULT: if (!S_ISDIR(inode->i_mode)) return acl ? -EINVAL : 0; - name_index = BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT; + name = POSIX_ACL_XATTR_DEFAULT; p_acl = &BTRFS_I(inode)->i_default_acl; break; default: @@ -156,7 +158,7 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) goto out; } - ret = btrfs_xattr_set(inode, name_index, "", value, size, 0); + ret = __btrfs_setxattr(inode, name, value, size, 0); out: if (value) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 10f26f445328..43d3f2649ca3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -45,6 +45,7 @@ #include "print-tree.h" #include "volumes.h" #include "ordered-data.h" +#include "xattr.h" struct btrfs_iget_args { u64 ino; @@ -3667,10 +3668,10 @@ static struct inode_operations btrfs_dir_inode_operations = { .symlink = btrfs_symlink, .setattr = btrfs_setattr, .mknod = btrfs_mknod, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, + .setxattr = btrfs_setxattr, + .getxattr = btrfs_getxattr, .listxattr = btrfs_listxattr, - .removexattr = generic_removexattr, + .removexattr = btrfs_removexattr, .permission = btrfs_permission, }; static struct inode_operations btrfs_dir_ro_inode_operations = { @@ -3728,20 +3729,20 @@ static struct inode_operations btrfs_file_inode_operations = { .truncate = btrfs_truncate, .getattr = btrfs_getattr, .setattr = btrfs_setattr, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, + .setxattr = btrfs_setxattr, + .getxattr = btrfs_getxattr, .listxattr = btrfs_listxattr, - .removexattr = generic_removexattr, + .removexattr = btrfs_removexattr, .permission = btrfs_permission, }; static struct inode_operations btrfs_special_inode_operations = { .getattr = btrfs_getattr, .setattr = btrfs_setattr, .permission = btrfs_permission, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, + .setxattr = btrfs_setxattr, + .getxattr = btrfs_getxattr, .listxattr = btrfs_listxattr, - .removexattr = generic_removexattr, + .removexattr = btrfs_removexattr, }; static struct inode_operations btrfs_symlink_inode_operations = { .readlink = generic_readlink, diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index fdfece41dd16..adb4b32a9d51 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -27,91 +27,20 @@ #include "xattr.h" #include "disk-io.h" -static struct xattr_handler *btrfs_xattr_handler_map[] = { - [BTRFS_XATTR_INDEX_USER] = &btrfs_xattr_user_handler, -#ifdef CONFIG_FS_POSIX_ACL - [BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &btrfs_xattr_acl_access_handler, - [BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &btrfs_xattr_acl_default_handler, -#endif - [BTRFS_XATTR_INDEX_TRUSTED] = &btrfs_xattr_trusted_handler, - [BTRFS_XATTR_INDEX_SECURITY] = &btrfs_xattr_security_handler, - [BTRFS_XATTR_INDEX_SYSTEM] = &btrfs_xattr_system_handler, -}; -struct xattr_handler *btrfs_xattr_handlers[] = { - &btrfs_xattr_user_handler, -#ifdef CONFIG_FS_POSIX_ACL - &btrfs_xattr_acl_access_handler, - &btrfs_xattr_acl_default_handler, -#endif - &btrfs_xattr_trusted_handler, - &btrfs_xattr_security_handler, - &btrfs_xattr_system_handler, - NULL, -}; - -/* - * @param name_index - the index for the xattr handler - * @return the xattr_handler if we found it, NULL otherwise - * - * use this if we know the type of the xattr already - */ -static struct xattr_handler *btrfs_xattr_handler(int name_index) -{ - struct xattr_handler *handler = NULL; - - if (name_index >= 0 && - name_index < ARRAY_SIZE(btrfs_xattr_handler_map)) - handler = btrfs_xattr_handler_map[name_index]; - - return handler; -} - -static inline char *get_name(const char *name, int name_index) -{ - char *ret = NULL; - struct xattr_handler *handler = btrfs_xattr_handler(name_index); - int prefix_len; - - if (!handler) - return ret; - - prefix_len = strlen(handler->prefix); - - ret = kmalloc(strlen(name) + prefix_len + 1, GFP_KERNEL); - if (!ret) - return ret; - - memcpy(ret, handler->prefix, prefix_len); - memcpy(ret+prefix_len, name, strlen(name)); - ret[prefix_len + strlen(name)] = '\0'; - - return ret; -} - -ssize_t btrfs_xattr_get(struct inode *inode, int name_index, - const char *attr_name, void *buffer, size_t size) +ssize_t __btrfs_getxattr(struct inode *inode, const char *name, + void *buffer, size_t size) { struct btrfs_dir_item *di; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_path *path; struct extent_buffer *leaf; - struct xattr_handler *handler = btrfs_xattr_handler(name_index); int ret = 0; unsigned long data_ptr; - char *name; - - if (!handler) - return -EOPNOTSUPP; - name = get_name(attr_name, name_index); - if (!name) - return -ENOMEM; path = btrfs_alloc_path(); - if (!path) { - kfree(name); + if (!path) return -ENOMEM; - } /* lookup the xattr by name */ di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name, @@ -140,33 +69,22 @@ ssize_t btrfs_xattr_get(struct inode *inode, int name_index, ret = btrfs_dir_data_len(leaf, di); out: - kfree(name); btrfs_free_path(path); return ret; } -int btrfs_xattr_set(struct inode *inode, int name_index, - const char *attr_name, const void *value, size_t size, - int flags) +int __btrfs_setxattr(struct inode *inode, const char *name, + const void *value, size_t size, int flags) { struct btrfs_dir_item *di; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; struct btrfs_path *path; - struct xattr_handler *handler = btrfs_xattr_handler(name_index); - char *name; int ret = 0, mod = 0; - if (!handler) - return -EOPNOTSUPP; - name = get_name(attr_name, name_index); - if (!name) - return -ENOMEM; path = btrfs_alloc_path(); - if (!path) { - kfree(name); + if (!path) return -ENOMEM; - } trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -221,9 +139,7 @@ out: } btrfs_end_transaction(trans, root); - kfree(name); btrfs_free_path(path); - return ret; } @@ -329,51 +245,77 @@ err: } /* - * Handler functions + * List of handlers for synthetic system.* attributes. All real ondisk + * attributes are handled directly. */ -#define BTRFS_XATTR_SETGET_FUNCS(name, index) \ -static int btrfs_xattr_##name##_get(struct inode *inode, \ - const char *name, void *value, \ - size_t size) \ -{ \ - if (*name == '\0') \ - return -EINVAL; \ - return btrfs_xattr_get(inode, index, name, value, size); \ -} \ -static int btrfs_xattr_##name##_set(struct inode *inode, \ - const char *name, const void *value,\ - size_t size, int flags) \ -{ \ - if (*name == '\0') \ - return -EINVAL; \ - return btrfs_xattr_set(inode, index, name, value, size, flags); \ +struct xattr_handler *btrfs_xattr_handlers[] = { +#ifdef CONFIG_FS_POSIX_ACL + &btrfs_xattr_acl_access_handler, + &btrfs_xattr_acl_default_handler, +#endif + NULL, +}; + +/* + * Check if the attribute is in a supported namespace. + * + * This applied after the check for the synthetic attributes in the system + * namespace. + */ +static bool btrfs_is_valid_xattr(const char *name) +{ + return !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) || + !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) || + !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || + !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); } -BTRFS_XATTR_SETGET_FUNCS(security, BTRFS_XATTR_INDEX_SECURITY); -BTRFS_XATTR_SETGET_FUNCS(system, BTRFS_XATTR_INDEX_SYSTEM); -BTRFS_XATTR_SETGET_FUNCS(user, BTRFS_XATTR_INDEX_USER); -BTRFS_XATTR_SETGET_FUNCS(trusted, BTRFS_XATTR_INDEX_TRUSTED); +ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size) +{ + /* + * If this is a request for a synthetic attribute in the system.* + * namespace use the generic infrastructure to resolve a handler + * for it via sb->s_xattr. + */ + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_getxattr(dentry, name, buffer, size); -struct xattr_handler btrfs_xattr_security_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .get = btrfs_xattr_security_get, - .set = btrfs_xattr_security_set, -}; + if (!btrfs_is_valid_xattr(name)) + return -EOPNOTSUPP; + return __btrfs_getxattr(dentry->d_inode, name, buffer, size); +} -struct xattr_handler btrfs_xattr_system_handler = { - .prefix = XATTR_SYSTEM_PREFIX, - .get = btrfs_xattr_system_get, - .set = btrfs_xattr_system_set, -}; +int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags) +{ + /* + * If this is a request for a synthetic attribute in the system.* + * namespace use the generic infrastructure to resolve a handler + * for it via sb->s_xattr. + */ + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_setxattr(dentry, name, value, size, flags); -struct xattr_handler btrfs_xattr_user_handler = { - .prefix = XATTR_USER_PREFIX, - .get = btrfs_xattr_user_get, - .set = btrfs_xattr_user_set, -}; + if (!btrfs_is_valid_xattr(name)) + return -EOPNOTSUPP; -struct xattr_handler btrfs_xattr_trusted_handler = { - .prefix = XATTR_TRUSTED_PREFIX, - .get = btrfs_xattr_trusted_get, - .set = btrfs_xattr_trusted_set, -}; + if (size == 0) + value = ""; /* empty EA, do not remove */ + return __btrfs_setxattr(dentry->d_inode, name, value, size, flags); +} + +int btrfs_removexattr(struct dentry *dentry, const char *name) +{ + /* + * If this is a request for a synthetic attribute in the system.* + * namespace use the generic infrastructure to resolve a handler + * for it via sb->s_xattr. + */ + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_removexattr(dentry, name); + + if (!btrfs_is_valid_xattr(name)) + return -EOPNOTSUPP; + return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE); +} diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h index 825e55bd4960..5b1d08f8e68d 100644 --- a/fs/btrfs/xattr.h +++ b/fs/btrfs/xattr.h @@ -20,31 +20,20 @@ #define __XATTR__ #include -#include "ctree.h" -/* Name indexes */ -enum { - BTRFS_XATTR_INDEX_USER, - BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS, - BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT, - BTRFS_XATTR_INDEX_TRUSTED, - BTRFS_XATTR_INDEX_SECURITY, - BTRFS_XATTR_INDEX_SYSTEM, - BTRFS_XATTR_INDEX_END, -}; - -extern struct xattr_handler btrfs_xattr_user_handler; -extern struct xattr_handler btrfs_xattr_trusted_handler; extern struct xattr_handler btrfs_xattr_acl_access_handler; extern struct xattr_handler btrfs_xattr_acl_default_handler; -extern struct xattr_handler btrfs_xattr_security_handler; -extern struct xattr_handler btrfs_xattr_system_handler; - extern struct xattr_handler *btrfs_xattr_handlers[]; -ssize_t btrfs_xattr_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t size); -int btrfs_xattr_set(struct inode *inode, int name_index, const char *name, - const void *value, size_t size, int flags); +extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, + void *buffer, size_t size); +extern int __btrfs_setxattr(struct inode *inode, const char *name, + const void *value, size_t size, int flags); + +extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size); +extern int btrfs_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); +extern int btrfs_removexattr(struct dentry *dentry, const char *name); #endif /* __XATTR__ */ From a1b32a5932cfac7c38b442582285f3da2a09dfd8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 5 Sep 2008 16:09:51 -0400 Subject: [PATCH 0689/2985] Btrfs: Add debugging checks to track down corrupted metadata Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 ++++++- fs/btrfs/extent_io.c | 21 ++++++++++++++------- fs/btrfs/file.c | 15 ++++++++------- fs/btrfs/volumes.c | 40 +++++++++++++++++++++------------------- 4 files changed, 49 insertions(+), 34 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6a218f792e59..8e7a938bfbc7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -250,7 +250,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, if (!ret && !verify_parent_transid(io_tree, eb, parent_transid)) return ret; - +printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror_num); num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, eb->start, eb->len); if (num_copies == 1) @@ -348,6 +348,9 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, found_start = btrfs_header_bytenr(eb); if (found_start != start) { + printk("bad tree block start %llu %llu\n", + (unsigned long long)found_start, + (unsigned long long)eb->start); ret = -EIO; goto err; } @@ -709,6 +712,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, if (ret == 0) { buf->flags |= EXTENT_UPTODATE; + } else { + WARN_ON(1); } return buf; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 83ba0c328722..7ca89c45d401 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1811,6 +1811,7 @@ printk("2bad mapping end %Lu cur %Lu\n", end, cur); } /* the get_extent function already copied into the page */ if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { + check_page_uptodate(tree, page); unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); cur = cur + iosize; page_offset += iosize; @@ -2785,21 +2786,20 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, * properly set. releasepage may drop page->private * on us if the page isn't already dirty. */ + lock_page(page); if (i == 0) { - lock_page(page); set_page_extent_head(page, eb->len); } else if (PagePrivate(page) && page->private != EXTENT_PAGE_PRIVATE) { - lock_page(page); set_page_extent_mapped(page); - unlock_page(page); } __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); - if (i == 0) - unlock_page(page); + set_extent_dirty(tree, page_offset(page), + page_offset(page) + PAGE_CACHE_SIZE -1, + GFP_NOFS); + unlock_page(page); } - return set_extent_dirty(tree, eb->start, - eb->start + eb->len - 1, GFP_NOFS); + return 0; } EXPORT_SYMBOL(set_extent_buffer_dirty); @@ -2952,6 +2952,9 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (all_uptodate) { if (start_i == 0) eb->flags |= EXTENT_UPTODATE; + if (ret) { + printk("all up to date but ret is %d\n", ret); + } goto unlock_exit; } @@ -2968,6 +2971,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, mirror_num); if (err) { ret = err; + printk("err %d from __extent_read_full_page\n", ret); } } else { unlock_page(page); @@ -2978,12 +2982,15 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, submit_one_bio(READ, bio, mirror_num); if (ret || !wait) { + if (ret) + printk("ret %d wait %d returning\n", ret, wait); return ret; } for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); wait_on_page_locked(page); if (!PageUptodate(page)) { + printk("page not uptodate after wait_on_page_locked\n"); ret = -EIO; } } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index eb8e4556fa71..e9e86fbaa243 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -39,9 +39,10 @@ #include "compat.h" -static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, - struct page **prepared_pages, - const char __user * buf) +static int noinline btrfs_copy_from_user(loff_t pos, int num_pages, + int write_bytes, + struct page **prepared_pages, + const char __user * buf) { long page_fault = 0; int i; @@ -69,7 +70,7 @@ static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, return page_fault ? -EFAULT : 0; } -static void btrfs_drop_pages(struct page **pages, size_t num_pages) +static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) { size_t i; for (i = 0; i < num_pages; i++) { @@ -359,7 +360,7 @@ out_unlock: return err; } -int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) +int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) { struct extent_map *em; struct extent_map *split = NULL; @@ -515,7 +516,7 @@ out: * it is either truncated or split. Anything entirely inside the range * is deleted from the tree. */ -int btrfs_drop_extents(struct btrfs_trans_handle *trans, +int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, u64 inline_limit, u64 *hint_byte) { @@ -785,7 +786,7 @@ out: /* * this gets pages into the page cache and locks them down */ -static int prepare_pages(struct btrfs_root *root, struct file *file, +static int noinline prepare_pages(struct btrfs_root *root, struct file *file, struct page **pages, size_t num_pages, loff_t pos, unsigned long first_index, unsigned long last_index, size_t write_bytes) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5b1b60839d21..37a8ea23e81d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -94,8 +94,8 @@ int btrfs_cleanup_fs_uuids(void) return 0; } -static struct btrfs_device *__find_device(struct list_head *head, u64 devid, - u8 *uuid) +static noinline struct btrfs_device *__find_device(struct list_head *head, + u64 devid, u8 *uuid) { struct btrfs_device *dev; struct list_head *cur; @@ -110,7 +110,7 @@ static struct btrfs_device *__find_device(struct list_head *head, u64 devid, return NULL; } -static struct btrfs_fs_devices *find_fsid(u8 *fsid) +static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) { struct list_head *cur; struct btrfs_fs_devices *fs_devices; @@ -134,7 +134,7 @@ static struct btrfs_fs_devices *find_fsid(u8 *fsid) * the list if the block device is congested. This way, multiple devices * can make progress from a single worker thread. */ -int run_scheduled_bios(struct btrfs_device *device) +static int noinline run_scheduled_bios(struct btrfs_device *device) { struct bio *pending; struct backing_dev_info *bdi; @@ -233,7 +233,7 @@ void pending_bios_fn(struct btrfs_work *work) run_scheduled_bios(device); } -static int device_list_add(const char *path, +static noinline int device_list_add(const char *path, struct btrfs_super_block *disk_super, u64 devid, struct btrfs_fs_devices **fs_devices_ret) { @@ -480,10 +480,10 @@ error: * called very infrequently and that a given device has a small number * of extents */ -static int find_free_dev_extent(struct btrfs_trans_handle *trans, - struct btrfs_device *device, - struct btrfs_path *path, - u64 num_bytes, u64 *start) +static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, + struct btrfs_path *path, + u64 num_bytes, u64 *start) { struct btrfs_key key; struct btrfs_root *root = device->dev_root; @@ -645,7 +645,7 @@ int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, return ret; } -int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, +int noinline btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, @@ -693,7 +693,8 @@ err: return ret; } -static int find_next_chunk(struct btrfs_root *root, u64 objectid, u64 *offset) +static noinline int find_next_chunk(struct btrfs_root *root, + u64 objectid, u64 *offset) { struct btrfs_path *path; int ret; @@ -735,8 +736,8 @@ error: return ret; } -static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path, - u64 *objectid) +static noinline int find_next_devid(struct btrfs_root *root, + struct btrfs_path *path, u64 *objectid) { int ret; struct btrfs_key key; @@ -1103,8 +1104,8 @@ out_close_bdev: goto out; } -int btrfs_update_device(struct btrfs_trans_handle *trans, - struct btrfs_device *device) +int noinline btrfs_update_device(struct btrfs_trans_handle *trans, + struct btrfs_device *device) { int ret; struct btrfs_path *path; @@ -1544,8 +1545,8 @@ int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, return 0; } -static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes, - int sub_stripes) +static u64 noinline chunk_bytes_by_type(u64 type, u64 calc_size, + int num_stripes, int sub_stripes) { if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) return calc_size; @@ -2141,8 +2142,9 @@ struct async_sched { * This will add one bio to the pending list for a device and make sure * the work struct is scheduled. */ -int schedule_bio(struct btrfs_root *root, struct btrfs_device *device, - int rw, struct bio *bio) +static int noinline schedule_bio(struct btrfs_root *root, + struct btrfs_device *device, + int rw, struct bio *bio) { int should_queue = 1; From e02119d5a7b4396c5a872582fddc8bd6d305a70a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 5 Sep 2008 16:13:11 -0400 Subject: [PATCH 0690/2985] Btrfs: Add a write ahead tree log to optimize synchronous operations File syncs and directory syncs are optimized by copying their items into a special (copy-on-write) log tree. There is one log tree per subvolume and the btrfs super block points to a tree of log tree roots. After a crash, items are copied out of the log tree and back into the subvolume. See tree-log.c for all the details. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/btrfs_inode.h | 8 + fs/btrfs/compat.h | 15 + fs/btrfs/ctree.c | 73 +- fs/btrfs/ctree.h | 72 +- fs/btrfs/dir-item.c | 3 + fs/btrfs/disk-io.c | 138 +- fs/btrfs/disk-io.h | 8 +- fs/btrfs/extent-tree.c | 93 +- fs/btrfs/file.c | 39 +- fs/btrfs/inode.c | 261 ++-- fs/btrfs/root-tree.c | 5 +- fs/btrfs/transaction.c | 45 +- fs/btrfs/transaction.h | 1 + fs/btrfs/tree-defrag.c | 4 +- fs/btrfs/tree-log.c | 2804 ++++++++++++++++++++++++++++++++++++++++ fs/btrfs/tree-log.h | 41 + 17 files changed, 3408 insertions(+), 205 deletions(-) create mode 100644 fs/btrfs/tree-log.c create mode 100644 fs/btrfs/tree-log.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 3a01065d4ef1..b7addbfd8c22 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -7,8 +7,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ - ref-cache.o acl.o export.o - + ref-cache.o export.o tree-log.o acl.o else # Normal Makefile diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 111f90524ae1..fcc8cf27e906 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -33,6 +33,7 @@ struct btrfs_inode { struct extent_io_tree io_failure_tree; struct mutex csum_mutex; struct mutex extent_mutex; + struct mutex log_mutex; struct inode vfs_inode; struct btrfs_ordered_inode_tree ordered_tree; @@ -44,10 +45,17 @@ struct btrfs_inode { struct list_head delalloc_inodes; + /* full 64 bit generation number */ + u64 generation; + /* * transid of the trans_handle that last modified this inode */ u64 last_trans; + /* + * transid that last logged this inode + */ + u64 logged_trans; u64 delalloc_bytes; u64 disk_i_size; u32 flags; diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h index d45fb37887bc..b0ed1887d9b1 100644 --- a/fs/btrfs/compat.h +++ b/fs/btrfs/compat.h @@ -22,6 +22,21 @@ static inline struct dentry *d_obtain_alias(struct inode *inode) } #endif +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) +static inline void btrfs_drop_nlink(struct inode *inode) +{ + inode->i_nlink--; +} + +static inline void btrfs_inc_nlink(struct inode *inode) +{ + inode->i_nlink++; +} +#else +# define btrfs_drop_nlink(inode) drop_nlink(inode) +# define btrfs_inc_nlink(inode) inc_nlink(inode) +#endif + /* * Even if AppArmor isn't enabled, it still has different prototypes. * Add more distro/version pairs here to declare which has AppArmor applied. diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7114faafa9d4..579124043d9b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -60,7 +60,7 @@ void btrfs_free_path(struct btrfs_path *p) kmem_cache_free(btrfs_path_cachep, p); } -void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) +void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; @@ -176,7 +176,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, return 0; } -int __btrfs_cow_block(struct btrfs_trans_handle *trans, +int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, @@ -294,7 +294,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } -int btrfs_cow_block(struct btrfs_trans_handle *trans, +int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, struct extent_buffer **cow_ret, u64 prealloc_dest) @@ -677,9 +677,10 @@ static int noinline check_block(struct btrfs_root *root, * * slot may point to max if the key is bigger than all of the keys */ -static int generic_bin_search(struct extent_buffer *eb, unsigned long p, - int item_size, struct btrfs_key *key, - int max, int *slot) +static noinline int generic_bin_search(struct extent_buffer *eb, + unsigned long p, + int item_size, struct btrfs_key *key, + int max, int *slot) { int low = 0; int high = max; @@ -765,7 +766,7 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, return -1; } -static struct extent_buffer *read_node_slot(struct btrfs_root *root, +static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, struct extent_buffer *parent, int slot) { int level = btrfs_header_level(parent); @@ -781,7 +782,7 @@ static struct extent_buffer *read_node_slot(struct btrfs_root *root, btrfs_node_ptr_generation(parent, slot)); } -static int balance_level(struct btrfs_trans_handle *trans, +static noinline int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { @@ -1128,8 +1129,9 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, /* * readahead one full node of leaves */ -static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, - int level, int slot, u64 objectid) +static noinline void reada_for_search(struct btrfs_root *root, + struct btrfs_path *path, + int level, int slot, u64 objectid) { struct extent_buffer *node; struct btrfs_disk_key disk_key; @@ -1201,7 +1203,8 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, } } -static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) +static noinline void unlock_up(struct btrfs_path *path, int level, + int lowest_unlock) { int i; int skip_level = level; @@ -1759,8 +1762,9 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root * * returns 0 on success and < 0 on failure */ -static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int level) +static noinline int split_node(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int level) { u64 root_gen; struct extent_buffer *c; @@ -1874,7 +1878,8 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr) * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ -int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf) +int noinline btrfs_leaf_free_space(struct btrfs_root *root, + struct extent_buffer *leaf) { int nritems = btrfs_header_nritems(leaf); int ret; @@ -2283,9 +2288,11 @@ out: * * returns 0 if all went well and < 0 on failure. */ -static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_key *ins_key, - struct btrfs_path *path, int data_size, int extend) +static noinline int split_leaf(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *ins_key, + struct btrfs_path *path, int data_size, + int extend) { u64 root_gen; struct extent_buffer *l; @@ -3079,6 +3086,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) * was nothing in the tree that matched the search criteria. */ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, + struct btrfs_key *max_key, struct btrfs_path *path, int cache_only, u64 min_trans) { @@ -3093,6 +3101,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, again: cur = btrfs_lock_root_node(root); level = btrfs_header_level(cur); + WARN_ON(path->nodes[level]); path->nodes[level] = cur; path->locks[level] = 1; @@ -3107,6 +3116,8 @@ again: /* at level = 0, we're done, setup the path and exit */ if (level == 0) { + if (slot >= nritems) + goto find_next_key; ret = 0; path->slots[level] = slot; btrfs_item_key_to_cpu(cur, &found_key, slot); @@ -3123,6 +3134,8 @@ again: u64 blockptr; u64 gen; struct extent_buffer *tmp; + struct btrfs_disk_key disk_key; + blockptr = btrfs_node_blockptr(cur, slot); gen = btrfs_node_ptr_generation(cur, slot); if (gen < min_trans) { @@ -3132,6 +3145,14 @@ again: if (!cache_only) break; + if (max_key) { + btrfs_node_key(cur, &disk_key, slot); + if (comp_keys(&disk_key, max_key) >= 0) { + ret = 1; + goto out; + } + } + tmp = btrfs_find_tree_block(root, blockptr, btrfs_level_size(root, level - 1)); @@ -3143,14 +3164,16 @@ again: free_extent_buffer(tmp); slot++; } +find_next_key: /* * we didn't find a candidate key in this node, walk forward * and find another one */ if (slot >= nritems) { - ret = btrfs_find_next_key(root, path, min_key, level, + path->slots[level] = slot; + sret = btrfs_find_next_key(root, path, min_key, level, cache_only, min_trans); - if (ret == 0) { + if (sret == 0) { btrfs_release_path(root, path); goto again; } else { @@ -3351,6 +3374,7 @@ int btrfs_previous_item(struct btrfs_root *root, { struct btrfs_key found_key; struct extent_buffer *leaf; + u32 nritems; int ret; while(1) { @@ -3362,9 +3386,20 @@ int btrfs_previous_item(struct btrfs_root *root, path->slots[0]--; } leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (nritems == 0) + return 1; + if (path->slots[0] == nritems) + path->slots[0]--; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.type == type) return 0; + if (found_key.objectid < min_objectid) + break; + if (found_key.objectid == min_objectid && + found_key.type < type) + break; } return 1; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b305ae7e10b0..6532b60683ef 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -77,6 +77,10 @@ struct btrfs_ordered_sum; /* orhpan objectid for tracking unlinked/truncated files */ #define BTRFS_ORPHAN_OBJECTID -5ULL +/* does write ahead logging to speed up fsyncs */ +#define BTRFS_TREE_LOG_OBJECTID -6ULL +#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL + /* * All files have objectids higher than this. */ @@ -276,6 +280,7 @@ struct btrfs_super_block { __le64 generation; __le64 root; __le64 chunk_root; + __le64 log_root; __le64 total_bytes; __le64 bytes_used; __le64 root_dir_objectid; @@ -287,6 +292,7 @@ struct btrfs_super_block { __le32 sys_chunk_array_size; u8 root_level; u8 chunk_root_level; + u8 log_root_level; struct btrfs_dev_item dev_item; char label[BTRFS_LABEL_SIZE]; u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; @@ -392,7 +398,10 @@ struct btrfs_timespec { * make a new item type */ struct btrfs_inode_item { + /* nfs style generation number */ __le64 generation; + /* transid that last touched this inode */ + __le64 transid; __le64 size; __le64 nblocks; __le64 block_group; @@ -409,8 +418,13 @@ struct btrfs_inode_item { struct btrfs_timespec otime; } __attribute__ ((__packed__)); +struct btrfs_dir_log_item { + __le64 end; +} __attribute__ ((__packed__)); + struct btrfs_dir_item { struct btrfs_disk_key location; + __le64 transid; __le16 data_len; __le16 name_len; u8 type; @@ -505,6 +519,9 @@ struct btrfs_fs_info { struct btrfs_root *tree_root; struct btrfs_root *chunk_root; struct btrfs_root *dev_root; + + /* the log root tree is a directory of all the other log roots */ + struct btrfs_root *log_root_tree; struct radix_tree_root fs_roots_radix; struct extent_io_tree free_space_cache; @@ -518,6 +535,7 @@ struct btrfs_fs_info { u64 generation; u64 last_trans_committed; + u64 last_trans_new_blockgroup; u64 open_ioctl_trans; unsigned long mount_opt; u64 max_extent; @@ -527,6 +545,9 @@ struct btrfs_fs_info { wait_queue_head_t transaction_throttle; wait_queue_head_t transaction_wait; wait_queue_head_t async_submit_wait; + + wait_queue_head_t tree_log_wait; + struct btrfs_super_block super_copy; struct btrfs_super_block super_for_commit; struct block_device *__bdev; @@ -535,6 +556,7 @@ struct btrfs_fs_info { struct backing_dev_info bdi; spinlock_t hash_lock; struct mutex trans_mutex; + struct mutex tree_log_mutex; struct mutex transaction_kthread_mutex; struct mutex cleaner_mutex; struct mutex alloc_mutex; @@ -544,8 +566,13 @@ struct btrfs_fs_info { struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; + atomic_t nr_async_submits; atomic_t nr_async_bios; + atomic_t tree_log_writers; + atomic_t tree_log_commit; + unsigned long tree_log_batch; + u64 tree_log_transid; /* * this is used by the balancing code to wait for all the pending @@ -583,6 +610,7 @@ struct btrfs_fs_info { struct completion kobj_unregister; int do_barriers; int closing; + int log_root_recovering; atomic_t throttles; atomic_t throttle_gen; @@ -596,6 +624,7 @@ struct btrfs_fs_info { u64 delalloc_bytes; u64 last_alloc; u64 last_data_alloc; + u64 last_log_alloc; spinlock_t ref_cache_lock; u64 total_ref_cache_size; @@ -632,6 +661,7 @@ struct btrfs_root { struct btrfs_leaf_ref_tree *ref_tree; struct btrfs_leaf_ref_tree ref_tree_struct; struct btrfs_dirty_root *dirty_root; + struct btrfs_root *log_root; struct btrfs_root_item root_item; struct btrfs_key root_key; @@ -640,6 +670,7 @@ struct btrfs_root { struct kobject root_kobj; struct completion kobj_unregister; struct mutex objectid_mutex; + struct mutex log_mutex; u64 objectid; u64 last_trans; @@ -692,6 +723,8 @@ struct btrfs_root { * dir items are the name -> inode pointers in a directory. There is one * for every name in a directory. */ +#define BTRFS_DIR_LOG_ITEM_KEY 14 +#define BTRFS_DIR_LOG_INDEX_KEY 15 #define BTRFS_DIR_ITEM_KEY 16 #define BTRFS_DIR_INDEX_KEY 17 /* @@ -703,7 +736,8 @@ struct btrfs_root { */ #define BTRFS_CSUM_ITEM_KEY 19 -/* reserve 20-31 for other file stuff */ + +/* reserve 21-31 for other file/dir stuff */ /* * root items point to tree roots. There are typically in the root @@ -938,6 +972,7 @@ BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); /* struct btrfs_inode_item */ BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); +BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64); BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); BTRFS_SETGET_FUNCS(inode_nblocks, struct btrfs_inode_item, nblocks, 64); BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64); @@ -1126,10 +1161,13 @@ static inline void btrfs_set_item_key(struct extent_buffer *eb, write_eb_member(eb, item, struct btrfs_item, key, disk_key); } +BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64); + /* struct btrfs_dir_item */ BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16); BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); +BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64); static inline void btrfs_dir_item_key(struct extent_buffer *eb, struct btrfs_dir_item *item, @@ -1301,7 +1339,11 @@ BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block, chunk_root, 64); BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block, - chunk_root_level, 64); + chunk_root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block, + log_root, 64); +BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block, + log_root_level, 8); BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, total_bytes, 64); BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, @@ -1405,6 +1447,12 @@ static inline struct dentry *fdentry(struct file *file) { } /* extent-tree.c */ +int btrfs_lookup_extent(struct btrfs_root *root, struct btrfs_path *path, + u64 start, u64 len); +int btrfs_update_pinned_extents(struct btrfs_root *root, + u64 bytenr, u64 num, int pin); +int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, u64 bytenr); @@ -1448,6 +1496,11 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, struct btrfs_key *ins); +int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins); int btrfs_reserve_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, @@ -1488,9 +1541,9 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key, int lowest_level, int cache_only, u64 min_trans); int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, + struct btrfs_key *max_key, struct btrfs_path *path, int cache_only, u64 min_trans); - int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, @@ -1656,6 +1709,18 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, #define PageChecked PageFsMisc #endif +int btrfs_unlink_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *dir, struct inode *inode, + const char *name, int name_len); +int btrfs_add_link(struct btrfs_trans_handle *trans, + struct inode *parent_inode, struct inode *inode, + const char *name, int name_len, int add_backref, u64 index); +int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, u64 new_size, + u32 min_type); + int btrfs_start_delalloc_inodes(struct btrfs_root *root); int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); int btrfs_writepages(struct address_space *mapping, @@ -1715,6 +1780,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); /* file.c */ +int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end); int btrfs_check_file(struct btrfs_root *root, struct inode *inode); extern struct file_operations btrfs_file_operations; diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 125094617fe8..e4f30090d640 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -96,6 +96,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, btrfs_set_dir_item_key(leaf, dir_item, &disk_key); btrfs_set_dir_type(leaf, dir_item, BTRFS_FT_XATTR); btrfs_set_dir_name_len(leaf, dir_item, name_len); + btrfs_set_dir_transid(leaf, dir_item, trans->transid); btrfs_set_dir_data_len(leaf, dir_item, data_len); name_ptr = (unsigned long)(dir_item + 1); data_ptr = (unsigned long)((char *)name_ptr + name_len); @@ -142,6 +143,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_dir_type(leaf, dir_item, type); btrfs_set_dir_data_len(leaf, dir_item, 0); btrfs_set_dir_name_len(leaf, dir_item, name_len); + btrfs_set_dir_transid(leaf, dir_item, trans->transid); name_ptr = (unsigned long)(dir_item + 1); write_extent_buffer(leaf, name, name_ptr, name_len); @@ -169,6 +171,7 @@ second_insert: btrfs_set_dir_type(leaf, dir_item, type); btrfs_set_dir_data_len(leaf, dir_item, 0); btrfs_set_dir_name_len(leaf, dir_item, name_len); + btrfs_set_dir_transid(leaf, dir_item, trans->transid); name_ptr = (unsigned long)(dir_item + 1); write_extent_buffer(leaf, name, name_ptr, name_len); btrfs_mark_buffer_dirty(leaf); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e7a938bfbc7..a4373db5967a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -41,6 +41,7 @@ #include "async-thread.h" #include "locking.h" #include "ref-cache.h" +#include "tree-log.h" #if 0 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) @@ -694,6 +695,18 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, } +int btrfs_write_tree_block(struct extent_buffer *buf) +{ + return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start, + buf->start + buf->len - 1, WB_SYNC_NONE); +} + +int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) +{ + return btrfs_wait_on_page_writeback_range(buf->first_page->mapping, + buf->start, buf->start + buf->len -1); +} + struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid) { @@ -732,15 +745,6 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } -int wait_on_tree_block_writeback(struct btrfs_root *root, - struct extent_buffer *buf) -{ - struct inode *btree_inode = root->fs_info->btree_inode; - wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->io_tree, - buf); - return 0; -} - static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, u32 stripesize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, @@ -771,6 +775,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->node_lock); spin_lock_init(&root->list_lock); mutex_init(&root->objectid_mutex); + mutex_init(&root->log_mutex); btrfs_leaf_ref_tree_init(&root->ref_tree_struct); root->ref_tree = &root->ref_tree_struct; @@ -809,11 +814,74 @@ static int find_and_setup_root(struct btrfs_root *tree_root, return 0; } -struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, - struct btrfs_key *location) +int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct extent_buffer *eb; + int ret; + + if (!fs_info->log_root_tree) + return 0; + + eb = fs_info->log_root_tree->node; + + WARN_ON(btrfs_header_level(eb) != 0); + WARN_ON(btrfs_header_nritems(eb) != 0); + + ret = btrfs_free_extent(trans, fs_info->tree_root, + eb->start, eb->len, + BTRFS_TREE_LOG_OBJECTID, 0, 0, 0, 1); + BUG_ON(ret); + + free_extent_buffer(eb); + kfree(fs_info->log_root_tree); + fs_info->log_root_tree = NULL; + return 0; +} + +int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) { struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; + + root = kzalloc(sizeof(*root), GFP_NOFS); + if (!root) + return -ENOMEM; + + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, tree_root->stripesize, + root, fs_info, BTRFS_TREE_LOG_OBJECTID); + + root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; + root->root_key.type = BTRFS_ROOT_ITEM_KEY; + root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; + root->ref_cows = 0; + + root->node = btrfs_alloc_free_block(trans, root, root->leafsize, + BTRFS_TREE_LOG_OBJECTID, + 0, 0, 0, 0, 0); + + btrfs_set_header_nritems(root->node, 0); + btrfs_set_header_level(root->node, 0); + btrfs_set_header_bytenr(root->node, root->node->start); + btrfs_set_header_generation(root->node, trans->transid); + btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID); + + write_extent_buffer(root->node, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(root->node), + BTRFS_FSID_SIZE); + btrfs_mark_buffer_dirty(root->node); + btrfs_tree_unlock(root->node); + fs_info->log_root_tree = root; + return 0; +} + +struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, + struct btrfs_key *location) +{ + struct btrfs_root *root; + struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_path *path; struct extent_buffer *l; u64 highest_inode; @@ -863,11 +931,13 @@ out: blocksize, 0); BUG_ON(!root->node); insert: - root->ref_cows = 1; - ret = btrfs_find_highest_inode(root, &highest_inode); - if (ret == 0) { - root->highest_inode = highest_inode; - root->last_inode_alloc = highest_inode; + if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { + root->ref_cows = 1; + ret = btrfs_find_highest_inode(root, &highest_inode); + if (ret == 0) { + root->highest_inode = highest_inode; + root->last_inode_alloc = highest_inode; + } } return root; } @@ -907,7 +977,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, if (root) return root; - root = btrfs_read_fs_root_no_radix(fs_info, location); + root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); if (IS_ERR(root)) return root; ret = radix_tree_insert(&fs_info->fs_roots_radix, @@ -1250,16 +1320,18 @@ struct btrfs_root *open_ctree(struct super_block *sb, u32 blocksize; u32 stripesize; struct buffer_head *bh; - struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), + struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), + struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), GFP_NOFS); - struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root), + struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root), + struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); + struct btrfs_root *log_tree_root; + int ret; int err = -EINVAL; @@ -1343,6 +1415,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); mutex_init(&fs_info->trans_mutex); + mutex_init(&fs_info->tree_log_mutex); mutex_init(&fs_info->drop_mutex); mutex_init(&fs_info->alloc_mutex); mutex_init(&fs_info->chunk_mutex); @@ -1352,6 +1425,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, init_waitqueue_head(&fs_info->transaction_throttle); init_waitqueue_head(&fs_info->transaction_wait); init_waitqueue_head(&fs_info->async_submit_wait); + init_waitqueue_head(&fs_info->tree_log_wait); + atomic_set(&fs_info->tree_log_commit, 0); + atomic_set(&fs_info->tree_log_writers, 0); + fs_info->tree_log_transid = 0; #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1532,7 +1609,26 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!fs_info->transaction_kthread) goto fail_cleaner; + if (btrfs_super_log_root(disk_super) != 0) { + u32 blocksize; + u64 bytenr = btrfs_super_log_root(disk_super); + blocksize = + btrfs_level_size(tree_root, + btrfs_super_log_root_level(disk_super)); + + log_tree_root = kzalloc(sizeof(struct btrfs_root), + GFP_NOFS); + + __setup_root(nodesize, leafsize, sectorsize, stripesize, + log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); + + log_tree_root->node = read_tree_block(tree_root, bytenr, + blocksize, 0); + ret = btrfs_recover_log_trees(log_tree_root); + BUG_ON(ret); + } + fs_info->last_trans_committed = btrfs_super_generation(disk_super); return tree_root; fail_cleaner: diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 2562a273ae18..6b6fdc697f31 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -45,7 +45,7 @@ struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen); -struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, +struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, struct btrfs_key *location); struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, struct btrfs_key *location); @@ -74,4 +74,10 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, extent_submit_bio_hook_t *submit_bio_hook); int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); +int btrfs_write_tree_block(struct extent_buffer *buf); +int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); +int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info); +int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e63b3b4bed7c..646b9148ca21 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -496,6 +496,23 @@ static int match_extent_ref(struct extent_buffer *leaf, return ret == 0; } +/* simple helper to search for an existing extent at a given offset */ +int btrfs_lookup_extent(struct btrfs_root *root, struct btrfs_path *path, + u64 start, u64 len) +{ + int ret; + struct btrfs_key key; + + maybe_lock_mutex(root); + key.objectid = start; + key.offset = len; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, + 0, 0); + maybe_unlock_mutex(root); + return ret; +} + static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 bytenr, @@ -1409,7 +1426,7 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) } -static int update_pinned_extents(struct btrfs_root *root, +int btrfs_update_pinned_extents(struct btrfs_root *root, u64 bytenr, u64 num, int pin) { u64 len; @@ -1492,7 +1509,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, EXTENT_DIRTY); if (ret) break; - update_pinned_extents(root, start, end + 1 - start, 0); + btrfs_update_pinned_extents(root, start, end + 1 - start, 0); clear_extent_dirty(unpin, start, end, GFP_NOFS); set_extent_dirty(free_space_cache, start, end, GFP_NOFS); if (need_resched()) { @@ -1538,14 +1555,11 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, GFP_NOFS); - eb = btrfs_find_tree_block(extent_root, ins.objectid, + eb = btrfs_find_create_tree_block(extent_root, ins.objectid, ins.offset); - if (!btrfs_buffer_uptodate(eb, trans->transid)) { - mutex_unlock(&extent_root->fs_info->alloc_mutex); + if (!btrfs_buffer_uptodate(eb, trans->transid)) btrfs_read_buffer(eb, trans->transid); - mutex_lock(&extent_root->fs_info->alloc_mutex); - } btrfs_tree_lock(eb); level = btrfs_header_level(eb); @@ -1585,13 +1599,20 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, struct extent_buffer *buf; buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { + /* we can reuse a block if it hasn't been written + * and it is from this transaction. We can't + * reuse anything from the tree log root because + * it has tiny sub-transactions. + */ if (btrfs_buffer_uptodate(buf, 0) && btrfs_try_tree_lock(buf)) { u64 transid = root->fs_info->running_transaction->transid; u64 header_transid = btrfs_header_generation(buf); - if (header_transid == transid && + if (btrfs_header_owner(buf) != + BTRFS_TREE_LOG_OBJECTID && + header_transid == transid && !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { clean_tree_block(NULL, root, buf); @@ -1603,7 +1624,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, } free_extent_buffer(buf); } - update_pinned_extents(root, bytenr, num_bytes, 1); + btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); } else { set_extent_bits(&root->fs_info->pending_del, bytenr, bytenr + num_bytes - 1, @@ -1801,7 +1822,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct GFP_NOFS); if (!test_range_bit(&extent_root->fs_info->extent_ins, start, end, EXTENT_LOCKED, 0)) { - update_pinned_extents(extent_root, start, + btrfs_update_pinned_extents(extent_root, start, end + 1 - start, 1); ret = __free_extent(trans, extent_root, start, end + 1 - start, @@ -1919,6 +1940,12 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { last_ptr = &root->fs_info->last_data_alloc; } + if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { + last_ptr = &root->fs_info->last_log_alloc; + if (!last_ptr == 0 && root->fs_info->last_alloc) { + *last_ptr = root->fs_info->last_alloc + empty_cluster; + } + } if (last_ptr) { if (*last_ptr) @@ -2268,6 +2295,35 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, maybe_unlock_mutex(root); return ret; } + +/* + * this is used by the tree logging recovery code. It records that + * an extent has been allocated and makes sure to clear the free + * space cache bits as well + */ +int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset, + struct btrfs_key *ins) +{ + int ret; + struct btrfs_block_group_cache *block_group; + + maybe_lock_mutex(root); + block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); + cache_block_group(root, block_group); + + clear_extent_dirty(&root->fs_info->free_space_cache, + ins->objectid, ins->objectid + ins->offset - 1, + GFP_NOFS); + ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, + ref_generation, owner, + owner_offset, ins); + maybe_unlock_mutex(root); + return ret; +} + /* * finds a free extent and does all the dirty work required for allocation * returns the key for the extent through ins, and a tree buffer for @@ -2350,9 +2406,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return buf; } -static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_buffer *leaf) +int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *leaf) { u64 leaf_owner; u64 leaf_generation; @@ -2402,9 +2457,9 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, return 0; } -static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_leaf_ref *ref) +static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_leaf_ref *ref) { int i; int ret; @@ -2512,7 +2567,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, btrfs_header_nritems(cur)) break; if (*level == 0) { - ret = drop_leaf_ref_no_cache(trans, root, cur); + ret = btrfs_drop_leaf_ref(trans, root, cur); BUG_ON(ret); break; } @@ -2552,7 +2607,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, btrfs_node_key_to_cpu(cur, &key, path->slots[*level]); ref = btrfs_lookup_leaf_ref(root, bytenr); if (ref) { - ret = drop_leaf_ref(trans, root, ref); + ret = cache_drop_leaf_ref(trans, root, ref); BUG_ON(ret); btrfs_remove_leaf_ref(root, ref); btrfs_free_leaf_ref(root, ref); @@ -3628,6 +3683,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, extent_root = root->fs_info->extent_root; block_group_cache = &root->fs_info->block_group_cache; + root->fs_info->last_trans_new_blockgroup = trans->transid; + cache = kzalloc(sizeof(*cache), GFP_NOFS); BUG_ON(!cache); cache->key.objectid = chunk_offset; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e9e86fbaa243..84ecf3ab8511 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -36,6 +36,8 @@ #include "btrfs_inode.h" #include "ioctl.h" #include "print-tree.h" +#include "tree-log.h" +#include "locking.h" #include "compat.h" @@ -988,10 +990,27 @@ out_nolock: *ppos = pos; if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { - err = sync_page_range(inode, inode->i_mapping, - start_pos, num_written); + struct btrfs_trans_handle *trans; + + err = btrfs_fdatawrite_range(inode->i_mapping, start_pos, + start_pos + num_written -1, + WB_SYNC_NONE); if (err < 0) num_written = err; + + err = btrfs_wait_on_page_writeback_range(inode->i_mapping, + start_pos, start_pos + num_written - 1); + if (err < 0) + num_written = err; + + trans = btrfs_start_transaction(root, 1); + ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); + if (ret == 0) { + btrfs_sync_log(trans, root); + btrfs_end_transaction(trans, root); + } else { + btrfs_commit_transaction(trans, root); + } } else if (num_written > 0 && (file->f_flags & O_DIRECT)) { #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) do_sync_file_range(file, start_pos, @@ -1019,8 +1038,7 @@ int btrfs_release_file(struct inode * inode, struct file * filp) return 0; } -static int btrfs_sync_file(struct file *file, - struct dentry *dentry, int datasync) +int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -1043,6 +1061,8 @@ static int btrfs_sync_file(struct file *file, } mutex_unlock(&root->fs_info->trans_mutex); + filemap_fdatawait(inode->i_mapping); + /* * ok we haven't committed the transaction yet, lets do a commit */ @@ -1054,7 +1074,16 @@ static int btrfs_sync_file(struct file *file, ret = -ENOMEM; goto out; } - ret = btrfs_commit_transaction(trans, root); + + ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); + if (ret < 0) + goto out; + if (ret > 0) { + ret = btrfs_commit_transaction(trans, root); + } else { + btrfs_sync_log(trans, root); + ret = btrfs_end_transaction(trans, root); + } out: return ret > 0 ? EIO : ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 43d3f2649ca3..65df9d830230 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -46,6 +46,8 @@ #include "volumes.h" #include "ordered-data.h" #include "xattr.h" +#include "compat.h" +#include "tree-log.h" struct btrfs_iget_args { u64 ino; @@ -586,6 +588,7 @@ nocow: &ordered_extent->list); btrfs_ordered_update_i_size(inode, ordered_extent); + btrfs_update_inode(trans, root, inode); btrfs_remove_ordered_extent(inode, ordered_extent); /* once for us */ @@ -593,7 +596,6 @@ nocow: /* once for the tree */ btrfs_put_ordered_extent(ordered_extent); - btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); return 0; } @@ -1007,7 +1009,8 @@ void btrfs_read_locked_inode(struct inode *inode) inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item); - inode->i_generation = btrfs_inode_generation(leaf, inode_item); + BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); + inode->i_generation = BTRFS_I(inode)->generation; inode->i_rdev = 0; rdev = btrfs_inode_rdev(leaf, inode_item); @@ -1056,7 +1059,8 @@ make_bad: make_bad_inode(inode); } -static void fill_inode_item(struct extent_buffer *leaf, +static void fill_inode_item(struct btrfs_trans_handle *trans, + struct extent_buffer *leaf, struct btrfs_inode_item *item, struct inode *inode) { @@ -1082,7 +1086,8 @@ static void fill_inode_item(struct extent_buffer *leaf, inode->i_ctime.tv_nsec); btrfs_set_inode_nblocks(leaf, item, inode->i_blocks); - btrfs_set_inode_generation(leaf, item, inode->i_generation); + btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); + btrfs_set_inode_transid(leaf, item, trans->transid); btrfs_set_inode_rdev(leaf, item, inode->i_rdev); btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); btrfs_set_inode_block_group(leaf, item, @@ -1112,7 +1117,7 @@ int noinline btrfs_update_inode(struct btrfs_trans_handle *trans, inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); - fill_inode_item(leaf, inode_item, inode); + fill_inode_item(trans, leaf, inode_item, inode); btrfs_mark_buffer_dirty(leaf); btrfs_set_inode_last_trans(trans, inode); ret = 0; @@ -1122,14 +1127,12 @@ failed: } -static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *dir, - struct dentry *dentry) +int btrfs_unlink_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *dir, struct inode *inode, + const char *name, int name_len) { struct btrfs_path *path; - const char *name = dentry->d_name.name; - int name_len = dentry->d_name.len; int ret = 0; struct extent_buffer *leaf; struct btrfs_dir_item *di; @@ -1160,13 +1163,12 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, btrfs_release_path(root, path); ret = btrfs_del_inode_ref(trans, root, name, name_len, - dentry->d_inode->i_ino, - dentry->d_parent->d_inode->i_ino, &index); + inode->i_ino, + dir->i_ino, &index); if (ret) { printk("failed to delete reference to %.*s, " "inode %lu parent %lu\n", name_len, name, - dentry->d_inode->i_ino, - dentry->d_parent->d_inode->i_ino); + inode->i_ino, dir->i_ino); goto err; } @@ -1183,21 +1185,25 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, ret = btrfs_delete_one_dir_name(trans, root, path, di); btrfs_release_path(root, path); - dentry->d_inode->i_ctime = dir->i_ctime; + ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, + inode, dir->i_ino); + BUG_ON(ret); + + ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, + dir, index); + BUG_ON(ret); err: btrfs_free_path(path); - if (!ret) { - btrfs_i_size_write(dir, dir->i_size - name_len * 2); - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - btrfs_update_inode(trans, root, dir); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - dentry->d_inode->i_nlink--; -#else - drop_nlink(dentry->d_inode); -#endif - ret = btrfs_update_inode(trans, root, dentry->d_inode); - dir->i_sb->s_dirt = 1; - } + if (ret) + goto out; + + btrfs_i_size_write(dir, dir->i_size - name_len * 2); + inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; + btrfs_update_inode(trans, root, dir); + btrfs_drop_nlink(inode); + ret = btrfs_update_inode(trans, root, inode); + dir->i_sb->s_dirt = 1; +out: return ret; } @@ -1218,7 +1224,8 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); - ret = btrfs_unlink_trans(trans, root, dir, dentry); + ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, + dentry->d_name.name, dentry->d_name.len); if (inode->i_nlink == 0) ret = btrfs_orphan_add(trans, inode); @@ -1256,7 +1263,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) goto fail_trans; /* now the directory is empty */ - err = btrfs_unlink_trans(trans, root, dir, dentry); + err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, + dentry->d_name.name, dentry->d_name.len); if (!err) { btrfs_i_size_write(inode, 0); } @@ -1283,10 +1291,10 @@ fail: * min_type is the minimum key type to truncate down to. If set to 0, this * will kill all the items on this inode, including the INODE_ITEM_KEY. */ -static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, - u32 min_type) +noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + u64 new_size, u32 min_type) { int ret; struct btrfs_path *path; @@ -1307,7 +1315,9 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int extent_type = -1; u64 mask = root->sectorsize - 1; - btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1); + if (root->ref_cows) + btrfs_drop_extent_cache(inode, + new_size & (~mask), (u64)-1); path = btrfs_alloc_path(); path->reada = -1; BUG_ON(!path); @@ -1324,7 +1334,13 @@ search_again: goto error; } if (ret > 0) { - BUG_ON(path->slots[0] == 0); + /* there are no items in the tree for us to truncate, we're + * done + */ + if (path->slots[0] == 0) { + ret = 0; + goto error; + } path->slots[0]--; } @@ -1358,10 +1374,10 @@ search_again: } if (found_type == BTRFS_CSUM_ITEM_KEY) { ret = btrfs_csum_truncate(trans, root, path, - inode->i_size); + new_size); BUG_ON(ret); } - if (item_end < inode->i_size) { + if (item_end < new_size) { if (found_type == BTRFS_DIR_ITEM_KEY) { found_type = BTRFS_INODE_ITEM_KEY; } else if (found_type == BTRFS_EXTENT_ITEM_KEY) { @@ -1378,7 +1394,7 @@ search_again: btrfs_set_key_type(&key, found_type); goto next; } - if (found_key.offset >= inode->i_size) + if (found_key.offset >= new_size) del_item = 1; else del_item = 0; @@ -1394,7 +1410,7 @@ search_again: if (!del_item) { u64 orig_num_bytes = btrfs_file_extent_num_bytes(leaf, fi); - extent_num_bytes = inode->i_size - + extent_num_bytes = new_size - found_key.offset + root->sectorsize - 1; extent_num_bytes = extent_num_bytes & ~((u64)root->sectorsize - 1); @@ -1402,7 +1418,7 @@ search_again: extent_num_bytes); num_dec = (orig_num_bytes - extent_num_bytes); - if (extent_start != 0) + if (root->ref_cows && extent_start != 0) dec_i_blocks(inode, num_dec); btrfs_mark_buffer_dirty(leaf); } else { @@ -1413,22 +1429,29 @@ search_again: num_dec = btrfs_file_extent_num_bytes(leaf, fi); if (extent_start != 0) { found_extent = 1; - dec_i_blocks(inode, num_dec); + if (root->ref_cows) + dec_i_blocks(inode, num_dec); + } + if (root->ref_cows) { + root_gen = + btrfs_header_generation(leaf); } - root_gen = btrfs_header_generation(leaf); root_owner = btrfs_header_owner(leaf); } } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { if (!del_item) { - u32 newsize = inode->i_size - found_key.offset; - dec_i_blocks(inode, item_end + 1 - - found_key.offset - newsize); - newsize = - btrfs_file_extent_calc_inline_size(newsize); + u32 size = new_size - found_key.offset; + + if (root->ref_cows) { + dec_i_blocks(inode, item_end + 1 - + found_key.offset - size); + } + size = + btrfs_file_extent_calc_inline_size(size); ret = btrfs_truncate_item(trans, root, path, - newsize, 1); + size, 1); BUG_ON(ret); - } else { + } else if (root->ref_cows) { dec_i_blocks(inode, item_end + 1 - found_key.offset); } @@ -1666,7 +1689,7 @@ void btrfs_delete_inode(struct inode *inode) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); - ret = btrfs_truncate_in_trans(trans, root, inode, 0); + ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); if (ret) { btrfs_orphan_del(NULL, inode); goto no_delete_lock; @@ -1753,15 +1776,20 @@ static int fixup_tree_root_location(struct btrfs_root *root, return 0; } -static int btrfs_init_locked_inode(struct inode *inode, void *p) +static noinline void init_btrfs_i(struct inode *inode) { - struct btrfs_iget_args *args = p; - inode->i_ino = args->ino; - BTRFS_I(inode)->root = args->root; - BTRFS_I(inode)->delalloc_bytes = 0; - inode->i_mapping->writeback_index = 0; - BTRFS_I(inode)->disk_i_size = 0; - BTRFS_I(inode)->index_cnt = (u64)-1; + struct btrfs_inode *bi = BTRFS_I(inode); + + bi->i_acl = NULL; + bi->i_default_acl = NULL; + + bi->generation = 0; + bi->last_trans = 0; + bi->logged_trans = 0; + bi->delalloc_bytes = 0; + bi->disk_i_size = 0; + bi->flags = 0; + bi->index_cnt = (u64)-1; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); @@ -1771,6 +1799,15 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); mutex_init(&BTRFS_I(inode)->csum_mutex); mutex_init(&BTRFS_I(inode)->extent_mutex); + mutex_init(&BTRFS_I(inode)->log_mutex); +} + +static int btrfs_init_locked_inode(struct inode *inode, void *p) +{ + struct btrfs_iget_args *args = p; + inode->i_ino = args->ino; + init_btrfs_i(inode); + BTRFS_I(inode)->root = args->root; return 0; } @@ -2263,21 +2300,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, * btrfs_get_inode_index_count has an explanation for the magic * number */ + init_btrfs_i(inode); BTRFS_I(inode)->index_cnt = 2; - - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_tree, - inode->i_mapping, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, - inode->i_mapping, GFP_NOFS); - btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); - INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); - mutex_init(&BTRFS_I(inode)->csum_mutex); - mutex_init(&BTRFS_I(inode)->extent_mutex); - BTRFS_I(inode)->delalloc_bytes = 0; - inode->i_mapping->writeback_index = 0; - BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->root = root; + BTRFS_I(inode)->generation = trans->transid; if (mode & S_IFDIR) owner = 0; @@ -2290,7 +2316,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, new_inode_group = group; } BTRFS_I(inode)->block_group = new_inode_group; - BTRFS_I(inode)->flags = 0; key[0].objectid = objectid; btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); @@ -2318,7 +2343,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); - fill_inode_item(path->nodes[0], inode_item, inode); + fill_inode_item(trans, path->nodes[0], inode_item, inode); ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, struct btrfs_inode_ref); @@ -2349,38 +2374,34 @@ static inline u8 btrfs_inode_type(struct inode *inode) return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; } -static int btrfs_add_link(struct btrfs_trans_handle *trans, - struct dentry *dentry, struct inode *inode, - int add_backref, u64 index) +int btrfs_add_link(struct btrfs_trans_handle *trans, + struct inode *parent_inode, struct inode *inode, + const char *name, int name_len, int add_backref, u64 index) { int ret; struct btrfs_key key; - struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root; - struct inode *parent_inode = dentry->d_parent->d_inode; + struct btrfs_root *root = BTRFS_I(parent_inode)->root; key.objectid = inode->i_ino; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; - ret = btrfs_insert_dir_item(trans, root, - dentry->d_name.name, dentry->d_name.len, - dentry->d_parent->d_inode->i_ino, + ret = btrfs_insert_dir_item(trans, root, name, name_len, + parent_inode->i_ino, &key, btrfs_inode_type(inode), index); if (ret == 0) { if (add_backref) { ret = btrfs_insert_inode_ref(trans, root, - dentry->d_name.name, - dentry->d_name.len, - inode->i_ino, - parent_inode->i_ino, - index); + name, name_len, + inode->i_ino, + parent_inode->i_ino, + index); } btrfs_i_size_write(parent_inode, parent_inode->i_size + - dentry->d_name.len * 2); + name_len * 2); parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; - ret = btrfs_update_inode(trans, root, - dentry->d_parent->d_inode); + ret = btrfs_update_inode(trans, root, parent_inode); } return ret; } @@ -2389,7 +2410,9 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans, struct dentry *dentry, struct inode *inode, int backref, u64 index) { - int err = btrfs_add_link(trans, dentry, inode, backref, index); + int err = btrfs_add_link(trans, dentry->d_parent->d_inode, + inode, dentry->d_name.name, + dentry->d_name.len, backref, index); if (!err) { d_instantiate(dentry, inode); return 0; @@ -2513,19 +2536,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping->backing_dev_info = &root->fs_info->bdi; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_tree, - inode->i_mapping, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, - inode->i_mapping, GFP_NOFS); - INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); - mutex_init(&BTRFS_I(inode)->csum_mutex); - mutex_init(&BTRFS_I(inode)->extent_mutex); - BTRFS_I(inode)->delalloc_bytes = 0; - BTRFS_I(inode)->disk_i_size = 0; - inode->i_mapping->writeback_index = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; - btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -2556,11 +2567,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink == 0) return -ENOENT; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - inode->i_nlink++; -#else - inc_nlink(inode); -#endif + btrfs_inc_nlink(inode); err = btrfs_check_free_space(root, 1, 0); if (err) goto fail; @@ -2650,7 +2657,9 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (err) goto out_fail; - err = btrfs_add_link(trans, dentry, inode, 0, index); + err = btrfs_add_link(trans, dentry->d_parent->d_inode, + inode, dentry->d_name.name, + dentry->d_name.len, 0, index); if (err) goto out_fail; @@ -3221,7 +3230,7 @@ static void btrfs_truncate(struct inode *inode) if (ret) goto out; /* FIXME, add redo link to tree so we don't leak on crash */ - ret = btrfs_truncate_in_trans(trans, root, inode, + ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, BTRFS_EXTENT_DATA_KEY); btrfs_update_inode(trans, root, inode); @@ -3304,6 +3313,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) if (!ei) return NULL; ei->last_trans = 0; + ei->logged_trans = 0; btrfs_ordered_inode_tree_init(&ei->ordered_tree); ei->i_acl = BTRFS_ACL_NOT_CACHED; ei->i_default_acl = BTRFS_ACL_NOT_CACHED; @@ -3463,31 +3473,39 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, btrfs_set_trans_block_group(trans, new_dir); - old_dentry->d_inode->i_nlink++; + btrfs_inc_nlink(old_dentry->d_inode); old_dir->i_ctime = old_dir->i_mtime = ctime; new_dir->i_ctime = new_dir->i_mtime = ctime; old_inode->i_ctime = ctime; - ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry); + ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, + old_dentry->d_name.name, + old_dentry->d_name.len); if (ret) goto out_fail; if (new_inode) { new_inode->i_ctime = CURRENT_TIME; - ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry); + ret = btrfs_unlink_inode(trans, root, new_dir, + new_dentry->d_inode, + new_dentry->d_name.name, + new_dentry->d_name.len); if (ret) goto out_fail; if (new_inode->i_nlink == 0) { - ret = btrfs_orphan_add(trans, new_inode); + ret = btrfs_orphan_add(trans, new_dentry->d_inode); if (ret) goto out_fail; } + } ret = btrfs_set_inode_index(new_dir, old_inode, &index); if (ret) goto out_fail; - ret = btrfs_add_link(trans, new_dentry, old_inode, 1, index); + ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode, + old_inode, new_dentry->d_name.name, + new_dentry->d_name.len, 1, index); if (ret) goto out_fail; @@ -3577,19 +3595,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping->backing_dev_info = &root->fs_info->bdi; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_tree, - inode->i_mapping, GFP_NOFS); - extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, - inode->i_mapping, GFP_NOFS); - INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); - mutex_init(&BTRFS_I(inode)->csum_mutex); - mutex_init(&BTRFS_I(inode)->extent_mutex); - BTRFS_I(inode)->delalloc_bytes = 0; - BTRFS_I(inode)->disk_i_size = 0; - inode->i_mapping->writeback_index = 0; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; - btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -3691,6 +3697,7 @@ static struct file_operations btrfs_dir_file_operations = { .compat_ioctl = btrfs_ioctl, #endif .release = btrfs_release_file, + .fsync = btrfs_sync_file, }; static struct extent_io_ops btrfs_extent_io_ops = { diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 36726696e58b..e3984f902e71 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -202,8 +202,9 @@ again: memcpy(&found_key, &key, sizeof(key)); key.offset++; btrfs_release_path(root, path); - dead_root = btrfs_read_fs_root_no_radix(root->fs_info, - &found_key); + dead_root = + btrfs_read_fs_root_no_radix(root->fs_info->tree_root, + &found_key); if (IS_ERR(dead_root)) { ret = PTR_ERR(dead_root); goto err; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index eff3ad72991b..49c4f5b40ed6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -25,6 +25,7 @@ #include "transaction.h" #include "locking.h" #include "ref-cache.h" +#include "tree-log.h" static int total_trans = 0; extern struct kmem_cache *btrfs_trans_handle_cachep; @@ -57,6 +58,7 @@ static noinline int join_transaction(struct btrfs_root *root) root->fs_info->generation++; root->fs_info->last_alloc = 0; root->fs_info->last_data_alloc = 0; + root->fs_info->last_log_alloc = 0; cur_trans->num_writers = 1; cur_trans->num_joined = 0; cur_trans->transid = root->fs_info->generation; @@ -83,7 +85,7 @@ static noinline int join_transaction(struct btrfs_root *root) return 0; } -static noinline int record_root_in_trans(struct btrfs_root *root) +noinline int btrfs_record_root_in_trans(struct btrfs_root *root) { struct btrfs_dirty_root *dirty; u64 running_trans_id = root->fs_info->running_transaction->transid; @@ -151,7 +153,7 @@ static void wait_current_trans(struct btrfs_root *root) } } -struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, +static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, int num_blocks, int wait) { struct btrfs_trans_handle *h = @@ -164,7 +166,7 @@ struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, ret = join_transaction(root); BUG_ON(ret); - record_root_in_trans(root); + btrfs_record_root_in_trans(root); h->transid = root->fs_info->running_transaction->transid; h->transaction = root->fs_info->running_transaction; h->blocks_reserved = num_blocks; @@ -456,6 +458,8 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, BUG_ON(!root->ref_tree); dirty = root->dirty_root; + btrfs_free_log(trans, root); + if (root->commit_root == root->node) { WARN_ON(root->node->start != btrfs_root_bytenr(&root->root_item)); @@ -600,7 +604,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, num_bytes -= btrfs_root_used(&dirty->root->root_item); bytes_used = btrfs_root_used(&root->root_item); if (num_bytes) { - record_root_in_trans(root); + btrfs_record_root_in_trans(root); btrfs_set_root_used(&root->root_item, bytes_used - num_bytes); } @@ -745,7 +749,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, int ret; INIT_LIST_HEAD(&dirty_fs_roots); - mutex_lock(&root->fs_info->trans_mutex); if (trans->transaction->in_commit) { cur_trans = trans->transaction; @@ -821,10 +824,30 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, WARN_ON(cur_trans != trans->transaction); + /* btrfs_commit_tree_roots is responsible for getting the + * various roots consistent with each other. Every pointer + * in the tree of tree roots has to point to the most up to date + * root for every subvolume and other tree. So, we have to keep + * the tree logging code from jumping in and changing any + * of the trees. + * + * At this point in the commit, there can't be any tree-log + * writers, but a little lower down we drop the trans mutex + * and let new people in. By holding the tree_log_mutex + * from now until after the super is written, we avoid races + * with the tree-log code. + */ + mutex_lock(&root->fs_info->tree_log_mutex); + ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, &dirty_fs_roots); BUG_ON(ret); + /* add_dirty_roots gets rid of all the tree log roots, it is now + * safe to free the root of tree log roots + */ + btrfs_free_log_root_tree(trans, root->fs_info); + ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); @@ -843,6 +866,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, chunk_root->node->start); btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, btrfs_header_level(chunk_root->node)); + + if (!root->fs_info->log_root_recovering) { + btrfs_set_super_log_root(&root->fs_info->super_copy, 0); + btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0); + } + memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, sizeof(root->fs_info->super_copy)); @@ -857,6 +886,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, BUG_ON(ret); write_ctree_super(trans, root); + /* + * the super is written, we can safely allow the tree-loggers + * to go about their business + */ + mutex_unlock(&root->fs_info->tree_log_mutex); + btrfs_finish_extent_commit(trans, root, pinned_copy); mutex_lock(&root->fs_info->trans_mutex); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 598baa312417..cc63650d60d6 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -98,4 +98,5 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, struct btrfs_root *root); void btrfs_throttle(struct btrfs_root *root); +int btrfs_record_root_in_trans(struct btrfs_root *root); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index cc2650b06952..b3bb5bbad76e 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -81,12 +81,12 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, memcpy(&key, &root->defrag_progress, sizeof(key)); } - path->lowest_level = 1; path->keep_locks = 1; if (cache_only) min_trans = root->defrag_trans_start; - ret = btrfs_search_forward(root, &key, path, cache_only, min_trans); + ret = btrfs_search_forward(root, &key, NULL, path, + cache_only, min_trans); if (ret < 0) goto out; if (ret > 0) { diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c new file mode 100644 index 000000000000..d1ce8314b948 --- /dev/null +++ b/fs/btrfs/tree-log.c @@ -0,0 +1,2804 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include "ctree.h" +#include "transaction.h" +#include "disk-io.h" +#include "locking.h" +#include "print-tree.h" +#include "compat.h" + +/* magic values for the inode_only field in btrfs_log_inode: + * + * LOG_INODE_ALL means to log everything + * LOG_INODE_EXISTS means to log just enough to recreate the inode + * during log replay + */ +#define LOG_INODE_ALL 0 +#define LOG_INODE_EXISTS 1 + +/* + * stages for the tree walking. The first + * stage (0) is to only pin down the blocks we find + * the second stage (1) is to make sure that all the inodes + * we find in the log are created in the subvolume. + * + * The last stage is to deal with directories and links and extents + * and all the other fun semantics + */ +#define LOG_WALK_PIN_ONLY 0 +#define LOG_WALK_REPLAY_INODES 1 +#define LOG_WALK_REPLAY_ALL 2 + +static int __btrfs_log_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + int inode_only); + +/* + * tree logging is a special write ahead log used to make sure that + * fsyncs and O_SYNCs can happen without doing full tree commits. + * + * Full tree commits are expensive because they require commonly + * modified blocks to be recowed, creating many dirty pages in the + * extent tree an 4x-6x higher write load than ext3. + * + * Instead of doing a tree commit on every fsync, we use the + * key ranges and transaction ids to find items for a given file or directory + * that have changed in this transaction. Those items are copied into + * a special tree (one per subvolume root), that tree is written to disk + * and then the fsync is considered complete. + * + * After a crash, items are copied out of the log-tree back into the + * subvolume tree. Any file data extents found are recorded in the extent + * allocation tree, and the log-tree freed. + * + * The log tree is read three times, once to pin down all the extents it is + * using in ram and once, once to create all the inodes logged in the tree + * and once to do all the other items. + */ + +/* + * btrfs_add_log_tree adds a new per-subvolume log tree into the + * tree of log tree roots. This must be called with a tree log transaction + * running (see start_log_trans). + */ +int btrfs_add_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_key key; + struct btrfs_root_item root_item; + struct btrfs_inode_item *inode_item; + struct extent_buffer *leaf; + struct btrfs_root *new_root = root; + int ret; + u64 objectid = root->root_key.objectid; + + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, + BTRFS_TREE_LOG_OBJECTID, + 0, 0, 0, 0, 0); + if (IS_ERR(leaf)) { + ret = PTR_ERR(leaf); + return ret; + } + + btrfs_set_header_nritems(leaf, 0); + btrfs_set_header_level(leaf, 0); + btrfs_set_header_bytenr(leaf, leaf->start); + btrfs_set_header_generation(leaf, trans->transid); + btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID); + + write_extent_buffer(leaf, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(leaf), + BTRFS_FSID_SIZE); + btrfs_mark_buffer_dirty(leaf); + + inode_item = &root_item.inode; + memset(inode_item, 0, sizeof(*inode_item)); + inode_item->generation = cpu_to_le64(1); + inode_item->size = cpu_to_le64(3); + inode_item->nlink = cpu_to_le32(1); + inode_item->nblocks = cpu_to_le64(1); + inode_item->mode = cpu_to_le32(S_IFDIR | 0755); + + btrfs_set_root_bytenr(&root_item, leaf->start); + btrfs_set_root_level(&root_item, 0); + btrfs_set_root_refs(&root_item, 0); + btrfs_set_root_used(&root_item, 0); + + memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); + root_item.drop_level = 0; + + btrfs_tree_unlock(leaf); + free_extent_buffer(leaf); + leaf = NULL; + + btrfs_set_root_dirid(&root_item, 0); + + key.objectid = BTRFS_TREE_LOG_OBJECTID; + key.offset = objectid; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + ret = btrfs_insert_root(trans, root->fs_info->log_root_tree, &key, + &root_item); + if (ret) + goto fail; + + new_root = btrfs_read_fs_root_no_radix(root->fs_info->log_root_tree, + &key); + BUG_ON(!new_root); + + WARN_ON(root->log_root); + root->log_root = new_root; + + /* + * log trees do not get reference counted because they go away + * before a real commit is actually done. They do store pointers + * to file data extents, and those reference counts still get + * updated (along with back refs to the log tree). + */ + new_root->ref_cows = 0; + new_root->last_trans = trans->transid; +fail: + return ret; +} + +/* + * start a sub transaction and setup the log tree + * this increments the log tree writer count to make the people + * syncing the tree wait for us to finish + */ +static int start_log_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + mutex_lock(&root->fs_info->tree_log_mutex); + if (!root->fs_info->log_root_tree) { + ret = btrfs_init_log_root_tree(trans, root->fs_info); + BUG_ON(ret); + } + if (!root->log_root) { + ret = btrfs_add_log_tree(trans, root); + BUG_ON(ret); + } + atomic_inc(&root->fs_info->tree_log_writers); + root->fs_info->tree_log_batch++; + mutex_unlock(&root->fs_info->tree_log_mutex); + return 0; +} + +/* + * returns 0 if there was a log transaction running and we were able + * to join, or returns -ENOENT if there were not transactions + * in progress + */ +static int join_running_log_trans(struct btrfs_root *root) +{ + int ret = -ENOENT; + + smp_mb(); + if (!root->log_root) + return -ENOENT; + + mutex_lock(&root->fs_info->tree_log_mutex); + if (root->log_root) { + ret = 0; + atomic_inc(&root->fs_info->tree_log_writers); + root->fs_info->tree_log_batch++; + } + mutex_unlock(&root->fs_info->tree_log_mutex); + return ret; +} + +/* + * indicate we're done making changes to the log tree + * and wake up anyone waiting to do a sync + */ +static int end_log_trans(struct btrfs_root *root) +{ + atomic_dec(&root->fs_info->tree_log_writers); + smp_mb(); + if (waitqueue_active(&root->fs_info->tree_log_wait)) + wake_up(&root->fs_info->tree_log_wait); + return 0; +} + + +/* + * the walk control struct is used to pass state down the chain when + * processing the log tree. The stage field tells us which part + * of the log tree processing we are currently doing. The others + * are state fields used for that specific part + */ +struct walk_control { + /* should we free the extent on disk when done? This is used + * at transaction commit time while freeing a log tree + */ + int free; + + /* should we write out the extent buffer? This is used + * while flushing the log tree to disk during a sync + */ + int write; + + /* should we wait for the extent buffer io to finish? Also used + * while flushing the log tree to disk for a sync + */ + int wait; + + /* pin only walk, we record which extents on disk belong to the + * log trees + */ + int pin; + + /* what stage of the replay code we're currently in */ + int stage; + + /* the root we are currently replaying */ + struct btrfs_root *replay_dest; + + /* the trans handle for the current replay */ + struct btrfs_trans_handle *trans; + + /* the function that gets used to process blocks we find in the + * tree. Note the extent_buffer might not be up to date when it is + * passed in, and it must be checked or read if you need the data + * inside it + */ + int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb, + struct walk_control *wc, u64 gen); +}; + +/* + * process_func used to pin down extents, write them or wait on them + */ +static int process_one_buffer(struct btrfs_root *log, + struct extent_buffer *eb, + struct walk_control *wc, u64 gen) +{ + if (wc->pin) { + mutex_lock(&log->fs_info->alloc_mutex); + btrfs_update_pinned_extents(log->fs_info->extent_root, + eb->start, eb->len, 1); + mutex_unlock(&log->fs_info->alloc_mutex); + } + + if (btrfs_buffer_uptodate(eb, gen)) { + if (wc->write) + btrfs_write_tree_block(eb); + if (wc->wait) + btrfs_wait_tree_block_writeback(eb); + } + return 0; +} + +/* + * Item overwrite used by replay and tree logging. eb, slot and key all refer + * to the src data we are copying out. + * + * root is the tree we are copying into, and path is a scratch + * path for use in this function (it should be released on entry and + * will be released on exit). + * + * If the key is already in the destination tree the existing item is + * overwritten. If the existing item isn't big enough, it is extended. + * If it is too large, it is truncated. + * + * If the key isn't in the destination yet, a new item is inserted. + */ +static noinline int overwrite_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *eb, int slot, + struct btrfs_key *key) +{ + int ret; + u32 item_size; + u64 saved_i_size = 0; + int save_old_i_size = 0; + unsigned long src_ptr; + unsigned long dst_ptr; + int overwrite_root = 0; + + if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) + overwrite_root = 1; + + item_size = btrfs_item_size_nr(eb, slot); + src_ptr = btrfs_item_ptr_offset(eb, slot); + + /* look for the key in the destination tree */ + ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret == 0) { + char *src_copy; + char *dst_copy; + u32 dst_size = btrfs_item_size_nr(path->nodes[0], + path->slots[0]); + if (dst_size != item_size) + goto insert; + + if (item_size == 0) { + btrfs_release_path(root, path); + return 0; + } + dst_copy = kmalloc(item_size, GFP_NOFS); + src_copy = kmalloc(item_size, GFP_NOFS); + + read_extent_buffer(eb, src_copy, src_ptr, item_size); + + dst_ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); + read_extent_buffer(path->nodes[0], dst_copy, dst_ptr, + item_size); + ret = memcmp(dst_copy, src_copy, item_size); + + kfree(dst_copy); + kfree(src_copy); + /* + * they have the same contents, just return, this saves + * us from cowing blocks in the destination tree and doing + * extra writes that may not have been done by a previous + * sync + */ + if (ret == 0) { + btrfs_release_path(root, path); + return 0; + } + + } +insert: + btrfs_release_path(root, path); + /* try to insert the key into the destination tree */ + ret = btrfs_insert_empty_item(trans, root, path, + key, item_size); + + /* make sure any existing item is the correct size */ + if (ret == -EEXIST) { + u32 found_size; + found_size = btrfs_item_size_nr(path->nodes[0], + path->slots[0]); + if (found_size > item_size) { + btrfs_truncate_item(trans, root, path, item_size, 1); + } else if (found_size < item_size) { + ret = btrfs_del_item(trans, root, + path); + BUG_ON(ret); + + btrfs_release_path(root, path); + ret = btrfs_insert_empty_item(trans, + root, path, key, item_size); + BUG_ON(ret); + } + } else if (ret) { + BUG(); + } + dst_ptr = btrfs_item_ptr_offset(path->nodes[0], + path->slots[0]); + + /* don't overwrite an existing inode if the generation number + * was logged as zero. This is done when the tree logging code + * is just logging an inode to make sure it exists after recovery. + * + * Also, don't overwrite i_size on directories during replay. + * log replay inserts and removes directory items based on the + * state of the tree found in the subvolume, and i_size is modified + * as it goes + */ + if (key->type == BTRFS_INODE_ITEM_KEY && ret == -EEXIST) { + struct btrfs_inode_item *src_item; + struct btrfs_inode_item *dst_item; + + src_item = (struct btrfs_inode_item *)src_ptr; + dst_item = (struct btrfs_inode_item *)dst_ptr; + + if (btrfs_inode_generation(eb, src_item) == 0) + goto no_copy; + + if (overwrite_root && + S_ISDIR(btrfs_inode_mode(eb, src_item)) && + S_ISDIR(btrfs_inode_mode(path->nodes[0], dst_item))) { + save_old_i_size = 1; + saved_i_size = btrfs_inode_size(path->nodes[0], + dst_item); + } + } + + copy_extent_buffer(path->nodes[0], eb, dst_ptr, + src_ptr, item_size); + + if (save_old_i_size) { + struct btrfs_inode_item *dst_item; + dst_item = (struct btrfs_inode_item *)dst_ptr; + btrfs_set_inode_size(path->nodes[0], dst_item, saved_i_size); + } + + /* make sure the generation is filled in */ + if (key->type == BTRFS_INODE_ITEM_KEY) { + struct btrfs_inode_item *dst_item; + dst_item = (struct btrfs_inode_item *)dst_ptr; + if (btrfs_inode_generation(path->nodes[0], dst_item) == 0) { + btrfs_set_inode_generation(path->nodes[0], dst_item, + trans->transid); + } + } +no_copy: + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(root, path); + return 0; +} + +/* + * simple helper to read an inode off the disk from a given root + * This can only be called for subvolume roots and not for the log + */ +static noinline struct inode *read_one_inode(struct btrfs_root *root, + u64 objectid) +{ + struct inode *inode; + inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); + if (inode->i_state & I_NEW) { + BTRFS_I(inode)->root = root; + BTRFS_I(inode)->location.objectid = objectid; + BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; + BTRFS_I(inode)->location.offset = 0; + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + + } + if (is_bad_inode(inode)) { + iput(inode); + inode = NULL; + } + return inode; +} + +/* replays a single extent in 'eb' at 'slot' with 'key' into the + * subvolume 'root'. path is released on entry and should be released + * on exit. + * + * extents in the log tree have not been allocated out of the extent + * tree yet. So, this completes the allocation, taking a reference + * as required if the extent already exists or creating a new extent + * if it isn't in the extent allocation tree yet. + * + * The extent is inserted into the file, dropping any existing extents + * from the file that overlap the new one. + */ +static noinline int replay_one_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *eb, int slot, + struct btrfs_key *key) +{ + int found_type; + u64 mask = root->sectorsize - 1; + u64 extent_end; + u64 alloc_hint; + u64 start = key->offset; + struct btrfs_file_extent_item *item; + struct inode *inode = NULL; + unsigned long size; + int ret = 0; + + item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + found_type = btrfs_file_extent_type(eb, item); + + if (found_type == BTRFS_FILE_EXTENT_REG) + extent_end = start + btrfs_file_extent_num_bytes(eb, item); + else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + size = btrfs_file_extent_inline_len(eb, + btrfs_item_nr(eb, slot)); + extent_end = (start + size + mask) & ~mask; + } else { + ret = 0; + goto out; + } + + inode = read_one_inode(root, key->objectid); + if (!inode) { + ret = -EIO; + goto out; + } + + /* + * first check to see if we already have this extent in the + * file. This must be done before the btrfs_drop_extents run + * so we don't try to drop this extent. + */ + ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, + start, 0); + + if (ret == 0 && found_type == BTRFS_FILE_EXTENT_REG) { + struct btrfs_file_extent_item cmp1; + struct btrfs_file_extent_item cmp2; + struct btrfs_file_extent_item *existing; + struct extent_buffer *leaf; + + leaf = path->nodes[0]; + existing = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + + read_extent_buffer(eb, &cmp1, (unsigned long)item, + sizeof(cmp1)); + read_extent_buffer(leaf, &cmp2, (unsigned long)existing, + sizeof(cmp2)); + + /* + * we already have a pointer to this exact extent, + * we don't have to do anything + */ + if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) { + btrfs_release_path(root, path); + goto out; + } + } + btrfs_release_path(root, path); + + /* drop any overlapping extents */ + ret = btrfs_drop_extents(trans, root, inode, + start, extent_end, start, &alloc_hint); + BUG_ON(ret); + + BUG_ON(ret); + if (found_type == BTRFS_FILE_EXTENT_REG) { + struct btrfs_key ins; + + ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); + ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); + ins.type = BTRFS_EXTENT_ITEM_KEY; + + /* insert the extent pointer in the file */ + ret = overwrite_item(trans, root, path, eb, slot, key); + BUG_ON(ret); + + /* + * is this extent already allocated in the extent + * allocation tree? If so, just add a reference + */ + ret = btrfs_lookup_extent(root, path, ins.objectid, ins.offset); + btrfs_release_path(root, path); + if (ret == 0) { + ret = btrfs_inc_extent_ref(trans, root, + ins.objectid, ins.offset, + root->root_key.objectid, + trans->transid, key->objectid, start); + } else { + /* + * insert the extent pointer in the extent + * allocation tree + */ + ret = btrfs_alloc_logged_extent(trans, root, + root->root_key.objectid, + trans->transid, key->objectid, + start, &ins); + BUG_ON(ret); + } + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + /* inline extents are easy, we just overwrite them */ + ret = overwrite_item(trans, root, path, eb, slot, key); + BUG_ON(ret); + } + /* btrfs_drop_extents changes i_blocks, update it here */ + inode->i_blocks += (extent_end - start) >> 9; + btrfs_update_inode(trans, root, inode); +out: + if (inode) + iput(inode); + return ret; +} + +/* + * when cleaning up conflicts between the directory names in the + * subvolume, directory names in the log and directory names in the + * inode back references, we may have to unlink inodes from directories. + * + * This is a helper function to do the unlink of a specific directory + * item + */ +static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct inode *dir, + struct btrfs_dir_item *di) +{ + struct inode *inode; + char *name; + int name_len; + struct extent_buffer *leaf; + struct btrfs_key location; + int ret; + + leaf = path->nodes[0]; + + btrfs_dir_item_key_to_cpu(leaf, di, &location); + name_len = btrfs_dir_name_len(leaf, di); + name = kmalloc(name_len, GFP_NOFS); + read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); + btrfs_release_path(root, path); + + inode = read_one_inode(root, location.objectid); + BUG_ON(!inode); + + btrfs_inc_nlink(inode); + ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); + kfree(name); + + iput(inode); + return ret; +} + +/* + * helper function to see if a given name and sequence number found + * in an inode back reference are already in a directory and correctly + * point to this inode + */ +static noinline int inode_in_dir(struct btrfs_root *root, + struct btrfs_path *path, + u64 dirid, u64 objectid, u64 index, + const char *name, int name_len) +{ + struct btrfs_dir_item *di; + struct btrfs_key location; + int match = 0; + + di = btrfs_lookup_dir_index_item(NULL, root, path, dirid, + index, name, name_len, 0); + if (di && !IS_ERR(di)) { + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); + if (location.objectid != objectid) + goto out; + } else + goto out; + btrfs_release_path(root, path); + + di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); + if (di && !IS_ERR(di)) { + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); + if (location.objectid != objectid) + goto out; + } else + goto out; + match = 1; +out: + btrfs_release_path(root, path); + return match; +} + +/* + * helper function to check a log tree for a named back reference in + * an inode. This is used to decide if a back reference that is + * found in the subvolume conflicts with what we find in the log. + * + * inode backreferences may have multiple refs in a single item, + * during replay we process one reference at a time, and we don't + * want to delete valid links to a file from the subvolume if that + * link is also in the log. + */ +static noinline int backref_in_log(struct btrfs_root *log, + struct btrfs_key *key, + char *name, int namelen) +{ + struct btrfs_path *path; + struct btrfs_inode_ref *ref; + unsigned long ptr; + unsigned long ptr_end; + unsigned long name_ptr; + int found_name_len; + int item_size; + int ret; + int match = 0; + + path = btrfs_alloc_path(); + ret = btrfs_search_slot(NULL, log, key, path, 0, 0); + if (ret != 0) + goto out; + + item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); + ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); + ptr_end = ptr + item_size; + while (ptr < ptr_end) { + ref = (struct btrfs_inode_ref *)ptr; + found_name_len = btrfs_inode_ref_name_len(path->nodes[0], ref); + if (found_name_len == namelen) { + name_ptr = (unsigned long)(ref + 1); + ret = memcmp_extent_buffer(path->nodes[0], name, + name_ptr, namelen); + if (ret == 0) { + match = 1; + goto out; + } + } + ptr = (unsigned long)(ref + 1) + found_name_len; + } +out: + btrfs_free_path(path); + return match; +} + + +/* + * replay one inode back reference item found in the log tree. + * eb, slot and key refer to the buffer and key found in the log tree. + * root is the destination we are replaying into, and path is for temp + * use by this function. (it should be released on return). + */ +static noinline int add_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_root *log, + struct btrfs_path *path, + struct extent_buffer *eb, int slot, + struct btrfs_key *key) +{ + struct inode *dir; + int ret; + struct btrfs_key location; + struct btrfs_inode_ref *ref; + struct btrfs_dir_item *di; + struct inode *inode; + char *name; + int namelen; + unsigned long ref_ptr; + unsigned long ref_end; + + location.objectid = key->objectid; + location.type = BTRFS_INODE_ITEM_KEY; + location.offset = 0; + + /* + * it is possible that we didn't log all the parent directories + * for a given inode. If we don't find the dir, just don't + * copy the back ref in. The link count fixup code will take + * care of the rest + */ + dir = read_one_inode(root, key->offset); + if (!dir) + return -ENOENT; + + inode = read_one_inode(root, key->objectid); + BUG_ON(!dir); + + ref_ptr = btrfs_item_ptr_offset(eb, slot); + ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); + +again: + ref = (struct btrfs_inode_ref *)ref_ptr; + + namelen = btrfs_inode_ref_name_len(eb, ref); + name = kmalloc(namelen, GFP_NOFS); + BUG_ON(!name); + + read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); + + /* if we already have a perfect match, we're done */ + if (inode_in_dir(root, path, dir->i_ino, inode->i_ino, + btrfs_inode_ref_index(eb, ref), + name, namelen)) { + goto out; + } + + /* + * look for a conflicting back reference in the metadata. + * if we find one we have to unlink that name of the file + * before we add our new link. Later on, we overwrite any + * existing back reference, and we don't want to create + * dangling pointers in the directory. + */ +conflict_again: + ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret == 0) { + char *victim_name; + int victim_name_len; + struct btrfs_inode_ref *victim_ref; + unsigned long ptr; + unsigned long ptr_end; + struct extent_buffer *leaf = path->nodes[0]; + + /* are we trying to overwrite a back ref for the root directory + * if so, just jump out, we're done + */ + if (key->objectid == key->offset) + goto out_nowrite; + + /* check all the names in this back reference to see + * if they are in the log. if so, we allow them to stay + * otherwise they must be unlinked as a conflict + */ + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + ptr_end = ptr + btrfs_item_size_nr(leaf, path->slots[0]); + while(ptr < ptr_end) { + victim_ref = (struct btrfs_inode_ref *)ptr; + victim_name_len = btrfs_inode_ref_name_len(leaf, + victim_ref); + victim_name = kmalloc(victim_name_len, GFP_NOFS); + BUG_ON(!victim_name); + + read_extent_buffer(leaf, victim_name, + (unsigned long)(victim_ref + 1), + victim_name_len); + + if (!backref_in_log(log, key, victim_name, + victim_name_len)) { + btrfs_inc_nlink(inode); + btrfs_release_path(root, path); + ret = btrfs_unlink_inode(trans, root, dir, + inode, victim_name, + victim_name_len); + kfree(victim_name); + btrfs_release_path(root, path); + goto conflict_again; + } + kfree(victim_name); + ptr = (unsigned long)(victim_ref + 1) + victim_name_len; + } + BUG_ON(ret); + } + btrfs_release_path(root, path); + + /* look for a conflicting sequence number */ + di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, + btrfs_inode_ref_index(eb, ref), + name, namelen, 0); + if (di && !IS_ERR(di)) { + ret = drop_one_dir_item(trans, root, path, dir, di); + BUG_ON(ret); + } + btrfs_release_path(root, path); + + + /* look for a conflicting name */ + di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, + name, namelen, 0); + if (di && !IS_ERR(di)) { + ret = drop_one_dir_item(trans, root, path, dir, di); + BUG_ON(ret); + } + btrfs_release_path(root, path); + + /* insert our name */ + ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, + btrfs_inode_ref_index(eb, ref)); + BUG_ON(ret); + + btrfs_update_inode(trans, root, inode); + +out: + ref_ptr = (unsigned long)(ref + 1) + namelen; + kfree(name); + if (ref_ptr < ref_end) + goto again; + + /* finally write the back reference in the inode */ + ret = overwrite_item(trans, root, path, eb, slot, key); + BUG_ON(ret); + +out_nowrite: + btrfs_release_path(root, path); + iput(dir); + iput(inode); + return 0; +} + +/* + * replay one csum item from the log tree into the subvolume 'root' + * eb, slot and key all refer to the log tree + * path is for temp use by this function and should be released on return + * + * This copies the checksums out of the log tree and inserts them into + * the subvolume. Any existing checksums for this range in the file + * are overwritten, and new items are added where required. + * + * We keep this simple by reusing the btrfs_ordered_sum code from + * the data=ordered mode. This basically means making a copy + * of all the checksums in ram, which we have to do anyway for kmap + * rules. + * + * The copy is then sent down to btrfs_csum_file_blocks, which + * does all the hard work of finding existing items in the file + * or adding new ones. + */ +static noinline int replay_one_csum(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *eb, int slot, + struct btrfs_key *key) +{ + int ret; + u32 item_size = btrfs_item_size_nr(eb, slot); + u64 cur_offset; + unsigned long file_bytes; + struct btrfs_ordered_sum *sums; + struct btrfs_sector_sum *sector_sum; + struct inode *inode; + unsigned long ptr; + + file_bytes = (item_size / BTRFS_CRC32_SIZE) * root->sectorsize; + inode = read_one_inode(root, key->objectid); + if (!inode) { + return -EIO; + } + + sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS); + if (!sums) { + iput(inode); + return -ENOMEM; + } + + INIT_LIST_HEAD(&sums->list); + sums->len = file_bytes; + sums->file_offset = key->offset; + + /* + * copy all the sums into the ordered sum struct + */ + sector_sum = sums->sums; + cur_offset = key->offset; + ptr = btrfs_item_ptr_offset(eb, slot); + while(item_size > 0) { + sector_sum->offset = cur_offset; + read_extent_buffer(eb, §or_sum->sum, ptr, BTRFS_CRC32_SIZE); + sector_sum++; + item_size -= BTRFS_CRC32_SIZE; + ptr += BTRFS_CRC32_SIZE; + cur_offset += root->sectorsize; + } + + /* let btrfs_csum_file_blocks add them into the file */ + ret = btrfs_csum_file_blocks(trans, root, inode, sums); + BUG_ON(ret); + kfree(sums); + iput(inode); + + return 0; +} +/* + * There are a few corners where the link count of the file can't + * be properly maintained during replay. So, instead of adding + * lots of complexity to the log code, we just scan the backrefs + * for any file that has been through replay. + * + * The scan will update the link count on the inode to reflect the + * number of back refs found. If it goes down to zero, the iput + * will free the inode. + */ +static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + u64 nlink = 0; + unsigned long ptr; + unsigned long ptr_end; + int name_len; + + key.objectid = inode->i_ino; + key.type = BTRFS_INODE_REF_KEY; + key.offset = (u64)-1; + + path = btrfs_alloc_path(); + + while(1) { + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + break; + if (ret > 0) { + if (path->slots[0] == 0) + break; + path->slots[0]--; + } + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid != inode->i_ino || + key.type != BTRFS_INODE_REF_KEY) + break; + ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); + ptr_end = ptr + btrfs_item_size_nr(path->nodes[0], + path->slots[0]); + while(ptr < ptr_end) { + struct btrfs_inode_ref *ref; + + ref = (struct btrfs_inode_ref *)ptr; + name_len = btrfs_inode_ref_name_len(path->nodes[0], + ref); + ptr = (unsigned long)(ref + 1) + name_len; + nlink++; + } + + if (key.offset == 0) + break; + key.offset--; + btrfs_release_path(root, path); + } + btrfs_free_path(path); + if (nlink != inode->i_nlink) { + inode->i_nlink = nlink; + btrfs_update_inode(trans, root, inode); + } + + return 0; +} + +static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path) +{ + int ret; + struct btrfs_key key; + struct inode *inode; + + key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; + key.type = BTRFS_ORPHAN_ITEM_KEY; + key.offset = (u64)-1; + while(1) { + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + break; + + if (ret == 1) { + if (path->slots[0] == 0) + break; + path->slots[0]--; + } + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid != BTRFS_TREE_LOG_FIXUP_OBJECTID || + key.type != BTRFS_ORPHAN_ITEM_KEY) + break; + + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + + btrfs_release_path(root, path); + inode = read_one_inode(root, key.offset); + BUG_ON(!inode); + + ret = fixup_inode_link_count(trans, root, inode); + BUG_ON(ret); + + iput(inode); + + if (key.offset == 0) + break; + key.offset--; + } + btrfs_release_path(root, path); + return 0; +} + + +/* + * record a given inode in the fixup dir so we can check its link + * count when replay is done. The link count is incremented here + * so the inode won't go away until we check it + */ +static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 objectid) +{ + struct btrfs_key key; + int ret = 0; + struct inode *inode; + + inode = read_one_inode(root, objectid); + BUG_ON(!inode); + + key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; + btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); + key.offset = objectid; + + ret = btrfs_insert_empty_item(trans, root, path, &key, 0); + + btrfs_release_path(root, path); + if (ret == 0) { + btrfs_inc_nlink(inode); + btrfs_update_inode(trans, root, inode); + } else if (ret == -EEXIST) { + ret = 0; + } else { + BUG(); + } + iput(inode); + + return ret; +} + +/* + * when replaying the log for a directory, we only insert names + * for inodes that actually exist. This means an fsync on a directory + * does not implicitly fsync all the new files in it + */ +static noinline int insert_one_name(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 dirid, u64 index, + char *name, int name_len, u8 type, + struct btrfs_key *location) +{ + struct inode *inode; + struct inode *dir; + int ret; + + inode = read_one_inode(root, location->objectid); + if (!inode) + return -ENOENT; + + dir = read_one_inode(root, dirid); + if (!dir) { + iput(inode); + return -EIO; + } + ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index); + + /* FIXME, put inode into FIXUP list */ + + iput(inode); + iput(dir); + return ret; +} + +/* + * take a single entry in a log directory item and replay it into + * the subvolume. + * + * if a conflicting item exists in the subdirectory already, + * the inode it points to is unlinked and put into the link count + * fix up tree. + * + * If a name from the log points to a file or directory that does + * not exist in the FS, it is skipped. fsyncs on directories + * do not force down inodes inside that directory, just changes to the + * names or unlinks in a directory. + */ +static noinline int replay_one_name(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *eb, + struct btrfs_dir_item *di, + struct btrfs_key *key) +{ + char *name; + int name_len; + struct btrfs_dir_item *dst_di; + struct btrfs_key found_key; + struct btrfs_key log_key; + struct inode *dir; + struct inode *inode; + u8 log_type; + int ret; + + dir = read_one_inode(root, key->objectid); + BUG_ON(!dir); + + name_len = btrfs_dir_name_len(eb, di); + name = kmalloc(name_len, GFP_NOFS); + log_type = btrfs_dir_type(eb, di); + read_extent_buffer(eb, name, (unsigned long)(di + 1), + name_len); + + btrfs_dir_item_key_to_cpu(eb, di, &log_key); + if (key->type == BTRFS_DIR_ITEM_KEY) { + dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, + name, name_len, 1); + } + else if (key->type == BTRFS_DIR_INDEX_KEY) { + dst_di = btrfs_lookup_dir_index_item(trans, root, path, + key->objectid, + key->offset, name, + name_len, 1); + } else { + BUG(); + } + if (!dst_di || IS_ERR(dst_di)) { + /* we need a sequence number to insert, so we only + * do inserts for the BTRFS_DIR_INDEX_KEY types + */ + if (key->type != BTRFS_DIR_INDEX_KEY) + goto out; + goto insert; + } + + btrfs_dir_item_key_to_cpu(path->nodes[0], dst_di, &found_key); + /* the existing item matches the logged item */ + if (found_key.objectid == log_key.objectid && + found_key.type == log_key.type && + found_key.offset == log_key.offset && + btrfs_dir_type(path->nodes[0], dst_di) == log_type) { + goto out; + } + + /* + * don't drop the conflicting directory entry if the inode + * for the new entry doesn't exist + */ + inode = read_one_inode(root, log_key.objectid); + if (!inode) + goto out; + + iput(inode); + ret = drop_one_dir_item(trans, root, path, dir, dst_di); + BUG_ON(ret); + + if (key->type == BTRFS_DIR_INDEX_KEY) + goto insert; +out: + btrfs_release_path(root, path); + kfree(name); + iput(dir); + return 0; + +insert: + btrfs_release_path(root, path); + ret = insert_one_name(trans, root, path, key->objectid, key->offset, + name, name_len, log_type, &log_key); + + if (ret && ret != -ENOENT) + BUG(); + goto out; +} + +/* + * find all the names in a directory item and reconcile them into + * the subvolume. Only BTRFS_DIR_ITEM_KEY types will have more than + * one name in a directory item, but the same code gets used for + * both directory index types + */ +static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *eb, int slot, + struct btrfs_key *key) +{ + int ret; + u32 item_size = btrfs_item_size_nr(eb, slot); + struct btrfs_dir_item *di; + int name_len; + unsigned long ptr; + unsigned long ptr_end; + + ptr = btrfs_item_ptr_offset(eb, slot); + ptr_end = ptr + item_size; + while(ptr < ptr_end) { + di = (struct btrfs_dir_item *)ptr; + name_len = btrfs_dir_name_len(eb, di); + ret = replay_one_name(trans, root, path, eb, di, key); + BUG_ON(ret); + ptr = (unsigned long)(di + 1); + ptr += name_len; + } + return 0; +} + +/* + * directory replay has two parts. There are the standard directory + * items in the log copied from the subvolume, and range items + * created in the log while the subvolume was logged. + * + * The range items tell us which parts of the key space the log + * is authoritative for. During replay, if a key in the subvolume + * directory is in a logged range item, but not actually in the log + * that means it was deleted from the directory before the fsync + * and should be removed. + */ +static noinline int find_dir_range(struct btrfs_root *root, + struct btrfs_path *path, + u64 dirid, int key_type, + u64 *start_ret, u64 *end_ret) +{ + struct btrfs_key key; + u64 found_end; + struct btrfs_dir_log_item *item; + int ret; + int nritems; + + if (*start_ret == (u64)-1) + return 1; + + key.objectid = dirid; + key.type = key_type; + key.offset = *start_ret; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret > 0) { + if (path->slots[0] == 0) + goto out; + path->slots[0]--; + } + if (ret != 0) + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + if (key.type != key_type || key.objectid != dirid) { + ret = 1; + goto next; + } + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_dir_log_item); + found_end = btrfs_dir_log_end(path->nodes[0], item); + + if (*start_ret >= key.offset && *start_ret <= found_end) { + ret = 0; + *start_ret = key.offset; + *end_ret = found_end; + goto out; + } + ret = 1; +next: + /* check the next slot in the tree to see if it is a valid item */ + nritems = btrfs_header_nritems(path->nodes[0]); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret) + goto out; + } else { + path->slots[0]++; + } + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + if (key.type != key_type || key.objectid != dirid) { + ret = 1; + goto out; + } + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_dir_log_item); + found_end = btrfs_dir_log_end(path->nodes[0], item); + *start_ret = key.offset; + *end_ret = found_end; + ret = 0; +out: + btrfs_release_path(root, path); + return ret; +} + +/* + * this looks for a given directory item in the log. If the directory + * item is not in the log, the item is removed and the inode it points + * to is unlinked + */ +static noinline int check_item_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_root *log, + struct btrfs_path *path, + struct btrfs_path *log_path, + struct inode *dir, + struct btrfs_key *dir_key) +{ + int ret; + struct extent_buffer *eb; + int slot; + u32 item_size; + struct btrfs_dir_item *di; + struct btrfs_dir_item *log_di; + int name_len; + unsigned long ptr; + unsigned long ptr_end; + char *name; + struct inode *inode; + struct btrfs_key location; + +again: + eb = path->nodes[0]; + slot = path->slots[0]; + item_size = btrfs_item_size_nr(eb, slot); + ptr = btrfs_item_ptr_offset(eb, slot); + ptr_end = ptr + item_size; + while(ptr < ptr_end) { + di = (struct btrfs_dir_item *)ptr; + name_len = btrfs_dir_name_len(eb, di); + name = kmalloc(name_len, GFP_NOFS); + if (!name) { + ret = -ENOMEM; + goto out; + } + read_extent_buffer(eb, name, (unsigned long)(di + 1), + name_len); + log_di = NULL; + if (dir_key->type == BTRFS_DIR_ITEM_KEY) { + log_di = btrfs_lookup_dir_item(trans, log, log_path, + dir_key->objectid, + name, name_len, 0); + } else if (dir_key->type == BTRFS_DIR_INDEX_KEY) { + log_di = btrfs_lookup_dir_index_item(trans, log, + log_path, + dir_key->objectid, + dir_key->offset, + name, name_len, 0); + } + if (!log_di || IS_ERR(log_di)) { + btrfs_dir_item_key_to_cpu(eb, di, &location); + btrfs_release_path(root, path); + btrfs_release_path(log, log_path); + inode = read_one_inode(root, location.objectid); + BUG_ON(!inode); + + ret = link_to_fixup_dir(trans, root, + path, location.objectid); + BUG_ON(ret); + btrfs_inc_nlink(inode); + ret = btrfs_unlink_inode(trans, root, dir, inode, + name, name_len); + BUG_ON(ret); + kfree(name); + iput(inode); + + /* there might still be more names under this key + * check and repeat if required + */ + ret = btrfs_search_slot(NULL, root, dir_key, path, + 0, 0); + if (ret == 0) + goto again; + ret = 0; + goto out; + } + btrfs_release_path(log, log_path); + kfree(name); + + ptr = (unsigned long)(di + 1); + ptr += name_len; + } + ret = 0; +out: + btrfs_release_path(root, path); + btrfs_release_path(log, log_path); + return ret; +} + +/* + * deletion replay happens before we copy any new directory items + * out of the log or out of backreferences from inodes. It + * scans the log to find ranges of keys that log is authoritative for, + * and then scans the directory to find items in those ranges that are + * not present in the log. + * + * Anything we don't find in the log is unlinked and removed from the + * directory. + */ +static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_root *log, + struct btrfs_path *path, + u64 dirid) +{ + u64 range_start; + u64 range_end; + int key_type = BTRFS_DIR_LOG_ITEM_KEY; + int ret = 0; + struct btrfs_key dir_key; + struct btrfs_key found_key; + struct btrfs_path *log_path; + struct inode *dir; + + dir_key.objectid = dirid; + dir_key.type = BTRFS_DIR_ITEM_KEY; + log_path = btrfs_alloc_path(); + if (!log_path) + return -ENOMEM; + + dir = read_one_inode(root, dirid); + /* it isn't an error if the inode isn't there, that can happen + * because we replay the deletes before we copy in the inode item + * from the log + */ + if (!dir) { + btrfs_free_path(log_path); + return 0; + } +again: + range_start = 0; + range_end = 0; + while(1) { + ret = find_dir_range(log, path, dirid, key_type, + &range_start, &range_end); + if (ret != 0) + break; + + dir_key.offset = range_start; + while(1) { + int nritems; + ret = btrfs_search_slot(NULL, root, &dir_key, path, + 0, 0); + if (ret < 0) + goto out; + + nritems = btrfs_header_nritems(path->nodes[0]); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret) + break; + } + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + if (found_key.objectid != dirid || + found_key.type != dir_key.type) + goto next_type; + + if (found_key.offset > range_end) + break; + + ret = check_item_in_log(trans, root, log, path, + log_path, dir, &found_key); + BUG_ON(ret); + if (found_key.offset == (u64)-1) + break; + dir_key.offset = found_key.offset + 1; + } + btrfs_release_path(root, path); + if (range_end == (u64)-1) + break; + range_start = range_end + 1; + } + +next_type: + ret = 0; + if (key_type == BTRFS_DIR_LOG_ITEM_KEY) { + key_type = BTRFS_DIR_LOG_INDEX_KEY; + dir_key.type = BTRFS_DIR_INDEX_KEY; + btrfs_release_path(root, path); + goto again; + } +out: + btrfs_release_path(root, path); + btrfs_free_path(log_path); + iput(dir); + return ret; +} + +/* + * the process_func used to replay items from the log tree. This + * gets called in two different stages. The first stage just looks + * for inodes and makes sure they are all copied into the subvolume. + * + * The second stage copies all the other item types from the log into + * the subvolume. The two stage approach is slower, but gets rid of + * lots of complexity around inodes referencing other inodes that exist + * only in the log (references come from either directory items or inode + * back refs). + */ +static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, + struct walk_control *wc, u64 gen) +{ + int nritems; + struct btrfs_path *path; + struct btrfs_root *root = wc->replay_dest; + struct btrfs_key key; + u32 item_size; + int level; + int i; + int ret; + + btrfs_read_buffer(eb, gen); + + level = btrfs_header_level(eb); + + if (level != 0) + return 0; + + path = btrfs_alloc_path(); + BUG_ON(!path); + + nritems = btrfs_header_nritems(eb); + for (i = 0; i < nritems; i++) { + btrfs_item_key_to_cpu(eb, &key, i); + item_size = btrfs_item_size_nr(eb, i); + + /* inode keys are done during the first stage */ + if (key.type == BTRFS_INODE_ITEM_KEY && + wc->stage == LOG_WALK_REPLAY_INODES) { + struct inode *inode; + struct btrfs_inode_item *inode_item; + u32 mode; + + inode_item = btrfs_item_ptr(eb, i, + struct btrfs_inode_item); + mode = btrfs_inode_mode(eb, inode_item); + if (S_ISDIR(mode)) { + ret = replay_dir_deletes(wc->trans, + root, log, path, key.objectid); + BUG_ON(ret); + } + ret = overwrite_item(wc->trans, root, path, + eb, i, &key); + BUG_ON(ret); + + /* for regular files, truncate away + * extents past the new EOF + */ + if (S_ISREG(mode)) { + inode = read_one_inode(root, + key.objectid); + BUG_ON(!inode); + + ret = btrfs_truncate_inode_items(wc->trans, + root, inode, inode->i_size, + BTRFS_EXTENT_DATA_KEY); + BUG_ON(ret); + iput(inode); + } + ret = link_to_fixup_dir(wc->trans, root, + path, key.objectid); + BUG_ON(ret); + } + if (wc->stage < LOG_WALK_REPLAY_ALL) + continue; + + /* these keys are simply copied */ + if (key.type == BTRFS_XATTR_ITEM_KEY) { + ret = overwrite_item(wc->trans, root, path, + eb, i, &key); + BUG_ON(ret); + } else if (key.type == BTRFS_INODE_REF_KEY) { + ret = add_inode_ref(wc->trans, root, log, path, + eb, i, &key); + BUG_ON(ret && ret != -ENOENT); + } else if (key.type == BTRFS_EXTENT_DATA_KEY) { + ret = replay_one_extent(wc->trans, root, path, + eb, i, &key); + BUG_ON(ret); + } else if (key.type == BTRFS_CSUM_ITEM_KEY) { + ret = replay_one_csum(wc->trans, root, path, + eb, i, &key); + BUG_ON(ret); + } else if (key.type == BTRFS_DIR_ITEM_KEY || + key.type == BTRFS_DIR_INDEX_KEY) { + ret = replay_one_dir_item(wc->trans, root, path, + eb, i, &key); + BUG_ON(ret); + } + } + btrfs_free_path(path); + return 0; +} + +static int noinline walk_down_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level, + struct walk_control *wc) +{ + u64 root_owner; + u64 root_gen; + u64 bytenr; + u64 ptr_gen; + struct extent_buffer *next; + struct extent_buffer *cur; + struct extent_buffer *parent; + u32 blocksize; + int ret = 0; + + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + + while(*level > 0) { + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + cur = path->nodes[*level]; + + if (btrfs_header_level(cur) != *level) + WARN_ON(1); + + if (path->slots[*level] >= + btrfs_header_nritems(cur)) + break; + + bytenr = btrfs_node_blockptr(cur, path->slots[*level]); + ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); + blocksize = btrfs_level_size(root, *level - 1); + + parent = path->nodes[*level]; + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); + + next = btrfs_find_create_tree_block(root, bytenr, blocksize); + + wc->process_func(root, next, wc, ptr_gen); + + if (*level == 1) { + path->slots[*level]++; + if (wc->free) { + btrfs_read_buffer(next, ptr_gen); + + btrfs_tree_lock(next); + clean_tree_block(trans, root, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); + + ret = btrfs_drop_leaf_ref(trans, root, next); + BUG_ON(ret); + + WARN_ON(root_owner != + BTRFS_TREE_LOG_OBJECTID); + ret = btrfs_free_extent(trans, root, bytenr, + blocksize, root_owner, + root_gen, 0, 0, 1); + BUG_ON(ret); + } + free_extent_buffer(next); + continue; + } + btrfs_read_buffer(next, ptr_gen); + + WARN_ON(*level <= 0); + if (path->nodes[*level-1]) + free_extent_buffer(path->nodes[*level-1]); + path->nodes[*level-1] = next; + *level = btrfs_header_level(next); + path->slots[*level] = 0; + cond_resched(); + } + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + + if (path->nodes[*level] == root->node) { + parent = path->nodes[*level]; + } else { + parent = path->nodes[*level + 1]; + } + bytenr = path->nodes[*level]->start; + + blocksize = btrfs_level_size(root, *level); + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); + + wc->process_func(root, path->nodes[*level], wc, + btrfs_header_generation(path->nodes[*level])); + + if (wc->free) { + next = path->nodes[*level]; + btrfs_tree_lock(next); + clean_tree_block(trans, root, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); + + if (*level == 0) { + ret = btrfs_drop_leaf_ref(trans, root, next); + BUG_ON(ret); + } + WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); + ret = btrfs_free_extent(trans, root, bytenr, blocksize, + root_owner, root_gen, 0, 0, 1); + BUG_ON(ret); + } + free_extent_buffer(path->nodes[*level]); + path->nodes[*level] = NULL; + *level += 1; + + cond_resched(); + return 0; +} + +static int noinline walk_up_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int *level, + struct walk_control *wc) +{ + u64 root_owner; + u64 root_gen; + int i; + int slot; + int ret; + + for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { + slot = path->slots[i]; + if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { + struct extent_buffer *node; + node = path->nodes[i]; + path->slots[i]++; + *level = i; + WARN_ON(*level == 0); + return 0; + } else { + if (path->nodes[*level] == root->node) { + root_owner = root->root_key.objectid; + root_gen = + btrfs_header_generation(path->nodes[*level]); + } else { + struct extent_buffer *node; + node = path->nodes[*level + 1]; + root_owner = btrfs_header_owner(node); + root_gen = btrfs_header_generation(node); + } + wc->process_func(root, path->nodes[*level], wc, + btrfs_header_generation(path->nodes[*level])); + if (wc->free) { + struct extent_buffer *next; + + next = path->nodes[*level]; + + btrfs_tree_lock(next); + clean_tree_block(trans, root, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); + + if (*level == 0) { + ret = btrfs_drop_leaf_ref(trans, root, + next); + BUG_ON(ret); + } + + WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); + ret = btrfs_free_extent(trans, root, + path->nodes[*level]->start, + path->nodes[*level]->len, + root_owner, root_gen, 0, 0, 1); + BUG_ON(ret); + } + free_extent_buffer(path->nodes[*level]); + path->nodes[*level] = NULL; + *level = i + 1; + } + } + return 1; +} + +/* + * drop the reference count on the tree rooted at 'snap'. This traverses + * the tree freeing any blocks that have a ref count of zero after being + * decremented. + */ +static int walk_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *log, struct walk_control *wc) +{ + int ret = 0; + int wret; + int level; + struct btrfs_path *path; + int i; + int orig_level; + + path = btrfs_alloc_path(); + BUG_ON(!path); + + level = btrfs_header_level(log->node); + orig_level = level; + path->nodes[level] = log->node; + extent_buffer_get(log->node); + path->slots[level] = 0; + + while(1) { + wret = walk_down_log_tree(trans, log, path, &level, wc); + if (wret > 0) + break; + if (wret < 0) + ret = wret; + + wret = walk_up_log_tree(trans, log, path, &level, wc); + if (wret > 0) + break; + if (wret < 0) + ret = wret; + } + + /* was the root node processed? if not, catch it here */ + if (path->nodes[orig_level]) { + wc->process_func(log, path->nodes[orig_level], wc, + btrfs_header_generation(path->nodes[orig_level])); + if (wc->free) { + struct extent_buffer *next; + + next = path->nodes[orig_level]; + + btrfs_tree_lock(next); + clean_tree_block(trans, log, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); + + if (orig_level == 0) { + ret = btrfs_drop_leaf_ref(trans, log, + next); + BUG_ON(ret); + } + WARN_ON(log->root_key.objectid != + BTRFS_TREE_LOG_OBJECTID); + ret = btrfs_free_extent(trans, log, + next->start, next->len, + log->root_key.objectid, + btrfs_header_generation(next), + 0, 0, 1); + BUG_ON(ret); + } + } + + for (i = 0; i <= orig_level; i++) { + if (path->nodes[i]) { + free_extent_buffer(path->nodes[i]); + path->nodes[i] = NULL; + } + } + btrfs_free_path(path); + if (wc->free) + free_extent_buffer(log->node); + return ret; +} + +int wait_log_commit(struct btrfs_root *log) +{ + DEFINE_WAIT(wait); + u64 transid = log->fs_info->tree_log_transid; + + do { + prepare_to_wait(&log->fs_info->tree_log_wait, &wait, + TASK_UNINTERRUPTIBLE); + mutex_unlock(&log->fs_info->tree_log_mutex); + if (atomic_read(&log->fs_info->tree_log_commit)) + schedule(); + finish_wait(&log->fs_info->tree_log_wait, &wait); + mutex_lock(&log->fs_info->tree_log_mutex); + } while(transid == log->fs_info->tree_log_transid && + atomic_read(&log->fs_info->tree_log_commit)); + return 0; +} + +/* + * btrfs_sync_log does sends a given tree log down to the disk and + * updates the super blocks to record it. When this call is done, + * you know that any inodes previously logged are safely on disk + */ +int btrfs_sync_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + unsigned long batch; + struct btrfs_root *log = root->log_root; + struct walk_control wc = { + .write = 1, + .process_func = process_one_buffer + }; + + mutex_lock(&log->fs_info->tree_log_mutex); + if (atomic_read(&log->fs_info->tree_log_commit)) { + wait_log_commit(log); + goto out; + } + atomic_set(&log->fs_info->tree_log_commit, 1); + + while(1) { + mutex_unlock(&log->fs_info->tree_log_mutex); + schedule_timeout_uninterruptible(1); + mutex_lock(&log->fs_info->tree_log_mutex); + batch = log->fs_info->tree_log_batch; + + while(atomic_read(&log->fs_info->tree_log_writers)) { + DEFINE_WAIT(wait); + prepare_to_wait(&log->fs_info->tree_log_wait, &wait, + TASK_UNINTERRUPTIBLE); + batch = log->fs_info->tree_log_batch; + mutex_unlock(&log->fs_info->tree_log_mutex); + if (atomic_read(&log->fs_info->tree_log_writers)) + schedule(); + mutex_lock(&log->fs_info->tree_log_mutex); + finish_wait(&log->fs_info->tree_log_wait, &wait); + } + if (batch == log->fs_info->tree_log_batch) + break; + } + ret = walk_log_tree(trans, log, &wc); + BUG_ON(ret); + + ret = walk_log_tree(trans, log->fs_info->log_root_tree, &wc); + BUG_ON(ret); + + wc.wait = 1; + + ret = walk_log_tree(trans, log, &wc); + BUG_ON(ret); + + ret = walk_log_tree(trans, log->fs_info->log_root_tree, &wc); + BUG_ON(ret); + + btrfs_set_super_log_root(&root->fs_info->super_for_commit, + log->fs_info->log_root_tree->node->start); + btrfs_set_super_log_root_level(&root->fs_info->super_for_commit, + btrfs_header_level(log->fs_info->log_root_tree->node)); + + write_ctree_super(trans, log->fs_info->tree_root); + log->fs_info->tree_log_transid++; + log->fs_info->tree_log_batch = 0; + atomic_set(&log->fs_info->tree_log_commit, 0); + smp_mb(); + if (waitqueue_active(&log->fs_info->tree_log_wait)) + wake_up(&log->fs_info->tree_log_wait); +out: + mutex_unlock(&log->fs_info->tree_log_mutex); + return 0; + +} + +/* + * free all the extents used by the tree log. This should be called + * at commit time of the full transaction + */ +int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) +{ + int ret; + struct btrfs_root *log; + struct key; + struct walk_control wc = { + .free = 1, + .process_func = process_one_buffer + }; + + if (!root->log_root) + return 0; + + log = root->log_root; + ret = walk_log_tree(trans, log, &wc); + BUG_ON(ret); + + log = root->log_root; + ret = btrfs_del_root(trans, root->fs_info->log_root_tree, + &log->root_key); + BUG_ON(ret); + root->log_root = NULL; + kfree(root->log_root); + return 0; +} + +/* + * helper function to update the item for a given subvolumes log root + * in the tree of log roots + */ +static int update_log_root(struct btrfs_trans_handle *trans, + struct btrfs_root *log) +{ + u64 bytenr = btrfs_root_bytenr(&log->root_item); + int ret; + + if (log->node->start == bytenr) + return 0; + + btrfs_set_root_bytenr(&log->root_item, log->node->start); + btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node)); + ret = btrfs_update_root(trans, log->fs_info->log_root_tree, + &log->root_key, &log->root_item); + BUG_ON(ret); + return ret; +} + +/* + * If both a file and directory are logged, and unlinks or renames are + * mixed in, we have a few interesting corners: + * + * create file X in dir Y + * link file X to X.link in dir Y + * fsync file X + * unlink file X but leave X.link + * fsync dir Y + * + * After a crash we would expect only X.link to exist. But file X + * didn't get fsync'd again so the log has back refs for X and X.link. + * + * We solve this by removing directory entries and inode backrefs from the + * log when a file that was logged in the current transaction is + * unlinked. Any later fsync will include the updated log entries, and + * we'll be able to reconstruct the proper directory items from backrefs. + * + * This optimizations allows us to avoid relogging the entire inode + * or the entire directory. + */ +int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + struct inode *dir, u64 index) +{ + struct btrfs_root *log; + struct btrfs_dir_item *di; + struct btrfs_path *path; + int ret; + int bytes_del = 0; + + ret = join_running_log_trans(root); + if (ret) + return 0; + + mutex_lock(&BTRFS_I(dir)->log_mutex); + + log = root->log_root; + path = btrfs_alloc_path(); + di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, + name, name_len, -1); + if (di && !IS_ERR(di)) { + ret = btrfs_delete_one_dir_name(trans, log, path, di); + bytes_del += name_len; + BUG_ON(ret); + } + btrfs_release_path(log, path); + di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, + index, name, name_len, -1); + if (di && !IS_ERR(di)) { + ret = btrfs_delete_one_dir_name(trans, log, path, di); + bytes_del += name_len; + BUG_ON(ret); + } + + /* update the directory size in the log to reflect the names + * we have removed + */ + if (bytes_del) { + struct btrfs_key key; + + key.objectid = dir->i_ino; + key.offset = 0; + key.type = BTRFS_INODE_ITEM_KEY; + btrfs_release_path(log, path); + + ret = btrfs_search_slot(trans, log, &key, path, 0, 1); + if (ret == 0) { + struct btrfs_inode_item *item; + u64 i_size; + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + i_size = btrfs_inode_size(path->nodes[0], item); + if (i_size > bytes_del) + i_size -= bytes_del; + else + i_size = 0; + btrfs_set_inode_size(path->nodes[0], item, i_size); + btrfs_mark_buffer_dirty(path->nodes[0]); + } else + ret = 0; + btrfs_release_path(log, path); + } + + btrfs_free_path(path); + mutex_unlock(&BTRFS_I(dir)->log_mutex); + end_log_trans(root); + + return 0; +} + +/* see comments for btrfs_del_dir_entries_in_log */ +int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + struct inode *inode, u64 dirid) +{ + struct btrfs_root *log; + u64 index; + int ret; + + ret = join_running_log_trans(root); + if (ret) + return 0; + log = root->log_root; + mutex_lock(&BTRFS_I(inode)->log_mutex); + + ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, + dirid, &index); + mutex_unlock(&BTRFS_I(inode)->log_mutex); + end_log_trans(root); + + if (ret == 0 || ret == -ENOENT) + return 0; + return ret; +} + +/* + * creates a range item in the log for 'dirid'. first_offset and + * last_offset tell us which parts of the key space the log should + * be considered authoritative for. + */ +static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, + struct btrfs_root *log, + struct btrfs_path *path, + int key_type, u64 dirid, + u64 first_offset, u64 last_offset) +{ + int ret; + struct btrfs_key key; + struct btrfs_dir_log_item *item; + + key.objectid = dirid; + key.offset = first_offset; + if (key_type == BTRFS_DIR_ITEM_KEY) + key.type = BTRFS_DIR_LOG_ITEM_KEY; + else + key.type = BTRFS_DIR_LOG_INDEX_KEY; + ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); + BUG_ON(ret); + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_dir_log_item); + btrfs_set_dir_log_end(path->nodes[0], item, last_offset); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(log, path); + return 0; +} + +/* + * log all the items included in the current transaction for a given + * directory. This also creates the range items in the log tree required + * to replay anything deleted before the fsync + */ +static noinline int log_dir_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + struct btrfs_path *path, + struct btrfs_path *dst_path, int key_type, + u64 min_offset, u64 *last_offset_ret) +{ + struct btrfs_key min_key; + struct btrfs_key max_key; + struct btrfs_root *log = root->log_root; + struct extent_buffer *src; + int ret; + int i; + int nritems; + u64 first_offset = min_offset; + u64 last_offset = (u64)-1; + + log = root->log_root; + max_key.objectid = inode->i_ino; + max_key.offset = (u64)-1; + max_key.type = key_type; + + min_key.objectid = inode->i_ino; + min_key.type = key_type; + min_key.offset = min_offset; + + path->keep_locks = 1; + + ret = btrfs_search_forward(root, &min_key, &max_key, + path, 0, trans->transid); + + /* + * we didn't find anything from this transaction, see if there + * is anything at all + */ + if (ret != 0 || min_key.objectid != inode->i_ino || + min_key.type != key_type) { + min_key.objectid = inode->i_ino; + min_key.type = key_type; + min_key.offset = (u64)-1; + btrfs_release_path(root, path); + ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); + if (ret < 0) { + btrfs_release_path(root, path); + return ret; + } + ret = btrfs_previous_item(root, path, inode->i_ino, key_type); + + /* if ret == 0 there are items for this type, + * create a range to tell us the last key of this type. + * otherwise, there are no items in this directory after + * *min_offset, and we create a range to indicate that. + */ + if (ret == 0) { + struct btrfs_key tmp; + btrfs_item_key_to_cpu(path->nodes[0], &tmp, + path->slots[0]); + if (key_type == tmp.type) { + first_offset = max(min_offset, tmp.offset) + 1; + } + } + goto done; + } + + /* go backward to find any previous key */ + ret = btrfs_previous_item(root, path, inode->i_ino, key_type); + if (ret == 0) { + struct btrfs_key tmp; + btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); + if (key_type == tmp.type) { + first_offset = tmp.offset; + ret = overwrite_item(trans, log, dst_path, + path->nodes[0], path->slots[0], + &tmp); + } + } + btrfs_release_path(root, path); + + /* find the first key from this transaction again */ + ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); + if (ret != 0) { + WARN_ON(1); + goto done; + } + + /* + * we have a block from this transaction, log every item in it + * from our directory + */ + while(1) { + struct btrfs_key tmp; + src = path->nodes[0]; + nritems = btrfs_header_nritems(src); + for (i = path->slots[0]; i < nritems; i++) { + btrfs_item_key_to_cpu(src, &min_key, i); + + if (min_key.objectid != inode->i_ino || + min_key.type != key_type) + goto done; + ret = overwrite_item(trans, log, dst_path, src, i, + &min_key); + BUG_ON(ret); + } + path->slots[0] = nritems; + + /* + * look ahead to the next item and see if it is also + * from this directory and from this transaction + */ + ret = btrfs_next_leaf(root, path); + if (ret == 1) { + last_offset = (u64)-1; + goto done; + } + btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); + if (tmp.objectid != inode->i_ino || tmp.type != key_type) { + last_offset = (u64)-1; + goto done; + } + if (btrfs_header_generation(path->nodes[0]) != trans->transid) { + ret = overwrite_item(trans, log, dst_path, + path->nodes[0], path->slots[0], + &tmp); + + BUG_ON(ret); + last_offset = tmp.offset; + goto done; + } + } +done: + *last_offset_ret = last_offset; + btrfs_release_path(root, path); + btrfs_release_path(log, dst_path); + + /* insert the log range keys to indicate where the log is valid */ + ret = insert_dir_log_key(trans, log, path, key_type, inode->i_ino, + first_offset, last_offset); + BUG_ON(ret); + return 0; +} + +/* + * logging directories is very similar to logging inodes, We find all the items + * from the current transaction and write them to the log. + * + * The recovery code scans the directory in the subvolume, and if it finds a + * key in the range logged that is not present in the log tree, then it means + * that dir entry was unlinked during the transaction. + * + * In order for that scan to work, we must include one key smaller than + * the smallest logged by this transaction and one key larger than the largest + * key logged by this transaction. + */ +static noinline int log_directory_changes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + struct btrfs_path *path, + struct btrfs_path *dst_path) +{ + u64 min_key; + u64 max_key; + int ret; + int key_type = BTRFS_DIR_ITEM_KEY; + +again: + min_key = 0; + max_key = 0; + while(1) { + ret = log_dir_items(trans, root, inode, path, + dst_path, key_type, min_key, + &max_key); + BUG_ON(ret); + if (max_key == (u64)-1) + break; + min_key = max_key + 1; + } + + if (key_type == BTRFS_DIR_ITEM_KEY) { + key_type = BTRFS_DIR_INDEX_KEY; + goto again; + } + return 0; +} + +/* + * a helper function to drop items from the log before we relog an + * inode. max_key_type indicates the highest item type to remove. + * This cannot be run for file data extents because it does not + * free the extents they point to. + */ +static int drop_objectid_items(struct btrfs_trans_handle *trans, + struct btrfs_root *log, + struct btrfs_path *path, + u64 objectid, int max_key_type) +{ + int ret; + struct btrfs_key key; + struct btrfs_key found_key; + + key.objectid = objectid; + key.type = max_key_type; + key.offset = (u64)-1; + + while(1) { + ret = btrfs_search_slot(trans, log, &key, path, -1, 1); + + if (ret != 1) + break; + + if (path->slots[0] == 0) + break; + + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + + if (found_key.objectid != objectid) + break; + + ret = btrfs_del_item(trans, log, path); + BUG_ON(ret); + btrfs_release_path(log, path); + } + btrfs_release_path(log, path); + return 0; +} + +/* log a single inode in the tree log. + * At least one parent directory for this inode must exist in the tree + * or be logged already. + * + * Any items from this inode changed by the current transaction are copied + * to the log tree. An extra reference is taken on any extents in this + * file, allowing us to avoid a whole pile of corner cases around logging + * blocks that have been removed from the tree. + * + * See LOG_INODE_ALL and related defines for a description of what inode_only + * does. + * + * This handles both files and directories. + */ +static int __btrfs_log_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + int inode_only) +{ + struct btrfs_path *path; + struct btrfs_path *dst_path; + struct btrfs_key min_key; + struct btrfs_key max_key; + struct btrfs_root *log = root->log_root; + unsigned long src_offset; + unsigned long dst_offset; + struct extent_buffer *src; + struct btrfs_file_extent_item *extent; + struct btrfs_inode_item *inode_item; + u32 size; + int ret; + + log = root->log_root; + + path = btrfs_alloc_path(); + dst_path = btrfs_alloc_path(); + + min_key.objectid = inode->i_ino; + min_key.type = BTRFS_INODE_ITEM_KEY; + min_key.offset = 0; + + max_key.objectid = inode->i_ino; + if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) + max_key.type = BTRFS_XATTR_ITEM_KEY; + else + max_key.type = (u8)-1; + max_key.offset = (u64)-1; + + /* + * if this inode has already been logged and we're in inode_only + * mode, we don't want to delete the things that have already + * been written to the log. + * + * But, if the inode has been through an inode_only log, + * the logged_trans field is not set. This allows us to catch + * any new names for this inode in the backrefs by logging it + * again + */ + if (inode_only == LOG_INODE_EXISTS && + BTRFS_I(inode)->logged_trans == trans->transid) { + btrfs_free_path(path); + btrfs_free_path(dst_path); + goto out; + } + mutex_lock(&BTRFS_I(inode)->log_mutex); + + /* + * a brute force approach to making sure we get the most uptodate + * copies of everything. + */ + if (S_ISDIR(inode->i_mode)) { + int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; + + if (inode_only == LOG_INODE_EXISTS) + max_key_type = BTRFS_XATTR_ITEM_KEY; + ret = drop_objectid_items(trans, log, path, + inode->i_ino, max_key_type); + } else { + ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); + } + BUG_ON(ret); + path->keep_locks = 1; + + while(1) { + ret = btrfs_search_forward(root, &min_key, &max_key, + path, 0, trans->transid); + if (ret != 0) + break; + + if (min_key.objectid != inode->i_ino) + break; + if (min_key.type > max_key.type) + break; + + src = path->nodes[0]; + size = btrfs_item_size_nr(src, path->slots[0]); + ret = btrfs_insert_empty_item(trans, log, dst_path, &min_key, + size); + if (ret) + BUG(); + + dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], + dst_path->slots[0]); + + src_offset = btrfs_item_ptr_offset(src, path->slots[0]); + + copy_extent_buffer(dst_path->nodes[0], src, dst_offset, + src_offset, size); + + if (inode_only == LOG_INODE_EXISTS && + min_key.type == BTRFS_INODE_ITEM_KEY) { + inode_item = btrfs_item_ptr(dst_path->nodes[0], + dst_path->slots[0], + struct btrfs_inode_item); + btrfs_set_inode_size(dst_path->nodes[0], inode_item, 0); + + /* set the generation to zero so the recover code + * can tell the difference between an logging + * just to say 'this inode exists' and a logging + * to say 'update this inode with these values' + */ + btrfs_set_inode_generation(dst_path->nodes[0], + inode_item, 0); + } + /* take a reference on file data extents so that truncates + * or deletes of this inode don't have to relog the inode + * again + */ + if (btrfs_key_type(&min_key) == BTRFS_EXTENT_DATA_KEY) { + int found_type; + extent = btrfs_item_ptr(src, path->slots[0], + struct btrfs_file_extent_item); + + found_type = btrfs_file_extent_type(src, extent); + if (found_type == BTRFS_FILE_EXTENT_REG) { + u64 ds = btrfs_file_extent_disk_bytenr(src, + extent); + u64 dl = btrfs_file_extent_disk_num_bytes(src, + extent); + /* ds == 0 is a hole */ + if (ds != 0) { + ret = btrfs_inc_extent_ref(trans, log, + ds, dl, + log->root_key.objectid, + 0, + inode->i_ino, + min_key.offset); + BUG_ON(ret); + } + } + } + + btrfs_mark_buffer_dirty(dst_path->nodes[0]); + btrfs_release_path(root, path); + btrfs_release_path(log, dst_path); + + if (min_key.offset < (u64)-1) + min_key.offset++; + else if (min_key.type < (u8)-1) + min_key.type++; + else if (min_key.objectid < (u64)-1) + min_key.objectid++; + else + break; + } + if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { + btrfs_release_path(root, path); + btrfs_release_path(log, dst_path); + ret = log_directory_changes(trans, root, inode, path, dst_path); + BUG_ON(ret); + } + mutex_unlock(&BTRFS_I(inode)->log_mutex); + + btrfs_free_path(path); + btrfs_free_path(dst_path); + + mutex_lock(&root->fs_info->tree_log_mutex); + ret = update_log_root(trans, log); + BUG_ON(ret); + mutex_unlock(&root->fs_info->tree_log_mutex); +out: + return 0; +} + +int btrfs_log_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + int inode_only) +{ + int ret; + + start_log_trans(trans, root); + ret = __btrfs_log_inode(trans, root, inode, inode_only); + end_log_trans(root); + return ret; +} + +/* + * helper function around btrfs_log_inode to make sure newly created + * parent directories also end up in the log. A minimal inode and backref + * only logging is done of any parent directories that are older than + * the last committed transaction + */ +int btrfs_log_dentry(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct dentry *dentry) +{ + int inode_only = LOG_INODE_ALL; + struct super_block *sb; + int ret; + + start_log_trans(trans, root); + sb = dentry->d_inode->i_sb; + while(1) { + ret = __btrfs_log_inode(trans, root, dentry->d_inode, + inode_only); + BUG_ON(ret); + inode_only = LOG_INODE_EXISTS; + + dentry = dentry->d_parent; + if (!dentry || !dentry->d_inode || sb != dentry->d_inode->i_sb) + break; + + if (BTRFS_I(dentry->d_inode)->generation <= + root->fs_info->last_trans_committed) + break; + } + end_log_trans(root); + return 0; +} + +/* + * it is not safe to log dentry if the chunk root has added new + * chunks. This returns 0 if the dentry was logged, and 1 otherwise. + * If this returns 1, you must commit the transaction to safely get your + * data on disk. + */ +int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct dentry *dentry) +{ + u64 gen; + gen = root->fs_info->last_trans_new_blockgroup; + if (gen > root->fs_info->last_trans_committed) + return 1; + else + return btrfs_log_dentry(trans, root, dentry); +} + +/* + * should be called during mount to recover any replay any log trees + * from the FS + */ +int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) +{ + int ret; + struct btrfs_path *path; + struct btrfs_trans_handle *trans; + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_key tmp_key; + struct btrfs_root *log; + struct btrfs_fs_info *fs_info = log_root_tree->fs_info; + struct walk_control wc = { + .process_func = process_one_buffer, + .stage = 0, + }; + + fs_info->log_root_recovering = 1; + path = btrfs_alloc_path(); + BUG_ON(!path); + + trans = btrfs_start_transaction(fs_info->tree_root, 1); + + wc.trans = trans; + wc.pin = 1; + + walk_log_tree(trans, log_root_tree, &wc); + +again: + key.objectid = BTRFS_TREE_LOG_OBJECTID; + key.offset = (u64)-1; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + + while(1) { + ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); + if (ret < 0) + break; + if (ret > 0) { + if (path->slots[0] == 0) + break; + path->slots[0]--; + } + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + btrfs_release_path(log_root_tree, path); + if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) + break; + + log = btrfs_read_fs_root_no_radix(log_root_tree, + &found_key); + BUG_ON(!log); + + + tmp_key.objectid = found_key.offset; + tmp_key.type = BTRFS_ROOT_ITEM_KEY; + tmp_key.offset = (u64)-1; + + wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); + + BUG_ON(!wc.replay_dest); + + btrfs_record_root_in_trans(wc.replay_dest); + ret = walk_log_tree(trans, log, &wc); + BUG_ON(ret); + + if (wc.stage == LOG_WALK_REPLAY_ALL) { + ret = fixup_inode_link_counts(trans, wc.replay_dest, + path); + BUG_ON(ret); + } + + key.offset = found_key.offset - 1; + free_extent_buffer(log->node); + kfree(log); + + if (found_key.offset == 0) + break; + } + btrfs_release_path(log_root_tree, path); + + /* step one is to pin it all, step two is to replay just inodes */ + if (wc.pin) { + wc.pin = 0; + wc.process_func = replay_one_buffer; + wc.stage = LOG_WALK_REPLAY_INODES; + goto again; + } + /* step three is to replay everything */ + if (wc.stage < LOG_WALK_REPLAY_ALL) { + wc.stage++; + goto again; + } + + btrfs_free_path(path); + + free_extent_buffer(log_root_tree->node); + log_root_tree->log_root = NULL; + fs_info->log_root_recovering = 0; + + /* step 4: commit the transaction, which also unpins the blocks */ + btrfs_commit_transaction(trans, fs_info->tree_root); + + kfree(log_root_tree); + return 0; +} diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h new file mode 100644 index 000000000000..b9409b32ed02 --- /dev/null +++ b/fs/btrfs/tree-log.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __TREE_LOG_ +#define __TREE_LOG_ + +int btrfs_sync_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_log_dentry(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct dentry *dentry); +int btrfs_recover_log_trees(struct btrfs_root *tree_root); +int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct dentry *dentry); +int btrfs_log_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + int inode_only); +int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + struct inode *dir, u64 index); +int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + struct inode *inode, u64 dirid); +#endif From 91c0827de247303c6dfc40a8ec9d8305e1d07986 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 5 Sep 2008 16:43:06 -0400 Subject: [PATCH 0691/2985] Btrfs: Rev the disk format Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6532b60683ef..f720b582fd5b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -40,7 +40,7 @@ extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; struct btrfs_ordered_sum; -#define BTRFS_MAGIC "_B7RfS_M" +#define BTRFS_MAGIC "_B8RfS_M" #define BTRFS_ACL_NOT_CACHED ((void *)-1) From 50ec891cc54fb865c4c40c45845e60123e0e66ca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Sep 2008 16:43:20 -0400 Subject: [PATCH 0692/2985] Btrfs: cleanup d_obtain_alias useage d_obtain_alias is intended as a tailcall that can pass in errors encoded in the inode pointer if needed, so use it that way instead of duplicating the error handling. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/export.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 292b0b24c302..2b357a6d2407 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -68,7 +68,6 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, { struct btrfs_root *root; struct inode *inode; - struct dentry *result; struct btrfs_key key; key.objectid = root_objectid; @@ -92,11 +91,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, return ERR_PTR(-ESTALE); } - result = d_obtain_alias(inode); - if (!result) - return ERR_PTR(-ENOMEM); - - return result; + return d_obtain_alias(inode); } static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, @@ -148,8 +143,6 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, static struct dentry *btrfs_get_parent(struct dentry *child) { struct inode *dir = child->d_inode; - struct inode *inode; - struct dentry *parent; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_key key; struct btrfs_path *path; @@ -203,13 +196,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child) btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; - inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); - - parent = d_obtain_alias(inode); - if (!parent) - parent = ERR_PTR(-ENOMEM); - - return parent; + return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); } const struct export_operations btrfs_export_ops = { From ad3d81ba8f2880dfa3ee518c779a7ea09a024520 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Sep 2008 16:43:28 -0400 Subject: [PATCH 0693/2985] Btrfs: missing endianess conversion in insert_new_root Add two missing endianess conversions in this function, found by sparse. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 579124043d9b..6cd4e929bd4d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1656,8 +1656,8 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, c = btrfs_alloc_free_block(trans, root, root->nodesize, root->root_key.objectid, - root_gen, lower_key.objectid, level, - root->node->start, 0); + root_gen, le64_to_cpu(lower_key.objectid), + level, root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); @@ -2352,8 +2352,9 @@ again: right = btrfs_alloc_free_block(trans, root, root->leafsize, root->root_key.objectid, - root_gen, disk_key.objectid, 0, - l->start, 0); + root_gen, + le64_to_cpu(disk_key.objectid), + 0, l->start, 0); if (IS_ERR(right)) { BUG_ON(1); return PTR_ERR(right); From a237d2a2bd67eb2b57b87c1e1d3d802d078d4ba7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Sep 2008 16:43:29 -0400 Subject: [PATCH 0694/2985] remove unused function btrfs_ilookup btrfs_ilookup is unused, which is good because a normal filesystem should never have to use ilookup anyway. Remove it. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 -- fs/btrfs/inode.c | 13 ------------- 2 files changed, 15 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f720b582fd5b..754c738ec2e8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1763,8 +1763,6 @@ void btrfs_destroy_cachep(void); long btrfs_ioctl_trans_end(struct file *file); struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root); -struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, - u64 root_objectid); struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, struct btrfs_root *root, int *is_new); int btrfs_commit_write(struct file *file, struct page *page, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 65df9d830230..b4dbb5c97646 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1818,19 +1818,6 @@ static int btrfs_find_actor(struct inode *inode, void *opaque) args->root == BTRFS_I(inode)->root); } -struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, - u64 root_objectid) -{ - struct btrfs_iget_args args; - args.ino = objectid; - args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid); - - if (!args.root) - return NULL; - - return ilookup5(s, objectid, btrfs_find_actor, (void *)&args); -} - struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root) { From b214107eda845f9a5851ae198f5b972e0dc30c45 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Sep 2008 16:43:31 -0400 Subject: [PATCH 0695/2985] Btrfs: trivial sparse fixes Fix a bunch of trivial sparse complaints. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 -- fs/btrfs/extent_io.c | 6 ++---- fs/btrfs/ioctl.c | 2 +- fs/btrfs/sysfs.c | 8 ++++---- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6cd4e929bd4d..ab07557bac07 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2749,7 +2749,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, BUG_ON(slot < 0); if (slot != nritems) { - int i; unsigned int old_data = btrfs_item_end_nr(leaf, slot); if (old_data < data_end) { @@ -2913,7 +2912,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, nritems = btrfs_header_nritems(leaf); if (slot + nr != nritems) { - int i; int data_end = leaf_data_end(root, leaf); memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7ca89c45d401..239e7c908abf 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3400,10 +3400,8 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) } /* at this point we can safely release the extent buffer */ num_pages = num_extent_pages(eb->start, eb->len); - for (i = 0; i < num_pages; i++) { - struct page *page = extent_buffer_page(eb, i); - page_cache_release(page); - } + for (i = 0; i < num_pages; i++) + page_cache_release(extent_buffer_page(eb, i)); rb_erase(&eb->rb_node, &tree->buffer); __free_extent_buffer(eb); out: diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 59b64c738fd1..f84b5f6991cc 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -739,7 +739,7 @@ long btrfs_ioctl_trans_end(struct file *file) goto out; } btrfs_end_transaction(trans, root); - file->private_data = 0; + file->private_data = NULL; mutex_lock(&root->fs_info->trans_mutex); root->fs_info->open_ioctl_trans--; diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 1cf4f4f8d2ce..61af5d8446e3 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -254,7 +254,7 @@ void btrfs_sysfs_del_super(struct btrfs_fs_info *fs) wait_for_completion(&fs->kobj_unregister); } -int btrfs_init_sysfs() +int btrfs_init_sysfs(void) { btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); if (!btrfs_kset) @@ -262,7 +262,7 @@ int btrfs_init_sysfs() return 0; } -void btrfs_exit_sysfs() +void btrfs_exit_sysfs(void) { kset_unregister(btrfs_kset); } @@ -289,12 +289,12 @@ void btrfs_sysfs_del_super(struct btrfs_fs_info *fs) return; } -int btrfs_init_sysfs() +int btrfs_init_sysfs(void) { return 0; } -void btrfs_exit_sysfs() +void btrfs_exit_sysfs(void) { return; } From 6527cdbe68a27a2ee745f36d001aa32d0f46f370 Mon Sep 17 00:00:00 2001 From: Zheng Yan Date: Fri, 5 Sep 2008 16:43:53 -0400 Subject: [PATCH 0696/2985] Btrfs: Update find free objectid function for orphan cleanup code Orphan items use BTRFS_ORPHAN_OBJECTID (-5UUL) as key objectid. This affects the find free objectid functions, inode objectid can easily overflow after orphan file cleanup. --- Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 ++- fs/btrfs/inode-map.c | 15 ++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 754c738ec2e8..2ed6918f32e5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -82,9 +82,10 @@ struct btrfs_ordered_sum; #define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL /* - * All files have objectids higher than this. + * All files have objectids in this range. */ #define BTRFS_FIRST_FREE_OBJECTID 256ULL +#define BTRFS_LAST_FREE_OBJECTID -256ULL #define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 298346ae1481..cd6171c2da42 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -32,7 +32,8 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) path = btrfs_alloc_path(); BUG_ON(!path); - search_key.objectid = (u64)-1; + search_key.objectid = BTRFS_LAST_FREE_OBJECTID; + search_key.type = -1; search_key.offset = (u64)-1; ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); if (ret < 0) @@ -70,16 +71,17 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, u64 search_start = dirid; mutex_lock(&root->objectid_mutex); - if (root->last_inode_alloc) { + if (root->last_inode_alloc >= BTRFS_FIRST_FREE_OBJECTID && + root->last_inode_alloc < BTRFS_LAST_FREE_OBJECTID) { *objectid = ++root->last_inode_alloc; mutex_unlock(&root->objectid_mutex); return 0; } path = btrfs_alloc_path(); BUG_ON(!path); - search_start = root->last_inode_alloc; search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); search_key.objectid = search_start; + search_key.type = 0; search_key.offset = 0; btrfs_init_path(path); @@ -88,9 +90,6 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, if (ret < 0) goto error; - if (path->slots[0] > 0) - path->slots[0]--; - while (1) { l = path->nodes[0]; slot = path->slots[0]; @@ -120,13 +119,15 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, } } } + if (key.objectid >= BTRFS_LAST_FREE_OBJECTID) + break; start_found = 1; last_ino = key.objectid + 1; path->slots[0]++; } // FIXME -ENOSPC + BUG_ON(1); found: - root->last_inode_alloc = *objectid; btrfs_release_path(root, path); btrfs_free_path(path); BUG_ON(*objectid < search_start); From 325cd4bafeb6cfb44addd6e807a9b74170d1be31 Mon Sep 17 00:00:00 2001 From: Zheng Yan Date: Fri, 5 Sep 2008 16:43:54 -0400 Subject: [PATCH 0697/2985] Btrfs: properly set blocksize when adding new device. --- Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 37a8ea23e81d..1546fa6f4f7d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1079,6 +1079,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (ret) goto out_close_bdev; + set_blocksize(device->bdev, 4096); + total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); btrfs_set_super_total_bytes(&root->fs_info->super_copy, total_bytes + device->total_bytes); From 4bef084857ab8fe71cf49eae349c25e440a49150 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 8 Sep 2008 11:18:08 -0400 Subject: [PATCH 0698/2985] Btrfs: Tree logging fixes * Pin down data blocks to prevent them from being reallocated like so: trans 1: allocate file extent trans 2: free file extent trans 3: free file extent during old snapshot deletion trans 3: allocate file extent to new file trans 3: fsync new file Before the tree logging code, this was legal because the fsync would commit the transation that did the final data extent free and the transaction that allocated the extent to the new file at the same time. With the tree logging code, the tree log subtransaction can commit before the transaction that freed the extent. If we crash, we're left with two different files using the extent. * Don't wait in start_transaction if log replay is going on. This avoids deadlocks from iput while we're cleaning up link counts in the replay code. * Don't deadlock in replay_one_name by trying to read an inode off the disk while holding paths for the directory * Hold the buffer lock while we mark a buffer as written. This closes a race where someone is changing a buffer while we write it. They are supposed to mark it dirty again after they change it, but this violates the cow rules. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 33 ++++++++++++++++++++++++++--- fs/btrfs/disk-io.h | 1 + fs/btrfs/extent-tree.c | 20 +++++++++++++++--- fs/btrfs/extent_io.c | 47 ++++++++++++++++++++++++++++++++---------- fs/btrfs/extent_io.h | 1 + fs/btrfs/transaction.c | 16 +++++++++++--- fs/btrfs/tree-log.c | 13 ++++++++---- 7 files changed, 107 insertions(+), 24 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a4373db5967a..42bf99168056 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -307,9 +307,7 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) goto err; } found_level = btrfs_header_level(eb); - spin_lock(&root->fs_info->hash_lock); - btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); - spin_unlock(&root->fs_info->hash_lock); + csum_tree_block(root, eb, 0); err: free_extent_buffer(eb); @@ -1998,7 +1996,36 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) return ret; } +int btree_lock_page_hook(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct extent_buffer *eb; + unsigned long len; + u64 bytenr = page_offset(page); + + if (page->private == EXTENT_PAGE_PRIVATE) + goto out; + + len = page->private >> 2; + eb = find_extent_buffer(io_tree, bytenr, len, GFP_NOFS); + if (!eb) + goto out; + + btrfs_tree_lock(eb); + spin_lock(&root->fs_info->hash_lock); + btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); + spin_unlock(&root->fs_info->hash_lock); + btrfs_tree_unlock(eb); + free_extent_buffer(eb); +out: + lock_page(page); + return 0; +} + static struct extent_io_ops btree_extent_io_ops = { + .write_cache_pages_lock_hook = btree_lock_page_hook, .writepage_io_hook = btree_writepage_io_hook, .readpage_end_io_hook = btree_readpage_end_io_hook, .submit_bio_hook = btree_submit_bio_hook, diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 6b6fdc697f31..f84f5058dbbb 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -80,4 +80,5 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); +int btree_lock_page_hook(struct page *page); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 646b9148ca21..3181759da1cf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1590,13 +1590,17 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, } static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, - int pending) + int is_data, int pending) { int err = 0; WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); if (!pending) { struct extent_buffer *buf; + + if (is_data) + goto pinit; + buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { /* we can reuse a block if it hasn't been written @@ -1624,6 +1628,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, } free_extent_buffer(buf); } +pinit: btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); } else { set_extent_bits(&root->fs_info->pending_del, @@ -1744,7 +1749,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root #endif if (pin) { - ret = pin_down_bytes(root, bytenr, num_bytes, 0); + ret = pin_down_bytes(root, bytenr, num_bytes, + owner_objectid >= BTRFS_FIRST_FREE_OBJECTID, 0); if (ret > 0) mark_free = 1; BUG_ON(ret < 0); @@ -1862,9 +1868,17 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, ref_generation = 0; if (root == extent_root) { - pin_down_bytes(root, bytenr, num_bytes, 1); + pin_down_bytes(root, bytenr, num_bytes, 0, 1); return 0; } + /* if metadata always pin */ + if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) + pin = 1; + + /* if data pin when any transaction has committed this */ + if (ref_generation != trans->transid) + pin = 1; + ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid, ref_generation, owner_objectid, owner_offset, pin, pin == 0); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 239e7c908abf..319a0c7a4a58 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -29,7 +29,10 @@ static struct kmem_cache *extent_buffer_cache; static LIST_HEAD(buffers); static LIST_HEAD(states); + +#ifdef LEAK_DEBUG static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED; +#endif #define BUFFER_LRU_MAX 64 @@ -106,7 +109,9 @@ EXPORT_SYMBOL(extent_io_tree_init); struct extent_state *alloc_extent_state(gfp_t mask) { struct extent_state *state; +#ifdef LEAK_DEBUG unsigned long flags; +#endif state = kmem_cache_alloc(extent_state_cache, mask); if (!state) @@ -114,10 +119,11 @@ struct extent_state *alloc_extent_state(gfp_t mask) state->state = 0; state->private = 0; state->tree = NULL; +#ifdef LEAK_DEBUG spin_lock_irqsave(&leak_lock, flags); list_add(&state->leak_list, &states); spin_unlock_irqrestore(&leak_lock, flags); - +#endif atomic_set(&state->refs, 1); init_waitqueue_head(&state->wq); return state; @@ -129,11 +135,15 @@ void free_extent_state(struct extent_state *state) if (!state) return; if (atomic_dec_and_test(&state->refs)) { +#ifdef LEAK_DEBUG unsigned long flags; +#endif WARN_ON(state->tree); +#ifdef LEAK_DEBUG spin_lock_irqsave(&leak_lock, flags); list_del(&state->leak_list); spin_unlock_irqrestore(&leak_lock, flags); +#endif kmem_cache_free(extent_state_cache, state); } } @@ -2070,13 +2080,13 @@ done: } #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) -/* Taken directly from 2.6.23 for 2.6.18 back port */ +/* Taken directly from 2.6.23 with a mod for a lockpage hook */ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, void *data); +#endif /** - * write_cache_pages - walk the list of dirty pages of the given address space - * and write all of them. + * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. * @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write * @writepage: function called for each page @@ -2090,9 +2100,10 @@ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, * WB_SYNC_ALL then we were called for data integrity and we must wait for * existing IO to complete. */ -static int write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc, writepage_t writepage, - void *data) +int extent_write_cache_pages(struct extent_io_tree *tree, + struct address_space *mapping, + struct writeback_control *wbc, + writepage_t writepage, void *data) { struct backing_dev_info *bdi = mapping->backing_dev_info; int ret = 0; @@ -2138,7 +2149,10 @@ retry: * swizzled back from swapper_space to tmpfs file * mapping */ - lock_page(page); + if (tree->ops && tree->ops->write_cache_pages_lock_hook) + tree->ops->write_cache_pages_lock_hook(page); + else + lock_page(page); if (unlikely(page->mapping != mapping)) { unlock_page(page); @@ -2187,9 +2201,12 @@ retry: } if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = index; + + if (wbc->range_cont) + wbc->range_start = index << PAGE_CACHE_SHIFT; return ret; } -#endif +EXPORT_SYMBOL(extent_write_cache_pages); int extent_write_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, @@ -2214,7 +2231,8 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, ret = __extent_writepage(page, wbc, &epd); - write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); + extent_write_cache_pages(tree, mapping, &wbc_writepages, + __extent_writepage, &epd); if (epd.bio) { submit_one_bio(WRITE, epd.bio, 0); } @@ -2235,7 +2253,8 @@ int extent_writepages(struct extent_io_tree *tree, .get_extent = get_extent, }; - ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); + ret = extent_write_cache_pages(tree, mapping, wbc, + __extent_writepage, &epd); if (epd.bio) { submit_one_bio(WRITE, epd.bio, 0); } @@ -2567,15 +2586,19 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, gfp_t mask) { struct extent_buffer *eb = NULL; +#ifdef LEAK_DEBUG unsigned long flags; +#endif eb = kmem_cache_zalloc(extent_buffer_cache, mask); eb->start = start; eb->len = len; mutex_init(&eb->mutex); +#ifdef LEAK_DEBUG spin_lock_irqsave(&leak_lock, flags); list_add(&eb->leak_list, &buffers); spin_unlock_irqrestore(&leak_lock, flags); +#endif atomic_set(&eb->refs, 1); return eb; @@ -2583,10 +2606,12 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, static void __free_extent_buffer(struct extent_buffer *eb) { +#ifdef LEAK_DEBUG unsigned long flags; spin_lock_irqsave(&leak_lock, flags); list_del(&eb->leak_list); spin_unlock_irqrestore(&leak_lock, flags); +#endif kmem_cache_free(extent_buffer_cache, eb); } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 315cfceae312..3cb411a5f4d3 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -50,6 +50,7 @@ struct extent_io_ops { unsigned long old, unsigned long bits); int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits); + int (*write_cache_pages_lock_hook)(struct page *page); }; struct extent_io_tree { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 49c4f5b40ed6..61a377bcb2fb 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -161,7 +161,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, int ret; mutex_lock(&root->fs_info->trans_mutex); - if ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2) + if (!root->fs_info->log_root_recovering && + ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2)) wait_current_trans(root); ret = join_transaction(root); BUG_ON(ret); @@ -328,9 +329,17 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, index = start >> PAGE_CACHE_SHIFT; start = (u64)(index + 1) << PAGE_CACHE_SHIFT; - page = find_lock_page(btree_inode->i_mapping, index); + page = find_get_page(btree_inode->i_mapping, index); if (!page) continue; + + btree_lock_page_hook(page); + if (!page->mapping) { + unlock_page(page); + page_cache_release(page); + continue; + } + if (PageWriteback(page)) { if (PageDirty(page)) wait_on_page_writeback(page); @@ -360,7 +369,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, if (!page) continue; if (PageDirty(page)) { - lock_page(page); + btree_lock_page_hook(page); + wait_on_page_writeback(page); err = write_one_page(page, 0); if (err) werr = err; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d1ce8314b948..13d7ee8e0c52 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1176,8 +1176,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, struct btrfs_key found_key; struct btrfs_key log_key; struct inode *dir; - struct inode *inode; u8 log_type; + int exists; int ret; dir = read_one_inode(root, key->objectid); @@ -1190,6 +1190,13 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, name_len); btrfs_dir_item_key_to_cpu(eb, di, &log_key); + exists = btrfs_lookup_inode(trans, root, path, &log_key, 0); + if (exists == 0) + exists = 1; + else + exists = 0; + btrfs_release_path(root, path); + if (key->type == BTRFS_DIR_ITEM_KEY) { dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, name, name_len, 1); @@ -1224,11 +1231,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, * don't drop the conflicting directory entry if the inode * for the new entry doesn't exist */ - inode = read_one_inode(root, log_key.objectid); - if (!inode) + if (!exists) goto out; - iput(inode); ret = drop_one_dir_item(trans, root, path, dir, dst_di); BUG_ON(ret); From 8d5bf1cb35ea29795862ff0ea2f4c4d7e22727f3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Sep 2008 15:51:21 -0400 Subject: [PATCH 0699/2985] Btrfs: Update the highest objectid in a root after log replay is done Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 3 ++- fs/btrfs/tree-log.c | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b4dbb5c97646..073fdd77c784 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2239,8 +2239,9 @@ static int btrfs_set_inode_index(struct inode *dir, struct inode *inode, if (BTRFS_I(dir)->index_cnt == (u64)-1) { ret = btrfs_set_inode_index_count(dir); - if (ret) + if (ret) { return ret; + } } *index = BTRFS_I(dir)->index_cnt; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 13d7ee8e0c52..3f4b139b27ed 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1027,6 +1027,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, inode->i_nlink = nlink; btrfs_update_inode(trans, root, inode); } + BTRFS_I(inode)->index_cnt = (u64)-1; return 0; } @@ -2714,6 +2715,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) struct btrfs_key tmp_key; struct btrfs_root *log; struct btrfs_fs_info *fs_info = log_root_tree->fs_info; + u64 highest_inode; struct walk_control wc = { .process_func = process_one_buffer, .stage = 0, @@ -2772,6 +2774,11 @@ again: path); BUG_ON(ret); } + ret = btrfs_find_highest_inode(wc.replay_dest, &highest_inode); + if (ret == 0) { + wc.replay_dest->highest_inode = highest_inode; + wc.replay_dest->last_inode_alloc = highest_inode; + } key.offset = found_key.offset - 1; free_extent_buffer(log->node); From 98509cfc5a6857bddcfe4b19a9539726655ec9bd Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Sep 2008 15:51:43 -0400 Subject: [PATCH 0700/2985] Btrfs: Fix releasepage to properly keep dirty and writeback pages Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 ++++- fs/btrfs/inode.c | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 42bf99168056..ecb74b720262 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -346,7 +346,7 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); found_start = btrfs_header_bytenr(eb); - if (found_start != start) { + if (0 && found_start != start) { printk("bad tree block start %llu %llu\n", (unsigned long long)found_start, (unsigned long long)eb->start); @@ -592,6 +592,9 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) struct extent_map_tree *map; int ret; + if (PageWriteback(page) || PageDirty(page)) + return 0; + tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 073fdd77c784..24b7e97fccb9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3060,6 +3060,8 @@ static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) { + if (PageWriteback(page) || PageDirty(page)) + return 0; return __btrfs_releasepage(page, gfp_flags); } From 49eb7e46d47ea72a9bd2a5f8cedb04f5159cc277 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Sep 2008 15:53:12 -0400 Subject: [PATCH 0701/2985] Btrfs: Dir fsync optimizations Drop i_mutex during the commit Don't bother doing the fsync at all unless the dir is marked as dirtied and needing fsync in this transaction. For directories, this means that someone has unlinked a file from the dir without fsyncing the file. Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 3 +++ fs/btrfs/file.c | 19 ++++++++++++++++++- fs/btrfs/inode.c | 5 ++++- fs/btrfs/tree-log.c | 8 ++++---- 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index fcc8cf27e906..0577fda2168a 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -56,6 +56,9 @@ struct btrfs_inode { * transid that last logged this inode */ u64 logged_trans; + + /* trans that last made a change that should be fully fsync'd */ + u64 log_dirty_trans; u64 delalloc_bytes; u64 disk_i_size; u32 flags; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 84ecf3ab8511..58b329ddb426 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1061,7 +1061,9 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) } mutex_unlock(&root->fs_info->trans_mutex); + root->fs_info->tree_log_batch++; filemap_fdatawait(inode->i_mapping); + root->fs_info->tree_log_batch++; /* * ok we haven't committed the transaction yet, lets do a commit @@ -1076,14 +1078,29 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) } ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); - if (ret < 0) + if (ret < 0) { goto out; + } + + /* we've logged all the items and now have a consistent + * version of the file in the log. It is possible that + * someone will come in and modify the file, but that's + * fine because the log is consistent on disk, and we + * have references to all of the file's extents + * + * It is possible that someone will come in and log the + * file again, but that will end up using the synchronization + * inside btrfs_sync_log to keep things safe. + */ + mutex_unlock(&file->f_dentry->d_inode->i_mutex); + if (ret > 0) { ret = btrfs_commit_transaction(trans, root); } else { btrfs_sync_log(trans, root); ret = btrfs_end_transaction(trans, root); } + mutex_lock(&file->f_dentry->d_inode->i_mutex); out: return ret > 0 ? EIO : ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 24b7e97fccb9..12c1c0530f3d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1187,7 +1187,9 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode, dir->i_ino); - BUG_ON(ret); + BUG_ON(ret != 0 && ret != -ENOENT); + if (ret != -ENOENT) + BTRFS_I(dir)->log_dirty_trans = trans->transid; ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir, index); @@ -1790,6 +1792,7 @@ static noinline void init_btrfs_i(struct inode *inode) bi->disk_i_size = 0; bi->flags = 0; bi->index_cnt = (u64)-1; + bi->log_dirty_trans = 0; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3f4b139b27ed..5d49a701bdcd 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1973,10 +1973,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, atomic_set(&log->fs_info->tree_log_commit, 1); while(1) { + batch = log->fs_info->tree_log_batch; mutex_unlock(&log->fs_info->tree_log_mutex); schedule_timeout_uninterruptible(1); mutex_lock(&log->fs_info->tree_log_mutex); - batch = log->fs_info->tree_log_batch; while(atomic_read(&log->fs_info->tree_log_writers)) { DEFINE_WAIT(wait); @@ -2189,8 +2189,6 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, mutex_unlock(&BTRFS_I(inode)->log_mutex); end_log_trans(root); - if (ret == 0 || ret == -ENOENT) - return 0; return ret; } @@ -2620,9 +2618,11 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans, else break; } - if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { + if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode) && + BTRFS_I(inode)->log_dirty_trans >= trans->transid) { btrfs_release_path(root, path); btrfs_release_path(log, dst_path); + BTRFS_I(inode)->log_dirty_trans = 0; ret = log_directory_changes(trans, root, inode, path, dst_path); BUG_ON(ret); } From 3a5f1d458ad1610a06e38f0be2fbc6ac215439c0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Sep 2008 15:53:37 -0400 Subject: [PATCH 0702/2985] Btrfs: Optimize btree walking while logging inodes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/tree-log.c | 25 +++++++++++++++++++------ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ecb74b720262..7c06eb4ecfdd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1497,7 +1497,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, * low idle thresh */ fs_info->endio_workers.idle_thresh = 4; - fs_info->endio_write_workers.idle_thresh = 4; + fs_info->endio_write_workers.idle_thresh = 64; btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 5d49a701bdcd..f43ee33ec2dc 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1982,7 +1982,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, DEFINE_WAIT(wait); prepare_to_wait(&log->fs_info->tree_log_wait, &wait, TASK_UNINTERRUPTIBLE); - batch = log->fs_info->tree_log_batch; mutex_unlock(&log->fs_info->tree_log_mutex); if (atomic_read(&log->fs_info->tree_log_writers)) schedule(); @@ -2024,8 +2023,7 @@ out: } -/* - * free all the extents used by the tree log. This should be called +/* * free all the extents used by the tree log. This should be called * at commit time of the full transaction */ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) @@ -2107,6 +2105,9 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, int ret; int bytes_del = 0; + if (BTRFS_I(dir)->logged_trans < trans->transid) + return 0; + ret = join_running_log_trans(root); if (ret) return 0; @@ -2178,6 +2179,9 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, u64 index; int ret; + if (BTRFS_I(inode)->logged_trans < trans->transid) + return 0; + ret = join_running_log_trans(root); if (ret) return 0; @@ -2484,6 +2488,7 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_inode_item *inode_item; u32 size; int ret; + int nritems; log = root->log_root; @@ -2541,12 +2546,11 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans, path, 0, trans->transid); if (ret != 0) break; - +again: if (min_key.objectid != inode->i_ino) break; if (min_key.type > max_key.type) break; - src = path->nodes[0]; size = btrfs_item_size_nr(src, path->slots[0]); ret = btrfs_insert_empty_item(trans, log, dst_path, &min_key, @@ -2606,9 +2610,17 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans, } btrfs_mark_buffer_dirty(dst_path->nodes[0]); - btrfs_release_path(root, path); btrfs_release_path(log, dst_path); + nritems = btrfs_header_nritems(path->nodes[0]); + path->slots[0]++; + if (path->slots[0] < nritems) { + btrfs_item_key_to_cpu(path->nodes[0], &min_key, + path->slots[0]); + goto again; + } + btrfs_release_path(root, path); + if (min_key.offset < (u64)-1) min_key.offset++; else if (min_key.type < (u8)-1) @@ -2626,6 +2638,7 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans, ret = log_directory_changes(trans, root, inode, path, dst_path); BUG_ON(ret); } + BTRFS_I(inode)->logged_trans = trans->transid; mutex_unlock(&BTRFS_I(inode)->log_mutex); btrfs_free_path(path); From d00aff00139b40f2e9c60299d76aac29d72e48ba Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Sep 2008 15:54:42 -0400 Subject: [PATCH 0703/2985] Btrfs: Optimize tree log block allocations Since tree log blocks get freed every transaction, they never really need to be written to disk. This skips the step where we update metadata to record they were allocated. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 ++--- fs/btrfs/extent-tree.c | 19 ++++++++++++++----- fs/btrfs/tree-log.c | 20 +++++++------------- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7c06eb4ecfdd..5edb7f885799 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -829,9 +829,8 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, WARN_ON(btrfs_header_level(eb) != 0); WARN_ON(btrfs_header_nritems(eb) != 0); - ret = btrfs_free_extent(trans, fs_info->tree_root, - eb->start, eb->len, - BTRFS_TREE_LOG_OBJECTID, 0, 0, 0, 1); + ret = btrfs_free_reserved_extent(fs_info->tree_root, + eb->start, eb->len); BUG_ON(ret); free_extent_buffer(eb); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3181759da1cf..c479d71e2869 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1872,8 +1872,15 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, return 0; } /* if metadata always pin */ - if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) + if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { + if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { + /* btrfs_free_reserved_extent */ + set_extent_dirty(&root->fs_info->free_space_cache, + bytenr, bytenr + num_bytes - 1, GFP_NOFS); + return 0; + } pin = 1; + } /* if data pin when any transaction has committed this */ if (ref_generation != trans->transid) @@ -2361,11 +2368,13 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, min_alloc_size, empty_size, hint_byte, search_end, ins, data); BUG_ON(ret); - ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, - ref_generation, owner, - owner_offset, ins); - BUG_ON(ret); + if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { + ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, + ref_generation, owner, + owner_offset, ins); + BUG_ON(ret); + } maybe_unlock_mutex(root); return ret; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f43ee33ec2dc..5f77bee0f846 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1728,9 +1728,8 @@ static int noinline walk_down_log_tree(struct btrfs_trans_handle *trans, WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_extent(trans, root, bytenr, - blocksize, root_owner, - root_gen, 0, 0, 1); + ret = btrfs_free_reserved_extent(root, + bytenr, blocksize); BUG_ON(ret); } free_extent_buffer(next); @@ -1775,8 +1774,7 @@ static int noinline walk_down_log_tree(struct btrfs_trans_handle *trans, BUG_ON(ret); } WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_extent(trans, root, bytenr, blocksize, - root_owner, root_gen, 0, 0, 1); + ret = btrfs_free_reserved_extent(root, bytenr, blocksize); BUG_ON(ret); } free_extent_buffer(path->nodes[*level]); @@ -1837,10 +1835,9 @@ static int noinline walk_up_log_tree(struct btrfs_trans_handle *trans, } WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_extent(trans, root, + ret = btrfs_free_reserved_extent(root, path->nodes[*level]->start, - path->nodes[*level]->len, - root_owner, root_gen, 0, 0, 1); + path->nodes[*level]->len); BUG_ON(ret); } free_extent_buffer(path->nodes[*level]); @@ -1910,11 +1907,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, } WARN_ON(log->root_key.objectid != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_extent(trans, log, - next->start, next->len, - log->root_key.objectid, - btrfs_header_generation(next), - 0, 0, 1); + ret = btrfs_free_reserved_extent(log, next->start, + next->len); BUG_ON(ret); } } From 31ff1cd25d376e8f499d450de177dffadc9e1c56 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Sep 2008 16:17:57 -0400 Subject: [PATCH 0704/2985] Btrfs: Copy into the log tree in big batches This changes the log tree copy code to use btrfs_insert_items and to work in larger batches where possible. Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 183 +++++++++++++++++++++++++++++--------------- 1 file changed, 122 insertions(+), 61 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 5f77bee0f846..ae96451bc223 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2452,6 +2452,94 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, return 0; } +static noinline int copy_items(struct btrfs_trans_handle *trans, + struct btrfs_root *log, + struct btrfs_path *dst_path, + struct extent_buffer *src, + int start_slot, int nr, int inode_only) +{ + unsigned long src_offset; + unsigned long dst_offset; + struct btrfs_file_extent_item *extent; + struct btrfs_inode_item *inode_item; + int ret; + struct btrfs_key *ins_keys; + u32 *ins_sizes; + char *ins_data; + int i; + + ins_data = kmalloc(nr * sizeof(struct btrfs_key) + + nr * sizeof(u32), GFP_NOFS); + ins_sizes = (u32 *)ins_data; + ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); + + for (i = 0; i < nr; i++) { + ins_sizes[i] = btrfs_item_size_nr(src, i + start_slot); + btrfs_item_key_to_cpu(src, ins_keys + i, i + start_slot); + } + ret = btrfs_insert_empty_items(trans, log, dst_path, + ins_keys, ins_sizes, nr); + BUG_ON(ret); + + for (i = 0; i < nr; i++) { + dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], + dst_path->slots[0]); + + src_offset = btrfs_item_ptr_offset(src, start_slot + i); + + copy_extent_buffer(dst_path->nodes[0], src, dst_offset, + src_offset, ins_sizes[i]); + + if (inode_only == LOG_INODE_EXISTS && + ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { + inode_item = btrfs_item_ptr(dst_path->nodes[0], + dst_path->slots[0], + struct btrfs_inode_item); + btrfs_set_inode_size(dst_path->nodes[0], inode_item, 0); + + /* set the generation to zero so the recover code + * can tell the difference between an logging + * just to say 'this inode exists' and a logging + * to say 'update this inode with these values' + */ + btrfs_set_inode_generation(dst_path->nodes[0], + inode_item, 0); + } + /* take a reference on file data extents so that truncates + * or deletes of this inode don't have to relog the inode + * again + */ + if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY) { + int found_type; + extent = btrfs_item_ptr(src, start_slot + i, + struct btrfs_file_extent_item); + + found_type = btrfs_file_extent_type(src, extent); + if (found_type == BTRFS_FILE_EXTENT_REG) { + u64 ds = btrfs_file_extent_disk_bytenr(src, + extent); + u64 dl = btrfs_file_extent_disk_num_bytes(src, + extent); + /* ds == 0 is a hole */ + if (ds != 0) { + ret = btrfs_inc_extent_ref(trans, log, + ds, dl, + BTRFS_TREE_LOG_OBJECTID, + 0, ins_keys[i].objectid, + ins_keys[i].offset); + BUG_ON(ret); + } + } + } + dst_path->slots[0]++; + } + + btrfs_mark_buffer_dirty(dst_path->nodes[0]); + btrfs_release_path(log, dst_path); + kfree(ins_data); + return 0; +} + /* log a single inode in the tree log. * At least one parent directory for this inode must exist in the tree * or be logged already. @@ -2475,14 +2563,12 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_key min_key; struct btrfs_key max_key; struct btrfs_root *log = root->log_root; - unsigned long src_offset; - unsigned long dst_offset; - struct extent_buffer *src; - struct btrfs_file_extent_item *extent; - struct btrfs_inode_item *inode_item; + struct extent_buffer *src = NULL; u32 size; int ret; int nritems; + int ins_start_slot = 0; + int ins_nr; log = root->log_root; @@ -2536,75 +2622,35 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans, path->keep_locks = 1; while(1) { + ins_nr = 0; ret = btrfs_search_forward(root, &min_key, &max_key, path, 0, trans->transid); if (ret != 0) break; again: + /* note, ins_nr might be > 0 here, cleanup outside the loop */ if (min_key.objectid != inode->i_ino) break; if (min_key.type > max_key.type) break; + src = path->nodes[0]; size = btrfs_item_size_nr(src, path->slots[0]); - ret = btrfs_insert_empty_item(trans, log, dst_path, &min_key, - size); - if (ret) - BUG(); - - dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], - dst_path->slots[0]); - - src_offset = btrfs_item_ptr_offset(src, path->slots[0]); - - copy_extent_buffer(dst_path->nodes[0], src, dst_offset, - src_offset, size); - - if (inode_only == LOG_INODE_EXISTS && - min_key.type == BTRFS_INODE_ITEM_KEY) { - inode_item = btrfs_item_ptr(dst_path->nodes[0], - dst_path->slots[0], - struct btrfs_inode_item); - btrfs_set_inode_size(dst_path->nodes[0], inode_item, 0); - - /* set the generation to zero so the recover code - * can tell the difference between an logging - * just to say 'this inode exists' and a logging - * to say 'update this inode with these values' - */ - btrfs_set_inode_generation(dst_path->nodes[0], - inode_item, 0); - } - /* take a reference on file data extents so that truncates - * or deletes of this inode don't have to relog the inode - * again - */ - if (btrfs_key_type(&min_key) == BTRFS_EXTENT_DATA_KEY) { - int found_type; - extent = btrfs_item_ptr(src, path->slots[0], - struct btrfs_file_extent_item); - - found_type = btrfs_file_extent_type(src, extent); - if (found_type == BTRFS_FILE_EXTENT_REG) { - u64 ds = btrfs_file_extent_disk_bytenr(src, - extent); - u64 dl = btrfs_file_extent_disk_num_bytes(src, - extent); - /* ds == 0 is a hole */ - if (ds != 0) { - ret = btrfs_inc_extent_ref(trans, log, - ds, dl, - log->root_key.objectid, - 0, - inode->i_ino, - min_key.offset); - BUG_ON(ret); - } - } + if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { + ins_nr++; + goto next_slot; + } else if (!ins_nr) { + ins_start_slot = path->slots[0]; + ins_nr = 1; + goto next_slot; } - btrfs_mark_buffer_dirty(dst_path->nodes[0]); - btrfs_release_path(log, dst_path); + ret = copy_items(trans, log, dst_path, src, ins_start_slot, + ins_nr, inode_only); + BUG_ON(ret); + ins_nr = 1; + ins_start_slot = path->slots[0]; +next_slot: nritems = btrfs_header_nritems(path->nodes[0]); path->slots[0]++; @@ -2613,6 +2659,13 @@ again: path->slots[0]); goto again; } + if (ins_nr) { + ret = copy_items(trans, log, dst_path, src, + ins_start_slot, + ins_nr, inode_only); + BUG_ON(ret); + ins_nr = 0; + } btrfs_release_path(root, path); if (min_key.offset < (u64)-1) @@ -2624,6 +2677,14 @@ again: else break; } + if (ins_nr) { + ret = copy_items(trans, log, dst_path, src, + ins_start_slot, + ins_nr, inode_only); + BUG_ON(ret); + ins_nr = 0; + } + WARN_ON(ins_nr); if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode) && BTRFS_I(inode)->log_dirty_trans >= trans->transid) { btrfs_release_path(root, path); From d0c803c4049c5ca322d4795d8b74f28768603e0e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Sep 2008 16:17:57 -0400 Subject: [PATCH 0705/2985] Btrfs: Record dirty pages tree-log pages in an extent_io tree This is the same way the transaction code makes sure that all the other tree blocks are safely on disk. There's an extent_io tree for each root, and any blocks allocated to the tree logs are recorded in that tree. At tree-log sync, the extent_io tree is walked to flush down the dirty pages and wait for them. The main benefit is less time spent walking the tree log and skipping clean pages, and getting sequential IO down to the drive. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/disk-io.c | 17 +++++++++++++++-- fs/btrfs/extent-tree.c | 7 ++++++- fs/btrfs/transaction.c | 21 ++++++++++++++------- fs/btrfs/transaction.h | 2 ++ fs/btrfs/tree-log.c | 31 ++++++++++++++++--------------- 6 files changed, 55 insertions(+), 25 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2ed6918f32e5..eb65fd808883 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -668,6 +668,8 @@ struct btrfs_root { struct btrfs_key root_key; struct btrfs_fs_info *fs_info; struct inode *inode; + struct extent_io_tree dirty_log_pages; + struct kobject root_kobj; struct completion kobj_unregister; struct mutex objectid_mutex; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5edb7f885799..57fbf107e59f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -777,6 +777,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->list_lock); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); + extent_io_tree_init(&root->dirty_log_pages, + fs_info->btree_inode->i_mapping, GFP_NOFS); btrfs_leaf_ref_tree_init(&root->ref_tree_struct); root->ref_tree = &root->ref_tree_struct; @@ -819,11 +821,23 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { struct extent_buffer *eb; + struct btrfs_root *log_root_tree = fs_info->log_root_tree; + u64 start = 0; + u64 end = 0; int ret; - if (!fs_info->log_root_tree) + if (!log_root_tree) return 0; + while(1) { + ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, + 0, &start, &end, EXTENT_DIRTY); + if (ret) + break; + + clear_extent_dirty(&log_root_tree->dirty_log_pages, + start, end, GFP_NOFS); + } eb = fs_info->log_root_tree->node; WARN_ON(btrfs_header_level(eb) != 0); @@ -1412,7 +1426,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); insert_inode_hash(fs_info->btree_inode); - mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->tree_log_mutex); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c479d71e2869..c0bb6b9ac4c6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2392,8 +2392,13 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, btrfs_tree_lock(buf); clean_tree_block(trans, root, buf); btrfs_set_buffer_uptodate(buf); - set_extent_dirty(&trans->transaction->dirty_pages, buf->start, + if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { + set_extent_dirty(&root->dirty_log_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); + } else { + set_extent_dirty(&trans->transaction->dirty_pages, buf->start, + buf->start + buf->len - 1, GFP_NOFS); + } trans->blocks_used++; return buf; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 61a377bcb2fb..151b00d52593 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -302,23 +302,18 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, } -int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, + struct extent_io_tree *dirty_pages) { int ret; int err = 0; int werr = 0; - struct extent_io_tree *dirty_pages; struct page *page; struct inode *btree_inode = root->fs_info->btree_inode; u64 start = 0; u64 end; unsigned long index; - if (!trans || !trans->transaction) { - return filemap_write_and_wait(btree_inode->i_mapping); - } - dirty_pages = &trans->transaction->dirty_pages; while(1) { ret = find_first_extent_bit(dirty_pages, start, &start, &end, EXTENT_DIRTY); @@ -385,6 +380,18 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, return werr; } +int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + if (!trans || !trans->transaction) { + struct inode *btree_inode; + btree_inode = root->fs_info->btree_inode; + return filemap_write_and_wait(btree_inode->i_mapping); + } + return btrfs_write_and_wait_marked_extents(root, + &trans->transaction->dirty_pages); +} + static int update_cowonly_root(struct btrfs_trans_handle *trans, struct btrfs_root *root) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index cc63650d60d6..eef2cb7d7e78 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -99,4 +99,6 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, struct btrfs_root *root); void btrfs_throttle(struct btrfs_root *root); int btrfs_record_root_in_trans(struct btrfs_root *root); +int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, + struct extent_io_tree *dirty_pages); #endif diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ae96451bc223..bfa710800963 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1954,10 +1954,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, int ret; unsigned long batch; struct btrfs_root *log = root->log_root; - struct walk_control wc = { - .write = 1, - .process_func = process_one_buffer - }; mutex_lock(&log->fs_info->tree_log_mutex); if (atomic_read(&log->fs_info->tree_log_commit)) { @@ -1985,18 +1981,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (batch == log->fs_info->tree_log_batch) break; } - ret = walk_log_tree(trans, log, &wc); + + ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); BUG_ON(ret); - - ret = walk_log_tree(trans, log->fs_info->log_root_tree, &wc); - BUG_ON(ret); - - wc.wait = 1; - - ret = walk_log_tree(trans, log, &wc); - BUG_ON(ret); - - ret = walk_log_tree(trans, log->fs_info->log_root_tree, &wc); + ret = btrfs_write_and_wait_marked_extents(root->fs_info->log_root_tree, + &root->fs_info->log_root_tree->dirty_log_pages); BUG_ON(ret); btrfs_set_super_log_root(&root->fs_info->super_for_commit, @@ -2025,6 +2014,8 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) int ret; struct btrfs_root *log; struct key; + u64 start; + u64 end; struct walk_control wc = { .free = 1, .process_func = process_one_buffer @@ -2037,6 +2028,16 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) ret = walk_log_tree(trans, log, &wc); BUG_ON(ret); + while(1) { + ret = find_first_extent_bit(&log->dirty_log_pages, + 0, &start, &end, EXTENT_DIRTY); + if (ret) + break; + + clear_extent_dirty(&log->dirty_log_pages, + start, end, GFP_NOFS); + } + log = root->log_root; ret = btrfs_del_root(trans, root->fs_info->log_root_tree, &log->root_key); From 9623f9a3894ec95fe140ee5460bb840ac53bc6a0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Sep 2008 17:42:42 -0400 Subject: [PATCH 0706/2985] Btrfs: Disable the dir fsync optimization to skip logging the dir sometimes More testing has turned up a bug, disable this for now. Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index bfa710800963..56de3fb2d8d5 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2686,8 +2686,7 @@ next_slot: ins_nr = 0; } WARN_ON(ins_nr); - if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode) && - BTRFS_I(inode)->log_dirty_trans >= trans->transid) { + if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { btrfs_release_path(root, path); btrfs_release_path(log, dst_path); BTRFS_I(inode)->log_dirty_trans = 0; From 23a07867b78ee0f33b01466e52bb608c336b26ee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 12 Sep 2008 08:57:47 -0400 Subject: [PATCH 0707/2985] Btrfs: Fix mismerge in block header checks I had incorrectly disabled the check for the block number being correct in the header block. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 57fbf107e59f..f6f7821d43a5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -346,7 +346,7 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); found_start = btrfs_header_bytenr(eb); - if (0 && found_start != start) { + if (found_start != start) { printk("bad tree block start %llu %llu\n", (unsigned long long)found_start, (unsigned long long)eb->start); From f25956cc582f5954033cca3f9488c0a5d5193541 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 12 Sep 2008 15:32:53 -0400 Subject: [PATCH 0708/2985] Fix leaf overflow check in btrfs_insert_empty_items It was incorrectly adding an extra sizeof(struct btrfs_item) and causing false positives (oops) Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ab07557bac07..18e84472abb5 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2737,8 +2737,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); - if (btrfs_leaf_free_space(root, leaf) < - sizeof(struct btrfs_item) + total_size) { + if (btrfs_leaf_free_space(root, leaf) < total_size) { btrfs_print_leaf(root, leaf); printk("not enough freespace need %u have %d\n", total_size, btrfs_leaf_free_space(root, leaf)); From ef8bbdfe7e12dc9b4e80756f6d606c4639c65851 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 23 Sep 2008 13:14:11 -0400 Subject: [PATCH 0709/2985] Btrfs: fix cache_block_group error handling cache block group had a few bugs in the error handling code, this makes sure paths get properly released and the correct return value goes out. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c0bb6b9ac4c6..1c10ffc837c8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -66,7 +66,7 @@ static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) { struct btrfs_path *path; - int ret; + int ret = 0; struct btrfs_key key; struct extent_buffer *leaf; struct extent_io_tree *free_space_cache; @@ -102,10 +102,10 @@ static int cache_block_group(struct btrfs_root *root, btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) - return ret; + goto err; ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); if (ret < 0) - return ret; + goto err; if (ret == 0) { leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); @@ -161,9 +161,10 @@ next: last + hole_size - 1, GFP_NOFS); } block_group->cached = 1; + ret = 0; err: btrfs_free_path(path); - return 0; + return ret; } struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct From 0f9dd46cda36b8de3b9f48bc42bd09d20b9c3b52 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 23 Sep 2008 13:14:11 -0400 Subject: [PATCH 0710/2985] Btrfs: free space accounting redo 1) replace the per fs_info extent_io_tree that tracked free space with two rb-trees per block group to track free space areas via offset and size. The reason to do this is because most allocations come with a hint byte where to start, so we can usually find a chunk of free space at that hint byte to satisfy the allocation and get good space packing. If we cannot find free space at or after the given offset we fall back on looking for a chunk of the given size as close to that given offset as possible. When we fall back on the size search we also try to find a slot as close to the size we want as possible, to avoid breaking small chunks off of huge areas if possible. 2) remove the extent_io_tree that tracked the block group cache from fs_info and replaced it with an rb-tree thats tracks block group cache via offset. also added a per space_info list that tracks the block group cache for the particular space so we can lookup related block groups easily. 3) cleaned up the allocation code to make it a little easier to read and a little less complicated. Basically there are 3 steps, first look from our provided hint. If we couldn't find from that given hint, start back at our original search start and look for space from there. If that fails try to allocate space if we can and start looking again. If not we're screwed and need to start over again. 4) small fixes. there were some issues in volumes.c where we wouldn't allocate the rest of the disk. fixed cow_file_range to actually pass the alloc_hint, which has helped a good bit in making the fs_mark test I run have semi-normal results as we run out of space. Generally with data allocations we don't track where we last allocated from, so everytime we did a data allocation we'd search through every block group that we have looking for free space. Now searching a block group with no free space isn't terribly time consuming, it was causing a slight degradation as we got more data block groups. The alloc_hint has fixed this slight degredation and made things semi-normal. There is still one nagging problem I'm working on where we will get ENOSPC when there is definitely plenty of space. This only happens with metadata allocations, and only when we are almost full. So you generally hit the 85% mark first, but sometimes you'll hit the BUG before you hit the 85% wall. I'm still tracking it down, but until then this seems to be pretty stable and make a significant performance gain. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.c | 3 +- fs/btrfs/ctree.h | 46 +- fs/btrfs/disk-io.c | 7 +- fs/btrfs/extent-tree.c | 871 +++++++++++++++++++----------------- fs/btrfs/extent_io.c | 4 + fs/btrfs/free-space-cache.c | 415 +++++++++++++++++ fs/btrfs/inode.c | 3 +- fs/btrfs/volumes.c | 11 +- 9 files changed, 926 insertions(+), 436 deletions(-) create mode 100644 fs/btrfs/free-space-cache.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index b7addbfd8c22..eb36ae981bdc 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -7,7 +7,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ - ref-cache.o export.o tree-log.o acl.o + ref-cache.o export.o tree-log.o acl.o free-space-cache.o else # Normal Makefile diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 18e84472abb5..6f467901246f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2725,9 +2725,8 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, total_size = total_data + (nr * sizeof(struct btrfs_item)); ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); - if (ret == 0) { + if (ret == 0) return -EEXIST; - } if (ret < 0) goto out; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index eb65fd808883..730aae3bc181 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -483,7 +483,6 @@ struct btrfs_csum_item { #define BTRFS_BLOCK_GROUP_DUP (1 << 5) #define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) - struct btrfs_block_group_item { __le64 used; __le64 chunk_objectid; @@ -498,17 +497,40 @@ struct btrfs_space_info { int full; int force_alloc; struct list_head list; + + /* for block groups in our same type */ + struct list_head block_groups; + spinlock_t lock; +}; + +struct btrfs_free_space { + struct rb_node bytes_index; + struct rb_node offset_index; + u64 offset; + u64 bytes; }; struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; - struct btrfs_space_info *space_info; spinlock_t lock; u64 pinned; u64 flags; int cached; int ro; + int dirty; + + struct btrfs_space_info *space_info; + + /* free space cache stuff */ + struct rb_root free_space_bytes; + struct rb_root free_space_offset; + + /* block group cache stuff */ + struct rb_node cache_node; + + /* for block groups in the same raid type */ + struct list_head list; }; struct btrfs_device; @@ -525,8 +547,10 @@ struct btrfs_fs_info { struct btrfs_root *log_root_tree; struct radix_tree_root fs_roots_radix; - struct extent_io_tree free_space_cache; - struct extent_io_tree block_group_cache; + /* block group cache stuff */ + spinlock_t block_group_cache_lock; + struct rb_root block_group_cache_tree; + struct extent_io_tree pinned_extents; struct extent_io_tree pending_del; struct extent_io_tree extent_ins; @@ -1814,4 +1838,18 @@ int btrfs_sync_fs(struct super_block *sb, int wait); int btrfs_check_acl(struct inode *inode, int mask); int btrfs_init_acl(struct inode *inode, struct inode *dir); int btrfs_acl_chmod(struct inode *inode); + +/* free-space-cache.c */ +int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, + u64 bytenr, u64 size); +int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, + u64 bytenr, u64 size); +void btrfs_remove_free_space_cache(struct btrfs_block_group_cache + *block_group); +struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache + *block_group, u64 offset, + u64 bytes); +void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, + u64 bytes); +u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f6f7821d43a5..535bd0fe1a71 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1410,10 +1410,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; - extent_io_tree_init(&fs_info->free_space_cache, - fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_io_tree_init(&fs_info->block_group_cache, - fs_info->btree_inode->i_mapping, GFP_NOFS); + spin_lock_init(&fs_info->block_group_cache_lock); + fs_info->block_group_cache_tree.rb_node = NULL; + extent_io_tree_init(&fs_info->pinned_extents, fs_info->btree_inode->i_mapping, GFP_NOFS); extent_io_tree_init(&fs_info->pending_del, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1c10ffc837c8..813566acc5d3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -29,12 +29,6 @@ #include "locking.h" #include "ref-cache.h" -#define BLOCK_GROUP_DATA EXTENT_WRITEBACK -#define BLOCK_GROUP_METADATA EXTENT_UPTODATE -#define BLOCK_GROUP_SYSTEM EXTENT_NEW - -#define BLOCK_GROUP_DIRTY EXTENT_DIRTY - static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -62,6 +56,127 @@ void maybe_unlock_mutex(struct btrfs_root *root) } } +static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) +{ + return (cache->flags & bits) == bits; +} + +/* + * this adds the block group to the fs_info rb tree for the block group + * cache + */ +int btrfs_add_block_group_cache(struct btrfs_fs_info *info, + struct btrfs_block_group_cache *block_group) +{ + struct rb_node **p; + struct rb_node *parent = NULL; + struct btrfs_block_group_cache *cache; + + spin_lock(&info->block_group_cache_lock); + p = &info->block_group_cache_tree.rb_node; + + while (*p) { + parent = *p; + cache = rb_entry(parent, struct btrfs_block_group_cache, + cache_node); + if (block_group->key.objectid < cache->key.objectid) { + p = &(*p)->rb_left; + } else if (block_group->key.objectid > cache->key.objectid) { + p = &(*p)->rb_right; + } else { + spin_unlock(&info->block_group_cache_lock); + return -EEXIST; + } + } + + rb_link_node(&block_group->cache_node, parent, p); + rb_insert_color(&block_group->cache_node, + &info->block_group_cache_tree); + spin_unlock(&info->block_group_cache_lock); + + return 0; +} + +/* + * This will return the block group at or after bytenr if contains is 0, else + * it will return the block group that contains the bytenr + */ +static struct btrfs_block_group_cache * +block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, + int contains) +{ + struct btrfs_block_group_cache *cache, *ret = NULL; + struct rb_node *n; + u64 end, start; + + spin_lock(&info->block_group_cache_lock); + n = info->block_group_cache_tree.rb_node; + + while (n) { + cache = rb_entry(n, struct btrfs_block_group_cache, + cache_node); + end = cache->key.objectid + cache->key.offset - 1; + start = cache->key.objectid; + + if (bytenr < start) { + if (!contains && (!ret || start < ret->key.objectid)) + ret = cache; + n = n->rb_left; + } else if (bytenr > start) { + if (contains && bytenr <= end) { + ret = cache; + break; + } + n = n->rb_right; + } else { + ret = cache; + break; + } + } + spin_unlock(&info->block_group_cache_lock); + + return ret; +} + +/* + * this is only called by cache_block_group, since we could have freed extents + * we need to check the pinned_extents for any extents that can't be used yet + * since their free space will be released as soon as the transaction commits. + */ +static int add_new_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_fs_info *info, u64 start, u64 end) +{ + u64 extent_start, extent_end, size; + int ret; + + while (start < end) { + ret = find_first_extent_bit(&info->pinned_extents, start, + &extent_start, &extent_end, + EXTENT_DIRTY); + if (ret) + break; + + if (extent_start == start) { + start = extent_end + 1; + } else if (extent_start > start && extent_start < end) { + size = extent_start - start; + ret = btrfs_add_free_space(block_group, start, size); + BUG_ON(ret); + start = extent_end + 1; + } else { + break; + } + } + + if (start < end) { + size = end - start; + ret = btrfs_add_free_space(block_group, start, size); + BUG_ON(ret); + } + + return 0; +} + static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) { @@ -69,10 +184,8 @@ static int cache_block_group(struct btrfs_root *root, int ret = 0; struct btrfs_key key; struct extent_buffer *leaf; - struct extent_io_tree *free_space_cache; int slot; u64 last = 0; - u64 hole_size; u64 first_free; int found = 0; @@ -80,7 +193,6 @@ static int cache_block_group(struct btrfs_root *root, return 0; root = root->fs_info->extent_root; - free_space_cache = &root->fs_info->free_space_cache; if (block_group->cached) return 0; @@ -96,7 +208,8 @@ static int cache_block_group(struct btrfs_root *root, * skip the locking here */ path->skip_locking = 1; - first_free = block_group->key.objectid; + first_free = max_t(u64, block_group->key.objectid, + BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); key.objectid = block_group->key.objectid; key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -119,32 +232,28 @@ static int cache_block_group(struct btrfs_root *root, ret = btrfs_next_leaf(root, path); if (ret < 0) goto err; - if (ret == 0) { + if (ret == 0) continue; - } else { + else break; - } } btrfs_item_key_to_cpu(leaf, &key, slot); - if (key.objectid < block_group->key.objectid) { + if (key.objectid < block_group->key.objectid) goto next; - } + if (key.objectid >= block_group->key.objectid + - block_group->key.offset) { + block_group->key.offset) break; - } if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { if (!found) { last = first_free; found = 1; } - if (key.objectid > last) { - hole_size = key.objectid - last; - set_extent_dirty(free_space_cache, last, - last + hole_size - 1, - GFP_NOFS); - } + + add_new_free_space(block_group, root->fs_info, last, + key.objectid); + last = key.objectid + key.offset; } next: @@ -153,13 +262,11 @@ next: if (!found) last = first_free; - if (block_group->key.objectid + - block_group->key.offset > last) { - hole_size = block_group->key.objectid + - block_group->key.offset - last; - set_extent_dirty(free_space_cache, last, - last + hole_size - 1, GFP_NOFS); - } + + add_new_free_space(block_group, root->fs_info, last, + block_group->key.objectid + + block_group->key.offset); + block_group->cached = 1; ret = 0; err: @@ -167,166 +274,79 @@ err: return ret; } +/* + * return the block group that starts at or after bytenr + */ struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) { - struct extent_io_tree *block_group_cache; - struct btrfs_block_group_cache *block_group = NULL; - u64 ptr; - u64 start; - u64 end; - int ret; + struct btrfs_block_group_cache *cache; - bytenr = max_t(u64, bytenr, - BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); - block_group_cache = &info->block_group_cache; - ret = find_first_extent_bit(block_group_cache, - bytenr, &start, &end, - BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | - BLOCK_GROUP_SYSTEM); - if (ret) { - return NULL; - } - ret = get_state_private(block_group_cache, start, &ptr); - if (ret) - return NULL; + cache = block_group_cache_tree_search(info, bytenr, 0); - block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; - return block_group; + return cache; } +/* + * return the block group that contains teh given bytenr + */ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr) { - struct extent_io_tree *block_group_cache; - struct btrfs_block_group_cache *block_group = NULL; - u64 ptr; - u64 start; - u64 end; - int ret; + struct btrfs_block_group_cache *cache; - bytenr = max_t(u64, bytenr, - BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); - block_group_cache = &info->block_group_cache; - ret = find_first_extent_bit(block_group_cache, - bytenr, &start, &end, - BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | - BLOCK_GROUP_SYSTEM); - if (ret) { - return NULL; - } - ret = get_state_private(block_group_cache, start, &ptr); - if (ret) - return NULL; + cache = block_group_cache_tree_search(info, bytenr, 1); - block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; - if (block_group->key.objectid <= bytenr && bytenr < - block_group->key.objectid + block_group->key.offset) - return block_group; - return NULL; + return cache; } -static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) -{ - return (cache->flags & bits) == bits; -} - -static int noinline find_search_start(struct btrfs_root *root, - struct btrfs_block_group_cache **cache_ret, - u64 *start_ret, u64 num, int data) +static int noinline find_free_space(struct btrfs_root *root, + struct btrfs_block_group_cache **cache_ret, + u64 *start_ret, u64 num, int data) { int ret; struct btrfs_block_group_cache *cache = *cache_ret; - struct extent_io_tree *free_space_cache; - struct extent_state *state; + struct btrfs_free_space *info = NULL; u64 last; - u64 start = 0; - u64 cache_miss = 0; u64 total_fs_bytes; u64 search_start = *start_ret; - int wrapped = 0; WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); - free_space_cache = &root->fs_info->free_space_cache; if (!cache) goto out; + last = max(search_start, cache->key.objectid); + again: ret = cache_block_group(root, cache); - if (ret) { + if (ret) goto out; - } - last = max(search_start, cache->key.objectid); - if (!block_group_bits(cache, data) || cache->ro) + if (cache->ro || !block_group_bits(cache, data)) goto new_group; - spin_lock_irq(&free_space_cache->lock); - state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY); - while(1) { - if (!state) { - if (!cache_miss) - cache_miss = last; - spin_unlock_irq(&free_space_cache->lock); - goto new_group; - } - - start = max(last, state->start); - last = state->end + 1; - if (last - start < num) { - do { - state = extent_state_next(state); - } while(state && !(state->state & EXTENT_DIRTY)); - continue; - } - spin_unlock_irq(&free_space_cache->lock); - if (cache->ro) { - goto new_group; - } - if (start + num > cache->key.objectid + cache->key.offset) - goto new_group; - if (!block_group_bits(cache, data)) { - printk("block group bits don't match %Lu %d\n", cache->flags, data); - } - *start_ret = start; + info = btrfs_find_free_space(cache, last, num); + if (info) { + *start_ret = info->offset; return 0; } -out: - cache = btrfs_lookup_block_group(root->fs_info, search_start); - if (!cache) { - printk("Unable to find block group for %Lu\n", search_start); - WARN_ON(1); - } - return -ENOSPC; new_group: last = cache->key.objectid + cache->key.offset; -wrapped: + cache = btrfs_lookup_first_block_group(root->fs_info, last); - if (!cache || cache->key.objectid >= total_fs_bytes) { -no_cache: - if (!wrapped) { - wrapped = 1; - last = search_start; - goto wrapped; - } + if (!cache || cache->key.objectid >= total_fs_bytes) goto out; - } - if (cache_miss && !cache->cached) { - cache_block_group(root, cache); - last = cache_miss; - cache = btrfs_lookup_first_block_group(root->fs_info, last); - } - cache_miss = 0; - cache = btrfs_find_block_group(root, cache, last, data, 0); - if (!cache) - goto no_cache; + *cache_ret = cache; goto again; + +out: + return -ENOSPC; } static u64 div_factor(u64 num, int factor) @@ -338,16 +358,19 @@ static u64 div_factor(u64 num, int factor) return num; } -static int block_group_state_bits(u64 flags) +static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, + u64 flags) { - int bits = 0; - if (flags & BTRFS_BLOCK_GROUP_DATA) - bits |= BLOCK_GROUP_DATA; - if (flags & BTRFS_BLOCK_GROUP_METADATA) - bits |= BLOCK_GROUP_METADATA; - if (flags & BTRFS_BLOCK_GROUP_SYSTEM) - bits |= BLOCK_GROUP_SYSTEM; - return bits; + struct list_head *head = &info->space_info; + struct list_head *cur; + struct btrfs_space_info *found; + list_for_each(cur, head) { + found = list_entry(cur, struct btrfs_space_info, list); + if (found->flags == flags) + return found; + } + return NULL; + } static struct btrfs_block_group_cache * @@ -356,28 +379,19 @@ __btrfs_find_block_group(struct btrfs_root *root, u64 search_start, int data, int owner) { struct btrfs_block_group_cache *cache; - struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; + struct btrfs_space_info *sinfo; u64 used; u64 last = 0; - u64 start; - u64 end; u64 free_check; - u64 ptr; - int bit; - int ret; int full_search = 0; int factor = 10; int wrapped = 0; - block_group_cache = &info->block_group_cache; - if (data & BTRFS_BLOCK_GROUP_METADATA) factor = 9; - bit = block_group_state_bits(data); - if (search_start) { struct btrfs_block_group_cache *shint; shint = btrfs_lookup_first_block_group(info, search_start); @@ -408,20 +422,30 @@ __btrfs_find_block_group(struct btrfs_root *root, else last = search_start; } + sinfo = __find_space_info(root->fs_info, data); + if (!sinfo) + goto found; again: while(1) { - ret = find_first_extent_bit(block_group_cache, last, - &start, &end, bit); - if (ret) + struct list_head *l; + + cache = NULL; + + spin_lock(&sinfo->lock); + list_for_each(l, &sinfo->block_groups) { + struct btrfs_block_group_cache *entry; + entry = list_entry(l, struct btrfs_block_group_cache, + list); + if ((entry->key.objectid >= last) && + (!cache || (entry->key.objectid < + cache->key.objectid))) + cache = entry; + } + spin_unlock(&sinfo->lock); + + if (!cache) break; - ret = get_state_private(block_group_cache, start, &ptr); - if (ret) { - last = end + 1; - continue; - } - - cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; spin_lock(&cache->lock); last = cache->key.objectid + cache->key.offset; used = btrfs_block_group_used(&cache->item); @@ -462,6 +486,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, ret = __btrfs_find_block_group(root, hint, search_start, data, owner); return ret; } + static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset) { @@ -1175,34 +1200,37 @@ fail: int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct extent_io_tree *block_group_cache; - struct btrfs_block_group_cache *cache; - int ret; + struct btrfs_block_group_cache *cache, *entry; + struct rb_node *n; int err = 0; int werr = 0; struct btrfs_path *path; u64 last = 0; - u64 start; - u64 end; - u64 ptr; - block_group_cache = &root->fs_info->block_group_cache; path = btrfs_alloc_path(); if (!path) return -ENOMEM; mutex_lock(&root->fs_info->alloc_mutex); while(1) { - ret = find_first_extent_bit(block_group_cache, last, - &start, &end, BLOCK_GROUP_DIRTY); - if (ret) + cache = NULL; + spin_lock(&root->fs_info->block_group_cache_lock); + for (n = rb_first(&root->fs_info->block_group_cache_tree); + n; n = rb_next(n)) { + entry = rb_entry(n, struct btrfs_block_group_cache, + cache_node); + if (entry->dirty) { + cache = entry; + break; + } + } + spin_unlock(&root->fs_info->block_group_cache_lock); + + if (!cache) break; - last = end + 1; - ret = get_state_private(block_group_cache, start, &ptr); - if (ret) - break; - cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; + last += cache->key.offset; + err = write_one_cache_group(trans, root, path, cache); /* @@ -1214,29 +1242,14 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, werr = err; continue; } - clear_extent_bits(block_group_cache, start, end, - BLOCK_GROUP_DIRTY, GFP_NOFS); + + cache->dirty = 0; } btrfs_free_path(path); mutex_unlock(&root->fs_info->alloc_mutex); return werr; } -static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, - u64 flags) -{ - struct list_head *head = &info->space_info; - struct list_head *cur; - struct btrfs_space_info *found; - list_for_each(cur, head) { - found = list_entry(cur, struct btrfs_space_info, list); - if (found->flags == flags) - return found; - } - return NULL; - -} - static int update_space_info(struct btrfs_fs_info *info, u64 flags, u64 total_bytes, u64 bytes_used, struct btrfs_space_info **space_info) @@ -1256,6 +1269,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, return -ENOMEM; list_add(&found->list, &info->space_info); + INIT_LIST_HEAD(&found->block_groups); + spin_lock_init(&found->lock); found->flags = flags; found->total_bytes = total_bytes; found->bytes_used = bytes_used; @@ -1318,7 +1333,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 thresh; u64 start; u64 num_bytes; - int ret; + int ret = 0; flags = reduce_alloc_profile(extent_root, flags); @@ -1355,10 +1370,11 @@ printk("space info full %Lu\n", flags); ret = btrfs_make_block_group(trans, extent_root, 0, flags, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); BUG_ON(ret); + out_unlock: mutex_unlock(&extent_root->fs_info->chunk_mutex); out: - return 0; + return ret; } static int update_block_group(struct btrfs_trans_handle *trans, @@ -1371,8 +1387,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 total = num_bytes; u64 old_val; u64 byte_in_group; - u64 start; - u64 end; WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); while(total) { @@ -1382,12 +1396,9 @@ static int update_block_group(struct btrfs_trans_handle *trans, } byte_in_group = bytenr - cache->key.objectid; WARN_ON(byte_in_group > cache->key.offset); - start = cache->key.objectid; - end = start + cache->key.offset - 1; - set_extent_bits(&info->block_group_cache, start, end, - BLOCK_GROUP_DIRTY, GFP_NOFS); spin_lock(&cache->lock); + cache->dirty = 1; old_val = btrfs_block_group_used(&cache->item); num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { @@ -1401,9 +1412,11 @@ static int update_block_group(struct btrfs_trans_handle *trans, btrfs_set_block_group_used(&cache->item, old_val); spin_unlock(&cache->lock); if (mark_free) { - set_extent_dirty(&info->free_space_cache, - bytenr, bytenr + num_bytes - 1, - GFP_NOFS); + int ret; + ret = btrfs_add_free_space(cache, bytenr, + num_bytes); + if (ret) + return -1; } } total -= num_bytes; @@ -1414,16 +1427,13 @@ static int update_block_group(struct btrfs_trans_handle *trans, static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) { - u64 start; - u64 end; - int ret; - ret = find_first_extent_bit(&root->fs_info->block_group_cache, - search_start, &start, &end, - BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | - BLOCK_GROUP_SYSTEM); - if (ret) + struct btrfs_block_group_cache *cache; + + cache = btrfs_lookup_first_block_group(root->fs_info, search_start); + if (!cache) return 0; - return start; + + return cache->key.objectid; } @@ -1501,8 +1511,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, u64 start; u64 end; int ret; - struct extent_io_tree *free_space_cache; - free_space_cache = &root->fs_info->free_space_cache; + struct btrfs_block_group_cache *cache; mutex_lock(&root->fs_info->alloc_mutex); while(1) { @@ -1512,7 +1521,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, break; btrfs_update_pinned_extents(root, start, end + 1 - start, 0); clear_extent_dirty(unpin, start, end, GFP_NOFS); - set_extent_dirty(free_space_cache, start, end, GFP_NOFS); + cache = btrfs_lookup_block_group(root->fs_info, start); + if (cache->cached) + btrfs_add_free_space(cache, start, end - start + 1); if (need_resched()) { mutex_unlock(&root->fs_info->alloc_mutex); cond_resched(); @@ -1875,9 +1886,12 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, /* if metadata always pin */ if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { + struct btrfs_block_group_cache *cache; + /* btrfs_free_reserved_extent */ - set_extent_dirty(&root->fs_info->free_space_cache, - bytenr, bytenr + num_bytes - 1, GFP_NOFS); + cache = btrfs_lookup_block_group(root->fs_info, bytenr); + BUG_ON(!cache); + btrfs_add_free_space(cache, bytenr, num_bytes); return 0; } pin = 1; @@ -1942,8 +1956,6 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, u64 total_needed = num_bytes; u64 *last_ptr = NULL; struct btrfs_block_group_cache *block_group; - int full_scan = 0; - int wrapped = 0; int chunk_alloc_done = 0; int empty_cluster = 2 * 1024 * 1024; int allowed_chunk_alloc = 0; @@ -1959,9 +1971,9 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, empty_cluster = 256 * 1024; } - if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { + if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) last_ptr = &root->fs_info->last_data_alloc; - } + if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { last_ptr = &root->fs_info->last_log_alloc; if (!last_ptr == 0 && root->fs_info->last_alloc) { @@ -1972,9 +1984,8 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if (last_ptr) { if (*last_ptr) hint_byte = *last_ptr; - else { + else empty_size += empty_cluster; - } } search_start = max(search_start, first_logical_byte(root, 0)); @@ -1983,145 +1994,172 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if (search_end == (u64)-1) search_end = btrfs_super_total_bytes(&info->super_copy); - if (hint_byte) { - block_group = btrfs_lookup_first_block_group(info, hint_byte); - if (!block_group) - hint_byte = search_start; - block_group = btrfs_find_block_group(root, block_group, - hint_byte, data, 1); - if (last_ptr && *last_ptr == 0 && block_group) - hint_byte = block_group->key.objectid; - } else { - block_group = btrfs_find_block_group(root, - trans->block_group, - search_start, data, 1); - } search_start = max(search_start, hint_byte); - total_needed += empty_size; -check_failed: - if (!block_group) { - block_group = btrfs_lookup_first_block_group(info, - search_start); - if (!block_group) - block_group = btrfs_lookup_first_block_group(info, - orig_search_start); - } - if (full_scan && !chunk_alloc_done) { - if (allowed_chunk_alloc) { - do_chunk_alloc(trans, root, - num_bytes + 2 * 1024 * 1024, data, 1); - allowed_chunk_alloc = 0; - } else if (block_group && block_group_bits(block_group, data)) { - block_group->space_info->force_alloc = 1; +new_group: + block_group = btrfs_lookup_block_group(info, search_start); + + /* + * Ok this looks a little tricky, buts its really simple. First if we + * didn't find a block group obviously we want to start over. + * Secondly, if the block group we found does not match the type we + * need, and we have a last_ptr and its not 0, chances are the last + * allocation we made was at the end of the block group, so lets go + * ahead and skip the looking through the rest of the block groups and + * start at the beginning. This helps with metadata allocations, + * since you are likely to have a bunch of data block groups to search + * through first before you realize that you need to start over, so go + * ahead and start over and save the time. + */ + if (!block_group || (!block_group_bits(block_group, data) && + last_ptr && *last_ptr)) { + if (search_start != orig_search_start) { + if (last_ptr && *last_ptr) + *last_ptr = 0; + search_start = orig_search_start; + goto new_group; + } else if (!chunk_alloc_done && allowed_chunk_alloc) { + ret = do_chunk_alloc(trans, root, + num_bytes + 2 * 1024 * 1024, + data, 1); + if (ret < 0) { + struct btrfs_space_info *info; + + info = __find_space_info(root->fs_info, data); + goto error; + } + BUG_ON(ret); + chunk_alloc_done = 1; + search_start = orig_search_start; + goto new_group; + } else { + ret = -ENOSPC; + goto error; } + } + + /* + * this is going to seach through all of the existing block groups it + * can find, so if we don't find something we need to see if we can + * allocate what we need. + */ + ret = find_free_space(root, &block_group, &search_start, + total_needed, data); + if (ret == -ENOSPC) { + /* + * instead of allocating, start at the original search start + * and see if there is something to be found, if not then we + * allocate + */ + if (search_start != orig_search_start) { + if (last_ptr && *last_ptr) { + *last_ptr = 0; + total_needed += empty_cluster; + } + search_start = orig_search_start; + goto new_group; + } + + /* + * we've already allocated, we're pretty screwed + */ + if (chunk_alloc_done) { + goto error; + } else if (!allowed_chunk_alloc && block_group && + block_group_bits(block_group, data)) { + block_group->space_info->force_alloc = 1; + goto error; + } else if (!allowed_chunk_alloc) { + goto error; + } + + ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024, + data, 1); + if (ret < 0) + goto error; + + BUG_ON(ret); chunk_alloc_done = 1; + if (block_group) + search_start = block_group->key.objectid + + block_group->key.offset; + else + search_start = orig_search_start; + goto new_group; } - ret = find_search_start(root, &block_group, &search_start, - total_needed, data); - if (ret == -ENOSPC && last_ptr && *last_ptr) { - *last_ptr = 0; - block_group = btrfs_lookup_first_block_group(info, - orig_search_start); - search_start = orig_search_start; - ret = find_search_start(root, &block_group, &search_start, - total_needed, data); - } - if (ret == -ENOSPC) - goto enospc; + if (ret) goto error; - if (last_ptr && *last_ptr && search_start != *last_ptr) { - *last_ptr = 0; - if (!empty_size) { - empty_size += empty_cluster; - total_needed += empty_size; - } - block_group = btrfs_lookup_first_block_group(info, - orig_search_start); - search_start = orig_search_start; - ret = find_search_start(root, &block_group, - &search_start, total_needed, data); - if (ret == -ENOSPC) - goto enospc; - if (ret) - goto error; - } - search_start = stripe_align(root, search_start); ins->objectid = search_start; ins->offset = num_bytes; - if (ins->objectid + num_bytes >= search_end) - goto enospc; + if (ins->objectid + num_bytes >= search_end) { + search_start = orig_search_start; + if (chunk_alloc_done) { + ret = -ENOSPC; + goto error; + } + goto new_group; + } if (ins->objectid + num_bytes > block_group->key.objectid + block_group->key.offset) { + if (search_start == orig_search_start && chunk_alloc_done) { + ret = -ENOSPC; + goto error; + } search_start = block_group->key.objectid + block_group->key.offset; goto new_group; } - if (test_range_bit(&info->extent_ins, ins->objectid, - ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { - search_start = ins->objectid + num_bytes; - goto new_group; - } - - if (test_range_bit(&info->pinned_extents, ins->objectid, - ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { - search_start = ins->objectid + num_bytes; - goto new_group; - } - if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && ins->objectid < exclude_start + exclude_nr)) { search_start = exclude_start + exclude_nr; goto new_group; } - if (!(data & BTRFS_BLOCK_GROUP_DATA)) { - block_group = btrfs_lookup_block_group(info, ins->objectid); - if (block_group) - trans->block_group = block_group; - } + if (!(data & BTRFS_BLOCK_GROUP_DATA)) + trans->block_group = block_group; + ins->offset = num_bytes; if (last_ptr) { *last_ptr = ins->objectid + ins->offset; if (*last_ptr == - btrfs_super_total_bytes(&root->fs_info->super_copy)) { + btrfs_super_total_bytes(&root->fs_info->super_copy)) *last_ptr = 0; - } } - return 0; - -new_group: - if (search_start + num_bytes >= search_end) { -enospc: - search_start = orig_search_start; - if (full_scan) { - ret = -ENOSPC; - goto error; - } - if (wrapped) { - if (!full_scan) - total_needed -= empty_size; - full_scan = 1; - } else - wrapped = 1; - } - block_group = btrfs_lookup_first_block_group(info, search_start); - cond_resched(); - block_group = btrfs_find_block_group(root, block_group, - search_start, data, 0); - goto check_failed; + ret = 0; error: return ret; } +static void dump_space_info(struct btrfs_space_info *info, u64 bytes) +{ + struct btrfs_block_group_cache *cache; + struct list_head *l; + + printk(KERN_INFO "space_info has %Lu free, is %sfull\n", + info->total_bytes - info->bytes_used - info->bytes_pinned, + (info->full) ? "" : "not "); + + spin_lock(&info->lock); + list_for_each(l, &info->block_groups) { + cache = list_entry(l, struct btrfs_block_group_cache, list); + spin_lock(&cache->lock); + printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used " + "%Lu pinned\n", + cache->key.objectid, cache->key.offset, + btrfs_block_group_used(&cache->item), cache->pinned); + btrfs_dump_free_space(cache, bytes); + spin_unlock(&cache->lock); + } + spin_unlock(&info->lock); +} static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, @@ -2133,6 +2171,7 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, u64 search_start = 0; u64 alloc_profile; struct btrfs_fs_info *info = root->fs_info; + struct btrfs_block_group_cache *cache; if (data) { alloc_profile = info->avail_data_alloc_bits & @@ -2160,11 +2199,9 @@ again: BTRFS_BLOCK_GROUP_METADATA | (info->metadata_alloc_profile & info->avail_metadata_alloc_bits), 0); - BUG_ON(ret); } ret = do_chunk_alloc(trans, root->fs_info->extent_root, num_bytes + 2 * 1024 * 1024, data, 0); - BUG_ON(ret); } WARN_ON(num_bytes < root->sectorsize); @@ -2175,26 +2212,44 @@ again: if (ret == -ENOSPC && num_bytes > min_alloc_size) { num_bytes = num_bytes >> 1; + num_bytes = num_bytes & ~(root->sectorsize - 1); num_bytes = max(num_bytes, min_alloc_size); do_chunk_alloc(trans, root->fs_info->extent_root, num_bytes, data, 1); goto again; } if (ret) { - printk("allocation failed flags %Lu\n", data); + struct btrfs_space_info *sinfo; + + sinfo = __find_space_info(root->fs_info, data); + printk("allocation failed flags %Lu, wanted %Lu\n", + data, num_bytes); + dump_space_info(sinfo, num_bytes); BUG(); } - clear_extent_dirty(&root->fs_info->free_space_cache, - ins->objectid, ins->objectid + ins->offset - 1, - GFP_NOFS); - return 0; + cache = btrfs_lookup_block_group(root->fs_info, ins->objectid); + if (!cache) { + printk(KERN_ERR "Unable to find block group for %Lu\n", ins->objectid); + return -ENOSPC; + } + + ret = btrfs_remove_free_space(cache, ins->objectid, ins->offset); + + return ret; } int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) { + struct btrfs_block_group_cache *cache; + maybe_lock_mutex(root); - set_extent_dirty(&root->fs_info->free_space_cache, - start, start + len - 1, GFP_NOFS); + cache = btrfs_lookup_block_group(root->fs_info, start); + if (!cache) { + printk(KERN_ERR "Unable to find block group for %Lu\n", start); + maybe_unlock_mutex(root); + return -ENOSPC; + } + btrfs_add_free_space(cache, start, len); maybe_unlock_mutex(root); return 0; } @@ -2264,8 +2319,8 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_items(trans, extent_root, path, keys, sizes, 2); - BUG_ON(ret); + extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_extent_item); btrfs_set_extent_refs(path->nodes[0], extent_item, 1); @@ -2336,9 +2391,9 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); cache_block_group(root, block_group); - clear_extent_dirty(&root->fs_info->free_space_cache, - ins->objectid, ins->objectid + ins->offset - 1, - GFP_NOFS); + ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset); + BUG_ON(ret); + ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, ref_generation, owner, owner_offset, ins); @@ -2843,31 +2898,24 @@ out: int btrfs_free_block_groups(struct btrfs_fs_info *info) { - u64 start; - u64 end; - u64 ptr; - int ret; + struct btrfs_block_group_cache *block_group; + struct rb_node *n; mutex_lock(&info->alloc_mutex); - while(1) { - ret = find_first_extent_bit(&info->block_group_cache, 0, - &start, &end, (unsigned int)-1); - if (ret) - break; - ret = get_state_private(&info->block_group_cache, start, &ptr); - if (!ret) - kfree((void *)(unsigned long)ptr); - clear_extent_bits(&info->block_group_cache, start, - end, (unsigned int)-1, GFP_NOFS); - } - while(1) { - ret = find_first_extent_bit(&info->free_space_cache, 0, - &start, &end, EXTENT_DIRTY); - if (ret) - break; - clear_extent_dirty(&info->free_space_cache, start, - end, GFP_NOFS); + spin_lock(&info->block_group_cache_lock); + while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { + block_group = rb_entry(n, struct btrfs_block_group_cache, + cache_node); + + btrfs_remove_free_space_cache(block_group); + rb_erase(&block_group->cache_node, + &info->block_group_cache_tree); + spin_lock(&block_group->space_info->lock); + list_del(&block_group->list); + spin_unlock(&block_group->space_info->lock); + kfree(block_group); } + spin_unlock(&info->block_group_cache_lock); mutex_unlock(&info->alloc_mutex); return 0; } @@ -3386,7 +3434,6 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) u64 total_found; u64 shrink_last_byte; struct btrfs_block_group_cache *shrink_block_group; - struct btrfs_fs_info *info = root->fs_info; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; @@ -3542,15 +3589,17 @@ next: goto out; } - clear_extent_bits(&info->block_group_cache, key.objectid, - key.objectid + key.offset - 1, - (unsigned int)-1, GFP_NOFS); - - - clear_extent_bits(&info->free_space_cache, - key.objectid, key.objectid + key.offset - 1, - (unsigned int)-1, GFP_NOFS); + spin_lock(&root->fs_info->block_group_cache_lock); + rb_erase(&shrink_block_group->cache_node, + &root->fs_info->block_group_cache_tree); + spin_unlock(&root->fs_info->block_group_cache_lock); + ret = btrfs_remove_free_space(shrink_block_group, key.objectid, + key.offset); + if (ret) { + btrfs_end_transaction(trans, root); + goto out; + } /* memset(shrink_block_group, 0, sizeof(*shrink_block_group)); kfree(shrink_block_group); @@ -3566,9 +3615,9 @@ next: /* the code to unpin extents might set a few bits in the free * space cache for this range again */ - clear_extent_bits(&info->free_space_cache, - key.objectid, key.objectid + key.offset - 1, - (unsigned int)-1, GFP_NOFS); + /* XXX? */ + ret = btrfs_remove_free_space(shrink_block_group, key.objectid, + key.offset); out: btrfs_free_path(path); mutex_unlock(&root->fs_info->alloc_mutex); @@ -3616,16 +3665,13 @@ int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; int ret; - int bit; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; struct btrfs_space_info *space_info; - struct extent_io_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; - block_group_cache = &info->block_group_cache; root = info->extent_root; key.objectid = 0; key.offset = 0; @@ -3653,6 +3699,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) } spin_lock_init(&cache->lock); + INIT_LIST_HEAD(&cache->list); read_extent_buffer(leaf, &cache->item, btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(cache->item)); @@ -3661,31 +3708,19 @@ int btrfs_read_block_groups(struct btrfs_root *root) key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); cache->flags = btrfs_block_group_flags(&cache->item); - bit = 0; - if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { - bit = BLOCK_GROUP_DATA; - } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { - bit = BLOCK_GROUP_SYSTEM; - } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { - bit = BLOCK_GROUP_METADATA; - } - set_avail_alloc_bits(info, cache->flags); ret = update_space_info(info, cache->flags, found_key.offset, btrfs_block_group_used(&cache->item), &space_info); BUG_ON(ret); cache->space_info = space_info; + spin_lock(&space_info->lock); + list_add(&cache->list, &space_info->block_groups); + spin_unlock(&space_info->lock); + + ret = btrfs_add_block_group_cache(root->fs_info, cache); + BUG_ON(ret); - /* use EXTENT_LOCKED to prevent merging */ - set_extent_bits(block_group_cache, found_key.objectid, - found_key.objectid + found_key.offset - 1, - EXTENT_LOCKED, GFP_NOFS); - set_state_private(block_group_cache, found_key.objectid, - (unsigned long)cache); - set_extent_bits(block_group_cache, found_key.objectid, - found_key.objectid + found_key.offset - 1, - bit | EXTENT_LOCKED, GFP_NOFS); if (key.objectid >= btrfs_super_total_bytes(&info->super_copy)) break; @@ -3703,22 +3738,22 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 size) { int ret; - int bit = 0; struct btrfs_root *extent_root; struct btrfs_block_group_cache *cache; - struct extent_io_tree *block_group_cache; WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); extent_root = root->fs_info->extent_root; - block_group_cache = &root->fs_info->block_group_cache; root->fs_info->last_trans_new_blockgroup = trans->transid; cache = kzalloc(sizeof(*cache), GFP_NOFS); - BUG_ON(!cache); + if (!cache) + return -ENOMEM; + cache->key.objectid = chunk_offset; cache->key.offset = size; spin_lock_init(&cache->lock); + INIT_LIST_HEAD(&cache->list); btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); btrfs_set_block_group_used(&cache->item, bytes_used); @@ -3729,16 +3764,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, &cache->space_info); BUG_ON(ret); + spin_lock(&cache->space_info->lock); + list_add(&cache->list, &cache->space_info->block_groups); + spin_unlock(&cache->space_info->lock); - bit = block_group_state_bits(type); - set_extent_bits(block_group_cache, chunk_offset, - chunk_offset + size - 1, - EXTENT_LOCKED, GFP_NOFS); - set_state_private(block_group_cache, chunk_offset, - (unsigned long)cache); - set_extent_bits(block_group_cache, chunk_offset, - chunk_offset + size - 1, - bit | EXTENT_LOCKED, GFP_NOFS); + ret = btrfs_add_block_group_cache(root->fs_info, cache); + BUG_ON(ret); ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, sizeof(cache->item)); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 319a0c7a4a58..8624f3e88036 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2634,6 +2634,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, if (eb) { atomic_inc(&eb->refs); spin_unlock(&tree->buffer_lock); + mark_page_accessed(eb->first_page); return eb; } spin_unlock(&tree->buffer_lock); @@ -2713,6 +2714,9 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, atomic_inc(&eb->refs); spin_unlock(&tree->buffer_lock); + if (eb) + mark_page_accessed(eb->first_page); + return eb; } EXPORT_SYMBOL(find_extent_buffer); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c new file mode 100644 index 000000000000..01c26e8ae555 --- /dev/null +++ b/fs/btrfs/free-space-cache.c @@ -0,0 +1,415 @@ +/* + * Copyright (C) 2008 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include "ctree.h" + +static int tree_insert_offset(struct rb_root *root, u64 offset, + struct rb_node *node) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct btrfs_free_space *info; + + while (*p) { + parent = *p; + info = rb_entry(parent, struct btrfs_free_space, offset_index); + + if (offset < info->offset) + p = &(*p)->rb_left; + else if (offset > info->offset) + p = &(*p)->rb_right; + else + return -EEXIST; + } + + rb_link_node(node, parent, p); + rb_insert_color(node, root); + + return 0; +} + +static int tree_insert_bytes(struct rb_root *root, u64 bytes, + struct rb_node *node) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct btrfs_free_space *info; + + while (*p) { + parent = *p; + info = rb_entry(parent, struct btrfs_free_space, bytes_index); + + if (bytes < info->bytes) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(node, parent, p); + rb_insert_color(node, root); + + return 0; +} + +/* + * searches the tree for the given offset. If contains is set we will return + * the free space that contains the given offset. If contains is not set we + * will return the free space that starts at or after the given offset and is + * at least bytes long. + */ +static struct btrfs_free_space *tree_search_offset(struct rb_root *root, + u64 offset, u64 bytes, + int contains) +{ + struct rb_node *n = root->rb_node; + struct btrfs_free_space *entry, *ret = NULL; + + while (n) { + entry = rb_entry(n, struct btrfs_free_space, offset_index); + + if (offset < entry->offset) { + if (!contains && + (!ret || entry->offset < ret->offset) && + (bytes <= entry->bytes)) + ret = entry; + n = n->rb_left; + } else if (offset > entry->offset) { + if (contains && + (entry->offset + entry->bytes - 1) >= offset) { + ret = entry; + break; + } + n = n->rb_right; + } else { + if (bytes > entry->bytes) { + n = n->rb_right; + continue; + } + ret = entry; + break; + } + } + + return ret; +} + +/* + * return a chunk at least bytes size, as close to offset that we can get. + */ +static struct btrfs_free_space *tree_search_bytes(struct rb_root *root, + u64 offset, u64 bytes) +{ + struct rb_node *n = root->rb_node; + struct btrfs_free_space *entry, *ret = NULL; + + while (n) { + entry = rb_entry(n, struct btrfs_free_space, bytes_index); + + if (bytes < entry->bytes) { + /* + * We prefer to get a hole size as close to the size we + * are asking for so we don't take small slivers out of + * huge holes, but we also want to get as close to the + * offset as possible so we don't have a whole lot of + * fragmentation. + */ + if (offset <= entry->offset) { + if (!ret) + ret = entry; + else if (entry->bytes < ret->bytes) + ret = entry; + else if (entry->offset < ret->offset) + ret = entry; + } + n = n->rb_left; + } else if (bytes > entry->bytes) { + n = n->rb_right; + } else { + /* + * Ok we may have multiple chunks of the wanted size, + * so we don't want to take the first one we find, we + * want to take the one closest to our given offset, so + * keep searching just in case theres a better match. + */ + n = n->rb_right; + if (offset > entry->offset) + continue; + else if (!ret || entry->offset < ret->offset) + ret = entry; + } + } + + return ret; +} + +static void unlink_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_free_space *info) +{ + rb_erase(&info->offset_index, &block_group->free_space_offset); + rb_erase(&info->bytes_index, &block_group->free_space_bytes); +} + +static int link_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_free_space *info) +{ + int ret = 0; + + + ret = tree_insert_offset(&block_group->free_space_offset, info->offset, + &info->offset_index); + if (ret) + return ret; + + ret = tree_insert_bytes(&block_group->free_space_bytes, info->bytes, + &info->bytes_index); + if (ret) + return ret; + + return ret; +} + +int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, + u64 offset, u64 bytes) +{ + struct btrfs_free_space *right_info; + struct btrfs_free_space *left_info; + struct btrfs_free_space *info = NULL; + struct btrfs_free_space *alloc_info; + int ret = 0; + + alloc_info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); + if (!alloc_info) + return -ENOMEM; + + /* + * first we want to see if there is free space adjacent to the range we + * are adding, if there is remove that struct and add a new one to + * cover the entire range + */ + spin_lock(&block_group->lock); + + right_info = tree_search_offset(&block_group->free_space_offset, + offset+bytes, 0, 1); + left_info = tree_search_offset(&block_group->free_space_offset, + offset-1, 0, 1); + + if (right_info && right_info->offset == offset+bytes) { + unlink_free_space(block_group, right_info); + info = right_info; + info->offset = offset; + info->bytes += bytes; + } else if (right_info && right_info->offset != offset+bytes) { + printk(KERN_ERR "adding space in the middle of an existing " + "free space area. existing: offset=%Lu, bytes=%Lu. " + "new: offset=%Lu, bytes=%Lu\n", right_info->offset, + right_info->bytes, offset, bytes); + BUG(); + } + + if (left_info) { + unlink_free_space(block_group, left_info); + + if (unlikely((left_info->offset + left_info->bytes) != + offset)) { + printk(KERN_ERR "free space to the left of new free " + "space isn't quite right. existing: offset=%Lu," + " bytes=%Lu. new: offset=%Lu, bytes=%Lu\n", + left_info->offset, left_info->bytes, offset, + bytes); + BUG(); + } + + if (info) { + info->offset = left_info->offset; + info->bytes += left_info->bytes; + kfree(left_info); + } else { + info = left_info; + info->bytes += bytes; + } + } + + if (info) { + ret = link_free_space(block_group, info); + if (!ret) + info = NULL; + goto out; + } + + info = alloc_info; + alloc_info = NULL; + info->offset = offset; + info->bytes = bytes; + + ret = link_free_space(block_group, info); + if (ret) + kfree(info); +out: + spin_unlock(&block_group->lock); + if (ret) { + printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret); + if (ret == -EEXIST) + BUG(); + } + + if (alloc_info) + kfree(alloc_info); + + return ret; +} + +int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, + u64 offset, u64 bytes) +{ + struct btrfs_free_space *info; + int ret = 0; + + spin_lock(&block_group->lock); + info = tree_search_offset(&block_group->free_space_offset, offset, 0, + 1); + + if (info && info->offset == offset) { + if (info->bytes < bytes) { + printk(KERN_ERR "Found free space at %Lu, size %Lu," + "trying to use %Lu\n", + info->offset, info->bytes, bytes); + WARN_ON(1); + ret = -EINVAL; + goto out; + } + + unlink_free_space(block_group, info); + + if (info->bytes == bytes) { + kfree(info); + goto out; + } + + info->offset += bytes; + info->bytes -= bytes; + + ret = link_free_space(block_group, info); + BUG_ON(ret); + } else { + WARN_ON(1); + } +out: + spin_unlock(&block_group->lock); + return ret; +} + +void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, + u64 bytes) +{ + struct btrfs_free_space *info; + struct rb_node *n; + int count = 0; + + for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) { + info = rb_entry(n, struct btrfs_free_space, offset_index); + if (info->bytes >= bytes) + count++; + //printk(KERN_INFO "offset=%Lu, bytes=%Lu\n", info->offset, + // info->bytes); + } + printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" + "\n", count); +} + +u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) +{ + struct btrfs_free_space *info; + struct rb_node *n; + u64 ret = 0; + + for (n = rb_first(&block_group->free_space_offset); n; + n = rb_next(n)) { + info = rb_entry(n, struct btrfs_free_space, offset_index); + ret += info->bytes; + } + + return ret; +} + +void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) +{ + struct btrfs_free_space *info; + struct rb_node *node; + + spin_lock(&block_group->lock); + while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { + info = rb_entry(node, struct btrfs_free_space, bytes_index); + unlink_free_space(block_group, info); + kfree(info); + if (need_resched()) { + spin_unlock(&block_group->lock); + cond_resched(); + spin_lock(&block_group->lock); + } + } + spin_unlock(&block_group->lock); +} + +struct btrfs_free_space *btrfs_find_free_space_offset(struct + btrfs_block_group_cache + *block_group, u64 offset, + u64 bytes) +{ + struct btrfs_free_space *ret; + + spin_lock(&block_group->lock); + ret = tree_search_offset(&block_group->free_space_offset, offset, + bytes, 0); + spin_unlock(&block_group->lock); + + return ret; +} + +struct btrfs_free_space *btrfs_find_free_space_bytes(struct + btrfs_block_group_cache + *block_group, u64 offset, + u64 bytes) +{ + struct btrfs_free_space *ret; + + spin_lock(&block_group->lock); + + ret = tree_search_bytes(&block_group->free_space_bytes, offset, bytes); + spin_unlock(&block_group->lock); + + return ret; +} + +struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache + *block_group, u64 offset, + u64 bytes) +{ + struct btrfs_free_space *ret; + + spin_lock(&block_group->lock); + ret = tree_search_offset(&block_group->free_space_offset, offset, + bytes, 0); + if (!ret) + ret = tree_search_bytes(&block_group->free_space_bytes, + offset, bytes); + + spin_unlock(&block_group->lock); + + return ret; +} diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 12c1c0530f3d..65b4f864b0df 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -141,7 +141,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) while(num_bytes > 0) { cur_alloc_size = min(num_bytes, root->fs_info->max_extent); ret = btrfs_reserve_extent(trans, root, cur_alloc_size, - root->sectorsize, 0, 0, + root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { WARN_ON(1); @@ -558,7 +558,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) trans->transid, inode->i_ino, ordered_extent->file_offset, &ins); BUG_ON(ret); - mutex_lock(&BTRFS_I(inode)->extent_mutex); ret = btrfs_drop_extents(trans, root, inode, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1546fa6f4f7d..b9e5c2d82dde 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -64,8 +64,8 @@ static void lock_chunks(struct btrfs_root *root) static void unlock_chunks(struct btrfs_root *root) { - mutex_unlock(&root->fs_info->alloc_mutex); mutex_unlock(&root->fs_info->chunk_mutex); + mutex_unlock(&root->fs_info->alloc_mutex); } int btrfs_cleanup_fs_uuids(void) @@ -1668,8 +1668,13 @@ again: else min_free = calc_size; - /* we add 1MB because we never use the first 1MB of the device */ - min_free += 1024 * 1024; + /* + * we add 1MB because we never use the first 1MB of the device, unless + * we've looped, then we are likely allocating the maximum amount of + * space left already + */ + if (!looped) + min_free += 1024 * 1024; /* build a private list of devices we will allocate from */ while(index < num_stripes) { From ce3ed71a58094544da1e1dc4723f1f553bb5b2d5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 23 Sep 2008 13:14:12 -0400 Subject: [PATCH 0711/2985] Btrfs: Checksum tree blocks in the background Tree blocks were using async bio submission, but the sum was still being done directly during writepage. This moves the checksumming into the worker thread. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 535bd0fe1a71..25be96946a2f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -315,14 +315,6 @@ out: return 0; } -static int btree_writepage_io_hook(struct page *page, u64 start, u64 end) -{ - struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; - - csum_dirty_buffer(root, page); - return 0; -} - int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) { @@ -501,6 +493,22 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, return 0; } +static int btree_csum_one_bio(struct bio *bio) +{ + struct bio_vec *bvec = bio->bi_io_vec; + int bio_index = 0; + struct btrfs_root *root; + + WARN_ON(bio->bi_vcnt <= 0); + while(bio_index < bio->bi_vcnt) { + root = BTRFS_I(bvec->bv_page->mapping->host)->root; + csum_dirty_buffer(root, bvec->bv_page); + bio_index++; + bvec++; + } + return 0; +} + static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { @@ -515,6 +523,7 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, * submission context. Just jump into btrfs_map_bio */ if (rw & (1 << BIO_RW)) { + btree_csum_one_bio(bio); return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); } @@ -2040,7 +2049,6 @@ out: static struct extent_io_ops btree_extent_io_ops = { .write_cache_pages_lock_hook = btree_lock_page_hook, - .writepage_io_hook = btree_writepage_io_hook, .readpage_end_io_hook = btree_readpage_end_io_hook, .submit_bio_hook = btree_submit_bio_hook, /* note we're sharing with inode.c for the merge bio hook */ From 1c2308f8e7d8491467e0095af2b01500f1b70819 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 23 Sep 2008 13:14:13 -0400 Subject: [PATCH 0712/2985] Add check for tree-log roots in btrfs_alloc_reserved_extents Tree log blocks are only reserved, and should not ever get fully allocated on disk. This check makes sure they stay out of the extent tree. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 813566acc5d3..558fbe407368 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2365,6 +2365,9 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, struct btrfs_key *ins) { int ret; + + if (root_objectid == BTRFS_TREE_LOG_OBJECTID) + return 0; maybe_lock_mutex(root); ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, ref_generation, owner, From 31840ae1a6b433ca0e6a8d341756ff478bbf959e Mon Sep 17 00:00:00 2001 From: Zheng Yan Date: Tue, 23 Sep 2008 13:14:14 -0400 Subject: [PATCH 0713/2985] Btrfs: Full back reference support This patch makes the back reference system to explicit record the location of parent node for all types of extents. The location of parent node is placed into the offset field of backref key. Every time a tree block is balanced, the back references for the affected lower level extents are updated. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 228 ++++---- fs/btrfs/ctree.h | 62 ++- fs/btrfs/disk-io.c | 4 +- fs/btrfs/extent-tree.c | 1143 ++++++++++++++++++++++++---------------- fs/btrfs/extent_io.c | 3 +- fs/btrfs/file.c | 121 ++--- fs/btrfs/inode.c | 57 +- fs/btrfs/ioctl.c | 57 +- fs/btrfs/print-tree.c | 5 +- fs/btrfs/tree-log.c | 108 ++-- 10 files changed, 1066 insertions(+), 722 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6f467901246f..50aea8cb653a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -125,7 +125,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, u32 nritems; int ret = 0; int level; - struct btrfs_key first_key; struct btrfs_root *new_root; new_root = kmalloc(sizeof(*new_root), GFP_NOFS); @@ -141,18 +140,10 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); - if (nritems) { - if (level == 0) - btrfs_item_key_to_cpu(buf, &first_key, 0); - else - btrfs_node_key_to_cpu(buf, &first_key, 0); - } else { - first_key.objectid = 0; - } - cow = btrfs_alloc_free_block(trans, new_root, buf->len, - new_root_objectid, - trans->transid, first_key.objectid, - level, buf->start, 0); + + cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0, + new_root_objectid, trans->transid, + level, buf->start, 0); if (IS_ERR(cow)) { kfree(new_root); return PTR_ERR(cow); @@ -165,7 +156,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); WARN_ON(btrfs_header_generation(buf) > trans->transid); - ret = btrfs_inc_ref(trans, new_root, buf, 0); + ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL); kfree(new_root); if (ret) @@ -184,39 +175,31 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, u64 search_start, u64 empty_size, u64 prealloc_dest) { - u64 root_gen; + u64 parent_start; struct extent_buffer *cow; u32 nritems; int ret = 0; int different_trans = 0; int level; int unlock_orig = 0; - struct btrfs_key first_key; if (*cow_ret == buf) unlock_orig = 1; WARN_ON(!btrfs_tree_locked(buf)); - if (root->ref_cows) { - root_gen = trans->transid; - } else { - root_gen = 0; - } + if (parent) + parent_start = parent->start; + else + parent_start = 0; + WARN_ON(root->ref_cows && trans->transid != root->fs_info->running_transaction->transid); WARN_ON(root->ref_cows && trans->transid != root->last_trans); level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); - if (nritems) { - if (level == 0) - btrfs_item_key_to_cpu(buf, &first_key, 0); - else - btrfs_node_key_to_cpu(buf, &first_key, 0); - } else { - first_key.objectid = 0; - } + if (prealloc_dest) { struct btrfs_key ins; @@ -224,19 +207,19 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, ins.offset = buf->len; ins.type = BTRFS_EXTENT_ITEM_KEY; - ret = btrfs_alloc_reserved_extent(trans, root, + ret = btrfs_alloc_reserved_extent(trans, root, parent_start, root->root_key.objectid, - root_gen, level, - first_key.objectid, + trans->transid, level, 0, &ins); BUG_ON(ret); cow = btrfs_init_new_buffer(trans, root, prealloc_dest, buf->len); } else { cow = btrfs_alloc_free_block(trans, root, buf->len, + parent_start, root->root_key.objectid, - root_gen, first_key.objectid, - level, search_start, empty_size); + trans->transid, level, + search_start, empty_size); } if (IS_ERR(cow)) return PTR_ERR(cow); @@ -249,17 +232,23 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(btrfs_header_generation(buf) > trans->transid); if (btrfs_header_generation(buf) != trans->transid) { + u32 nr_extents; different_trans = 1; - ret = btrfs_inc_ref(trans, root, buf, 1); + ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents); if (ret) return ret; + + ret = btrfs_cache_ref(trans, root, buf, nr_extents); + WARN_ON(ret); } else { + ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems); + if (ret) + return ret; clean_tree_block(trans, root, buf); } if (buf == root->node) { WARN_ON(parent && parent != buf); - root_gen = btrfs_header_generation(buf); spin_lock(&root->node_lock); root->node = cow; @@ -268,13 +257,14 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, if (buf != root->commit_root) { btrfs_free_extent(trans, root, buf->start, - buf->len, root->root_key.objectid, - root_gen, 0, 0, 1); + buf->len, buf->start, + root->root_key.objectid, + btrfs_header_generation(buf), + 0, 0, 1); } free_extent_buffer(buf); add_root_to_dirty_list(root); } else { - root_gen = btrfs_header_generation(parent); btrfs_set_node_blockptr(parent, parent_slot, cow->start); WARN_ON(trans->transid == 0); @@ -283,8 +273,8 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(parent); WARN_ON(btrfs_header_generation(parent) != trans->transid); btrfs_free_extent(trans, root, buf->start, buf->len, - btrfs_header_owner(parent), root_gen, - 0, 0, 1); + parent_start, btrfs_header_owner(parent), + btrfs_header_generation(parent), 0, 0, 1); } if (unlock_orig) btrfs_tree_unlock(buf); @@ -831,6 +821,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, root->node = child; spin_unlock(&root->node_lock); + ret = btrfs_update_extent_ref(trans, root, child->start, + mid->start, child->start, + root->root_key.objectid, + trans->transid, level - 1, 0); + BUG_ON(ret); + add_root_to_dirty_list(root); btrfs_tree_unlock(child); path->locks[level] = 0; @@ -840,7 +836,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, /* once for the path */ free_extent_buffer(mid); ret = btrfs_free_extent(trans, root, mid->start, mid->len, - root->root_key.objectid, + mid->start, root->root_key.objectid, btrfs_header_generation(mid), 0, 0, 1); /* once for the root ptr */ free_extent_buffer(mid); @@ -905,7 +901,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (wret) ret = wret; wret = btrfs_free_extent(trans, root, bytenr, - blocksize, + blocksize, parent->start, btrfs_header_owner(parent), generation, 0, 0, 1); if (wret) @@ -954,6 +950,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (wret) ret = wret; wret = btrfs_free_extent(trans, root, bytenr, blocksize, + parent->start, btrfs_header_owner(parent), root_gen, 0, 0, 1); if (wret) @@ -1499,6 +1496,41 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, return ret; } +/* + * update item key. + * + * This function isn't completely safe. It's the caller's responsibility + * that the new key won't break the order + */ +int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *new_key) +{ + struct btrfs_disk_key disk_key; + struct extent_buffer *eb; + int slot; + + eb = path->nodes[0]; + slot = path->slots[0]; + if (slot > 0) { + btrfs_item_key(eb, &disk_key, slot - 1); + if (comp_keys(&disk_key, new_key) >= 0) + return -1; + } + if (slot < btrfs_header_nritems(eb) - 1) { + btrfs_item_key(eb, &disk_key, slot + 1); + if (comp_keys(&disk_key, new_key) <= 0) + return -1; + } + + btrfs_cpu_key_to_disk(&disk_key, new_key); + btrfs_set_item_key(eb, &disk_key, slot); + btrfs_mark_buffer_dirty(eb); + if (slot == 0) + fixup_low_keys(trans, root, path, &disk_key, 1); + return 0; +} + /* * try to push data from one node into the next node left in the * tree. @@ -1558,6 +1590,10 @@ static int push_node_left(struct btrfs_trans_handle *trans, btrfs_set_header_nritems(dst, dst_nritems + push_items); btrfs_mark_buffer_dirty(src); btrfs_mark_buffer_dirty(dst); + + ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items); + BUG_ON(ret); + return ret; } @@ -1619,6 +1655,10 @@ static int balance_node_right(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(src); btrfs_mark_buffer_dirty(dst); + + ret = btrfs_update_ref(trans, root, src, dst, 0, push_items); + BUG_ON(ret); + return ret; } @@ -1633,30 +1673,24 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - u64 root_gen; u64 lower_gen; struct extent_buffer *lower; struct extent_buffer *c; struct extent_buffer *old; struct btrfs_disk_key lower_key; + int ret; BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - if (root->ref_cows) - root_gen = trans->transid; - else - root_gen = 0; - lower = path->nodes[level-1]; if (level == 1) btrfs_item_key(lower, &lower_key, 0); else btrfs_node_key(lower, &lower_key, 0); - c = btrfs_alloc_free_block(trans, root, root->nodesize, - root->root_key.objectid, - root_gen, le64_to_cpu(lower_key.objectid), + c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, + root->root_key.objectid, trans->transid, level, root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); @@ -1679,7 +1713,7 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, btrfs_set_node_key(c, &lower_key, 0); btrfs_set_node_blockptr(c, 0, lower->start); lower_gen = btrfs_header_generation(lower); - WARN_ON(lower_gen == 0); + WARN_ON(lower_gen != trans->transid); btrfs_set_node_ptr_generation(c, 0, lower_gen); @@ -1690,6 +1724,12 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, root->node = c; spin_unlock(&root->node_lock); + ret = btrfs_update_extent_ref(trans, root, lower->start, + lower->start, c->start, + root->root_key.objectid, + trans->transid, level - 1, 0); + BUG_ON(ret); + /* the super has an extra ref to root->node */ free_extent_buffer(old); @@ -1698,20 +1738,6 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, path->nodes[level] = c; path->locks[level] = 1; path->slots[level] = 0; - - if (root->ref_cows && lower_gen != trans->transid) { - struct btrfs_path *back_path = btrfs_alloc_path(); - int ret; - mutex_lock(&root->fs_info->alloc_mutex); - ret = btrfs_insert_extent_backref(trans, - root->fs_info->extent_root, - path, lower->start, - root->root_key.objectid, - trans->transid, 0, 0); - BUG_ON(ret); - mutex_unlock(&root->fs_info->alloc_mutex); - btrfs_free_path(back_path); - } return 0; } @@ -1766,7 +1792,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - u64 root_gen; struct extent_buffer *c; struct extent_buffer *split; struct btrfs_disk_key disk_key; @@ -1793,17 +1818,11 @@ static noinline int split_node(struct btrfs_trans_handle *trans, } c_nritems = btrfs_header_nritems(c); - if (root->ref_cows) - root_gen = trans->transid; - else - root_gen = 0; - btrfs_node_key(c, &disk_key, 0); split = btrfs_alloc_free_block(trans, root, root->nodesize, - root->root_key.objectid, - root_gen, - btrfs_disk_key_objectid(&disk_key), - level, c->start, 0); + path->nodes[level + 1]->start, + root->root_key.objectid, + trans->transid, level, c->start, 0); if (IS_ERR(split)) return PTR_ERR(split); @@ -1840,6 +1859,9 @@ static noinline int split_node(struct btrfs_trans_handle *trans, if (wret) ret = wret; + ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid); + BUG_ON(ret); + if (path->slots[level] >= mid) { path->slots[level] -= mid; btrfs_tree_unlock(c); @@ -1955,10 +1977,23 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root else nr = 1; + if (path->slots[0] >= left_nritems) + push_space += data_size + sizeof(*item); + i = left_nritems - 1; while (i >= nr) { item = btrfs_item_nr(left, i); + if (!empty && push_items > 0) { + if (path->slots[0] > i) + break; + if (path->slots[0] == i) { + int space = btrfs_leaf_free_space(root, left); + if (space + push_space * 2 > free_space) + break; + } + } + if (path->slots[0] == i) push_space += data_size + sizeof(*item); @@ -1973,6 +2008,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root this_item_size = btrfs_item_size(left, item); if (this_item_size + sizeof(*item) + push_space > free_space) break; + push_items++; push_space += this_item_size + sizeof(*item); if (i == 0) @@ -2046,6 +2082,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(left); btrfs_mark_buffer_dirty(right); + ret = btrfs_update_ref(trans, root, left, right, 0, push_items); + BUG_ON(ret); + btrfs_item_key(right, &disk_key, 0); btrfs_set_node_key(upper, &disk_key, slot + 1); btrfs_mark_buffer_dirty(upper); @@ -2147,6 +2186,16 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root KM_USER1); } + if (!empty && push_items > 0) { + if (path->slots[0] < i) + break; + if (path->slots[0] == i) { + int space = btrfs_leaf_free_space(root, right); + if (space + push_space * 2 > free_space) + break; + } + } + if (path->slots[0] == i) push_space += data_size + sizeof(*item); @@ -2255,6 +2304,10 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root if (right_nritems) btrfs_mark_buffer_dirty(right); + ret = btrfs_update_ref(trans, root, right, left, + old_left_nritems, push_items); + BUG_ON(ret); + btrfs_item_key(right, &disk_key, 0); wret = fixup_low_keys(trans, root, path, &disk_key, 1); if (wret) @@ -2294,7 +2347,6 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_path *path, int data_size, int extend) { - u64 root_gen; struct extent_buffer *l; u32 nritems; int mid; @@ -2313,11 +2365,6 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, if (extend) space_needed = data_size; - if (root->ref_cows) - root_gen = trans->transid; - else - root_gen = 0; - /* first try to make some room by pushing left and right */ if (ins_key->type != BTRFS_DIR_ITEM_KEY) { wret = push_leaf_right(trans, root, path, data_size, 0); @@ -2348,13 +2395,10 @@ again: nritems = btrfs_header_nritems(l); mid = (nritems + 1)/ 2; - btrfs_item_key(l, &disk_key, 0); - right = btrfs_alloc_free_block(trans, root, root->leafsize, - root->root_key.objectid, - root_gen, - le64_to_cpu(disk_key.objectid), - 0, l->start, 0); + path->nodes[1]->start, + root->root_key.objectid, + trans->transid, 0, l->start, 0); if (IS_ERR(right)) { BUG_ON(1); return PTR_ERR(right); @@ -2485,6 +2529,9 @@ again: btrfs_mark_buffer_dirty(l); BUG_ON(path->slots[0] != slot); + ret = btrfs_update_ref(trans, root, l, right, 0, nritems); + BUG_ON(ret); + if (mid <= slot) { btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); @@ -2956,6 +3003,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = wret; wret = btrfs_free_extent(trans, root, leaf->start, leaf->len, + path->nodes[1]->start, btrfs_header_owner(path->nodes[1]), root_gen, 0, 0, 1); if (wret) @@ -3007,7 +3055,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, free_extent_buffer(leaf); wret = btrfs_free_extent(trans, root, bytenr, - blocksize, + blocksize, path->nodes[1]->start, btrfs_header_owner(path->nodes[1]), root_gen, 0, 0, 1); if (wret) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 730aae3bc181..138c157bbc45 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -40,7 +40,7 @@ extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; struct btrfs_ordered_sum; -#define BTRFS_MAGIC "_B8RfS_M" +#define BTRFS_MAGIC "_B9RfS_M" #define BTRFS_ACL_NOT_CACHED ((void *)-1) @@ -81,6 +81,9 @@ struct btrfs_ordered_sum; #define BTRFS_TREE_LOG_OBJECTID -6ULL #define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL +/* dummy objectid represents multiple objectids */ +#define BTRFS_MULTIPLE_OBJECTIDS -255ULL + /* * All files have objectids in this range. */ @@ -369,6 +372,7 @@ struct btrfs_extent_ref { __le64 generation; __le64 objectid; __le64 offset; + __le32 num_refs; } __attribute__ ((__packed__)); /* dev extents record free space on individual devices. The owner @@ -1047,9 +1051,6 @@ btrfs_inode_otime(struct btrfs_inode_item *inode_item) BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); -/* struct btrfs_extent_item */ -BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); - /* struct btrfs_dev_extent */ BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent, chunk_tree, 64); @@ -1070,14 +1071,20 @@ BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64); BTRFS_SETGET_FUNCS(ref_offset, struct btrfs_extent_ref, offset, 64); +BTRFS_SETGET_FUNCS(ref_num_refs, struct btrfs_extent_ref, num_refs, 32); BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64); BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref, generation, 64); BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref, objectid, 64); -BTRFS_SETGET_STACK_FUNCS(stack_ref_offset, struct btrfs_extent_ref, offset, 64); +BTRFS_SETGET_STACK_FUNCS(stack_ref_offset, struct btrfs_extent_ref, + offset, 64); +BTRFS_SETGET_STACK_FUNCS(stack_ref_num_refs, struct btrfs_extent_ref, + num_refs, 32); +/* struct btrfs_extent_item */ +BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, refs, 32); @@ -1474,8 +1481,7 @@ static inline struct dentry *fdentry(struct file *file) { } /* extent-tree.c */ -int btrfs_lookup_extent(struct btrfs_root *root, struct btrfs_path *path, - u64 start, u64 len); +int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_update_pinned_extents(struct btrfs_root *root, u64 bytenr, u64 num, int pin); int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, @@ -1495,10 +1501,9 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int data, int owner); struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u32 blocksize, + u32 blocksize, u64 parent, u64 root_objectid, u64 ref_generation, - u64 first_objectid, int level, u64 hint, u64 empty_size); @@ -1508,23 +1513,24 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size); int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, u64 bytenr, + struct btrfs_path *path, + u64 bytenr, u64 parent, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 num_bytes, u64 min_bytes, + u64 num_bytes, u64 parent, u64 min_bytes, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, u64 data); int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, struct btrfs_key *ins); int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, struct btrfs_key *ins); @@ -1535,9 +1541,16 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, u64 search_end, struct btrfs_key *ins, u64 data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf, int cache_ref); -int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 bytenr, u64 num_bytes, + struct extent_buffer *orig_buf, struct extent_buffer *buf, + u32 *nr_extents); +int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *buf, u32 nr_extents); +int btrfs_update_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *orig_buf, + struct extent_buffer *buf, int start_slot, int nr); +int btrfs_free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 ref_generation, u64 owner_objectid, u64 owner_offset, int pin); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); @@ -1545,10 +1558,15 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_io_tree *unpin); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset); + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset); +int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 orig_parent, u64 parent, + u64 root_objectid, u64 ref_generation, + u64 owner, u64 owner_offset); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_block_groups(struct btrfs_fs_info *info); @@ -1561,7 +1579,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, int type); - +int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *new_key); struct extent_buffer *btrfs_root_node(struct btrfs_root *root); struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 25be96946a2f..d35ca6a3f513 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -882,8 +882,8 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, root->ref_cows = 0; root->node = btrfs_alloc_free_block(trans, root, root->leafsize, - BTRFS_TREE_LOG_OBJECTID, - 0, 0, 0, 0, 0); + 0, BTRFS_TREE_LOG_OBJECTID, + trans->transid, 0, 0, 0); btrfs_set_header_nritems(root->node, 0); btrfs_set_header_level(root->node, 0); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 558fbe407368..5258923d621f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -29,6 +29,21 @@ #include "locking.h" #include "ref-cache.h" +#define PENDING_EXTENT_INSERT 0 +#define PENDING_EXTENT_DELETE 1 +#define PENDING_BACKREF_UPDATE 2 + +struct pending_extent_op { + int type; + u64 bytenr; + u64 num_bytes; + u64 parent; + u64 orig_parent; + u64 generation; + u64 orig_generation; + int level; +}; + static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -487,48 +502,15 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, return ret; } -static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset) -{ - u32 high_crc = ~(u32)0; - u32 low_crc = ~(u32)0; - __le64 lenum; - lenum = cpu_to_le64(root_objectid); - high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum)); - lenum = cpu_to_le64(ref_generation); - low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); - if (owner >= BTRFS_FIRST_FREE_OBJECTID) { - lenum = cpu_to_le64(owner); - low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); - lenum = cpu_to_le64(owner_offset); - low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); - } - return ((u64)high_crc << 32) | (u64)low_crc; -} - -static int match_extent_ref(struct extent_buffer *leaf, - struct btrfs_extent_ref *disk_ref, - struct btrfs_extent_ref *cpu_ref) -{ - int ret; - int len; - - if (cpu_ref->objectid) - len = sizeof(*cpu_ref); - else - len = 2 * sizeof(u64); - ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref, - len); - return ret == 0; -} - /* simple helper to search for an existing extent at a given offset */ -int btrfs_lookup_extent(struct btrfs_root *root, struct btrfs_path *path, - u64 start, u64 len) +int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) { int ret; struct btrfs_key key; + struct btrfs_path *path; + path = btrfs_alloc_path(); + BUG_ON(!path); maybe_lock_mutex(root); key.objectid = start; key.offset = len; @@ -536,72 +518,7 @@ int btrfs_lookup_extent(struct btrfs_root *root, struct btrfs_path *path, ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 0, 0); maybe_unlock_mutex(root); - return ret; -} - -static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, u64 bytenr, - u64 root_objectid, - u64 ref_generation, u64 owner, - u64 owner_offset, int del) -{ - u64 hash; - struct btrfs_key key; - struct btrfs_key found_key; - struct btrfs_extent_ref ref; - struct extent_buffer *leaf; - struct btrfs_extent_ref *disk_ref; - int ret; - int ret2; - - btrfs_set_stack_ref_root(&ref, root_objectid); - btrfs_set_stack_ref_generation(&ref, ref_generation); - btrfs_set_stack_ref_objectid(&ref, owner); - btrfs_set_stack_ref_offset(&ref, owner_offset); - - hash = hash_extent_ref(root_objectid, ref_generation, owner, - owner_offset); - key.offset = hash; - key.objectid = bytenr; - key.type = BTRFS_EXTENT_REF_KEY; - - while (1) { - ret = btrfs_search_slot(trans, root, &key, path, - del ? -1 : 0, del); - if (ret < 0) - goto out; - leaf = path->nodes[0]; - if (ret != 0) { - u32 nritems = btrfs_header_nritems(leaf); - if (path->slots[0] >= nritems) { - ret2 = btrfs_next_leaf(root, path); - if (ret2) - goto out; - leaf = path->nodes[0]; - } - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid != bytenr || - found_key.type != BTRFS_EXTENT_REF_KEY) - goto out; - key.offset = found_key.offset; - if (del) { - btrfs_release_path(root, path); - continue; - } - } - disk_ref = btrfs_item_ptr(path->nodes[0], - path->slots[0], - struct btrfs_extent_ref); - if (match_extent_ref(path->nodes[0], disk_ref, &ref)) { - ret = 0; - goto out; - } - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - key.offset = found_key.offset + 1; - btrfs_release_path(root, path); - } -out: + btrfs_free_path(path); return ret; } @@ -622,7 +539,7 @@ out: * File extents can be referenced by: * * - multiple snapshots, subvolumes, or different generations in one subvol - * - different files inside a single subvolume (in theory, not implemented yet) + * - different files inside a single subvolume * - different offsets inside a file (bookend extents in file.c) * * The extent ref structure has fields for: @@ -631,119 +548,284 @@ out: * - Generation number of the tree holding the reference * - objectid of the file holding the reference * - offset in the file corresponding to the key holding the reference + * - number of references holding by parent node (alway 1 for tree blocks) + * + * Btree leaf may hold multiple references to a file extent. In most cases, + * these references are from same file and the corresponding offsets inside + * the file are close together. So inode objectid and offset in file are + * just hints, they provide hints about where in the btree the references + * can be found and when we can stop searching. * * When a file extent is allocated the fields are filled in: - * (root_key.objectid, trans->transid, inode objectid, offset in file) + * (root_key.objectid, trans->transid, inode objectid, offset in file, 1) * * When a leaf is cow'd new references are added for every file extent found - * in the leaf. It looks the same as the create case, but trans->transid - * will be different when the block is cow'd. + * in the leaf. It looks similar to the create case, but trans->transid will + * be different when the block is cow'd. * - * (root_key.objectid, trans->transid, inode objectid, offset in file) + * (root_key.objectid, trans->transid, inode objectid, offset in file, + * number of references in the leaf) * - * When a file extent is removed either during snapshot deletion or file - * truncation, the corresponding back reference is found - * by searching for: + * Because inode objectid and offset in file are just hints, they are not + * used when backrefs are deleted. When a file extent is removed either + * during snapshot deletion or file truncation, we find the corresponding + * back back reference and check the following fields. * - * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), - * inode objectid, offset in file) + * (btrfs_header_owner(leaf), btrfs_header_generation(leaf)) * * Btree extents can be referenced by: * * - Different subvolumes * - Different generations of the same subvolume * - * Storing sufficient information for a full reverse mapping of a btree - * block would require storing the lowest key of the block in the backref, - * and it would require updating that lowest key either before write out or - * every time it changed. Instead, the objectid of the lowest key is stored - * along with the level of the tree block. This provides a hint - * about where in the btree the block can be found. Searches through the - * btree only need to look for a pointer to that block, so they stop one - * level higher than the level recorded in the backref. - * - * Some btrees do not do reference counting on their extents. These - * include the extent tree and the tree of tree roots. Backrefs for these - * trees always have a generation of zero. - * * When a tree block is created, back references are inserted: * - * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid) + * (root->root_key.objectid, trans->transid, level, 0, 1) * - * When a tree block is cow'd in a reference counted root, - * new back references are added for all the blocks it points to. - * These are of the form (trans->transid will have increased since creation): + * When a tree block is cow'd, new back references are added for all the + * blocks it points to. If the tree block isn't in reference counted root, + * the old back references are removed. These new back references are of + * the form (trans->transid will have increased since creation): * - * (root->root_key.objectid, trans->transid, level, lowest_key_objectid) + * (root->root_key.objectid, trans->transid, level, 0, 1) * - * Because the lowest_key_objectid and the level are just hints - * they are not used when backrefs are deleted. When a backref is deleted: + * When a backref is in deleting, the following fields are checked: * * if backref was for a tree root: - * root_objectid = root->root_key.objectid + * (btrfs_header_owner(itself), btrfs_header_generation(itself)) * else - * root_objectid = btrfs_header_owner(parent) + * (btrfs_header_owner(parent), btrfs_header_generation(parent)) * - * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0) + * Back Reference Key composing: * - * Back Reference Key hashing: - * - * Back references have four fields, each 64 bits long. Unfortunately, - * This is hashed into a single 64 bit number and placed into the key offset. - * The key objectid corresponds to the first byte in the extent, and the - * key type is set to BTRFS_EXTENT_REF_KEY + * The key objectid corresponds to the first byte in the extent, the key + * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first + * byte of parent extent. If a extent is tree root, the key offset is set + * to the key objectid. */ -int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, u64 bytenr, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset) + +static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 bytenr, + u64 parent, u64 ref_root, + u64 ref_generation, int del) { - u64 hash; struct btrfs_key key; - struct btrfs_extent_ref ref; - struct btrfs_extent_ref *disk_ref; + struct btrfs_extent_ref *ref; + struct extent_buffer *leaf; int ret; - btrfs_set_stack_ref_root(&ref, root_objectid); - btrfs_set_stack_ref_generation(&ref, ref_generation); - btrfs_set_stack_ref_objectid(&ref, owner); - btrfs_set_stack_ref_offset(&ref, owner_offset); - - hash = hash_extent_ref(root_objectid, ref_generation, owner, - owner_offset); - key.offset = hash; key.objectid = bytenr; key.type = BTRFS_EXTENT_REF_KEY; + key.offset = parent; - ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref)); - while (ret == -EEXIST) { - disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_extent_ref); - if (match_extent_ref(path->nodes[0], disk_ref, &ref)) - goto out; - key.offset++; - btrfs_release_path(root, path); - ret = btrfs_insert_empty_item(trans, root, path, &key, - sizeof(ref)); - } - if (ret) + ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1); + if (ret < 0) goto out; - disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_extent_ref); - write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref, - sizeof(ref)); + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); + if (btrfs_ref_root(leaf, ref) != ref_root || + btrfs_ref_generation(leaf, ref) != ref_generation) { + ret = -EIO; + WARN_ON(1); + goto out; + } + ret = 0; +out: + return ret; +} + +static int noinline insert_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 parent, + u64 ref_root, u64 ref_generation, + u64 owner_objectid, u64 owner_offset) +{ + struct btrfs_key key; + struct extent_buffer *leaf; + struct btrfs_extent_ref *ref; + u32 num_refs; + int ret; + + key.objectid = bytenr; + key.type = BTRFS_EXTENT_REF_KEY; + key.offset = parent; + + ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref)); + if (ret == 0) { + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref); + btrfs_set_ref_root(leaf, ref, ref_root); + btrfs_set_ref_generation(leaf, ref, ref_generation); + btrfs_set_ref_objectid(leaf, ref, owner_objectid); + btrfs_set_ref_offset(leaf, ref, owner_offset); + btrfs_set_ref_num_refs(leaf, ref, 1); + } else if (ret == -EEXIST) { + u64 existing_owner; + BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID); + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref); + if (btrfs_ref_root(leaf, ref) != ref_root || + btrfs_ref_generation(leaf, ref) != ref_generation) { + ret = -EIO; + WARN_ON(1); + goto out; + } + + num_refs = btrfs_ref_num_refs(leaf, ref); + BUG_ON(num_refs == 0); + btrfs_set_ref_num_refs(leaf, ref, num_refs + 1); + + existing_owner = btrfs_ref_objectid(leaf, ref); + if (existing_owner == owner_objectid && + btrfs_ref_offset(leaf, ref) > owner_offset) { + btrfs_set_ref_offset(leaf, ref, owner_offset); + } else if (existing_owner != owner_objectid && + existing_owner != BTRFS_MULTIPLE_OBJECTIDS) { + btrfs_set_ref_objectid(leaf, ref, + BTRFS_MULTIPLE_OBJECTIDS); + btrfs_set_ref_offset(leaf, ref, 0); + } + ret = 0; + } else { + goto out; + } btrfs_mark_buffer_dirty(path->nodes[0]); out: btrfs_release_path(root, path); return ret; } +static int noinline remove_extent_backref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path) +{ + struct extent_buffer *leaf; + struct btrfs_extent_ref *ref; + u32 num_refs; + int ret = 0; + + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); + num_refs = btrfs_ref_num_refs(leaf, ref); + BUG_ON(num_refs == 0); + num_refs -= 1; + if (num_refs == 0) { + ret = btrfs_del_item(trans, root, path); + } else { + btrfs_set_ref_num_refs(leaf, ref, num_refs); + btrfs_mark_buffer_dirty(leaf); + } + btrfs_release_path(root, path); + return ret; +} + +static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 orig_parent, u64 parent, + u64 orig_root, u64 ref_root, + u64 orig_generation, u64 ref_generation, + u64 owner_objectid, u64 owner_offset) +{ + int ret; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_path *path; + + if (root == root->fs_info->extent_root) { + struct pending_extent_op *extent_op; + u64 num_bytes; + + BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL); + num_bytes = btrfs_level_size(root, (int)owner_objectid); + if (test_range_bit(&root->fs_info->extent_ins, bytenr, + bytenr + num_bytes - 1, EXTENT_LOCKED, 0)) { + u64 priv; + ret = get_state_private(&root->fs_info->extent_ins, + bytenr, &priv); + BUG_ON(ret); + extent_op = (struct pending_extent_op *) + (unsigned long)priv; + BUG_ON(extent_op->parent != orig_parent); + BUG_ON(extent_op->generation != orig_generation); + extent_op->parent = parent; + extent_op->generation = ref_generation; + } else { + extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); + BUG_ON(!extent_op); + + extent_op->type = PENDING_BACKREF_UPDATE; + extent_op->bytenr = bytenr; + extent_op->num_bytes = num_bytes; + extent_op->parent = parent; + extent_op->orig_parent = orig_parent; + extent_op->generation = ref_generation; + extent_op->orig_generation = orig_generation; + extent_op->level = (int)owner_objectid; + + set_extent_bits(&root->fs_info->extent_ins, + bytenr, bytenr + num_bytes - 1, + EXTENT_LOCKED, GFP_NOFS); + set_state_private(&root->fs_info->extent_ins, + bytenr, (unsigned long)extent_op); + } + return 0; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + ret = lookup_extent_backref(trans, extent_root, path, + bytenr, orig_parent, orig_root, + orig_generation, 1); + if (ret) + goto out; + ret = remove_extent_backref(trans, extent_root, path); + if (ret) + goto out; + ret = insert_extent_backref(trans, extent_root, path, bytenr, + parent, ref_root, ref_generation, + owner_objectid, owner_offset); + BUG_ON(ret); + finish_current_insert(trans, extent_root); + del_pending_extents(trans, extent_root); +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 orig_parent, u64 parent, + u64 ref_root, u64 ref_generation, + u64 owner_objectid, u64 owner_offset) +{ + int ret; + if (ref_root == BTRFS_TREE_LOG_OBJECTID && + owner_objectid < BTRFS_FIRST_FREE_OBJECTID) + return 0; + maybe_lock_mutex(root); + ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent, + parent, ref_root, ref_root, + ref_generation, ref_generation, + owner_objectid, owner_offset); + maybe_unlock_mutex(root); + return ret; +} + static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset) + struct btrfs_root *root, u64 bytenr, + u64 orig_parent, u64 parent, + u64 orig_root, u64 ref_root, + u64 orig_generation, u64 ref_generation, + u64 owner_objectid, u64 owner_offset) { struct btrfs_path *path; int ret; @@ -752,24 +834,28 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_extent_item *item; u32 refs; - WARN_ON(num_bytes < root->sectorsize); path = btrfs_alloc_path(); if (!path) return -ENOMEM; path->reada = 1; key.objectid = bytenr; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - key.offset = num_bytes; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = (u64)-1; + ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); if (ret < 0) return ret; - if (ret != 0) { - BUG(); - } - BUG_ON(ret != 0); + BUG_ON(ret == 0 || path->slots[0] == 0); + + path->slots[0]--; l = path->nodes[0]; + + btrfs_item_key_to_cpu(l, &key, path->slots[0]); + BUG_ON(key.objectid != bytenr); + BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); + item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(l, item); btrfs_set_extent_refs(l, item, refs + 1); @@ -778,9 +864,10 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_release_path(root->fs_info->extent_root, path); path->reada = 1; - ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root, - path, bytenr, root_objectid, - ref_generation, owner, owner_offset); + ret = insert_extent_backref(trans, root->fs_info->extent_root, + path, bytenr, parent, + ref_root, ref_generation, + owner_objectid, owner_offset); BUG_ON(ret); finish_current_insert(trans, root->fs_info->extent_root); del_pending_extents(trans, root->fs_info->extent_root); @@ -790,18 +877,20 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, } int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset) + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 ref_root, u64 ref_generation, + u64 owner_objectid, u64 owner_offset) { int ret; - - mutex_lock(&root->fs_info->alloc_mutex); - ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, - root_objectid, ref_generation, - owner, owner_offset); - mutex_unlock(&root->fs_info->alloc_mutex); + if (ref_root == BTRFS_TREE_LOG_OBJECTID && + owner_objectid < BTRFS_FIRST_FREE_OBJECTID) + return 0; + maybe_lock_mutex(root); + ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent, + 0, ref_root, 0, ref_generation, + owner_objectid, owner_offset); + maybe_unlock_mutex(root); return ret; } @@ -813,9 +902,9 @@ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, return 0; } -static int lookup_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u32 *refs) +int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u32 *refs) { struct btrfs_path *path; int ret; @@ -846,7 +935,6 @@ out: return 0; } - static int get_reference_status(struct btrfs_root *root, u64 bytenr, u64 parent_gen, u64 ref_objectid, u64 *min_generation, u32 *ref_count) @@ -863,7 +951,7 @@ static int get_reference_status(struct btrfs_root *root, u64 bytenr, int ret; key.objectid = bytenr; - key.offset = 0; + key.offset = (u64)-1; key.type = BTRFS_EXTENT_ITEM_KEY; path = btrfs_alloc_path(); @@ -872,7 +960,10 @@ static int get_reference_status(struct btrfs_root *root, u64 bytenr, if (ret < 0) goto out; BUG_ON(ret == 0); + if (ret < 0 || path->slots[0] == 0) + goto out; + path->slots[0]--; leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); @@ -909,7 +1000,7 @@ static int get_reference_status(struct btrfs_root *root, u64 bytenr, struct btrfs_extent_ref); ref_generation = btrfs_ref_generation(leaf, ref_item); /* - * For (parent_gen > 0 && parent_gen > ref_gen): + * For (parent_gen > 0 && parent_gen > ref_generation): * * we reach here through the oldest root, therefore * all other reference from same snapshot should have @@ -919,8 +1010,7 @@ static int get_reference_status(struct btrfs_root *root, u64 bytenr, (parent_gen > 0 && parent_gen > ref_generation) || (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID && ref_objectid != btrfs_ref_objectid(leaf, ref_item))) { - if (ref_count) - *ref_count = 2; + *ref_count = 2; break; } @@ -1020,80 +1110,29 @@ out: return ret; } -int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf, int cache_ref) +int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *buf, u32 nr_extents) { - u64 bytenr; u32 nritems; struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; int level; - int ret; - int faili; - int nr_file_extents = 0; + int ret = 0; if (!root->ref_cows) return 0; level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); - for (i = 0; i < nritems; i++) { - cond_resched(); - if (level == 0) { - u64 disk_bytenr; - btrfs_item_key_to_cpu(buf, &key, i); - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) - continue; - fi = btrfs_item_ptr(buf, i, - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(buf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); - if (disk_bytenr == 0) - continue; - if (buf != root->commit_root) - nr_file_extents++; - - mutex_lock(&root->fs_info->alloc_mutex); - ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr, - btrfs_file_extent_disk_num_bytes(buf, fi), - root->root_key.objectid, trans->transid, - key.objectid, key.offset); - mutex_unlock(&root->fs_info->alloc_mutex); - if (ret) { - faili = i; - WARN_ON(1); - goto fail; - } - } else { - bytenr = btrfs_node_blockptr(buf, i); - btrfs_node_key_to_cpu(buf, &key, i); - - mutex_lock(&root->fs_info->alloc_mutex); - ret = __btrfs_inc_extent_ref(trans, root, bytenr, - btrfs_level_size(root, level - 1), - root->root_key.objectid, - trans->transid, - level - 1, key.objectid); - mutex_unlock(&root->fs_info->alloc_mutex); - if (ret) { - faili = i; - WARN_ON(1); - goto fail; - } - } - } - /* cache orignal leaf block's references */ - if (level == 0 && cache_ref && buf != root->commit_root) { + if (level == 0) { struct btrfs_leaf_ref *ref; struct btrfs_extent_info *info; - ref = btrfs_alloc_leaf_ref(root, nr_file_extents); + ref = btrfs_alloc_leaf_ref(root, nr_extents); if (!ref) { - WARN_ON(1); + ret = -ENOMEM; goto out; } @@ -1101,10 +1140,10 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, ref->bytenr = buf->start; ref->owner = btrfs_header_owner(buf); ref->generation = btrfs_header_generation(buf); - ref->nritems = nr_file_extents; + ref->nritems = nr_extents; info = ref->extents; - for (i = 0; nr_file_extents > 0 && i < nritems; i++) { + for (i = 0; nr_extents > 0 && i < nritems; i++) { u64 disk_bytenr; btrfs_item_key_to_cpu(buf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) @@ -1132,13 +1171,52 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, btrfs_free_leaf_ref(root, ref); } out: - return 0; -fail: - WARN_ON(1); -#if 0 - for (i =0; i < faili; i++) { + return ret; +} + +int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *orig_buf, struct extent_buffer *buf, + u32 *nr_extents) +{ + u64 bytenr; + u64 ref_root; + u64 orig_root; + u64 ref_generation; + u64 orig_generation; + u32 nritems; + u32 nr_file_extents = 0; + struct btrfs_key key; + struct btrfs_file_extent_item *fi; + int i; + int level; + int ret = 0; + int faili = 0; + int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, + u64, u64, u64, u64, u64, u64, u64, u64, u64); + + ref_root = btrfs_header_owner(buf); + ref_generation = btrfs_header_generation(buf); + orig_root = btrfs_header_owner(orig_buf); + orig_generation = btrfs_header_generation(orig_buf); + + nritems = btrfs_header_nritems(buf); + level = btrfs_header_level(buf); + + if (root->ref_cows) { + process_func = __btrfs_inc_extent_ref; + } else { + if (level == 0 && + root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) + goto out; + if (level != 0 && + root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) + goto out; + process_func = __btrfs_update_extent_ref; + } + + for (i = 0; i < nritems; i++) { + cond_resched(); if (level == 0) { - u64 disk_bytenr; btrfs_item_key_to_cpu(buf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; @@ -1147,24 +1225,131 @@ fail: if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); - if (disk_bytenr == 0) + bytenr = btrfs_file_extent_disk_bytenr(buf, fi); + if (bytenr == 0) continue; - err = btrfs_free_extent(trans, root, disk_bytenr, - btrfs_file_extent_disk_num_bytes(buf, - fi), 0); - BUG_ON(err); + + nr_file_extents++; + + maybe_lock_mutex(root); + ret = process_func(trans, root, bytenr, + orig_buf->start, buf->start, + orig_root, ref_root, + orig_generation, ref_generation, + key.objectid, key.offset); + maybe_unlock_mutex(root); + + if (ret) { + faili = i; + WARN_ON(1); + goto fail; + } } else { bytenr = btrfs_node_blockptr(buf, i); - err = btrfs_free_extent(trans, root, bytenr, - btrfs_level_size(root, level - 1), 0); - BUG_ON(err); + maybe_lock_mutex(root); + ret = process_func(trans, root, bytenr, + orig_buf->start, buf->start, + orig_root, ref_root, + orig_generation, ref_generation, + level - 1, 0); + maybe_unlock_mutex(root); + if (ret) { + faili = i; + WARN_ON(1); + goto fail; + } } } -#endif +out: + if (nr_extents) { + if (level == 0) + *nr_extents = nr_file_extents; + else + *nr_extents = nritems; + } + return 0; +fail: + WARN_ON(1); return ret; } +int btrfs_update_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *orig_buf, + struct extent_buffer *buf, int start_slot, int nr) + +{ + u64 bytenr; + u64 ref_root; + u64 orig_root; + u64 ref_generation; + u64 orig_generation; + struct btrfs_key key; + struct btrfs_file_extent_item *fi; + int i; + int ret; + int slot; + int level; + + BUG_ON(start_slot < 0); + BUG_ON(start_slot + nr > btrfs_header_nritems(buf)); + + ref_root = btrfs_header_owner(buf); + ref_generation = btrfs_header_generation(buf); + orig_root = btrfs_header_owner(orig_buf); + orig_generation = btrfs_header_generation(orig_buf); + level = btrfs_header_level(buf); + + if (!root->ref_cows) { + if (level == 0 && + root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) + return 0; + if (level != 0 && + root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) + return 0; + } + + for (i = 0, slot = start_slot; i < nr; i++, slot++) { + cond_resched(); + if (level == 0) { + btrfs_item_key_to_cpu(buf, &key, slot); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(buf, slot, + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(buf, fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; + bytenr = btrfs_file_extent_disk_bytenr(buf, fi); + if (bytenr == 0) + continue; + maybe_lock_mutex(root); + ret = __btrfs_update_extent_ref(trans, root, bytenr, + orig_buf->start, buf->start, + orig_root, ref_root, + orig_generation, ref_generation, + key.objectid, key.offset); + maybe_unlock_mutex(root); + if (ret) + goto fail; + } else { + bytenr = btrfs_node_blockptr(buf, slot); + maybe_lock_mutex(root); + ret = __btrfs_update_extent_ref(trans, root, bytenr, + orig_buf->start, buf->start, + orig_root, ref_root, + orig_generation, ref_generation, + level - 1, 0); + maybe_unlock_mutex(root); + if (ret) + goto fail; + } + } + return 0; +fail: + WARN_ON(1); + return -1; +} + static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -1539,19 +1724,18 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, { u64 start; u64 end; + u64 priv; struct btrfs_fs_info *info = extent_root->fs_info; - struct extent_buffer *eb; struct btrfs_path *path; - struct btrfs_key ins; - struct btrfs_disk_key first; + struct btrfs_extent_ref *ref; + struct pending_extent_op *extent_op; + struct btrfs_key key; struct btrfs_extent_item extent_item; int ret; - int level; int err = 0; WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); btrfs_set_stack_extent_refs(&extent_item, 1); - btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); path = btrfs_alloc_path(); while(1) { @@ -1560,37 +1744,54 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, if (ret) break; - ins.objectid = start; - ins.offset = end + 1 - start; - err = btrfs_insert_item(trans, extent_root, &ins, + ret = get_state_private(&info->extent_ins, start, &priv); + BUG_ON(ret); + extent_op = (struct pending_extent_op *)(unsigned long)priv; + + if (extent_op->type == PENDING_EXTENT_INSERT) { + key.objectid = start; + key.offset = end + 1 - start; + key.type = BTRFS_EXTENT_ITEM_KEY; + err = btrfs_insert_item(trans, extent_root, &key, &extent_item, sizeof(extent_item)); - clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, - GFP_NOFS); + BUG_ON(err); - eb = btrfs_find_create_tree_block(extent_root, ins.objectid, - ins.offset); + clear_extent_bits(&info->extent_ins, start, end, + EXTENT_LOCKED, GFP_NOFS); - if (!btrfs_buffer_uptodate(eb, trans->transid)) - btrfs_read_buffer(eb, trans->transid); + err = insert_extent_backref(trans, extent_root, path, + start, extent_op->parent, + extent_root->root_key.objectid, + extent_op->generation, + extent_op->level, 0); + BUG_ON(err); + } else if (extent_op->type == PENDING_BACKREF_UPDATE) { + err = lookup_extent_backref(trans, extent_root, path, + start, extent_op->orig_parent, + extent_root->root_key.objectid, + extent_op->orig_generation, 0); + BUG_ON(err); - btrfs_tree_lock(eb); - level = btrfs_header_level(eb); - if (level == 0) { - btrfs_item_key(eb, &first, 0); + clear_extent_bits(&info->extent_ins, start, end, + EXTENT_LOCKED, GFP_NOFS); + + key.objectid = start; + key.offset = extent_op->parent; + key.type = BTRFS_EXTENT_REF_KEY; + err = btrfs_set_item_key_safe(trans, extent_root, path, + &key); + BUG_ON(err); + ref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_extent_ref); + btrfs_set_ref_generation(path->nodes[0], ref, + extent_op->generation); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(extent_root, path); } else { - btrfs_node_key(eb, &first, 0); + BUG_ON(1); } - btrfs_tree_unlock(eb); - free_extent_buffer(eb); - /* - * the first key is just a hint, so the race we've created - * against reading it is fine - */ - err = btrfs_insert_extent_backref(trans, extent_root, path, - start, extent_root->root_key.objectid, - 0, level, - btrfs_disk_key_objectid(&first)); - BUG_ON(err); + kfree(extent_op); + if (need_resched()) { mutex_unlock(&extent_root->fs_info->alloc_mutex); cond_resched(); @@ -1601,52 +1802,44 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, return 0; } -static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, - int is_data, int pending) +static int pin_down_bytes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, int is_data) { int err = 0; + struct extent_buffer *buf; WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); - if (!pending) { - struct extent_buffer *buf; + if (is_data) + goto pinit; - if (is_data) - goto pinit; + buf = btrfs_find_tree_block(root, bytenr, num_bytes); + if (!buf) + goto pinit; - buf = btrfs_find_tree_block(root, bytenr, num_bytes); - if (buf) { - /* we can reuse a block if it hasn't been written - * and it is from this transaction. We can't - * reuse anything from the tree log root because - * it has tiny sub-transactions. - */ - if (btrfs_buffer_uptodate(buf, 0) && - btrfs_try_tree_lock(buf)) { - u64 transid = - root->fs_info->running_transaction->transid; - u64 header_transid = - btrfs_header_generation(buf); - if (btrfs_header_owner(buf) != - BTRFS_TREE_LOG_OBJECTID && - header_transid == transid && - !btrfs_header_flag(buf, - BTRFS_HEADER_FLAG_WRITTEN)) { - clean_tree_block(NULL, root, buf); - btrfs_tree_unlock(buf); - free_extent_buffer(buf); - return 1; - } - btrfs_tree_unlock(buf); - } + /* we can reuse a block if it hasn't been written + * and it is from this transaction. We can't + * reuse anything from the tree log root because + * it has tiny sub-transactions. + */ + if (btrfs_buffer_uptodate(buf, 0) && + btrfs_try_tree_lock(buf)) { + u64 header_owner = btrfs_header_owner(buf); + u64 header_transid = btrfs_header_generation(buf); + if (header_owner != BTRFS_TREE_LOG_OBJECTID && + header_transid == trans->transid && + !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { + clean_tree_block(NULL, root, buf); + btrfs_tree_unlock(buf); free_extent_buffer(buf); + return 1; } -pinit: - btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); - } else { - set_extent_bits(&root->fs_info->pending_del, - bytenr, bytenr + num_bytes - 1, - EXTENT_LOCKED, GFP_NOFS); + btrfs_tree_unlock(buf); } + free_extent_buffer(buf); +pinit: + btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); + BUG_ON(err < 0); return 0; } @@ -1654,11 +1847,12 @@ pinit: /* * remove an extent from the root, returns 0 on success */ -static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 bytenr, u64 num_bytes, +static int __free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner_objectid, u64 owner_offset, int pin, - int mark_free) + u64 owner_objectid, u64 owner_offset, + int pin, int mark_free) { struct btrfs_path *path; struct btrfs_key key; @@ -1681,10 +1875,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root return -ENOMEM; path->reada = 1; - ret = lookup_extent_backref(trans, extent_root, path, - bytenr, root_objectid, - ref_generation, - owner_objectid, owner_offset, 1); + ret = lookup_extent_backref(trans, extent_root, path, bytenr, parent, + root_objectid, ref_generation, 1); if (ret == 0) { struct btrfs_key found_key; extent_slot = path->slots[0]; @@ -1702,8 +1894,15 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (path->slots[0] - extent_slot > 5) break; } - if (!found_extent) - ret = btrfs_del_item(trans, extent_root, path); + if (!found_extent) { + ret = remove_extent_backref(trans, extent_root, path); + BUG_ON(ret); + btrfs_release_path(extent_root, path); + ret = btrfs_search_slot(trans, extent_root, + &key, path, -1, 1); + BUG_ON(ret); + extent_slot = path->slots[0]; + } } else { btrfs_print_leaf(extent_root, path->nodes[0]); WARN_ON(1); @@ -1712,14 +1911,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root root_objectid, ref_generation, owner_objectid, owner_offset); } - if (!found_extent) { - btrfs_release_path(extent_root, path); - ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); - if (ret < 0) - return ret; - BUG_ON(ret); - extent_slot = path->slots[0]; - } leaf = path->nodes[0]; ei = btrfs_item_ptr(leaf, extent_slot, @@ -1732,6 +1923,10 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(leaf); if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { + struct btrfs_extent_ref *ref; + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref); + BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1); /* if the back ref and the extent are next to each other * they get deleted below in one shot */ @@ -1739,15 +1934,13 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root num_to_del = 2; } else if (found_extent) { /* otherwise delete the extent back ref */ - ret = btrfs_del_item(trans, extent_root, path); + ret = remove_extent_backref(trans, extent_root, path); BUG_ON(ret); /* if refs are 0, we need to setup the path for deletion */ if (refs == 0) { btrfs_release_path(extent_root, path); ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); - if (ret < 0) - return ret; BUG_ON(ret); } } @@ -1761,8 +1954,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root #endif if (pin) { - ret = pin_down_bytes(root, bytenr, num_bytes, - owner_objectid >= BTRFS_FIRST_FREE_OBJECTID, 0); + ret = pin_down_bytes(trans, root, bytenr, num_bytes, + owner_objectid >= BTRFS_FIRST_FREE_OBJECTID); if (ret > 0) mark_free = 1; BUG_ON(ret < 0); @@ -1781,9 +1974,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root root_used - num_bytes); ret = btrfs_del_items(trans, extent_root, path, path->slots[0], num_to_del); - if (ret) { - return ret; - } + BUG_ON(ret); ret = update_block_group(trans, root, bytenr, num_bytes, 0, mark_free); BUG_ON(ret); @@ -1822,33 +2013,61 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct { int ret; int err = 0; + int mark_free = 0; u64 start; u64 end; + u64 priv; struct extent_io_tree *pending_del; - struct extent_io_tree *pinned_extents; + struct extent_io_tree *extent_ins; + struct pending_extent_op *extent_op; WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); + extent_ins = &extent_root->fs_info->extent_ins; pending_del = &extent_root->fs_info->pending_del; - pinned_extents = &extent_root->fs_info->pinned_extents; while(1) { ret = find_first_extent_bit(pending_del, 0, &start, &end, EXTENT_LOCKED); if (ret) break; + + ret = get_state_private(pending_del, start, &priv); + BUG_ON(ret); + extent_op = (struct pending_extent_op *)(unsigned long)priv; + clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, GFP_NOFS); - if (!test_range_bit(&extent_root->fs_info->extent_ins, - start, end, EXTENT_LOCKED, 0)) { - btrfs_update_pinned_extents(extent_root, start, - end + 1 - start, 1); + + ret = pin_down_bytes(trans, extent_root, start, + end + 1 - start, 0); + mark_free = ret > 0; + if (!test_range_bit(extent_ins, start, end, + EXTENT_LOCKED, 0)) { +free_extent: ret = __free_extent(trans, extent_root, - start, end + 1 - start, - extent_root->root_key.objectid, - 0, 0, 0, 0, 0); + start, end + 1 - start, + extent_op->orig_parent, + extent_root->root_key.objectid, + extent_op->orig_generation, + extent_op->level, 0, 0, mark_free); + kfree(extent_op); } else { - clear_extent_bits(&extent_root->fs_info->extent_ins, - start, end, EXTENT_LOCKED, GFP_NOFS); + kfree(extent_op); + ret = get_state_private(extent_ins, start, &priv); + BUG_ON(ret); + extent_op = (struct pending_extent_op *) + (unsigned long)priv; + + clear_extent_bits(extent_ins, start, end, + EXTENT_LOCKED, GFP_NOFS); + + if (extent_op->type == PENDING_BACKREF_UPDATE) + goto free_extent; + + ret = update_block_group(trans, extent_root, start, + end + 1 - start, 0, mark_free); + BUG_ON(ret); + kfree(extent_op); } if (ret) err = ret; @@ -1866,21 +2085,36 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct * remove an extent from the root, returns 0 on success */ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u64 root_objectid, - u64 ref_generation, u64 owner_objectid, - u64 owner_offset, int pin) + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 ref_generation, + u64 owner_objectid, u64 owner_offset, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; int pending_ret; int ret; WARN_ON(num_bytes < root->sectorsize); - if (!root->ref_cows) - ref_generation = 0; - if (root == extent_root) { - pin_down_bytes(root, bytenr, num_bytes, 0, 1); + struct pending_extent_op *extent_op; + + extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); + BUG_ON(!extent_op); + + extent_op->type = PENDING_EXTENT_DELETE; + extent_op->bytenr = bytenr; + extent_op->num_bytes = num_bytes; + extent_op->parent = parent; + extent_op->orig_parent = parent; + extent_op->generation = ref_generation; + extent_op->orig_generation = ref_generation; + extent_op->level = (int)owner_objectid; + + set_extent_bits(&root->fs_info->pending_del, + bytenr, bytenr + num_bytes - 1, + EXTENT_LOCKED, GFP_NOFS); + set_state_private(&root->fs_info->pending_del, + bytenr, (unsigned long)extent_op); return 0; } /* if metadata always pin */ @@ -1901,9 +2135,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, if (ref_generation != trans->transid) pin = 1; - ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid, - ref_generation, owner_objectid, owner_offset, - pin, pin == 0); + ret = __free_extent(trans, root, bytenr, num_bytes, parent, + root_objectid, ref_generation, owner_objectid, + owner_offset, pin, pin == 0); finish_current_insert(trans, root->fs_info->extent_root); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); @@ -1911,15 +2145,15 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, } int btrfs_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u64 root_objectid, - u64 ref_generation, u64 owner_objectid, - u64 owner_offset, int pin) + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 ref_generation, + u64 owner_objectid, u64 owner_offset, int pin) { int ret; maybe_lock_mutex(root); - ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, + ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent, root_objectid, ref_generation, owner_objectid, owner_offset, pin); maybe_unlock_mutex(root); @@ -2271,7 +2505,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, } static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, struct btrfs_key *ins) @@ -2289,6 +2523,9 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_key keys[2]; + if (parent == 0) + parent = ins->objectid; + /* block accounting for super block */ spin_lock_irq(&info->delalloc_lock); super_used = btrfs_super_bytes_used(&info->super_copy); @@ -2300,17 +2537,32 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, btrfs_set_root_used(&root->root_item, root_used + num_bytes); if (root == extent_root) { + struct pending_extent_op *extent_op; + + extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); + BUG_ON(!extent_op); + + extent_op->type = PENDING_EXTENT_INSERT; + extent_op->bytenr = ins->objectid; + extent_op->num_bytes = ins->offset; + extent_op->parent = parent; + extent_op->orig_parent = 0; + extent_op->generation = ref_generation; + extent_op->orig_generation = 0; + extent_op->level = (int)owner; + set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, EXTENT_LOCKED, GFP_NOFS); + set_state_private(&root->fs_info->extent_ins, + ins->objectid, (unsigned long)extent_op); goto update_block; } memcpy(&keys[0], ins, sizeof(*ins)); - keys[1].offset = hash_extent_ref(root_objectid, ref_generation, - owner, owner_offset); keys[1].objectid = ins->objectid; keys[1].type = BTRFS_EXTENT_REF_KEY; + keys[1].offset = parent; sizes[0] = sizeof(*extent_item); sizes[1] = sizeof(*ref); @@ -2331,6 +2583,7 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); btrfs_set_ref_objectid(path->nodes[0], ref, owner); btrfs_set_ref_offset(path->nodes[0], ref, owner_offset); + btrfs_set_ref_num_refs(path->nodes[0], ref, 1); btrfs_mark_buffer_dirty(path->nodes[0]); @@ -2359,7 +2612,7 @@ out: } int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, struct btrfs_key *ins) @@ -2369,9 +2622,9 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, if (root_objectid == BTRFS_TREE_LOG_OBJECTID) return 0; maybe_lock_mutex(root); - ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, - ref_generation, owner, - owner_offset, ins); + ret = __btrfs_alloc_reserved_extent(trans, root, parent, + root_objectid, ref_generation, + owner, owner_offset, ins); maybe_unlock_mutex(root); return ret; } @@ -2382,7 +2635,7 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, * space cache bits as well */ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset, struct btrfs_key *ins) @@ -2396,10 +2649,9 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset); BUG_ON(ret); - - ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, - ref_generation, owner, - owner_offset, ins); + ret = __btrfs_alloc_reserved_extent(trans, root, parent, + root_objectid, ref_generation, + owner, owner_offset, ins); maybe_unlock_mutex(root); return ret; } @@ -2413,9 +2665,9 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 num_bytes, u64 min_alloc_size, + u64 num_bytes, u64 parent, u64 min_alloc_size, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, + u64 owner_objectid, u64 owner_offset, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, u64 data) { @@ -2428,9 +2680,9 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, search_end, ins, data); BUG_ON(ret); if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { - ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, - ref_generation, owner, - owner_offset, ins); + ret = __btrfs_alloc_reserved_extent(trans, root, parent, + root_objectid, ref_generation, + owner_objectid, owner_offset, ins); BUG_ON(ret); } @@ -2468,10 +2720,9 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, */ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u32 blocksize, + u32 blocksize, u64 parent, u64 root_objectid, u64 ref_generation, - u64 first_objectid, int level, u64 hint, u64 empty_size) @@ -2480,10 +2731,9 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int ret; struct extent_buffer *buf; - ret = btrfs_alloc_extent(trans, root, blocksize, blocksize, - root_objectid, ref_generation, - level, first_objectid, empty_size, hint, - (u64)-1, &ins, 0); + ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize, + root_objectid, ref_generation, level, 0, + empty_size, hint, (u64)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); return ERR_PTR(ret); @@ -2531,15 +2781,14 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(leaf, fi), - leaf_owner, leaf_generation, + leaf->start, leaf_owner, leaf_generation, key.objectid, key.offset, 0); mutex_unlock(&root->fs_info->alloc_mutex); + BUG_ON(ret); atomic_inc(&root->fs_info->throttle_gen); wake_up(&root->fs_info->transaction_throttle); cond_resched(); - - BUG_ON(ret); } return 0; } @@ -2554,10 +2803,10 @@ static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans, for (i = 0; i < ref->nritems; i++) { mutex_lock(&root->fs_info->alloc_mutex); - ret = __btrfs_free_extent(trans, root, - info->bytenr, info->num_bytes, - ref->owner, ref->generation, - info->objectid, info->offset, 0); + ret = __btrfs_free_extent(trans, root, info->bytenr, + info->num_bytes, ref->bytenr, + ref->owner, ref->generation, + info->objectid, info->offset, 0); mutex_unlock(&root->fs_info->alloc_mutex); atomic_inc(&root->fs_info->throttle_gen); @@ -2576,7 +2825,7 @@ int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, { int ret; - ret = lookup_extent_ref(NULL, root, start, len, refs); + ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs); BUG_ON(ret); #if 0 // some debugging code in case we see problems here @@ -2672,8 +2921,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, - blocksize, root_owner, - root_gen, 0, 0, 1); + blocksize, parent->start, + root_owner, root_gen, 0, 0, 1); BUG_ON(ret); mutex_unlock(&root->fs_info->alloc_mutex); @@ -2690,8 +2939,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, * So, we don't need to check it again */ if (*level == 1) { - struct btrfs_key key; - btrfs_node_key_to_cpu(cur, &key, path->slots[*level]); ref = btrfs_lookup_leaf_ref(root, bytenr); if (ref) { ret = cache_drop_leaf_ref(trans, root, ref); @@ -2750,12 +2997,13 @@ out: mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, blocksize, - root_owner, root_gen, 0, 0, 1); + parent->start, root_owner, root_gen, + 0, 0, 1); + mutex_unlock(&root->fs_info->alloc_mutex); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; BUG_ON(ret); - mutex_unlock(&root->fs_info->alloc_mutex); cond_resched(); return 0; @@ -2792,19 +3040,18 @@ static int noinline walk_up_tree(struct btrfs_trans_handle *trans, root_item->drop_level = i; return 0; } else { - if (path->nodes[*level] == root->node) { - root_owner = root->root_key.objectid; - root_gen = - btrfs_header_generation(path->nodes[*level]); - } else { - struct extent_buffer *node; - node = path->nodes[*level + 1]; - root_owner = btrfs_header_owner(node); - root_gen = btrfs_header_generation(node); - } + struct extent_buffer *parent; + if (path->nodes[*level] == root->node) + parent = path->nodes[*level]; + else + parent = path->nodes[*level + 1]; + + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, path->nodes[*level]->len, + parent->start, root_owner, root_gen, 0, 0, 1); BUG_ON(ret); free_extent_buffer(path->nodes[*level]); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8624f3e88036..58ad25838a41 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2201,9 +2201,10 @@ retry: } if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = index; - +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) if (wbc->range_cont) wbc->range_start = index << PAGE_CACHE_SHIFT; +#endif return ret; } EXPORT_SYMBOL(extent_write_cache_pages); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 58b329ddb426..48a702d41c8c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -524,6 +524,9 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, { u64 extent_end = 0; u64 search_start = start; + u64 leaf_start; + u64 root_gen; + u64 root_owner; struct extent_buffer *leaf; struct btrfs_file_extent_item *extent; struct btrfs_path *path; @@ -562,6 +565,9 @@ next_slot: bookend = 0; found_extent = 0; found_inline = 0; + leaf_start = 0; + root_gen = 0; + root_owner = 0; extent = NULL; leaf = path->nodes[0]; slot = path->slots[0]; @@ -628,27 +634,18 @@ next_slot: search_start = extent_end; if (end <= extent_end && start >= key.offset && found_inline) { *hint_byte = EXTENT_MAP_INLINE; - continue; + goto out; } + + if (found_extent) { + read_extent_buffer(leaf, &old, (unsigned long)extent, + sizeof(old)); + root_gen = btrfs_header_generation(leaf); + root_owner = btrfs_header_owner(leaf); + leaf_start = leaf->start; + } + if (end < extent_end && end >= key.offset) { - if (found_extent) { - u64 disk_bytenr = - btrfs_file_extent_disk_bytenr(leaf, extent); - u64 disk_num_bytes = - btrfs_file_extent_disk_num_bytes(leaf, - extent); - read_extent_buffer(leaf, &old, - (unsigned long)extent, - sizeof(old)); - if (disk_bytenr != 0) { - ret = btrfs_inc_extent_ref(trans, root, - disk_bytenr, disk_num_bytes, - root->root_key.objectid, - trans->transid, - key.objectid, end); - BUG_ON(ret); - } - } bookend = 1; if (found_inline && start <= key.offset) keep = 1; @@ -687,49 +684,12 @@ next_slot: } /* delete the entire extent */ if (!keep) { - u64 disk_bytenr = 0; - u64 disk_num_bytes = 0; - u64 extent_num_bytes = 0; - u64 root_gen; - u64 root_owner; - - root_gen = btrfs_header_generation(leaf); - root_owner = btrfs_header_owner(leaf); - if (found_extent) { - disk_bytenr = - btrfs_file_extent_disk_bytenr(leaf, - extent); - disk_num_bytes = - btrfs_file_extent_disk_num_bytes(leaf, - extent); - extent_num_bytes = - btrfs_file_extent_num_bytes(leaf, extent); - *hint_byte = - btrfs_file_extent_disk_bytenr(leaf, - extent); - } ret = btrfs_del_item(trans, root, path); /* TODO update progress marker and return */ BUG_ON(ret); - btrfs_release_path(root, path); extent = NULL; - if (found_extent && disk_bytenr != 0) { - dec_i_blocks(inode, extent_num_bytes); - ret = btrfs_free_extent(trans, root, - disk_bytenr, - disk_num_bytes, - root_owner, - root_gen, inode->i_ino, - key.offset, 0); - } - - BUG_ON(ret); - if (!bookend && search_start >= end) { - ret = 0; - goto out; - } - if (!bookend) - continue; + btrfs_release_path(root, path); + /* the extent will be freed later */ } if (bookend && found_inline && start <= key.offset) { u32 new_size; @@ -737,10 +697,13 @@ next_slot: extent_end - end); dec_i_blocks(inode, (extent_end - key.offset) - (extent_end - end)); - btrfs_truncate_item(trans, root, path, new_size, 0); + ret = btrfs_truncate_item(trans, root, path, + new_size, 0); + BUG_ON(ret); } /* create bookend, splitting the extent in two */ if (bookend && found_extent) { + u64 disk_bytenr; struct btrfs_key ins; ins.objectid = inode->i_ino; ins.offset = end; @@ -748,13 +711,9 @@ next_slot: btrfs_release_path(root, path); ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*extent)); + BUG_ON(ret); leaf = path->nodes[0]; - if (ret) { - btrfs_print_leaf(root, leaf); - printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.type, ins.offset, start, end, key.offset, extent_end, keep); - } - BUG_ON(ret); extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); write_extent_buffer(leaf, &old, @@ -770,11 +729,43 @@ next_slot: BTRFS_FILE_EXTENT_REG); btrfs_mark_buffer_dirty(path->nodes[0]); - if (le64_to_cpu(old.disk_bytenr) != 0) { + + disk_bytenr = le64_to_cpu(old.disk_bytenr); + if (disk_bytenr != 0) { + ret = btrfs_inc_extent_ref(trans, root, + disk_bytenr, + le64_to_cpu(old.disk_num_bytes), + leaf->start, + root->root_key.objectid, + trans->transid, + ins.objectid, ins.offset); + BUG_ON(ret); + } + btrfs_release_path(root, path); + if (disk_bytenr != 0) { inode->i_blocks += btrfs_file_extent_num_bytes(leaf, extent) >> 9; } + } + + if (found_extent && !keep) { + u64 disk_bytenr = le64_to_cpu(old.disk_bytenr); + + if (disk_bytenr != 0) { + dec_i_blocks(inode, le64_to_cpu(old.num_bytes)); + ret = btrfs_free_extent(trans, root, + disk_bytenr, + le64_to_cpu(old.disk_num_bytes), + leaf_start, root_owner, + root_gen, key.objectid, + key.offset, 0); + BUG_ON(ret); + *hint_byte = disk_bytenr; + } + } + + if (search_start >= end) { ret = 0; goto out; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 65b4f864b0df..2e7d82ec5d18 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -528,6 +528,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered_extent; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_file_extent_item *extent_item; + struct btrfs_path *path = NULL; + struct extent_buffer *leaf; u64 alloc_hint = 0; struct list_head list; struct btrfs_key ins; @@ -544,20 +547,15 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) goto nocow; + path = btrfs_alloc_path(); + BUG_ON(!path); + lock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, GFP_NOFS); INIT_LIST_HEAD(&list); - ins.objectid = ordered_extent->start; - ins.offset = ordered_extent->len; - ins.type = BTRFS_EXTENT_ITEM_KEY; - - ret = btrfs_alloc_reserved_extent(trans, root, root->root_key.objectid, - trans->transid, inode->i_ino, - ordered_extent->file_offset, &ins); - BUG_ON(ret); mutex_lock(&BTRFS_I(inode)->extent_mutex); ret = btrfs_drop_extents(trans, root, inode, @@ -566,18 +564,42 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->len, ordered_extent->file_offset, &alloc_hint); BUG_ON(ret); - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - ordered_extent->file_offset, - ordered_extent->start, - ordered_extent->len, - ordered_extent->len, 0); + + ins.objectid = inode->i_ino; + ins.offset = ordered_extent->file_offset; + ins.type = BTRFS_EXTENT_DATA_KEY; + ret = btrfs_insert_empty_item(trans, root, path, &ins, + sizeof(*extent_item)); BUG_ON(ret); + leaf = path->nodes[0]; + extent_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, extent_item, trans->transid); + btrfs_set_file_extent_type(leaf, extent_item, BTRFS_FILE_EXTENT_REG); + btrfs_set_file_extent_disk_bytenr(leaf, extent_item, + ordered_extent->start); + btrfs_set_file_extent_disk_num_bytes(leaf, extent_item, + ordered_extent->len); + btrfs_set_file_extent_offset(leaf, extent_item, 0); + btrfs_set_file_extent_num_bytes(leaf, extent_item, + ordered_extent->len); + btrfs_mark_buffer_dirty(leaf); btrfs_drop_extent_cache(inode, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1); mutex_unlock(&BTRFS_I(inode)->extent_mutex); + ins.objectid = ordered_extent->start; + ins.offset = ordered_extent->len; + ins.type = BTRFS_EXTENT_ITEM_KEY; + ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, + root->root_key.objectid, + trans->transid, inode->i_ino, + ordered_extent->file_offset, &ins); + BUG_ON(ret); + btrfs_release_path(root, path); + inode->i_blocks += ordered_extent->len >> 9; unlock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, @@ -596,6 +618,8 @@ nocow: btrfs_put_ordered_extent(ordered_extent); btrfs_end_transaction(trans, root); + if (path) + btrfs_free_path(path); return 0; } @@ -1433,10 +1457,7 @@ search_again: if (root->ref_cows) dec_i_blocks(inode, num_dec); } - if (root->ref_cows) { - root_gen = - btrfs_header_generation(leaf); - } + root_gen = btrfs_header_generation(leaf); root_owner = btrfs_header_owner(leaf); } } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { @@ -1477,7 +1498,7 @@ delete: if (found_extent) { ret = btrfs_free_extent(trans, root, extent_start, extent_num_bytes, - root_owner, + leaf->start, root_owner, root_gen, inode->i_ino, found_key.offset, 0); BUG_ON(ret); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f84b5f6991cc..4c6e0c15754d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -76,9 +76,8 @@ static noinline int create_subvol(struct btrfs_root *root, char *name, if (ret) goto fail; - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, - objectid, trans->transid, 0, 0, - 0, 0); + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, + objectid, trans->transid, 0, 0, 0); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); goto fail; @@ -525,13 +524,10 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) struct file *src_file; struct inode *src; struct btrfs_trans_handle *trans; - struct btrfs_ordered_extent *ordered; struct btrfs_path *path; struct extent_buffer *leaf; char *buf; struct btrfs_key key; - struct btrfs_key new_key; - u32 size; u32 nritems; int slot; int ret; @@ -576,6 +572,7 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) /* do any pending delalloc/csum calc on src, one way or another, and lock file content */ while (1) { + struct btrfs_ordered_extent *ordered; lock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered) @@ -619,6 +616,32 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) key.objectid != src->i_ino) break; + if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY || + btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) { + u32 size; + struct btrfs_key new_key; + + size = btrfs_item_size_nr(leaf, slot); + read_extent_buffer(leaf, buf, + btrfs_item_ptr_offset(leaf, slot), + size); + btrfs_release_path(root, path); + + memcpy(&new_key, &key, sizeof(new_key)); + new_key.objectid = inode->i_ino; + ret = btrfs_insert_empty_item(trans, root, path, + &new_key, size); + if (ret) + goto out; + + leaf = path->nodes[0]; + slot = path->slots[0]; + write_extent_buffer(leaf, buf, + btrfs_item_ptr_offset(leaf, slot), + size); + btrfs_mark_buffer_dirty(leaf); + } + if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { struct btrfs_file_extent_item *extent; int found_type; @@ -634,31 +657,15 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) /* ds == 0 means there's a hole */ if (ds != 0) { ret = btrfs_inc_extent_ref(trans, root, - ds, dl, + ds, dl, leaf->start, root->root_key.objectid, trans->transid, inode->i_ino, key.offset); - if (ret) - goto out; + BUG_ON(ret); } } } - - if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY || - btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) { - size = btrfs_item_size_nr(leaf, slot); - read_extent_buffer(leaf, buf, - btrfs_item_ptr_offset(leaf, slot), - size); - btrfs_release_path(root, path); - memcpy(&new_key, &key, sizeof(new_key)); - new_key.objectid = inode->i_ino; - ret = btrfs_insert_item(trans, root, &new_key, - buf, size); - BUG_ON(ret); - } else { - btrfs_release_path(root, path); - } + btrfs_release_path(root, path); key.offset++; } ret = 0; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index f1374d597a17..3577badfa5bc 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -102,11 +102,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) case BTRFS_EXTENT_REF_KEY: ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref); printk("\t\textent back ref root %llu gen %llu " - "owner %llu offset %llu\n", + "owner %llu offset %llu num_refs %lu\n", (unsigned long long)btrfs_ref_root(l, ref), (unsigned long long)btrfs_ref_generation(l, ref), (unsigned long long)btrfs_ref_objectid(l, ref), - (unsigned long long)btrfs_ref_offset(l, ref)); + (unsigned long long)btrfs_ref_offset(l, ref), + (unsigned long)btrfs_ref_num_refs(l, ref)); break; case BTRFS_EXTENT_DATA_KEY: diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 56de3fb2d8d5..88bbfd959f18 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -89,9 +89,9 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, int ret; u64 objectid = root->root_key.objectid; - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, BTRFS_TREE_LOG_OBJECTID, - 0, 0, 0, 0, 0); + trans->transid, 0, 0, 0); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); return ret; @@ -433,6 +433,49 @@ insert: trans->transid); } } + + if (overwrite_root && + key->type == BTRFS_EXTENT_DATA_KEY) { + int extent_type; + struct btrfs_file_extent_item *fi; + + fi = (struct btrfs_file_extent_item *)dst_ptr; + extent_type = btrfs_file_extent_type(path->nodes[0], fi); + if (extent_type == BTRFS_FILE_EXTENT_REG) { + struct btrfs_key ins; + ins.objectid = btrfs_file_extent_disk_bytenr( + path->nodes[0], fi); + ins.offset = btrfs_file_extent_disk_num_bytes( + path->nodes[0], fi); + ins.type = BTRFS_EXTENT_ITEM_KEY; + + /* + * is this extent already allocated in the extent + * allocation tree? If so, just add a reference + */ + ret = btrfs_lookup_extent(root, ins.objectid, + ins.offset); + if (ret == 0) { + ret = btrfs_inc_extent_ref(trans, root, + ins.objectid, ins.offset, + path->nodes[0]->start, + root->root_key.objectid, + trans->transid, + key->objectid, key->offset); + } else { + /* + * insert the extent pointer in the extent + * allocation tree + */ + ret = btrfs_alloc_logged_extent(trans, root, + path->nodes[0]->start, + root->root_key.objectid, + trans->transid, key->objectid, + key->offset, &ins); + BUG_ON(ret); + } + } + } no_copy: btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root, path); @@ -551,45 +594,10 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, start, extent_end, start, &alloc_hint); BUG_ON(ret); + /* insert the extent */ + ret = overwrite_item(trans, root, path, eb, slot, key); BUG_ON(ret); - if (found_type == BTRFS_FILE_EXTENT_REG) { - struct btrfs_key ins; - ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); - ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); - ins.type = BTRFS_EXTENT_ITEM_KEY; - - /* insert the extent pointer in the file */ - ret = overwrite_item(trans, root, path, eb, slot, key); - BUG_ON(ret); - - /* - * is this extent already allocated in the extent - * allocation tree? If so, just add a reference - */ - ret = btrfs_lookup_extent(root, path, ins.objectid, ins.offset); - btrfs_release_path(root, path); - if (ret == 0) { - ret = btrfs_inc_extent_ref(trans, root, - ins.objectid, ins.offset, - root->root_key.objectid, - trans->transid, key->objectid, start); - } else { - /* - * insert the extent pointer in the extent - * allocation tree - */ - ret = btrfs_alloc_logged_extent(trans, root, - root->root_key.objectid, - trans->transid, key->objectid, - start, &ins); - BUG_ON(ret); - } - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - /* inline extents are easy, we just overwrite them */ - ret = overwrite_item(trans, root, path, eb, slot, key); - BUG_ON(ret); - } /* btrfs_drop_extents changes i_blocks, update it here */ inode->i_blocks += (extent_end - start) >> 9; btrfs_update_inode(trans, root, inode); @@ -1806,16 +1814,14 @@ static int noinline walk_up_log_tree(struct btrfs_trans_handle *trans, WARN_ON(*level == 0); return 0; } else { - if (path->nodes[*level] == root->node) { - root_owner = root->root_key.objectid; - root_gen = - btrfs_header_generation(path->nodes[*level]); - } else { - struct extent_buffer *node; - node = path->nodes[*level + 1]; - root_owner = btrfs_header_owner(node); - root_gen = btrfs_header_generation(node); - } + struct extent_buffer *parent; + if (path->nodes[*level] == root->node) + parent = path->nodes[*level]; + else + parent = path->nodes[*level + 1]; + + root_owner = btrfs_header_owner(parent); + root_gen = btrfs_header_generation(parent); wc->process_func(root, path->nodes[*level], wc, btrfs_header_generation(path->nodes[*level])); if (wc->free) { @@ -2525,8 +2531,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, if (ds != 0) { ret = btrfs_inc_extent_ref(trans, log, ds, dl, + dst_path->nodes[0]->start, BTRFS_TREE_LOG_OBJECTID, - 0, ins_keys[i].objectid, + trans->transid, + ins_keys[i].objectid, ins_keys[i].offset); BUG_ON(ret); } From 34353029534a08e41cfb8be647d734b9ce9ebff8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 23 Sep 2008 20:19:49 -0400 Subject: [PATCH 0714/2985] Btrfs: Fix race against disk_i_size updates The code to update the on disk i_size happens before the ordered_extent record is removed. So, it is possible for multiple ordered_extent completion routines to run at the same time, and to find each other in the ordered tree. The end result is they both decide not to update disk_i_size, leaving it too small. This temporary fix just puts the updates inside the extent_mutex. A real solution would be stronger ordering of disk_i_size updates against removing the ordered extent from the tree. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2e7d82ec5d18..adb169d739ce 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -608,9 +608,11 @@ nocow: add_pending_csums(trans, inode, ordered_extent->file_offset, &ordered_extent->list); + mutex_lock(&BTRFS_I(inode)->extent_mutex); btrfs_ordered_update_i_size(inode, ordered_extent); btrfs_update_inode(trans, root, inode); btrfs_remove_ordered_extent(inode, ordered_extent); + mutex_unlock(&BTRFS_I(inode)->extent_mutex); /* once for us */ btrfs_put_ordered_extent(ordered_extent); From b4f6c45dfbf84f47c21f73f6370ad1292b0627fd Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 24 Sep 2008 11:05:53 -0400 Subject: [PATCH 0715/2985] Update Btrfs files for in-kernel usage btrfs had magic to put the chagneset id into a printk on module load. This removes that from the Makefile and hardcodes the printk to print "Btrfs" Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 7 +------ fs/btrfs/version.h | 4 ++++ 2 files changed, 5 insertions(+), 6 deletions(-) create mode 100644 fs/btrfs/version.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index eb36ae981bdc..d5c28557fba9 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -13,17 +13,12 @@ else # Normal Makefile KERNELDIR := /lib/modules/`uname -r`/build -all: version +all: $(MAKE) -C $(KERNELDIR) M=`pwd` modules -version: - bash version.sh - modules_install: $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install clean: $(MAKE) -C $(KERNELDIR) M=`pwd` clean -tester: - $(MAKE) -C $(KERNELDIR) M=`pwd` tree-defrag.o transaction.o sysfs.o super.o root-tree.o inode-map.o inode-item.o inode.o file-item.o file.o extent_map.o disk-io.o ctree.o dir-item.o extent-tree.o endif diff --git a/fs/btrfs/version.h b/fs/btrfs/version.h new file mode 100644 index 000000000000..9bf3946d5ef2 --- /dev/null +++ b/fs/btrfs/version.h @@ -0,0 +1,4 @@ +#ifndef __BTRFS_VERSION_H +#define __BTRFS_VERSION_H +#define BTRFS_BUILD_VERSION "Btrfs" +#endif From 60582d1e933649f14f46f816bb75049058679e01 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 25 Sep 2008 12:25:16 -0400 Subject: [PATCH 0716/2985] Add Btrfs to fs/Kconfig and fs/Makefile Signed-off-by: Chris Mason --- fs/Kconfig | 17 +++++++++++++++++ fs/Makefile | 1 + 2 files changed, 18 insertions(+) diff --git a/fs/Kconfig b/fs/Kconfig index abccb5dab9a8..18f5a85b47c6 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -497,6 +497,23 @@ config OCFS2_DEBUG_FS this option for debugging only as it is likely to decrease performance of the filesystem. +config BTRFS_FS + tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format" + depends on EXPERIMENTAL + select LIBCRC32C + help + Btrfs is a new filesystem with extents, writable snapshotting, + support for multiple devices and many more features. + + Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET + FINALIZED. You should say N here unless you are interested in + testing Btrfs with non-critical data. + + To compile this file system support as a module, choose M here. The + module will be called btrfs. + + If unsure, say N. + endif # BLOCK config DNOTIFY diff --git a/fs/Makefile b/fs/Makefile index a1482a5eff15..41fcc858c972 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -121,4 +121,5 @@ obj-$(CONFIG_HOSTFS) += hostfs/ obj-$(CONFIG_HPPFS) += hppfs/ obj-$(CONFIG_DEBUG_FS) += debugfs/ obj-$(CONFIG_OCFS2_FS) += ocfs2/ +obj-$(CONFIG_BTRFS_FS) += btrfs/ obj-$(CONFIG_GFS2_FS) += gfs2/ From 9b49c9b9f93e148815f2544d0c91f43b6d72eea9 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 24 Sep 2008 11:23:25 -0400 Subject: [PATCH 0717/2985] Btrfs: Fix allocation completions in tree log replay After a crash, the tree log code uses btrfs_alloc_logged_extent to record allocations of data extents that it finds in the log tree. These come in basically random order, which does not fit how btrfs_remove_free_space() expects to be called. btrfs_remove_free_space was changed to support recording an extent allocation in the middle of a region of free space. Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 01c26e8ae555..f1d9b6bc23ba 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -306,11 +306,45 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, ret = link_free_space(block_group, info); BUG_ON(ret); + } else if (info && info->offset < offset && + info->offset + info->bytes >= offset + bytes) { + u64 old_start = info->offset; + /* + * we're freeing space in the middle of the info, + * this can happen during tree log replay + * + * first unlink the old info and then + * insert it again after the hole we're creating + */ + unlink_free_space(block_group, info); + if (offset + bytes < info->offset + info->bytes) { + u64 old_end = info->offset + info->bytes; + + info->offset = offset + bytes; + info->bytes = old_end - info->offset; + ret = link_free_space(block_group, info); + BUG_ON(ret); + } else { + /* the hole we're creating ends at the end + * of the info struct, just free the info + */ + kfree(info); + } + + /* step two, insert a new info struct to cover anything + * before the hole + */ + spin_unlock(&block_group->lock); + ret = btrfs_add_free_space(block_group, old_start, + offset - old_start); + BUG_ON(ret); + goto out_nolock; } else { WARN_ON(1); } out: spin_unlock(&block_group->lock); +out_nolock: return ret; } From 2b1f55b0f0d0d1a66470ef4ea2696cd5dd741a12 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 24 Sep 2008 11:48:04 -0400 Subject: [PATCH 0718/2985] Remove Btrfs compat code for older kernels Btrfs had compatibility code for kernels back to 2.6.18. These have been removed, and will be maintained in a separate backport git tree from now on. Signed-off-by: Chris Mason --- fs/btrfs/async-thread.c | 6 -- fs/btrfs/compat.h | 40 +----------- fs/btrfs/crc32c.h | 6 -- fs/btrfs/ctree.h | 7 +-- fs/btrfs/disk-io.c | 28 --------- fs/btrfs/export.c | 6 -- fs/btrfs/extent_io.c | 66 +------------------- fs/btrfs/file.c | 24 +------ fs/btrfs/inode.c | 135 +--------------------------------------- fs/btrfs/super.c | 7 --- fs/btrfs/sysfs.c | 33 ---------- fs/btrfs/volumes.c | 20 ------ 12 files changed, 10 insertions(+), 368 deletions(-) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 2ee301740195..4e780b279de6 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -20,13 +20,7 @@ #include #include #include - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) # include -#else -# include -#endif - #include "async-thread.h" /* diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h index b0ed1887d9b1..cd6598b169df 100644 --- a/fs/btrfs/compat.h +++ b/fs/btrfs/compat.h @@ -1,9 +1,8 @@ #ifndef _COMPAT_H_ #define _COMPAT_H_ -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,26) -#define trylock_page(page) (!TestSetPageLocked(page)) -#endif +#define btrfs_drop_nlink(inode) drop_nlink(inode) +#define btrfs_inc_nlink(inode) inc_nlink(inode) #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,27) static inline struct dentry *d_obtain_alias(struct inode *inode) @@ -22,39 +21,4 @@ static inline struct dentry *d_obtain_alias(struct inode *inode) } #endif -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) -static inline void btrfs_drop_nlink(struct inode *inode) -{ - inode->i_nlink--; -} - -static inline void btrfs_inc_nlink(struct inode *inode) -{ - inode->i_nlink++; -} -#else -# define btrfs_drop_nlink(inode) drop_nlink(inode) -# define btrfs_inc_nlink(inode) inc_nlink(inode) -#endif - -/* - * Even if AppArmor isn't enabled, it still has different prototypes. - * Add more distro/version pairs here to declare which has AppArmor applied. - */ -#if defined(CONFIG_SUSE_KERNEL) -# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) -# define REMOVE_SUID_PATH 1 -# endif -#endif - -/* - * catch any other distros that have patched in apparmor. This isn't - * 100% reliable because it won't catch people that hand compile their - * own distro kernels without apparmor compiled in. But, it is better - * than nothing. - */ -#ifdef CONFIG_SECURITY_APPARMOR -# define REMOVE_SUID_PATH 1 -#endif - #endif /* _COMPAT_H_ */ diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h index bf6c12e85730..4f0fefed132a 100644 --- a/fs/btrfs/crc32c.h +++ b/fs/btrfs/crc32c.h @@ -96,13 +96,7 @@ static inline u32 __btrfs_crc32c(u32 crc, unsigned char const *address, * We must workaround older implementations of crc32c_le() * found on older kernel versions. */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) -#define btrfs_crc32c(seed, data, length) \ - __cpu_to_le32( __btrfs_crc32c( __le32_to_cpu(seed), \ - (unsigned char const *)data, length) ) -#else #define btrfs_crc32c(seed, data, length) \ __btrfs_crc32c(seed, (unsigned char const *)data, length) #endif -#endif diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 138c157bbc45..3b3c1ca50c5d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1472,12 +1472,9 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { ((unsigned long)(btrfs_leaf_data(leaf) + \ btrfs_item_offset_nr(leaf, slot))) -static inline struct dentry *fdentry(struct file *file) { -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - return file->f_dentry; -#else +static inline struct dentry *fdentry(struct file *file) +{ return file->f_path.dentry; -#endif } /* extent-tree.c */ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d35ca6a3f513..dffb8dabd533 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -26,11 +26,7 @@ #include // for block_sync_page #include #include -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) # include -#else -# include -#endif #include "crc32c.h" #include "ctree.h" #include "disk-io.h" @@ -373,21 +369,11 @@ out: return ret; } -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) static void end_workqueue_bio(struct bio *bio, int err) -#else -static int end_workqueue_bio(struct bio *bio, - unsigned int bytes_done, int err) -#endif { struct end_io_wq *end_io_wq = bio->bi_private; struct btrfs_fs_info *fs_info; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - fs_info = end_io_wq->info; end_io_wq->error = err; end_io_wq->work.func = end_workqueue_fn; @@ -397,10 +383,6 @@ static int end_workqueue_bio(struct bio *bio, &end_io_wq->work); else btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif } int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, @@ -1161,9 +1143,7 @@ void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) { -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) bdi_init(bdi); -#endif bdi->ra_pages = default_backing_dev_info.ra_pages; bdi->state = 0; bdi->capabilities = default_backing_dev_info.capabilities; @@ -1242,11 +1222,7 @@ static void end_workqueue_fn(struct btrfs_work *work) bio->bi_private = end_io_wq->private; bio->bi_end_io = end_io_wq->end_io; kfree(end_io_wq); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - bio_endio(bio, bio->bi_size, error); -#else bio_endio(bio, error); -#endif } static int cleaner_kthread(void *arg) @@ -1673,9 +1649,7 @@ fail: kfree(extent_root); kfree(tree_root); -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) bdi_destroy(&fs_info->bdi); -#endif kfree(fs_info); return ERR_PTR(err); } @@ -1936,9 +1910,7 @@ int close_ctree(struct btrfs_root *root) btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) bdi_destroy(&fs_info->bdi); -#endif kfree(fs_info->extent_root); kfree(fs_info->tree_root); diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 2b357a6d2407..48b82cd7583c 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -7,12 +7,6 @@ #include "export.h" #include "compat.h" -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) -#define FILEID_BTRFS_WITHOUT_PARENT 0x4d -#define FILEID_BTRFS_WITH_PARENT 0x4e -#define FILEID_BTRFS_WITH_PARENT_ROOT 0x4f -#endif - #define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, parent_objectid)/4) #define BTRFS_FID_SIZE_CONNECTABLE (offsetof(struct btrfs_fid, parent_root_objectid)/4) #define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid)/4) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 58ad25838a41..e3a25be5c663 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1397,12 +1397,7 @@ static int check_page_writeback(struct extent_io_tree *tree, * Scheduling is not allowed, so the extent state tree is expected * to have one and only one object corresponding to this IO. */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) static void end_bio_extent_writepage(struct bio *bio, int err) -#else -static int end_bio_extent_writepage(struct bio *bio, - unsigned int bytes_done, int err) -#endif { int uptodate = err == 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -1412,10 +1407,6 @@ static int end_bio_extent_writepage(struct bio *bio, int whole_page; int ret; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif do { struct page *page = bvec->bv_page; tree = &BTRFS_I(page->mapping->host)->io_tree; @@ -1461,10 +1452,8 @@ static int end_bio_extent_writepage(struct bio *bio, else check_page_writeback(tree, page); } while (bvec >= bio->bi_io_vec); + bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif } /* @@ -1478,12 +1467,7 @@ static int end_bio_extent_writepage(struct bio *bio, * Scheduling is not allowed, so the extent state tree is expected * to have one and only one object corresponding to this IO. */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) static void end_bio_extent_readpage(struct bio *bio, int err) -#else -static int end_bio_extent_readpage(struct bio *bio, - unsigned int bytes_done, int err) -#endif { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -1493,11 +1477,6 @@ static int end_bio_extent_readpage(struct bio *bio, int whole_page; int ret; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - do { struct page *page = bvec->bv_page; tree = &BTRFS_I(page->mapping->host)->io_tree; @@ -1556,9 +1535,6 @@ static int end_bio_extent_readpage(struct bio *bio, } while (bvec >= bio->bi_io_vec); bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif } /* @@ -1566,12 +1542,7 @@ static int end_bio_extent_readpage(struct bio *bio, * the structs in the extent tree when done, and set the uptodate bits * as appropriate. */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) static void end_bio_extent_preparewrite(struct bio *bio, int err) -#else -static int end_bio_extent_preparewrite(struct bio *bio, - unsigned int bytes_done, int err) -#endif { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -1579,11 +1550,6 @@ static int end_bio_extent_preparewrite(struct bio *bio, u64 start; u64 end; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - do { struct page *page = bvec->bv_page; tree = &BTRFS_I(page->mapping->host)->io_tree; @@ -1607,9 +1573,6 @@ static int end_bio_extent_preparewrite(struct bio *bio, } while (bvec >= bio->bi_io_vec); bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif } static struct bio * @@ -2079,12 +2042,6 @@ done: return 0; } -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) -/* Taken directly from 2.6.23 with a mod for a lockpage hook */ -typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, - void *data); -#endif - /** * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. * @mapping: address space structure to write @@ -2201,10 +2158,9 @@ retry: } if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = index; -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) + if (wbc->range_cont) wbc->range_start = index << PAGE_CACHE_SHIFT; -#endif return ret; } EXPORT_SYMBOL(extent_write_cache_pages); @@ -2560,18 +2516,10 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, * by increasing the reference count. So we know the page must * be in the radix tree. */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) rcu_read_lock(); -#else - read_lock_irq(&mapping->tree_lock); -#endif p = radix_tree_lookup(&mapping->page_tree, i); - -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) rcu_read_unlock(); -#else - read_unlock_irq(&mapping->tree_lock); -#endif + return p; } @@ -2773,21 +2721,13 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, } } clear_page_dirty_for_io(page); -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) spin_lock_irq(&page->mapping->tree_lock); -#else - read_lock_irq(&page->mapping->tree_lock); -#endif if (!PageDirty(page)) { radix_tree_tag_clear(&page->mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) spin_unlock_irq(&page->mapping->tree_lock); -#else - read_unlock_irq(&page->mapping->tree_lock); -#endif unlock_page(page); } return 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 48a702d41c8c..8856570a0ebd 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -871,15 +871,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, goto out_nolock; if (count == 0) goto out_nolock; -#ifdef REMOVE_SUID_PATH - err = remove_suid(&file->f_path); -#else -# if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) + err = file_remove_suid(file); -# else - err = remove_suid(fdentry(file)); -# endif -#endif if (err) goto out_nolock; file_update_time(file); @@ -1003,17 +996,10 @@ out_nolock: btrfs_commit_transaction(trans, root); } } else if (num_written > 0 && (file->f_flags & O_DIRECT)) { -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) - do_sync_file_range(file, start_pos, - start_pos + num_written - 1, - SYNC_FILE_RANGE_WRITE | - SYNC_FILE_RANGE_WAIT_AFTER); -#else do_sync_mapping_range(inode->i_mapping, start_pos, start_pos + num_written - 1, SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER); -#endif invalidate_mapping_pages(inode->i_mapping, start_pos >> PAGE_CACHE_SHIFT, (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); @@ -1097,12 +1083,7 @@ out: } static struct vm_operations_struct btrfs_file_vm_ops = { -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) - .nopage = filemap_nopage, - .populate = filemap_populate, -#else .fault = filemap_fault, -#endif .page_mkwrite = btrfs_page_mkwrite, }; @@ -1118,9 +1099,6 @@ struct file_operations btrfs_file_operations = { .read = do_sync_read, .aio_read = generic_file_aio_read, .splice_read = generic_file_splice_read, -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - .sendfile = generic_file_sendfile, -#endif .write = btrfs_file_write, .mmap = btrfs_file_mmap, .open = generic_file_open, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index adb169d739ce..48a3dc030807 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2073,104 +2073,6 @@ err: return ret; } -/* Kernels earlier than 2.6.28 still have the NFS deadlock where nfsd - will call the file system's ->lookup() method from within its - filldir callback, which in turn was called from the file system's - ->readdir() method. And will deadlock for many file systems. */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) - -struct nfshack_dirent { - u64 ino; - loff_t offset; - int namlen; - unsigned int d_type; - char name[]; -}; - -struct nfshack_readdir { - char *dirent; - size_t used; - int full; -}; - - - -static int btrfs_nfshack_filldir(void *__buf, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type) -{ - struct nfshack_readdir *buf = __buf; - struct nfshack_dirent *de = (void *)(buf->dirent + buf->used); - unsigned int reclen; - - reclen = ALIGN(sizeof(struct nfshack_dirent) + namlen, sizeof(u64)); - if (buf->used + reclen > PAGE_SIZE) { - buf->full = 1; - return -EINVAL; - } - - de->namlen = namlen; - de->offset = offset; - de->ino = ino; - de->d_type = d_type; - memcpy(de->name, name, namlen); - buf->used += reclen; - - return 0; -} - -static int btrfs_nfshack_readdir(struct file *file, void *dirent, - filldir_t filldir) -{ - struct nfshack_readdir buf; - struct nfshack_dirent *de; - int err; - int size; - loff_t offset; - - buf.dirent = (void *)__get_free_page(GFP_KERNEL); - if (!buf.dirent) - return -ENOMEM; - - offset = file->f_pos; - - do { - unsigned int reclen; - - buf.used = 0; - buf.full = 0; - err = btrfs_real_readdir(file, &buf, btrfs_nfshack_filldir); - if (err) - break; - - size = buf.used; - - if (!size) - break; - - de = (struct nfshack_dirent *)buf.dirent; - while (size > 0) { - offset = de->offset; - - if (filldir(dirent, de->name, de->namlen, de->offset, - de->ino, de->d_type)) - goto done; - offset = file->f_pos; - - reclen = ALIGN(sizeof(*de) + de->namlen, - sizeof(u64)); - size -= reclen; - de = (struct nfshack_dirent *)((char *)de + reclen); - } - } while (buf.full); - - done: - free_page((unsigned long)buf.dirent); - file->f_pos = offset; - - return err; -} -#endif - int btrfs_write_inode(struct inode *inode, int wait) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -3311,13 +3213,8 @@ unsigned long btrfs_force_ra(struct address_space *mapping, { pgoff_t req_size = last_index - offset + 1; -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) - offset = page_cache_readahead(mapping, ra, file, offset, req_size); - return offset; -#else page_cache_sync_readahead(mapping, ra, file, offset, req_size); return offset + req_size; -#endif } struct inode *btrfs_alloc_inode(struct super_block *sb) @@ -3373,14 +3270,7 @@ void btrfs_destroy_inode(struct inode *inode) kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) static void init_once(void *foo) -#elif LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) -static void init_once(struct kmem_cache * cachep, void *foo) -#else -static void init_once(void * foo, struct kmem_cache * cachep, - unsigned long flags) -#endif { struct btrfs_inode *ei = (struct btrfs_inode *) foo; @@ -3403,22 +3293,10 @@ void btrfs_destroy_cachep(void) struct kmem_cache *btrfs_cache_create(const char *name, size_t size, unsigned long extra_flags, -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) - void (*ctor)(void *) -#elif LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) - void (*ctor)(struct kmem_cache *, void *) -#else - void (*ctor)(void *, struct kmem_cache *, - unsigned long) -#endif - ) + void (*ctor)(void *)) { return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT | - SLAB_MEM_SPREAD | extra_flags), ctor -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) - ,NULL -#endif - ); + SLAB_MEM_SPREAD | extra_flags), ctor); } int btrfs_init_cachep(void) @@ -3666,12 +3544,7 @@ static int btrfs_set_page_dirty(struct page *page) return __set_page_dirty_nobuffers(page); } -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) static int btrfs_permission(struct inode *inode, int mask) -#else -static int btrfs_permission(struct inode *inode, int mask, - struct nameidata *nd) -#endif { if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE)) return -EACCES; @@ -3702,11 +3575,7 @@ static struct inode_operations btrfs_dir_ro_inode_operations = { static struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) - .readdir = btrfs_nfshack_readdir, -#else /* NFSd readdir/lookup deadlock is fixed */ .readdir = btrfs_real_readdir, -#endif .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_ioctl, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f7b3eac7ac6d..8399d6d05d63 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -349,10 +349,7 @@ static int btrfs_fill_super(struct super_block * sb, sb->s_root = root_dentry; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) save_mount_options(sb, data); -#endif - return 0; fail_close: @@ -566,11 +563,7 @@ static struct super_operations btrfs_super_ops = { .put_super = btrfs_put_super, .write_super = btrfs_write_super, .sync_fs = btrfs_sync_fs, -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) - .read_inode = btrfs_read_locked_inode, -#else .show_options = generic_show_options, -#endif .write_inode = btrfs_write_inode, .dirty_inode = btrfs_dirty_inode, .alloc_inode = btrfs_alloc_inode, diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 61af5d8446e3..300076e66765 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -28,7 +28,6 @@ #include "disk-io.h" #include "transaction.h" -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", @@ -267,35 +266,3 @@ void btrfs_exit_sysfs(void) kset_unregister(btrfs_kset); } -#else - -int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) -{ - return 0; -} - -int btrfs_sysfs_add_root(struct btrfs_root *root) -{ - return 0; -} - -void btrfs_sysfs_del_root(struct btrfs_root *root) -{ - return; -} - -void btrfs_sysfs_del_super(struct btrfs_fs_info *fs) -{ - return; -} - -int btrfs_init_sysfs(void) -{ - return 0; -} - -void btrfs_exit_sysfs(void) -{ - return; -} -#endif diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b9e5c2d82dde..ddf89626498a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2080,20 +2080,11 @@ int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, } -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) static void end_bio_multi_stripe(struct bio *bio, int err) -#else -static int end_bio_multi_stripe(struct bio *bio, - unsigned int bytes_done, int err) -#endif { struct btrfs_multi_bio *multi = bio->bi_private; int is_orig_bio = 0; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif if (err) atomic_inc(&multi->error); @@ -2122,17 +2113,10 @@ static int end_bio_multi_stripe(struct bio *bio, } kfree(multi); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - bio_endio(bio, bio->bi_size, err); -#else bio_endio(bio, err); -#endif } else if (!is_orig_bio) { bio_put(bio); } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif } struct async_sched { @@ -2248,11 +2232,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, } else { bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; bio->bi_sector = logical >> 9; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - bio_endio(bio, bio->bi_size, -EIO); -#else bio_endio(bio, -EIO); -#endif } dev_nr++; } From 4434c33c7f233a7ee5dcaf357fbf795818ea8861 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 24 Sep 2008 12:50:10 -0400 Subject: [PATCH 0719/2985] Btrfs: fix sleep with spinlock held during unmount The code to free block groups needs to drop the space info spin lock before calling btrfs_remove_free_space_cache (which can schedule). This is safe because at unmount time, nobody else is going to play with the block groups. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5258923d621f..fe4e11b31a43 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3157,9 +3157,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) block_group = rb_entry(n, struct btrfs_block_group_cache, cache_node); + spin_unlock(&info->block_group_cache_lock); btrfs_remove_free_space_cache(block_group); + spin_lock(&info->block_group_cache_lock); + rb_erase(&block_group->cache_node, &info->block_group_cache_tree); + spin_lock(&block_group->space_info->lock); list_del(&block_group->list); spin_unlock(&block_group->space_info->lock); From 24ab9cd85c11bccacbd0cce7f8e1aebd4930404c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 24 Sep 2008 14:51:30 -0400 Subject: [PATCH 0720/2985] Btrfs: Raise thresholds for metadata writeback Btrfs metadata writeback is fairly expensive. Once a tree block is written it must be cowed before it can be changed again. The btree writepages code has a threshold based on a count of dirty btree bytes which is updated as IO is sent out. This changes btree_writepages to skip the writeout if there are less than 32MB of dirty bytes from the btrees, improving performance across many workloads. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dffb8dabd533..71e81f3a765b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -556,7 +556,7 @@ static int btree_writepages(struct address_space *mapping, if (wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; - unsigned long thresh = 8 * 1024 * 1024; + unsigned long thresh = 32 * 1024 * 1024; if (wbc->for_kupdate) return 0; @@ -690,7 +690,7 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, int btrfs_write_tree_block(struct extent_buffer *buf) { return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start, - buf->start + buf->len - 1, WB_SYNC_NONE); + buf->start + buf->len - 1, WB_SYNC_ALL); } int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) From e8569813849b5da394a195c7e76b4faa452b12d1 Mon Sep 17 00:00:00 2001 From: Zheng Yan Date: Fri, 26 Sep 2008 10:05:48 -0400 Subject: [PATCH 0721/2985] Btrfs: allocator fixes for space balancing update * Reserved extent accounting: reserved extents have been allocated in the rbtrees that track free space but have not been allocated on disk. They were never properly accounted for in the past, making it hard to know how much space was really free. * btrfs_find_block_group used to return NULL for block groups that had been removed by the space balancing code. This made it hard to account for space during the final stages of a balance run. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 + fs/btrfs/extent-tree.c | 136 ++++++++++++++++++++--------------------- 2 files changed, 67 insertions(+), 71 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3b3c1ca50c5d..c683aaa925fa 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -498,6 +498,7 @@ struct btrfs_space_info { u64 total_bytes; u64 bytes_used; u64 bytes_pinned; + u64 bytes_reserved; int full; int force_alloc; struct list_head list; @@ -519,6 +520,7 @@ struct btrfs_block_group_cache { struct btrfs_block_group_item item; spinlock_t lock; u64 pinned; + u64 reserved; u64 flags; int cached; int ro; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fe4e11b31a43..3e2f969de42d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -325,12 +325,9 @@ static int noinline find_free_space(struct btrfs_root *root, struct btrfs_block_group_cache *cache = *cache_ret; struct btrfs_free_space *info = NULL; u64 last; - u64 total_fs_bytes; u64 search_start = *start_ret; WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); - total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); - if (!cache) goto out; @@ -354,7 +351,7 @@ new_group: last = cache->key.objectid + cache->key.offset; cache = btrfs_lookup_first_block_group(root->fs_info, last); - if (!cache || cache->key.objectid >= total_fs_bytes) + if (!cache) goto out; *cache_ret = cache; @@ -385,7 +382,6 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, return found; } return NULL; - } static struct btrfs_block_group_cache * @@ -396,7 +392,6 @@ __btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *cache; struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; - struct btrfs_space_info *sinfo; u64 used; u64 last = 0; u64 free_check; @@ -413,7 +408,7 @@ __btrfs_find_block_group(struct btrfs_root *root, if (shint && block_group_bits(shint, data) && !shint->ro) { spin_lock(&shint->lock); used = btrfs_block_group_used(&shint->item); - if (used + shint->pinned < + if (used + shint->pinned + shint->reserved < div_factor(shint->key.offset, factor)) { spin_unlock(&shint->lock); return shint; @@ -424,7 +419,7 @@ __btrfs_find_block_group(struct btrfs_root *root, if (hint && !hint->ro && block_group_bits(hint, data)) { spin_lock(&hint->lock); used = btrfs_block_group_used(&hint->item); - if (used + hint->pinned < + if (used + hint->pinned + hint->reserved < div_factor(hint->key.offset, factor)) { spin_unlock(&hint->lock); return hint; @@ -437,27 +432,9 @@ __btrfs_find_block_group(struct btrfs_root *root, else last = search_start; } - sinfo = __find_space_info(root->fs_info, data); - if (!sinfo) - goto found; again: - while(1) { - struct list_head *l; - - cache = NULL; - - spin_lock(&sinfo->lock); - list_for_each(l, &sinfo->block_groups) { - struct btrfs_block_group_cache *entry; - entry = list_entry(l, struct btrfs_block_group_cache, - list); - if ((entry->key.objectid >= last) && - (!cache || (entry->key.objectid < - cache->key.objectid))) - cache = entry; - } - spin_unlock(&sinfo->lock); - + while (1) { + cache = btrfs_lookup_first_block_group(root->fs_info, last); if (!cache) break; @@ -467,7 +444,8 @@ again: if (!cache->ro && block_group_bits(cache, data)) { free_check = div_factor(cache->key.offset, factor); - if (used + cache->pinned < free_check) { + if (used + cache->pinned + cache->reserved < + free_check) { found_group = cache; spin_unlock(&cache->lock); goto found; @@ -1414,6 +1392,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, if (!cache) break; + cache->dirty = 0; last += cache->key.offset; err = write_one_cache_group(trans, root, @@ -1427,8 +1406,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, werr = err; continue; } - - cache->dirty = 0; } btrfs_free_path(path); mutex_unlock(&root->fs_info->alloc_mutex); @@ -1460,6 +1437,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->total_bytes = total_bytes; found->bytes_used = bytes_used; found->bytes_pinned = 0; + found->bytes_reserved = 0; found->full = 0; found->force_alloc = 0; *space_info = found; @@ -1539,8 +1517,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, thresh = div_factor(space_info->total_bytes, 6); if (!force && - (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < - thresh) + (space_info->bytes_used + space_info->bytes_pinned + + space_info->bytes_reserved + alloc_bytes) < thresh) goto out; mutex_lock(&extent_root->fs_info->chunk_mutex); @@ -1621,7 +1599,6 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) return cache->key.objectid; } - int btrfs_update_pinned_extents(struct btrfs_root *root, u64 bytenr, u64 num, int pin) { @@ -1639,29 +1616,20 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, } while (num > 0) { cache = btrfs_lookup_block_group(fs_info, bytenr); - if (!cache) { - u64 first = first_logical_byte(root, bytenr); - WARN_ON(first < bytenr); - len = min(first - bytenr, num); - } else { - len = min(num, cache->key.offset - - (bytenr - cache->key.objectid)); - } + BUG_ON(!cache); + len = min(num, cache->key.offset - + (bytenr - cache->key.objectid)); if (pin) { - if (cache) { - spin_lock(&cache->lock); - cache->pinned += len; - cache->space_info->bytes_pinned += len; - spin_unlock(&cache->lock); - } + spin_lock(&cache->lock); + cache->pinned += len; + cache->space_info->bytes_pinned += len; + spin_unlock(&cache->lock); fs_info->total_pinned += len; } else { - if (cache) { - spin_lock(&cache->lock); - cache->pinned -= len; - cache->space_info->bytes_pinned -= len; - spin_unlock(&cache->lock); - } + spin_lock(&cache->lock); + cache->pinned -= len; + cache->space_info->bytes_pinned -= len; + spin_unlock(&cache->lock); fs_info->total_pinned -= len; } bytenr += len; @@ -1670,6 +1638,36 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, return 0; } +static int update_reserved_extents(struct btrfs_root *root, + u64 bytenr, u64 num, int reserve) +{ + u64 len; + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *fs_info = root->fs_info; + + WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); + while (num > 0) { + cache = btrfs_lookup_block_group(fs_info, bytenr); + BUG_ON(!cache); + len = min(num, cache->key.offset - + (bytenr - cache->key.objectid)); + if (reserve) { + spin_lock(&cache->lock); + cache->reserved += len; + cache->space_info->bytes_reserved += len; + spin_unlock(&cache->lock); + } else { + spin_lock(&cache->lock); + cache->reserved -= len; + cache->space_info->bytes_reserved -= len; + spin_unlock(&cache->lock); + } + bytenr += len; + num -= len; + } + return 0; +} + int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) { u64 last = 0; @@ -2126,6 +2124,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, cache = btrfs_lookup_block_group(root->fs_info, bytenr); BUG_ON(!cache); btrfs_add_free_space(cache, bytenr, num_bytes); + update_reserved_extents(root, bytenr, num_bytes, 0); return 0; } pin = 1; @@ -2225,14 +2224,11 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, search_start = max(search_start, first_logical_byte(root, 0)); orig_search_start = search_start; - if (search_end == (u64)-1) - search_end = btrfs_super_total_bytes(&info->super_copy); - search_start = max(search_start, hint_byte); total_needed += empty_size; new_group: - block_group = btrfs_lookup_block_group(info, search_start); + block_group = btrfs_lookup_first_block_group(info, search_start); /* * Ok this looks a little tricky, buts its really simple. First if we @@ -2257,12 +2253,8 @@ new_group: ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024, data, 1); - if (ret < 0) { - struct btrfs_space_info *info; - - info = __find_space_info(root->fs_info, data); + if (ret < 0) goto error; - } BUG_ON(ret); chunk_alloc_done = 1; search_start = orig_search_start; @@ -2378,22 +2370,24 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) struct list_head *l; printk(KERN_INFO "space_info has %Lu free, is %sfull\n", - info->total_bytes - info->bytes_used - info->bytes_pinned, - (info->full) ? "" : "not "); + info->total_bytes - info->bytes_used - info->bytes_pinned - + info->bytes_reserved, (info->full) ? "" : "not "); spin_lock(&info->lock); list_for_each(l, &info->block_groups) { cache = list_entry(l, struct btrfs_block_group_cache, list); spin_lock(&cache->lock); printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used " - "%Lu pinned\n", + "%Lu pinned %Lu reserved\n", cache->key.objectid, cache->key.offset, - btrfs_block_group_used(&cache->item), cache->pinned); + btrfs_block_group_used(&cache->item), + cache->pinned, cache->reserved); btrfs_dump_free_space(cache, bytes); spin_unlock(&cache->lock); } spin_unlock(&info->lock); } + static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, @@ -2500,6 +2494,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, empty_size, hint_byte, search_end, ins, data); + update_reserved_extents(root, ins->objectid, ins->offset, 1); maybe_unlock_mutex(root); return ret; } @@ -2625,6 +2620,7 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, ref_generation, owner, owner_offset, ins); + update_reserved_extents(root, ins->objectid, ins->offset, 0); maybe_unlock_mutex(root); return ret; } @@ -2685,6 +2681,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, owner_objectid, owner_offset, ins); BUG_ON(ret); + } else { + update_reserved_extents(root, ins->objectid, ins->offset, 1); } maybe_unlock_mutex(root); return ret; @@ -3974,10 +3972,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) ret = btrfs_add_block_group_cache(root->fs_info, cache); BUG_ON(ret); - - if (key.objectid >= - btrfs_super_total_bytes(&info->super_copy)) - break; } ret = 0; error: From e465768938f95388723b0fd3c50a0ae48173edb9 Mon Sep 17 00:00:00 2001 From: Zheng Yan Date: Fri, 26 Sep 2008 10:04:53 -0400 Subject: [PATCH 0722/2985] Btrfs: Add shared reference cache Btrfs has a cache of reference counts in leaves, allowing it to avoid reading tree leaves while deleting snapshots. To reduce contention with multiple subvolumes, this cache is private to each subvolume. This patch adds shared reference cache support. The new space balancing code plays with multiple subvols at the same time, So the old per-subvol reference cache is not well suited. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 19 +++++++++----- fs/btrfs/disk-io.c | 2 ++ fs/btrfs/extent-tree.c | 17 ++++++++++--- fs/btrfs/ref-cache.c | 58 +++++++++++++++++++++++++++--------------- fs/btrfs/ref-cache.h | 7 +++-- fs/btrfs/transaction.c | 2 +- 6 files changed, 71 insertions(+), 34 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c683aaa925fa..b9f9f815ed09 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -81,6 +81,10 @@ struct btrfs_ordered_sum; #define BTRFS_TREE_LOG_OBJECTID -6ULL #define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL +/* for space balancing */ +#define BTRFS_TREE_RELOC_OBJECTID -8ULL +#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL + /* dummy objectid represents multiple objectids */ #define BTRFS_MULTIPLE_OBJECTIDS -255ULL @@ -539,6 +543,12 @@ struct btrfs_block_group_cache { struct list_head list; }; +struct btrfs_leaf_ref_tree { + struct rb_root root; + struct list_head list; + spinlock_t lock; +}; + struct btrfs_device; struct btrfs_fs_devices; struct btrfs_fs_info { @@ -637,6 +647,8 @@ struct btrfs_fs_info { struct task_struct *cleaner_kthread; int thread_pool_size; + struct btrfs_leaf_ref_tree shared_ref_tree; + struct kobject super_kobj; struct completion kobj_unregister; int do_barriers; @@ -670,13 +682,6 @@ struct btrfs_fs_info { void *bdev_holder; }; -struct btrfs_leaf_ref_tree { - struct rb_root root; - struct btrfs_leaf_ref *last; - struct list_head list; - spinlock_t lock; -}; - /* * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 71e81f3a765b..8969fee23318 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1406,6 +1406,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; + btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); + BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3e2f969de42d..9ab099bc01a4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1091,16 +1091,26 @@ out: int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, u32 nr_extents) { - u32 nritems; struct btrfs_key key; struct btrfs_file_extent_item *fi; + u64 root_gen; + u32 nritems; int i; int level; int ret = 0; + int shared = 0; if (!root->ref_cows) return 0; + if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { + shared = 0; + root_gen = root->root_key.offset; + } else { + shared = 1; + root_gen = trans->transid - 1; + } + level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); @@ -1114,7 +1124,7 @@ int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, goto out; } - ref->root_gen = root->root_key.offset; + ref->root_gen = root_gen; ref->bytenr = buf->start; ref->owner = btrfs_header_owner(buf); ref->generation = btrfs_header_generation(buf); @@ -1143,8 +1153,7 @@ int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, info++; } - BUG_ON(!root->ref_tree); - ret = btrfs_add_leaf_ref(root, ref); + ret = btrfs_add_leaf_ref(root, ref, shared); WARN_ON(ret); btrfs_free_leaf_ref(root, ref); } diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index 272b9890c982..c5809988c875 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c @@ -78,7 +78,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, } entry = rb_entry(node, struct btrfs_leaf_ref, rb_node); - entry->in_tree = 1; rb_link_node(node, parent, p); rb_insert_color(node, root); return NULL; @@ -103,23 +102,29 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) return NULL; } -int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen) +int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, + int shared) { struct btrfs_leaf_ref *ref = NULL; struct btrfs_leaf_ref_tree *tree = root->ref_tree; + if (shared) + tree = &root->fs_info->shared_ref_tree; if (!tree) return 0; spin_lock(&tree->lock); while(!list_empty(&tree->list)) { ref = list_entry(tree->list.next, struct btrfs_leaf_ref, list); - BUG_ON(!ref->in_tree); + BUG_ON(ref->tree != tree); if (ref->root_gen > max_root_gen) break; + if (!xchg(&ref->in_tree, 0)) { + cond_resched_lock(&tree->lock); + continue; + } rb_erase(&ref->rb_node, &tree->root); - ref->in_tree = 0; list_del_init(&ref->list); spin_unlock(&tree->lock); @@ -137,32 +142,43 @@ struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, struct rb_node *rb; struct btrfs_leaf_ref *ref = NULL; struct btrfs_leaf_ref_tree *tree = root->ref_tree; - - if (!tree) - return NULL; - - spin_lock(&tree->lock); - rb = tree_search(&tree->root, bytenr); - if (rb) - ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); - if (ref) - atomic_inc(&ref->usage); - spin_unlock(&tree->lock); - return ref; +again: + if (tree) { + spin_lock(&tree->lock); + rb = tree_search(&tree->root, bytenr); + if (rb) + ref = rb_entry(rb, struct btrfs_leaf_ref, rb_node); + if (ref) + atomic_inc(&ref->usage); + spin_unlock(&tree->lock); + if (ref) + return ref; + } + if (tree != &root->fs_info->shared_ref_tree) { + tree = &root->fs_info->shared_ref_tree; + goto again; + } + return NULL; } -int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) +int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, + int shared) { int ret = 0; struct rb_node *rb; struct btrfs_leaf_ref_tree *tree = root->ref_tree; + if (shared) + tree = &root->fs_info->shared_ref_tree; + spin_lock(&tree->lock); rb = tree_insert(&tree->root, ref->bytenr, &ref->rb_node); if (rb) { ret = -EEXIST; } else { atomic_inc(&ref->usage); + ref->tree = tree; + ref->in_tree = 1; list_add_tail(&ref->list, &tree->list); } spin_unlock(&tree->lock); @@ -171,13 +187,15 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) { - struct btrfs_leaf_ref_tree *tree = root->ref_tree; + struct btrfs_leaf_ref_tree *tree; - BUG_ON(!ref->in_tree); + if (!xchg(&ref->in_tree, 0)) + return 0; + + tree = ref->tree; spin_lock(&tree->lock); rb_erase(&ref->rb_node, &tree->root); - ref->in_tree = 0; list_del_init(&ref->list); spin_unlock(&tree->lock); diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h index c361b321c0c3..617564787f52 100644 --- a/fs/btrfs/ref-cache.h +++ b/fs/btrfs/ref-cache.h @@ -27,6 +27,7 @@ struct btrfs_extent_info { struct btrfs_leaf_ref { struct rb_node rb_node; + struct btrfs_leaf_ref_tree *tree; int in_tree; atomic_t usage; @@ -64,8 +65,10 @@ struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, u64 bytenr); -int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); -int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen); +int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, + int shared); +int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, + int shared); int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 151b00d52593..656baefa5255 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -650,7 +650,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); - ret = btrfs_remove_leaf_refs(root, max_useless); + ret = btrfs_remove_leaf_refs(root, max_useless, 0); BUG_ON(ret); free_extent_buffer(dirty->root->node); From 5b21f2ed3f2947b5195b65c9fdbdd9e52904cc03 Mon Sep 17 00:00:00 2001 From: Zheng Yan Date: Fri, 26 Sep 2008 10:05:38 -0400 Subject: [PATCH 0723/2985] Btrfs: extent_map and data=ordered fixes for space balancing * Add an EXTENT_BOUNDARY state bit to keep the writepage code from merging data extents that are in the process of being relocated. This allows us to do accounting for them properly. * The balancing code relocates data extents indepdent of the underlying inode. The extent_map code was modified to properly account for things moving around (invalidating extent_map caches in the inode). * Don't take the drop_mutex in the create_subvol ioctl. It isn't required. * Fix walking of the ordered extent list to avoid races with sys_unlink * Change the lock ordering rules. Transaction start goes outside the drop_mutex. This allows btrfs_commit_transaction to directly drop the relocation trees. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 9 +++---- fs/btrfs/ctree.h | 11 ++++++++- fs/btrfs/extent_io.c | 13 +++++++---- fs/btrfs/extent_io.h | 1 + fs/btrfs/file.c | 31 +++++++++++++++++++----- fs/btrfs/inode-map.c | 4 ++++ fs/btrfs/inode.c | 52 +++++++++++++++++++++++++++++------------ fs/btrfs/ioctl.c | 2 -- fs/btrfs/ordered-data.c | 26 ++++++++++----------- fs/btrfs/transaction.c | 8 +++---- 10 files changed, 108 insertions(+), 49 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 50aea8cb653a..f9cd40967d04 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -290,7 +290,6 @@ int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, struct extent_buffer **cow_ret, u64 prealloc_dest) { u64 search_start; - u64 header_trans; int ret; if (trans->transaction != root->fs_info->running_transaction) { @@ -304,9 +303,9 @@ int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(1); } - header_trans = btrfs_header_generation(buf); spin_lock(&root->fs_info->hash_lock); - if (header_trans == trans->transid && + if (btrfs_header_generation(buf) == trans->transid && + btrfs_header_owner(buf) == root->root_key.objectid && !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { *cow_ret = buf; spin_unlock(&root->fs_info->hash_lock); @@ -1300,6 +1299,7 @@ again: /* is a cow on this block not required */ spin_lock(&root->fs_info->hash_lock); if (btrfs_header_generation(b) == trans->transid && + btrfs_header_owner(b) == root->root_key.objectid && !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { spin_unlock(&root->fs_info->hash_lock); goto cow_done; @@ -1396,7 +1396,8 @@ cow_done: /* this is only true while dropping a snapshot */ if (level == lowest_level) { - break; + ret = 0; + goto done; } blocknr = btrfs_node_blockptr(b, slot); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b9f9f815ed09..3e62a1b0a1f7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1486,6 +1486,9 @@ static inline struct dentry *fdentry(struct file *file) /* extent-tree.c */ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); +int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u32 *refs); int btrfs_update_pinned_extents(struct btrfs_root *root, u64 bytenr, u64 num, int pin); int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, @@ -1812,6 +1815,8 @@ void btrfs_destroy_inode(struct inode *inode); int btrfs_init_cachep(void); void btrfs_destroy_cachep(void); long btrfs_ioctl_trans_end(struct file *file); +struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, + struct btrfs_root *root, int wait); struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root); struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, @@ -1824,13 +1829,17 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, int btrfs_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); +int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); +int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); +void btrfs_orphan_cleanup(struct btrfs_root *root); /* ioctl.c */ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); /* file.c */ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); -int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end); +int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, + int skip_pinned); int btrfs_check_file(struct btrfs_root *root, struct inode *inode); extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e3a25be5c663..8bd1b402f3fd 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -292,7 +292,7 @@ static int merge_state(struct extent_io_tree *tree, struct extent_state *other; struct rb_node *other_node; - if (state->state & EXTENT_IOBITS) + if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) return 0; other_node = rb_prev(&state->rb_node); @@ -1070,7 +1070,8 @@ search_again: while(1) { state = rb_entry(node, struct extent_state, rb_node); - if (found && state->start != cur_start) { + if (found && (state->start != cur_start || + (state->state & EXTENT_BOUNDARY))) { goto out; } if (!(state->state & EXTENT_DELALLOC)) { @@ -1078,7 +1079,7 @@ search_again: *end = state->end; goto out; } - if (!found) { + if (!found && !(state->state & EXTENT_BOUNDARY)) { struct extent_state *prev_state; struct rb_node *prev_node = node; while(1) { @@ -1088,7 +1089,11 @@ search_again: prev_state = rb_entry(prev_node, struct extent_state, rb_node); - if (!(prev_state->state & EXTENT_DELALLOC)) + if ((prev_state->end + 1 != state->start) || + !(prev_state->state & EXTENT_DELALLOC)) + break; + if ((cur_start - prev_state->start) * 2 > + max_bytes) break; state = prev_state; node = prev_node; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 3cb411a5f4d3..c9d1908a1ae3 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -15,6 +15,7 @@ #define EXTENT_BUFFER_FILLED (1 << 8) #define EXTENT_ORDERED (1 << 9) #define EXTENT_ORDERED_METADATA (1 << 10) +#define EXTENT_BOUNDARY (1 << 11) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) /* diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8856570a0ebd..1b7e51a9db0f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -294,7 +294,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, last_pos_in_file, 0, 0, hole_size, 0); btrfs_drop_extent_cache(inode, last_pos_in_file, - last_pos_in_file + hole_size -1); + last_pos_in_file + hole_size - 1, 0); mutex_unlock(&BTRFS_I(inode)->extent_mutex); btrfs_check_file(root, inode); } @@ -337,7 +337,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inline_size -= start_pos; err = insert_inline_extent(trans, root, inode, start_pos, inline_size, pages, 0, num_pages); - btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1); + btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1, 0); BUG_ON(err); mutex_unlock(&BTRFS_I(inode)->extent_mutex); @@ -362,7 +362,8 @@ out_unlock: return err; } -int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) +int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, + int skip_pinned) { struct extent_map *em; struct extent_map *split = NULL; @@ -371,6 +372,7 @@ int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) u64 len = end - start + 1; int ret; int testend = 1; + unsigned long flags; WARN_ON(end < start); if (end == (u64)-1) { @@ -389,6 +391,23 @@ int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) spin_unlock(&em_tree->lock); break; } + flags = em->flags; + if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { + spin_unlock(&em_tree->lock); + if (em->start <= start && + (!testend || em->start + em->len >= start + len)) { + free_extent_map(em); + break; + } + if (start < em->start) { + len = em->start - start; + } else { + len = start + len - (em->start + em->len); + start = em->start + em->len; + } + free_extent_map(em); + continue; + } clear_bit(EXTENT_FLAG_PINNED, &em->flags); remove_extent_mapping(em_tree, em); @@ -398,7 +417,7 @@ int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) split->len = start - em->start; split->block_start = em->block_start; split->bdev = em->bdev; - split->flags = em->flags; + split->flags = flags; ret = add_extent_mapping(em_tree, split); BUG_ON(ret); free_extent_map(split); @@ -412,7 +431,7 @@ int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) split->start = start + len; split->len = em->start + em->len - (start + len); split->bdev = em->bdev; - split->flags = em->flags; + split->flags = flags; split->block_start = em->block_start + diff; @@ -541,7 +560,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, int recow; int ret; - btrfs_drop_extent_cache(inode, start, end - 1); + btrfs_drop_extent_cache(inode, start, end - 1, 0); path = btrfs_alloc_path(); if (!path) diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index cd6171c2da42..80038c5ef7cf 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -117,10 +117,14 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, *objectid = last_ino; goto found; } + } else if (key.objectid > search_start) { + *objectid = search_start; + goto found; } } if (key.objectid >= BTRFS_LAST_FREE_OBJECTID) break; + start_found = 1; last_ino = key.objectid + 1; path->slots[0]++; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 48a3dc030807..4516fbf01671 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -135,7 +135,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); mutex_lock(&BTRFS_I(inode)->extent_mutex); - btrfs_drop_extent_cache(inode, start, start + num_bytes - 1); + btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); mutex_unlock(&BTRFS_I(inode)->extent_mutex); while(num_bytes > 0) { @@ -163,7 +163,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) break; } btrfs_drop_extent_cache(inode, start, - start + ins.offset - 1); + start + ins.offset - 1, 0); } mutex_unlock(&BTRFS_I(inode)->extent_mutex); @@ -587,7 +587,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) btrfs_drop_extent_cache(inode, ordered_extent->file_offset, ordered_extent->file_offset + - ordered_extent->len - 1); + ordered_extent->len - 1, 0); mutex_unlock(&BTRFS_I(inode)->extent_mutex); ins.objectid = ordered_extent->start; @@ -880,7 +880,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) int ret = 0, nr_unlink = 0, nr_truncate = 0; /* don't do orphan cleanup if the fs is readonly. */ - if (root->inode->i_sb->s_flags & MS_RDONLY) + if (root->fs_info->sb->s_flags & MS_RDONLY) return; path = btrfs_alloc_path(); @@ -892,8 +892,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); key.offset = (u64)-1; - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, root->inode); while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -933,7 +931,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) * crossing root thing. we store the inode number in the * offset of the orphan item. */ - inode = btrfs_iget_locked(root->inode->i_sb, + inode = btrfs_iget_locked(root->fs_info->sb, found_key.offset, root); if (!inode) break; @@ -965,7 +963,9 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) * do a destroy_inode */ if (is_bad_inode(inode)) { + trans = btrfs_start_transaction(root, 1); btrfs_orphan_del(trans, inode); + btrfs_end_transaction(trans, root); iput(inode); continue; } @@ -988,7 +988,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); btrfs_free_path(path); - btrfs_end_transaction(trans, root); } void btrfs_read_locked_inode(struct inode *inode) @@ -1343,8 +1342,7 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, u64 mask = root->sectorsize - 1; if (root->ref_cows) - btrfs_drop_extent_cache(inode, - new_size & (~mask), (u64)-1); + btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); path = btrfs_alloc_path(); path->reada = -1; BUG_ON(!path); @@ -1677,7 +1675,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) hole_start, 0, 0, hole_size, 0); btrfs_drop_extent_cache(inode, hole_start, - (u64)-1); + (u64)-1, 0); btrfs_check_file(root, inode); } mutex_unlock(&BTRFS_I(inode)->extent_mutex); @@ -1843,6 +1841,24 @@ static int btrfs_find_actor(struct inode *inode, void *opaque) args->root == BTRFS_I(inode)->root); } +struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, + struct btrfs_root *root, int wait) +{ + struct inode *inode; + struct btrfs_iget_args args; + args.ino = objectid; + args.root = root; + + if (wait) { + inode = ilookup5(s, objectid, btrfs_find_actor, + (void *)&args); + } else { + inode = ilookup5_nowait(s, objectid, btrfs_find_actor, + (void *)&args); + } + return inode; +} + struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root) { @@ -3266,7 +3282,7 @@ void btrfs_destroy_inode(struct inode *inode) btrfs_put_ordered_extent(ordered); } } - btrfs_drop_extent_cache(inode, 0, (u64)-1); + btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } @@ -3412,16 +3428,22 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root) { struct list_head *head = &root->fs_info->delalloc_inodes; struct btrfs_inode *binode; + struct inode *inode; unsigned long flags; spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); while(!list_empty(head)) { binode = list_entry(head->next, struct btrfs_inode, delalloc_inodes); - atomic_inc(&binode->vfs_inode.i_count); + inode = igrab(&binode->vfs_inode); + if (!inode) + list_del_init(&binode->delalloc_inodes); spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); - filemap_write_and_wait(binode->vfs_inode.i_mapping); - iput(&binode->vfs_inode); + if (inode) { + filemap_write_and_wait(inode->i_mapping); + iput(inode); + } + cond_resched(); spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); } spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4c6e0c15754d..04de767a8db2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -444,12 +444,10 @@ static noinline int btrfs_ioctl_snap_create(struct btrfs_root *root, goto out; } - mutex_lock(&root->fs_info->drop_mutex); if (root == root->fs_info->tree_root) ret = create_subvol(root, vol_args->name, namelen); else ret = create_snapshot(root, vol_args->name, namelen); - mutex_unlock(&root->fs_info->drop_mutex); out: kfree(vol_args); return ret; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index da6d43eb41db..951eacff2420 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -309,7 +309,6 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) { struct list_head splice; struct list_head *cur; - struct list_head *tmp; struct btrfs_ordered_extent *ordered; struct inode *inode; @@ -317,37 +316,38 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) spin_lock(&root->fs_info->ordered_extent_lock); list_splice_init(&root->fs_info->ordered_extents, &splice); - list_for_each_safe(cur, tmp, &splice) { + while (!list_empty(&splice)) { cur = splice.next; ordered = list_entry(cur, struct btrfs_ordered_extent, root_extent_list); if (nocow_only && !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { + list_move(&ordered->root_extent_list, + &root->fs_info->ordered_extents); cond_resched_lock(&root->fs_info->ordered_extent_lock); continue; } list_del_init(&ordered->root_extent_list); atomic_inc(&ordered->refs); - inode = ordered->inode; /* - * the inode can't go away until all the pages are gone - * and the pages won't go away while there is still - * an ordered extent and the ordered extent won't go - * away until it is off this list. So, we can safely - * increment i_count here and call iput later + * the inode may be getting freed (in sys_unlink path). */ - atomic_inc(&inode->i_count); + inode = igrab(ordered->inode); + spin_unlock(&root->fs_info->ordered_extent_lock); - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - iput(inode); + if (inode) { + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + iput(inode); + } else { + btrfs_put_ordered_extent(ordered); + } spin_lock(&root->fs_info->ordered_extent_lock); } - list_splice_init(&splice, &root->fs_info->ordered_extents); spin_unlock(&root->fs_info->ordered_extent_lock); return 0; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 656baefa5255..8c83cf464c83 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -109,6 +109,7 @@ noinline int btrfs_record_root_in_trans(struct btrfs_root *root) spin_lock_init(&dirty->root->node_lock); spin_lock_init(&dirty->root->list_lock); mutex_init(&dirty->root->objectid_mutex); + mutex_init(&dirty->root->log_mutex); INIT_LIST_HEAD(&dirty->root->dead_list); dirty->root->node = root->commit_root; dirty->root->commit_root = NULL; @@ -590,13 +591,14 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, root = dirty->latest_root; atomic_inc(&root->fs_info->throttles); - mutex_lock(&root->fs_info->drop_mutex); while(1) { trans = btrfs_start_transaction(tree_root, 1); + mutex_lock(&root->fs_info->drop_mutex); ret = btrfs_drop_snapshot(trans, dirty->root); if (ret != -EAGAIN) { break; } + mutex_unlock(&root->fs_info->drop_mutex); err = btrfs_update_root(trans, tree_root, @@ -608,10 +610,8 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); - mutex_unlock(&root->fs_info->drop_mutex); btrfs_btree_balance_dirty(tree_root, nr); cond_resched(); - mutex_lock(&root->fs_info->drop_mutex); } BUG_ON(ret); atomic_dec(&root->fs_info->throttles); @@ -689,7 +689,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); key.objectid = objectid; - key.offset = 1; + key.offset = trans->transid; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); old = btrfs_lock_root_node(root); From 1a40e23b95da45051ee4d74374c58ae87a14051c Mon Sep 17 00:00:00 2001 From: Zheng Yan Date: Fri, 26 Sep 2008 10:09:34 -0400 Subject: [PATCH 0724/2985] Btrfs: update space balancing code This patch updates the space balancing code to utilize the new backref format. Before, btrfs-vol -b would break any COW links on data blocks or metadata. This was slow and caused the amount of space used to explode if a large number of snapshots were present. The new code can keeps the sharing of all data extents and most of the tree blocks. To maintain the sharing of data extents, the space balance code uses a seperate inode hold data extent pointers, then updates the references to point to the new location. To maintain the sharing of tree blocks, the space balance code uses reloc trees to relocate tree blocks in reference counted roots. There is one reloc tree for each subvol, and all reloc trees share same root key objectid. Reloc trees are snapshots of the latest committed roots of subvols (root->commit_root). To relocate a tree block referenced by a subvol, there are two steps. COW the block through subvol's reloc tree, then update block pointer in the subvol to point to the new block. Since all reloc trees share same root key objectid, doing special handing for tree blocks owned by them is easy. Once a tree block has been COWed in one reloc tree, we can use the resulting new block directly when the same block is required to COW again through other reloc trees. In this way, relocated tree blocks are shared between reloc trees, so they are also shared between subvols. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 155 ++- fs/btrfs/ctree.h | 26 +- fs/btrfs/disk-io.c | 9 + fs/btrfs/extent-tree.c | 2146 +++++++++++++++++++++++++++++++--------- fs/btrfs/root-tree.c | 5 +- fs/btrfs/transaction.c | 15 +- fs/btrfs/volumes.c | 9 +- 7 files changed, 1884 insertions(+), 481 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f9cd40967d04..50e81f43e6d4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -179,7 +179,6 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, struct extent_buffer *cow; u32 nritems; int ret = 0; - int different_trans = 0; int level; int unlock_orig = 0; @@ -233,13 +232,33 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(btrfs_header_generation(buf) > trans->transid); if (btrfs_header_generation(buf) != trans->transid) { u32 nr_extents; - different_trans = 1; ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents); if (ret) return ret; ret = btrfs_cache_ref(trans, root, buf, nr_extents); WARN_ON(ret); + } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) { + /* + * There are only two places that can drop reference to + * tree blocks owned by living reloc trees, one is here, + * the other place is btrfs_merge_path. In both places, + * we check reference count while tree block is locked. + * Furthermore, if reference count is one, it won't get + * increased by someone else. + */ + u32 refs; + ret = btrfs_lookup_extent_ref(trans, root, buf->start, + buf->len, &refs); + BUG_ON(ret); + if (refs == 1) { + ret = btrfs_update_ref(trans, root, buf, cow, + 0, nritems); + clean_tree_block(trans, root, buf); + } else { + ret = btrfs_inc_ref(trans, root, buf, cow, NULL); + } + BUG_ON(ret); } else { ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems); if (ret) @@ -247,6 +266,14 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, clean_tree_block(trans, root, buf); } + if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { + ret = btrfs_add_reloc_mapping(root, buf->start, + buf->len, cow->start); + BUG_ON(ret); + ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start); + WARN_ON(ret); + } + if (buf == root->node) { WARN_ON(parent && parent != buf); @@ -1466,6 +1493,130 @@ done: return ret; } +int btrfs_merge_path(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *node_keys, + u64 *nodes, int lowest_level) +{ + struct extent_buffer *eb; + struct extent_buffer *parent; + struct btrfs_key key; + u64 bytenr; + u64 generation; + u32 blocksize; + int level; + int slot; + int key_match; + int ret; + + eb = btrfs_lock_root_node(root); + ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); + BUG_ON(ret); + + parent = eb; + while (1) { + level = btrfs_header_level(parent); + if (level == 0 || level <= lowest_level) + break; + + ret = bin_search(parent, &node_keys[lowest_level], level, + &slot); + if (ret && slot > 0) + slot--; + + bytenr = btrfs_node_blockptr(parent, slot); + if (nodes[level - 1] == bytenr) + break; + + blocksize = btrfs_level_size(root, level - 1); + generation = btrfs_node_ptr_generation(parent, slot); + btrfs_node_key_to_cpu(eb, &key, slot); + key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key)); + + /* + * if node keys match and node pointer hasn't been modified + * in the running transaction, we can merge the path. for + * reloc trees, the node pointer check is skipped, this is + * because the reloc trees are fully controlled by the space + * balance code, no one else can modify them. + */ + if (!nodes[level - 1] || !key_match || + (generation == trans->transid && + root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)) { +next_level: + if (level == 1 || level == lowest_level + 1) + break; + + eb = read_tree_block(root, bytenr, blocksize, + generation); + btrfs_tree_lock(eb); + + ret = btrfs_cow_block(trans, root, eb, parent, slot, + &eb, 0); + BUG_ON(ret); + + btrfs_tree_unlock(parent); + free_extent_buffer(parent); + parent = eb; + continue; + } + + if (generation == trans->transid) { + u32 refs; + BUG_ON(btrfs_header_owner(eb) != + BTRFS_TREE_RELOC_OBJECTID); + /* + * lock the block to keep __btrfs_cow_block from + * changing the reference count. + */ + eb = read_tree_block(root, bytenr, blocksize, + generation); + btrfs_tree_lock(eb); + + ret = btrfs_lookup_extent_ref(trans, root, bytenr, + blocksize, &refs); + BUG_ON(ret); + /* + * if replace block whose reference count is one, + * we have to "drop the subtree". so skip it for + * simplicity + */ + if (refs == 1) { + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + goto next_level; + } + } + + btrfs_set_node_blockptr(parent, slot, nodes[level - 1]); + btrfs_set_node_ptr_generation(parent, slot, trans->transid); + btrfs_mark_buffer_dirty(parent); + + ret = btrfs_inc_extent_ref(trans, root, + nodes[level - 1], + blocksize, parent->start, + btrfs_header_owner(parent), + btrfs_header_generation(parent), + level - 1, 0); + BUG_ON(ret); + ret = btrfs_free_extent(trans, root, bytenr, + blocksize, parent->start, + btrfs_header_owner(parent), + btrfs_header_generation(parent), + level - 1, 0, 1); + BUG_ON(ret); + + if (generation == trans->transid) { + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + } + break; + } + btrfs_tree_unlock(parent); + free_extent_buffer(parent); + return 0; +} + /* * adjust the pointers going up the tree, starting at level * making sure the right key of each node is points to 'key'. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3e62a1b0a1f7..2775e270881e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -604,6 +604,7 @@ struct btrfs_fs_info { struct mutex chunk_mutex; struct mutex drop_mutex; struct mutex volume_mutex; + struct mutex tree_reloc_mutex; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; @@ -647,6 +648,10 @@ struct btrfs_fs_info { struct task_struct *cleaner_kthread; int thread_pool_size; + /* tree relocation relocated fields */ + struct extent_io_tree reloc_mapping_tree; + struct list_head dead_reloc_roots; + struct btrfs_leaf_ref_tree reloc_ref_tree; struct btrfs_leaf_ref_tree shared_ref_tree; struct kobject super_kobj; @@ -698,6 +703,7 @@ struct btrfs_root { struct btrfs_leaf_ref_tree ref_tree_struct; struct btrfs_dirty_root *dirty_root; struct btrfs_root *log_root; + struct btrfs_root *reloc_root; struct btrfs_root_item root_item; struct btrfs_key root_key; @@ -1517,7 +1523,6 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u32 blocksize); -int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size); int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -1582,10 +1587,29 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytes_used, u64 type, u64 chunk_objectid, u64 chunk_offset, u64 size); +int btrfs_remove_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 group_start); +int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); +int btrfs_free_reloc_root(struct btrfs_root *root); +int btrfs_drop_dead_reloc_roots(struct btrfs_root *root); +int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, + u64 num_bytes, u64 new_bytenr); +int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, + u64 num_bytes, u64 *new_bytenr); +void btrfs_free_reloc_mappings(struct btrfs_root *root); +int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, u64 orig_start); +int btrfs_add_dead_reloc_root(struct btrfs_root *root); +int btrfs_cleanup_reloc_trees(struct btrfs_root *root); /* ctree.c */ int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, int type); +int btrfs_merge_path(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *node_keys, + u64 *nodes, int lowest_level); int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *new_key); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8969fee23318..45bc3132b054 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1406,6 +1406,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; + extent_io_tree_init(&fs_info->reloc_mapping_tree, + fs_info->btree_inode->i_mapping, GFP_NOFS); + INIT_LIST_HEAD(&fs_info->dead_reloc_roots); + btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree); btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); BTRFS_I(fs_info->btree_inode)->root = tree_root; @@ -1421,6 +1425,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); + mutex_init(&fs_info->tree_reloc_mutex); init_waitqueue_head(&fs_info->transaction_throttle); init_waitqueue_head(&fs_info->transaction_wait); init_waitqueue_head(&fs_info->async_submit_wait); @@ -1627,6 +1632,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = btrfs_recover_log_trees(log_tree_root); BUG_ON(ret); } + + ret = btrfs_cleanup_reloc_trees(tree_root); + BUG_ON(ret); + fs_info->last_trans_committed = btrfs_super_generation(disk_super); return tree_root; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9ab099bc01a4..8043b9d584a9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1834,6 +1834,7 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, u64 header_owner = btrfs_header_owner(buf); u64 header_transid = btrfs_header_generation(buf); if (header_owner != BTRFS_TREE_LOG_OBJECTID && + header_owner != BTRFS_TREE_RELOC_OBJECTID && header_transid == trans->transid && !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { clean_tree_block(NULL, root, buf); @@ -2487,6 +2488,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) return -ENOSPC; } btrfs_add_free_space(cache, start, len); + update_reserved_extents(root, start, len, 0); maybe_unlock_mutex(root); return 0; } @@ -2947,6 +2949,10 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, */ if (*level == 1) { ref = btrfs_lookup_leaf_ref(root, bytenr); + if (ref && ref->generation != ptr_gen) { + btrfs_free_leaf_ref(root, ref); + ref = NULL; + } if (ref) { ret = cache_drop_leaf_ref(trans, root, ref); BUG_ON(ret); @@ -3153,34 +3159,6 @@ out: return ret; } -int btrfs_free_block_groups(struct btrfs_fs_info *info) -{ - struct btrfs_block_group_cache *block_group; - struct rb_node *n; - - mutex_lock(&info->alloc_mutex); - spin_lock(&info->block_group_cache_lock); - while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { - block_group = rb_entry(n, struct btrfs_block_group_cache, - cache_node); - - spin_unlock(&info->block_group_cache_lock); - btrfs_remove_free_space_cache(block_group); - spin_lock(&info->block_group_cache_lock); - - rb_erase(&block_group->cache_node, - &info->block_group_cache_tree); - - spin_lock(&block_group->space_info->lock); - list_del(&block_group->list); - spin_unlock(&block_group->space_info->lock); - kfree(block_group); - } - spin_unlock(&info->block_group_cache_lock); - mutex_unlock(&info->alloc_mutex); - return 0; -} - static unsigned long calc_ra(unsigned long start, unsigned long last, unsigned long nr) { @@ -3192,37 +3170,43 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, { u64 page_start; u64 page_end; + unsigned long first_index; unsigned long last_index; unsigned long i; struct page *page; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct file_ra_state *ra; - unsigned long total_read = 0; - unsigned long ra_pages; struct btrfs_ordered_extent *ordered; - struct btrfs_trans_handle *trans; + unsigned int total_read = 0; + unsigned int total_dirty = 0; + int ret = 0; ra = kzalloc(sizeof(*ra), GFP_NOFS); mutex_lock(&inode->i_mutex); - i = start >> PAGE_CACHE_SHIFT; + first_index = start >> PAGE_CACHE_SHIFT; last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; - ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages; + /* make sure the dirty trick played by the caller work */ + ret = invalidate_inode_pages2_range(inode->i_mapping, + first_index, last_index); + if (ret) + goto out_unlock; file_ra_state_init(ra, inode->i_mapping); - for (; i <= last_index; i++) { - if (total_read % ra_pages == 0) { + for (i = first_index ; i <= last_index; i++) { + if (total_read % ra->ra_pages == 0) { btrfs_force_ra(inode->i_mapping, ra, NULL, i, - calc_ra(i, last_index, ra_pages)); + calc_ra(i, last_index, ra->ra_pages)); } total_read++; again: if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) - goto truncate_racing; + BUG_ON(1); page = grab_cache_page(inode->i_mapping, i); if (!page) { + ret = -ENOMEM; goto out_unlock; } if (!PageUptodate(page)) { @@ -3231,6 +3215,7 @@ again: if (!PageUptodate(page)) { unlock_page(page); page_cache_release(page); + ret = -EIO; goto out_unlock; } } @@ -3251,14 +3236,13 @@ again: } set_page_extent_mapped(page); - /* - * make sure page_mkwrite is called for this page if userland - * wants to change it from mmap - */ - clear_page_dirty_for_io(page); - btrfs_set_extent_delalloc(inode, page_start, page_end); + if (i == first_index) + set_extent_bits(io_tree, page_start, page_end, + EXTENT_BOUNDARY, GFP_NOFS); + set_page_dirty(page); + total_dirty++; unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_page(page); @@ -3266,347 +3250,1457 @@ again: } out_unlock: - /* we have to start the IO in order to get the ordered extents - * instantiated. This allows the relocation to code to wait - * for all the ordered extents to hit the disk. - * - * Otherwise, it would constantly loop over the same extents - * because the old ones don't get deleted until the IO is - * started - */ - btrfs_fdatawrite_range(inode->i_mapping, start, start + len - 1, - WB_SYNC_NONE); kfree(ra); - trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); - if (trans) { - btrfs_end_transaction(trans, BTRFS_I(inode)->root); - mark_inode_dirty(inode); - } mutex_unlock(&inode->i_mutex); - return 0; - -truncate_racing: - vmtruncate(inode, inode->i_size); - balance_dirty_pages_ratelimited_nr(inode->i_mapping, - total_read); - goto out_unlock; -} - -/* - * The back references tell us which tree holds a ref on a block, - * but it is possible for the tree root field in the reference to - * reflect the original root before a snapshot was made. In this - * case we should search through all the children of a given root - * to find potential holders of references on a block. - * - * Instead, we do something a little less fancy and just search - * all the roots for a given key/block combination. - */ -static int find_root_for_ref(struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_key *key0, - int level, - int file_key, - struct btrfs_root **found_root, - u64 bytenr) -{ - struct btrfs_key root_location; - struct btrfs_root *cur_root = *found_root; - struct btrfs_file_extent_item *file_extent; - u64 root_search_start = BTRFS_FS_TREE_OBJECTID; - u64 found_bytenr; - int ret; - - root_location.offset = (u64)-1; - root_location.type = BTRFS_ROOT_ITEM_KEY; - path->lowest_level = level; - path->reada = 0; - while(1) { - ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0); - found_bytenr = 0; - if (ret == 0 && file_key) { - struct extent_buffer *leaf = path->nodes[0]; - file_extent = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, file_extent) == - BTRFS_FILE_EXTENT_REG) { - found_bytenr = - btrfs_file_extent_disk_bytenr(leaf, - file_extent); - } - } else if (!file_key) { - if (path->nodes[level]) - found_bytenr = path->nodes[level]->start; - } - - btrfs_release_path(cur_root, path); - - if (found_bytenr == bytenr) { - *found_root = cur_root; - ret = 0; - goto out; - } - ret = btrfs_search_root(root->fs_info->tree_root, - root_search_start, &root_search_start); - if (ret) - break; - - root_location.objectid = root_search_start; - cur_root = btrfs_read_fs_root_no_name(root->fs_info, - &root_location); - if (!cur_root) { - ret = 1; - break; - } - } -out: - path->lowest_level = 0; + balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); return ret; } -/* - * note, this releases the path - */ -static int noinline relocate_one_reference(struct btrfs_root *extent_root, - struct btrfs_path *path, - struct btrfs_key *extent_key, - u64 *last_file_objectid, - u64 *last_file_offset, - u64 *last_file_root, - u64 last_extent) +static int noinline relocate_data_extent(struct inode *reloc_inode, + struct btrfs_key *extent_key, + u64 offset) { - struct inode *inode; - struct btrfs_root *found_root; - struct btrfs_key root_location; - struct btrfs_key found_key; + struct btrfs_root *root = BTRFS_I(reloc_inode)->root; + struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; + struct extent_map *em; + + em = alloc_extent_map(GFP_NOFS); + BUG_ON(!em || IS_ERR(em)); + + em->start = extent_key->objectid - offset; + em->len = extent_key->offset; + em->block_start = extent_key->objectid; + em->bdev = root->fs_info->fs_devices->latest_bdev; + set_bit(EXTENT_FLAG_PINNED, &em->flags); + + /* setup extent map to cheat btrfs_readpage */ + mutex_lock(&BTRFS_I(reloc_inode)->extent_mutex); + while (1) { + int ret; + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + if (ret != -EEXIST) { + free_extent_map(em); + break; + } + btrfs_drop_extent_cache(reloc_inode, em->start, + em->start + em->len - 1, 0); + } + mutex_unlock(&BTRFS_I(reloc_inode)->extent_mutex); + + return relocate_inode_pages(reloc_inode, extent_key->objectid - offset, + extent_key->offset); +} + +struct btrfs_ref_path { + u64 extent_start; + u64 nodes[BTRFS_MAX_LEVEL]; + u64 root_objectid; + u64 root_generation; + u64 owner_objectid; + u64 owner_offset; + u32 num_refs; + int lowest_level; + int current_level; +}; + +struct disk_extent { + u64 disk_bytenr; + u64 disk_num_bytes; + u64 offset; + u64 num_bytes; +}; + +static int is_cowonly_root(u64 root_objectid) +{ + if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || + root_objectid == BTRFS_EXTENT_TREE_OBJECTID || + root_objectid == BTRFS_CHUNK_TREE_OBJECTID || + root_objectid == BTRFS_DEV_TREE_OBJECTID || + root_objectid == BTRFS_TREE_LOG_OBJECTID) + return 1; + return 0; +} + +static int noinline __next_ref_path(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct btrfs_ref_path *ref_path, + int first_time) +{ + struct extent_buffer *leaf; + struct btrfs_path *path; struct btrfs_extent_ref *ref; - u64 ref_root; - u64 ref_gen; - u64 ref_objectid; - u64 ref_offset; - int ret; - int level; - - WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); - - ref = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_extent_ref); - ref_root = btrfs_ref_root(path->nodes[0], ref); - ref_gen = btrfs_ref_generation(path->nodes[0], ref); - ref_objectid = btrfs_ref_objectid(path->nodes[0], ref); - ref_offset = btrfs_ref_offset(path->nodes[0], ref); - btrfs_release_path(extent_root, path); - - root_location.objectid = ref_root; - if (ref_gen == 0) - root_location.offset = 0; - else - root_location.offset = (u64)-1; - root_location.type = BTRFS_ROOT_ITEM_KEY; - - found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, - &root_location); - BUG_ON(!found_root); - mutex_unlock(&extent_root->fs_info->alloc_mutex); - - if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { - found_key.objectid = ref_objectid; - found_key.type = BTRFS_EXTENT_DATA_KEY; - found_key.offset = ref_offset; - level = 0; - - if (last_extent == extent_key->objectid && - *last_file_objectid == ref_objectid && - *last_file_offset == ref_offset && - *last_file_root == ref_root) - goto out; - - ret = find_root_for_ref(extent_root, path, &found_key, - level, 1, &found_root, - extent_key->objectid); - - if (ret) - goto out; - - if (last_extent == extent_key->objectid && - *last_file_objectid == ref_objectid && - *last_file_offset == ref_offset && - *last_file_root == ref_root) - goto out; - - inode = btrfs_iget_locked(extent_root->fs_info->sb, - ref_objectid, found_root); - if (inode->i_state & I_NEW) { - /* the inode and parent dir are two different roots */ - BTRFS_I(inode)->root = found_root; - BTRFS_I(inode)->location.objectid = ref_objectid; - BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; - BTRFS_I(inode)->location.offset = 0; - btrfs_read_locked_inode(inode); - unlock_new_inode(inode); - - } - /* this can happen if the reference is not against - * the latest version of the tree root - */ - if (is_bad_inode(inode)) - goto out; - - *last_file_objectid = inode->i_ino; - *last_file_root = found_root->root_key.objectid; - *last_file_offset = ref_offset; - - relocate_inode_pages(inode, ref_offset, extent_key->offset); - iput(inode); - } else { - struct btrfs_trans_handle *trans; - struct extent_buffer *eb; - int needs_lock = 0; - - eb = read_tree_block(found_root, extent_key->objectid, - extent_key->offset, 0); - btrfs_tree_lock(eb); - level = btrfs_header_level(eb); - - if (level == 0) - btrfs_item_key_to_cpu(eb, &found_key, 0); - else - btrfs_node_key_to_cpu(eb, &found_key, 0); - - btrfs_tree_unlock(eb); - free_extent_buffer(eb); - - ret = find_root_for_ref(extent_root, path, &found_key, - level, 0, &found_root, - extent_key->objectid); - - if (ret) - goto out; - - /* - * right here almost anything could happen to our key, - * but that's ok. The cow below will either relocate it - * or someone else will have relocated it. Either way, - * it is in a different spot than it was before and - * we're happy. - */ - - trans = btrfs_start_transaction(found_root, 1); - - if (found_root == extent_root->fs_info->extent_root || - found_root == extent_root->fs_info->chunk_root || - found_root == extent_root->fs_info->dev_root) { - needs_lock = 1; - mutex_lock(&extent_root->fs_info->alloc_mutex); - } - - path->lowest_level = level; - path->reada = 2; - ret = btrfs_search_slot(trans, found_root, &found_key, path, - 0, 1); - path->lowest_level = 0; - btrfs_release_path(found_root, path); - - if (found_root == found_root->fs_info->extent_root) - btrfs_extent_post_op(trans, found_root); - if (needs_lock) - mutex_unlock(&extent_root->fs_info->alloc_mutex); - - btrfs_end_transaction(trans, found_root); - - } -out: - mutex_lock(&extent_root->fs_info->alloc_mutex); - return 0; -} - -static int noinline del_extent_zero(struct btrfs_root *extent_root, - struct btrfs_path *path, - struct btrfs_key *extent_key) -{ - int ret; - struct btrfs_trans_handle *trans; - - trans = btrfs_start_transaction(extent_root, 1); - ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); - if (ret > 0) { - ret = -EIO; - goto out; - } - if (ret < 0) - goto out; - ret = btrfs_del_item(trans, extent_root, path); -out: - btrfs_end_transaction(trans, extent_root); - return ret; -} - -static int noinline relocate_one_extent(struct btrfs_root *extent_root, - struct btrfs_path *path, - struct btrfs_key *extent_key) -{ struct btrfs_key key; struct btrfs_key found_key; - struct extent_buffer *leaf; - u64 last_file_objectid = 0; - u64 last_file_root = 0; - u64 last_file_offset = (u64)-1; - u64 last_extent = 0; + u64 bytenr; u32 nritems; - u32 item_size; - int ret = 0; + int level; + int ret = 1; - if (extent_key->objectid == 0) { - ret = del_extent_zero(extent_root, path, extent_key); - goto out; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + mutex_lock(&extent_root->fs_info->alloc_mutex); + + if (first_time) { + ref_path->lowest_level = -1; + ref_path->current_level = -1; + goto walk_up; } - key.objectid = extent_key->objectid; - key.type = BTRFS_EXTENT_REF_KEY; - key.offset = 0; +walk_down: + level = ref_path->current_level - 1; + while (level >= -1) { + u64 parent; + if (level < ref_path->lowest_level) + break; - while(1) { - ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); + if (level >= 0) { + bytenr = ref_path->nodes[level]; + } else { + bytenr = ref_path->extent_start; + } + BUG_ON(bytenr == 0); + parent = ref_path->nodes[level + 1]; + ref_path->nodes[level + 1] = 0; + ref_path->current_level = level; + BUG_ON(parent == 0); + + key.objectid = bytenr; + key.offset = parent + 1; + key.type = BTRFS_EXTENT_REF_KEY; + + ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); if (ret < 0) goto out; + BUG_ON(ret == 0); - ret = 0; leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); - if (path->slots[0] == nritems) { + if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(extent_root, path); - if (ret > 0) { - ret = 0; - goto out; - } if (ret < 0) goto out; + if (ret > 0) + goto next; leaf = path->nodes[0]; } btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid != extent_key->objectid) { - break; + if (found_key.objectid == bytenr && + found_key.type == BTRFS_EXTENT_REF_KEY) + goto found; +next: + level--; + btrfs_release_path(extent_root, path); + if (need_resched()) { + mutex_unlock(&extent_root->fs_info->alloc_mutex); + cond_resched(); + mutex_lock(&extent_root->fs_info->alloc_mutex); } - - if (found_key.type != BTRFS_EXTENT_REF_KEY) { - break; + } + /* reached lowest level */ + ret = 1; + goto out; +walk_up: + level = ref_path->current_level; + while (level < BTRFS_MAX_LEVEL - 1) { + u64 ref_objectid; + if (level >= 0) { + bytenr = ref_path->nodes[level]; + } else { + bytenr = ref_path->extent_start; } + BUG_ON(bytenr == 0); - key.offset = found_key.offset + 1; - item_size = btrfs_item_size_nr(leaf, path->slots[0]); + key.objectid = bytenr; + key.offset = 0; + key.type = BTRFS_EXTENT_REF_KEY; - ret = relocate_one_reference(extent_root, path, extent_key, - &last_file_objectid, - &last_file_offset, - &last_file_root, last_extent); - if (ret) + ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); + if (ret < 0) goto out; - last_extent = extent_key->objectid; + + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(extent_root, path); + if (ret < 0) + goto out; + if (ret > 0) { + /* the extent was freed by someone */ + if (ref_path->lowest_level == level) + goto out; + btrfs_release_path(extent_root, path); + goto walk_down; + } + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid != bytenr || + found_key.type != BTRFS_EXTENT_REF_KEY) { + /* the extent was freed by someone */ + if (ref_path->lowest_level == level) { + ret = 1; + goto out; + } + btrfs_release_path(extent_root, path); + goto walk_down; + } +found: + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref); + ref_objectid = btrfs_ref_objectid(leaf, ref); + if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { + if (first_time) { + level = (int)ref_objectid; + BUG_ON(level >= BTRFS_MAX_LEVEL); + ref_path->lowest_level = level; + ref_path->current_level = level; + ref_path->nodes[level] = bytenr; + } else { + WARN_ON(ref_objectid != level); + } + } else { + WARN_ON(level != -1); + } + first_time = 0; + + if (ref_path->lowest_level == level) { + ref_path->owner_objectid = ref_objectid; + ref_path->owner_offset = btrfs_ref_offset(leaf, ref); + ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); + } + + /* + * the block is tree root or the block isn't in reference + * counted tree. + */ + if (found_key.objectid == found_key.offset || + is_cowonly_root(btrfs_ref_root(leaf, ref))) { + ref_path->root_objectid = btrfs_ref_root(leaf, ref); + ref_path->root_generation = + btrfs_ref_generation(leaf, ref); + if (level < 0) { + /* special reference from the tree log */ + ref_path->nodes[0] = found_key.offset; + ref_path->current_level = 0; + } + ret = 0; + goto out; + } + + level++; + BUG_ON(ref_path->nodes[level] != 0); + ref_path->nodes[level] = found_key.offset; + ref_path->current_level = level; + + /* + * the reference was created in the running transaction, + * no need to continue walking up. + */ + if (btrfs_ref_generation(leaf, ref) == trans->transid) { + ref_path->root_objectid = btrfs_ref_root(leaf, ref); + ref_path->root_generation = + btrfs_ref_generation(leaf, ref); + ret = 0; + goto out; + } + + btrfs_release_path(extent_root, path); + if (need_resched()) { + mutex_unlock(&extent_root->fs_info->alloc_mutex); + cond_resched(); + mutex_lock(&extent_root->fs_info->alloc_mutex); + } + } + /* reached max tree level, but no tree root found. */ + BUG(); +out: + mutex_unlock(&extent_root->fs_info->alloc_mutex); + btrfs_free_path(path); + return ret; +} + +static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct btrfs_ref_path *ref_path, + u64 extent_start) +{ + memset(ref_path, 0, sizeof(*ref_path)); + ref_path->extent_start = extent_start; + + return __next_ref_path(trans, extent_root, ref_path, 1); +} + +static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct btrfs_ref_path *ref_path) +{ + return __next_ref_path(trans, extent_root, ref_path, 0); +} + +static int noinline get_new_locations(struct inode *reloc_inode, + struct btrfs_key *extent_key, + u64 offset, int no_fragment, + struct disk_extent **extents, + int *nr_extents) +{ + struct btrfs_root *root = BTRFS_I(reloc_inode)->root; + struct btrfs_path *path; + struct btrfs_file_extent_item *fi; + struct extent_buffer *leaf; + struct disk_extent *exts = *extents; + struct btrfs_key found_key; + u64 cur_pos; + u64 last_byte; + u32 nritems; + int nr = 0; + int max = *nr_extents; + int ret; + + WARN_ON(!no_fragment && *extents); + if (!exts) { + max = 1; + exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); + if (!exts) + return -ENOMEM; + } + + path = btrfs_alloc_path(); + BUG_ON(!path); + + cur_pos = extent_key->objectid - offset; + last_byte = extent_key->objectid + extent_key->offset; + ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, + cur_pos, 0); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + while (1) { + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret > 0) + break; + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.offset != cur_pos || + found_key.type != BTRFS_EXTENT_DATA_KEY || + found_key.objectid != reloc_inode->i_ino) + break; + + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, fi) != + BTRFS_FILE_EXTENT_REG || + btrfs_file_extent_disk_bytenr(leaf, fi) == 0) + break; + + if (nr == max) { + struct disk_extent *old = exts; + max *= 2; + exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); + memcpy(exts, old, sizeof(*exts) * nr); + if (old != *extents) + kfree(old); + } + + exts[nr].disk_bytenr = + btrfs_file_extent_disk_bytenr(leaf, fi); + exts[nr].disk_num_bytes = + btrfs_file_extent_disk_num_bytes(leaf, fi); + exts[nr].offset = btrfs_file_extent_offset(leaf, fi); + exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); + WARN_ON(exts[nr].offset > 0); + WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); + + cur_pos += exts[nr].num_bytes; + nr++; + + if (cur_pos + offset >= last_byte) + break; + + if (no_fragment) { + ret = 1; + goto out; + } + path->slots[0]++; + } + + WARN_ON(cur_pos + offset > last_byte); + if (cur_pos + offset < last_byte) { + ret = -ENOENT; + goto out; } ret = 0; +out: + btrfs_free_path(path); + if (ret) { + if (exts != *extents) + kfree(exts); + } else { + *extents = exts; + *nr_extents = nr; + } + return ret; +} + +static int noinline replace_one_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *extent_key, + struct btrfs_key *leaf_key, + struct btrfs_ref_path *ref_path, + struct disk_extent *new_extents, + int nr_extents) +{ + struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + struct inode *inode = NULL; + struct btrfs_key key; + u64 lock_start = 0; + u64 lock_end = 0; + u64 num_bytes; + u64 ext_offset; + u64 first_pos; + u32 nritems; + int extent_locked = 0; + int ret; + + first_pos = ref_path->owner_offset; + if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { + key.objectid = ref_path->owner_objectid; + key.offset = ref_path->owner_offset; + key.type = BTRFS_EXTENT_DATA_KEY; + } else { + memcpy(&key, leaf_key, sizeof(key)); + } + + while (1) { + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) + goto out; + + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); +next: + if (extent_locked && ret > 0) { + /* + * the file extent item was modified by someone + * before the extent got locked. + */ + mutex_unlock(&BTRFS_I(inode)->extent_mutex); + unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, + lock_end, GFP_NOFS); + extent_locked = 0; + } + + if (path->slots[0] >= nritems) { + if (ref_path->owner_objectid == + BTRFS_MULTIPLE_OBJECTIDS) + break; + + BUG_ON(extent_locked); + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret > 0) + break; + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + + if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { + if ((key.objectid > ref_path->owner_objectid) || + (key.objectid == ref_path->owner_objectid && + key.type > BTRFS_EXTENT_DATA_KEY) || + (key.offset >= first_pos + extent_key->offset)) + break; + } + + if (inode && key.objectid != inode->i_ino) { + BUG_ON(extent_locked); + btrfs_release_path(root, path); + mutex_unlock(&inode->i_mutex); + iput(inode); + inode = NULL; + continue; + } + + if (key.type != BTRFS_EXTENT_DATA_KEY) { + path->slots[0]++; + ret = 1; + goto next; + } + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + if ((btrfs_file_extent_type(leaf, fi) != + BTRFS_FILE_EXTENT_REG) || + (btrfs_file_extent_disk_bytenr(leaf, fi) != + extent_key->objectid)) { + path->slots[0]++; + ret = 1; + goto next; + } + + num_bytes = btrfs_file_extent_num_bytes(leaf, fi); + ext_offset = btrfs_file_extent_offset(leaf, fi); + + if (first_pos > key.offset - ext_offset) + first_pos = key.offset - ext_offset; + + if (!extent_locked) { + lock_start = key.offset; + lock_end = lock_start + num_bytes - 1; + } else { + BUG_ON(lock_start != key.offset); + BUG_ON(lock_end - lock_start + 1 < num_bytes); + } + + if (!inode) { + btrfs_release_path(root, path); + + inode = btrfs_iget_locked(root->fs_info->sb, + key.objectid, root); + if (inode->i_state & I_NEW) { + BTRFS_I(inode)->root = root; + BTRFS_I(inode)->location.objectid = + key.objectid; + BTRFS_I(inode)->location.type = + BTRFS_INODE_ITEM_KEY; + BTRFS_I(inode)->location.offset = 0; + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + } + /* + * some code call btrfs_commit_transaction while + * holding the i_mutex, so we can't use mutex_lock + * here. + */ + if (is_bad_inode(inode) || + !mutex_trylock(&inode->i_mutex)) { + iput(inode); + inode = NULL; + key.offset = (u64)-1; + goto skip; + } + } + + if (!extent_locked) { + struct btrfs_ordered_extent *ordered; + + btrfs_release_path(root, path); + + lock_extent(&BTRFS_I(inode)->io_tree, lock_start, + lock_end, GFP_NOFS); + ordered = btrfs_lookup_first_ordered_extent(inode, + lock_end); + if (ordered && + ordered->file_offset <= lock_end && + ordered->file_offset + ordered->len > lock_start) { + unlock_extent(&BTRFS_I(inode)->io_tree, + lock_start, lock_end, GFP_NOFS); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + key.offset += num_bytes; + goto skip; + } + if (ordered) + btrfs_put_ordered_extent(ordered); + + mutex_lock(&BTRFS_I(inode)->extent_mutex); + extent_locked = 1; + continue; + } + + if (nr_extents == 1) { + /* update extent pointer in place */ + btrfs_set_file_extent_generation(leaf, fi, + trans->transid); + btrfs_set_file_extent_disk_bytenr(leaf, fi, + new_extents[0].disk_bytenr); + btrfs_set_file_extent_disk_num_bytes(leaf, fi, + new_extents[0].disk_num_bytes); + ext_offset += new_extents[0].offset; + btrfs_set_file_extent_offset(leaf, fi, ext_offset); + btrfs_mark_buffer_dirty(leaf); + + btrfs_drop_extent_cache(inode, key.offset, + key.offset + num_bytes - 1, 0); + + ret = btrfs_inc_extent_ref(trans, root, + new_extents[0].disk_bytenr, + new_extents[0].disk_num_bytes, + leaf->start, + root->root_key.objectid, + trans->transid, + key.objectid, key.offset); + BUG_ON(ret); + + ret = btrfs_free_extent(trans, root, + extent_key->objectid, + extent_key->offset, + leaf->start, + btrfs_header_owner(leaf), + btrfs_header_generation(leaf), + key.objectid, key.offset, 0); + BUG_ON(ret); + + btrfs_release_path(root, path); + key.offset += num_bytes; + } else { + u64 alloc_hint; + u64 extent_len; + int i; + /* + * drop old extent pointer at first, then insert the + * new pointers one bye one + */ + btrfs_release_path(root, path); + ret = btrfs_drop_extents(trans, root, inode, key.offset, + key.offset + num_bytes, + key.offset, &alloc_hint); + BUG_ON(ret); + + for (i = 0; i < nr_extents; i++) { + if (ext_offset >= new_extents[i].num_bytes) { + ext_offset -= new_extents[i].num_bytes; + continue; + } + extent_len = min(new_extents[i].num_bytes - + ext_offset, num_bytes); + + ret = btrfs_insert_empty_item(trans, root, + path, &key, + sizeof(*fi)); + BUG_ON(ret); + + leaf = path->nodes[0]; + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, fi, + trans->transid); + btrfs_set_file_extent_type(leaf, fi, + BTRFS_FILE_EXTENT_REG); + btrfs_set_file_extent_disk_bytenr(leaf, fi, + new_extents[i].disk_bytenr); + btrfs_set_file_extent_disk_num_bytes(leaf, fi, + new_extents[i].disk_num_bytes); + btrfs_set_file_extent_num_bytes(leaf, fi, + extent_len); + ext_offset += new_extents[i].offset; + btrfs_set_file_extent_offset(leaf, fi, + ext_offset); + btrfs_mark_buffer_dirty(leaf); + + btrfs_drop_extent_cache(inode, key.offset, + key.offset + extent_len - 1, 0); + + ret = btrfs_inc_extent_ref(trans, root, + new_extents[i].disk_bytenr, + new_extents[i].disk_num_bytes, + leaf->start, + root->root_key.objectid, + trans->transid, + key.objectid, key.offset); + BUG_ON(ret); + btrfs_release_path(root, path); + + inode->i_blocks += extent_len >> 9; + + ext_offset = 0; + num_bytes -= extent_len; + key.offset += extent_len; + + if (num_bytes == 0) + break; + } + BUG_ON(i >= nr_extents); + } + + if (extent_locked) { + mutex_unlock(&BTRFS_I(inode)->extent_mutex); + unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, + lock_end, GFP_NOFS); + extent_locked = 0; + } +skip: + if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && + key.offset >= first_pos + extent_key->offset) + break; + + cond_resched(); + } + ret = 0; +out: + btrfs_release_path(root, path); + if (inode) { + mutex_unlock(&inode->i_mutex); + if (extent_locked) { + mutex_unlock(&BTRFS_I(inode)->extent_mutex); + unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, + lock_end, GFP_NOFS); + } + iput(inode); + } + return ret; +} + +int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, + u64 num_bytes, u64 new_bytenr) +{ + set_extent_bits(&root->fs_info->reloc_mapping_tree, + orig_bytenr, orig_bytenr + num_bytes - 1, + EXTENT_LOCKED, GFP_NOFS); + set_state_private(&root->fs_info->reloc_mapping_tree, + orig_bytenr, new_bytenr); + return 0; +} + +int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, + u64 num_bytes, u64 *new_bytenr) +{ + u64 bytenr; + u64 cur_bytenr = orig_bytenr; + u64 prev_bytenr = orig_bytenr; + int ret; + + while (1) { + ret = get_state_private(&root->fs_info->reloc_mapping_tree, + cur_bytenr, &bytenr); + if (ret) + break; + prev_bytenr = cur_bytenr; + cur_bytenr = bytenr; + } + + if (orig_bytenr == cur_bytenr) + return -ENOENT; + + if (prev_bytenr != orig_bytenr) { + set_state_private(&root->fs_info->reloc_mapping_tree, + orig_bytenr, cur_bytenr); + } + *new_bytenr = cur_bytenr; + return 0; +} + +void btrfs_free_reloc_mappings(struct btrfs_root *root) +{ + clear_extent_bits(&root->fs_info->reloc_mapping_tree, + 0, (u64)-1, -1, GFP_NOFS); +} + +int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, u64 orig_start) +{ + int level; + int ret; + + BUG_ON(btrfs_header_generation(buf) != trans->transid); + BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); + + level = btrfs_header_level(buf); + if (level == 0) { + struct btrfs_leaf_ref *ref; + struct btrfs_leaf_ref *orig_ref; + + orig_ref = btrfs_lookup_leaf_ref(root, orig_start); + if (!orig_ref) + return -ENOENT; + + ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems); + if (!ref) { + btrfs_free_leaf_ref(root, orig_ref); + return -ENOMEM; + } + + ref->nritems = orig_ref->nritems; + memcpy(ref->extents, orig_ref->extents, + sizeof(ref->extents[0]) * ref->nritems); + + btrfs_free_leaf_ref(root, orig_ref); + + ref->root_gen = trans->transid; + ref->bytenr = buf->start; + ref->owner = btrfs_header_owner(buf); + ref->generation = btrfs_header_generation(buf); + ret = btrfs_add_leaf_ref(root, ref, 0); + WARN_ON(ret); + btrfs_free_leaf_ref(root, ref); + } + return 0; +} + +static int noinline invalidate_extent_cache(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_block_group_cache *group, + struct btrfs_root *target_root) +{ + struct btrfs_key key; + struct inode *inode = NULL; + struct btrfs_file_extent_item *fi; + u64 num_bytes; + u64 skip_objectid = 0; + u32 nritems; + u32 i; + + nritems = btrfs_header_nritems(leaf); + for (i = 0; i < nritems; i++) { + btrfs_item_key_to_cpu(leaf, &key, i); + if (key.objectid == skip_objectid || + key.type != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; + if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) + continue; + if (!inode || inode->i_ino != key.objectid) { + iput(inode); + inode = btrfs_ilookup(target_root->fs_info->sb, + key.objectid, target_root, 1); + } + if (!inode) { + skip_objectid = key.objectid; + continue; + } + num_bytes = btrfs_file_extent_num_bytes(leaf, fi); + + lock_extent(&BTRFS_I(inode)->io_tree, key.offset, + key.offset + num_bytes - 1, GFP_NOFS); + mutex_lock(&BTRFS_I(inode)->extent_mutex); + btrfs_drop_extent_cache(inode, key.offset, + key.offset + num_bytes - 1, 1); + mutex_unlock(&BTRFS_I(inode)->extent_mutex); + unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, + key.offset + num_bytes - 1, GFP_NOFS); + cond_resched(); + } + iput(inode); + return 0; +} + +static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_block_group_cache *group, + struct inode *reloc_inode) +{ + struct btrfs_key key; + struct btrfs_key extent_key; + struct btrfs_file_extent_item *fi; + struct btrfs_leaf_ref *ref; + struct disk_extent *new_extent; + u64 bytenr; + u64 num_bytes; + u32 nritems; + u32 i; + int ext_index; + int nr_extent; + int ret; + + new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); + BUG_ON(!new_extent); + + ref = btrfs_lookup_leaf_ref(root, leaf->start); + BUG_ON(!ref); + + ext_index = -1; + nritems = btrfs_header_nritems(leaf); + for (i = 0; i < nritems; i++) { + btrfs_item_key_to_cpu(leaf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); + if (bytenr == 0) + continue; + + ext_index++; + if (bytenr >= group->key.objectid + group->key.offset || + bytenr + num_bytes <= group->key.objectid) + continue; + + extent_key.objectid = bytenr; + extent_key.offset = num_bytes; + extent_key.type = BTRFS_EXTENT_ITEM_KEY; + nr_extent = 1; + ret = get_new_locations(reloc_inode, &extent_key, + group->key.objectid, 1, + &new_extent, &nr_extent); + if (ret > 0) + continue; + BUG_ON(ret < 0); + + BUG_ON(ref->extents[ext_index].bytenr != bytenr); + BUG_ON(ref->extents[ext_index].num_bytes != num_bytes); + ref->extents[ext_index].bytenr = new_extent->disk_bytenr; + ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; + + btrfs_set_file_extent_generation(leaf, fi, trans->transid); + btrfs_set_file_extent_disk_bytenr(leaf, fi, + new_extent->disk_bytenr); + btrfs_set_file_extent_disk_num_bytes(leaf, fi, + new_extent->disk_num_bytes); + new_extent->offset += btrfs_file_extent_offset(leaf, fi); + btrfs_set_file_extent_offset(leaf, fi, new_extent->offset); + btrfs_mark_buffer_dirty(leaf); + + ret = btrfs_inc_extent_ref(trans, root, + new_extent->disk_bytenr, + new_extent->disk_num_bytes, + leaf->start, + root->root_key.objectid, + trans->transid, + key.objectid, key.offset); + BUG_ON(ret); + ret = btrfs_free_extent(trans, root, + bytenr, num_bytes, leaf->start, + btrfs_header_owner(leaf), + btrfs_header_generation(leaf), + key.objectid, key.offset, 0); + BUG_ON(ret); + cond_resched(); + } + kfree(new_extent); + BUG_ON(ext_index + 1 != ref->nritems); + btrfs_free_leaf_ref(root, ref); + return 0; +} + +int btrfs_free_reloc_root(struct btrfs_root *root) +{ + struct btrfs_root *reloc_root; + + if (root->reloc_root) { + reloc_root = root->reloc_root; + root->reloc_root = NULL; + list_add(&reloc_root->dead_list, + &root->fs_info->dead_reloc_roots); + } + return 0; +} + +int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *reloc_root; + struct btrfs_root *prev_root = NULL; + struct list_head dead_roots; + int ret; + unsigned long nr; + + INIT_LIST_HEAD(&dead_roots); + list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots); + + while (!list_empty(&dead_roots)) { + reloc_root = list_entry(dead_roots.prev, + struct btrfs_root, dead_list); + list_del_init(&reloc_root->dead_list); + + BUG_ON(reloc_root->commit_root != NULL); + while (1) { + trans = btrfs_join_transaction(root, 1); + BUG_ON(!trans); + + mutex_lock(&root->fs_info->drop_mutex); + ret = btrfs_drop_snapshot(trans, reloc_root); + if (ret != -EAGAIN) + break; + mutex_unlock(&root->fs_info->drop_mutex); + + nr = trans->blocks_used; + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + btrfs_btree_balance_dirty(root, nr); + } + + free_extent_buffer(reloc_root->node); + + ret = btrfs_del_root(trans, root->fs_info->tree_root, + &reloc_root->root_key); + BUG_ON(ret); + mutex_unlock(&root->fs_info->drop_mutex); + + nr = trans->blocks_used; + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + btrfs_btree_balance_dirty(root, nr); + + kfree(prev_root); + prev_root = reloc_root; + } + if (prev_root) { + btrfs_remove_leaf_refs(prev_root, (u64)-1, 0); + kfree(prev_root); + } + return 0; +} + +int btrfs_add_dead_reloc_root(struct btrfs_root *root) +{ + list_add(&root->dead_list, &root->fs_info->dead_reloc_roots); + return 0; +} + +int btrfs_cleanup_reloc_trees(struct btrfs_root *root) +{ + struct btrfs_root *reloc_root; + struct btrfs_trans_handle *trans; + struct btrfs_key location; + int found; + int ret; + + mutex_lock(&root->fs_info->tree_reloc_mutex); + ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL); + BUG_ON(ret); + found = !list_empty(&root->fs_info->dead_reloc_roots); + mutex_unlock(&root->fs_info->tree_reloc_mutex); + + if (found) { + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + } + + location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; + location.offset = (u64)-1; + location.type = BTRFS_ROOT_ITEM_KEY; + + reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); + BUG_ON(!reloc_root); + btrfs_orphan_cleanup(reloc_root); + return 0; +} + +static int noinline init_reloc_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_root *reloc_root; + struct extent_buffer *eb; + struct btrfs_root_item *root_item; + struct btrfs_key root_key; + int ret; + + BUG_ON(!root->ref_cows); + if (root->reloc_root) + return 0; + + root_item = kmalloc(sizeof(*root_item), GFP_NOFS); + BUG_ON(!root_item); + + ret = btrfs_copy_root(trans, root, root->commit_root, + &eb, BTRFS_TREE_RELOC_OBJECTID); + BUG_ON(ret); + + root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; + root_key.offset = root->root_key.objectid; + root_key.type = BTRFS_ROOT_ITEM_KEY; + + memcpy(root_item, &root->root_item, sizeof(root_item)); + btrfs_set_root_refs(root_item, 0); + btrfs_set_root_bytenr(root_item, eb->start); + btrfs_set_root_level(root_item, btrfs_header_level(eb)); + memset(&root_item->drop_progress, 0, sizeof(root_item->drop_progress)); + root_item->drop_level = 0; + + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + + ret = btrfs_insert_root(trans, root->fs_info->tree_root, + &root_key, root_item); + BUG_ON(ret); + kfree(root_item); + + reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, + &root_key); + BUG_ON(!reloc_root); + reloc_root->last_trans = trans->transid; + reloc_root->commit_root = NULL; + reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; + + root->reloc_root = reloc_root; + return 0; +} + +/* + * Core function of space balance. + * + * The idea is using reloc trees to relocate tree blocks in reference + * counted roots. There is one reloc tree for each subvol, all reloc + * trees share same key objectid. Reloc trees are snapshots of the + * latest committed roots (subvol root->commit_root). To relocate a tree + * block referenced by a subvol, the code COW the block through the reloc + * tree, then update pointer in the subvol to point to the new block. + * Since all reloc trees share same key objectid, we can easily do special + * handing to share tree blocks between reloc trees. Once a tree block has + * been COWed in one reloc tree, we can use the result when the same block + * is COWed again through other reloc trees. + */ +static int noinline relocate_one_path(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *first_key, + struct btrfs_ref_path *ref_path, + struct btrfs_block_group_cache *group, + struct inode *reloc_inode) +{ + struct btrfs_root *reloc_root; + struct extent_buffer *eb = NULL; + struct btrfs_key *keys; + u64 *nodes; + int level; + int lowest_merge; + int lowest_level = 0; + int update_refs; + int ret; + + if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) + lowest_level = ref_path->owner_objectid; + + if (is_cowonly_root(ref_path->root_objectid)) { + path->lowest_level = lowest_level; + ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); + BUG_ON(ret < 0); + path->lowest_level = 0; + btrfs_release_path(root, path); + return 0; + } + + keys = kzalloc(sizeof(*keys) * BTRFS_MAX_LEVEL, GFP_NOFS); + BUG_ON(!keys); + nodes = kzalloc(sizeof(*nodes) * BTRFS_MAX_LEVEL, GFP_NOFS); + BUG_ON(!nodes); + + mutex_lock(&root->fs_info->tree_reloc_mutex); + ret = init_reloc_tree(trans, root); + BUG_ON(ret); + reloc_root = root->reloc_root; + + path->lowest_level = lowest_level; + ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 0); + BUG_ON(ret); + /* + * get relocation mapping for tree blocks in the path + */ + lowest_merge = BTRFS_MAX_LEVEL; + for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) { + u64 new_bytenr; + eb = path->nodes[level]; + if (!eb || eb == reloc_root->node) + continue; + ret = btrfs_get_reloc_mapping(reloc_root, eb->start, eb->len, + &new_bytenr); + if (ret) + continue; + if (level == 0) + btrfs_item_key_to_cpu(eb, &keys[level], 0); + else + btrfs_node_key_to_cpu(eb, &keys[level], 0); + nodes[level] = new_bytenr; + lowest_merge = level; + } + + update_refs = 0; + if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + eb = path->nodes[0]; + if (btrfs_header_generation(eb) < trans->transid) + update_refs = 1; + } + + btrfs_release_path(reloc_root, path); + /* + * merge tree blocks that already relocated in other reloc trees + */ + if (lowest_merge != BTRFS_MAX_LEVEL) { + ret = btrfs_merge_path(trans, reloc_root, keys, nodes, + lowest_merge); + BUG_ON(ret < 0); + } + /* + * cow any tree blocks that still haven't been relocated + */ + ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 1); + BUG_ON(ret); + /* + * if we are relocating data block group, update extent pointers + * in the newly created tree leaf. + */ + eb = path->nodes[0]; + if (update_refs && nodes[0] != eb->start) { + ret = replace_extents_in_leaf(trans, reloc_root, eb, group, + reloc_inode); + BUG_ON(ret); + } + + memset(keys, 0, sizeof(*keys) * BTRFS_MAX_LEVEL); + memset(nodes, 0, sizeof(*nodes) * BTRFS_MAX_LEVEL); + for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) { + eb = path->nodes[level]; + if (!eb || eb == reloc_root->node) + continue; + BUG_ON(btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID); + nodes[level] = eb->start; + if (level == 0) + btrfs_item_key_to_cpu(eb, &keys[level], 0); + else + btrfs_node_key_to_cpu(eb, &keys[level], 0); + } + + if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + eb = path->nodes[0]; + extent_buffer_get(eb); + } + btrfs_release_path(reloc_root, path); + /* + * replace tree blocks in the fs tree with tree blocks in + * the reloc tree. + */ + ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level); + BUG_ON(ret < 0); + + if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + ret = invalidate_extent_cache(reloc_root, eb, group, root); + BUG_ON(ret); + free_extent_buffer(eb); + } + mutex_unlock(&root->fs_info->tree_reloc_mutex); + + path->lowest_level = 0; + kfree(nodes); + kfree(keys); + return 0; +} + +static int noinline relocate_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *first_key, + struct btrfs_ref_path *ref_path) +{ + int ret; + int needs_lock = 0; + + if (root == root->fs_info->extent_root || + root == root->fs_info->chunk_root || + root == root->fs_info->dev_root) { + needs_lock = 1; + mutex_lock(&root->fs_info->alloc_mutex); + } + + ret = relocate_one_path(trans, root, path, first_key, + ref_path, NULL, NULL); + BUG_ON(ret); + + if (root == root->fs_info->extent_root) + btrfs_extent_post_op(trans, root); + if (needs_lock) + mutex_unlock(&root->fs_info->alloc_mutex); + + return 0; +} + +static int noinline del_extent_zero(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key) +{ + int ret; + + mutex_lock(&extent_root->fs_info->alloc_mutex); + ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); + if (ret) + goto out; + ret = btrfs_del_item(trans, extent_root, path); out: btrfs_release_path(extent_root, path); + mutex_unlock(&extent_root->fs_info->alloc_mutex); + return ret; +} + +static struct btrfs_root noinline *read_ref_root(struct btrfs_fs_info *fs_info, + struct btrfs_ref_path *ref_path) +{ + struct btrfs_key root_key; + + root_key.objectid = ref_path->root_objectid; + root_key.type = BTRFS_ROOT_ITEM_KEY; + if (is_cowonly_root(ref_path->root_objectid)) + root_key.offset = 0; + else + root_key.offset = (u64)-1; + + return btrfs_read_fs_root_no_name(fs_info, &root_key); +} + +static int noinline relocate_one_extent(struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key, + struct btrfs_block_group_cache *group, + struct inode *reloc_inode, int pass) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *found_root; + struct btrfs_ref_path *ref_path = NULL; + struct disk_extent *new_extents = NULL; + int nr_extents = 0; + int loops; + int ret; + int level; + struct btrfs_key first_key; + u64 prev_block = 0; + + mutex_unlock(&extent_root->fs_info->alloc_mutex); + + trans = btrfs_start_transaction(extent_root, 1); + BUG_ON(!trans); + + if (extent_key->objectid == 0) { + ret = del_extent_zero(trans, extent_root, path, extent_key); + goto out; + } + + ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); + if (!ref_path) { + ret = -ENOMEM; + goto out; + } + + for (loops = 0; ; loops++) { + if (loops == 0) { + ret = btrfs_first_ref_path(trans, extent_root, ref_path, + extent_key->objectid); + } else { + ret = btrfs_next_ref_path(trans, extent_root, ref_path); + } + if (ret < 0) + goto out; + if (ret > 0) + break; + + if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID || + ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID) + continue; + + found_root = read_ref_root(extent_root->fs_info, ref_path); + BUG_ON(!found_root); + /* + * for reference counted tree, only process reference paths + * rooted at the latest committed root. + */ + if (found_root->ref_cows && + ref_path->root_generation != found_root->root_key.offset) + continue; + + if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + if (pass == 0) { + /* + * copy data extents to new locations + */ + u64 group_start = group->key.objectid; + ret = relocate_data_extent(reloc_inode, + extent_key, + group_start); + if (ret < 0) + goto out; + break; + } + level = 0; + } else { + level = ref_path->owner_objectid; + } + + if (prev_block != ref_path->nodes[level]) { + struct extent_buffer *eb; + u64 block_start = ref_path->nodes[level]; + u64 block_size = btrfs_level_size(found_root, level); + + eb = read_tree_block(found_root, block_start, + block_size, 0); + btrfs_tree_lock(eb); + BUG_ON(level != btrfs_header_level(eb)); + + if (level == 0) + btrfs_item_key_to_cpu(eb, &first_key, 0); + else + btrfs_node_key_to_cpu(eb, &first_key, 0); + + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + prev_block = block_start; + } + + if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID && + pass >= 2) { + /* + * use fallback method to process the remaining + * references. + */ + if (!new_extents) { + u64 group_start = group->key.objectid; + ret = get_new_locations(reloc_inode, + extent_key, + group_start, 0, + &new_extents, + &nr_extents); + if (ret < 0) + goto out; + } + btrfs_record_root_in_trans(found_root); + ret = replace_one_extent(trans, found_root, + path, extent_key, + &first_key, ref_path, + new_extents, nr_extents); + if (ret < 0) + goto out; + continue; + } + + btrfs_record_root_in_trans(found_root); + if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { + ret = relocate_tree_block(trans, found_root, path, + &first_key, ref_path); + } else { + /* + * try to update data extent references while + * keeping metadata shared between snapshots. + */ + ret = relocate_one_path(trans, found_root, path, + &first_key, ref_path, + group, reloc_inode); + } + if (ret < 0) + goto out; + } + ret = 0; +out: + btrfs_end_transaction(trans, extent_root); + kfree(new_extents); + kfree(ref_path); + mutex_lock(&extent_root->fs_info->alloc_mutex); return ret; } @@ -3686,84 +4780,155 @@ int __alloc_chunk_for_shrink(struct btrfs_root *root, return 0; } -int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) +static int __insert_orphan_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, u64 size) +{ + struct btrfs_path *path; + struct btrfs_inode_item *item; + struct extent_buffer *leaf; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_insert_empty_inode(trans, root, path, objectid); + if (ret) + goto out; + + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); + memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); + btrfs_set_inode_generation(leaf, item, 1); + btrfs_set_inode_size(leaf, item, size); + btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); + btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM); + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(root, path); +out: + btrfs_free_path(path); + return ret; +} + +static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *group) +{ + struct inode *inode = NULL; + struct btrfs_trans_handle *trans; + struct btrfs_root *root; + struct btrfs_key root_key; + u64 objectid = BTRFS_FIRST_FREE_OBJECTID; + int err = 0; + + root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; + root_key.type = BTRFS_ROOT_ITEM_KEY; + root_key.offset = (u64)-1; + root = btrfs_read_fs_root_no_name(fs_info, &root_key); + if (IS_ERR(root)) + return ERR_CAST(root); + + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + err = btrfs_find_free_objectid(trans, root, objectid, &objectid); + if (err) + goto out; + + err = __insert_orphan_inode(trans, root, objectid, group->key.offset); + BUG_ON(err); + + err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, + group->key.offset, 0); + BUG_ON(err); + + inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); + if (inode->i_state & I_NEW) { + BTRFS_I(inode)->root = root; + BTRFS_I(inode)->location.objectid = objectid; + BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; + BTRFS_I(inode)->location.offset = 0; + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + BUG_ON(is_bad_inode(inode)); + } else { + BUG_ON(1); + } + + err = btrfs_orphan_add(trans, inode); +out: + btrfs_end_transaction(trans, root); + if (err) { + if (inode) + iput(inode); + inode = ERR_PTR(err); + } + return inode; +} + +int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) { struct btrfs_trans_handle *trans; - struct btrfs_root *tree_root = root->fs_info->tree_root; struct btrfs_path *path; + struct btrfs_fs_info *info = root->fs_info; + struct extent_buffer *leaf; + struct inode *reloc_inode; + struct btrfs_block_group_cache *block_group; + struct btrfs_key key; u64 cur_byte; u64 total_found; - u64 shrink_last_byte; - struct btrfs_block_group_cache *shrink_block_group; - struct btrfs_key key; - struct btrfs_key found_key; - struct extent_buffer *leaf; u32 nritems; int ret; int progress; + int pass = 0; - mutex_lock(&root->fs_info->alloc_mutex); - shrink_block_group = btrfs_lookup_block_group(root->fs_info, - shrink_start); - BUG_ON(!shrink_block_group); - - shrink_last_byte = shrink_block_group->key.objectid + - shrink_block_group->key.offset; - - shrink_block_group->space_info->total_bytes -= - shrink_block_group->key.offset; - path = btrfs_alloc_path(); root = root->fs_info->extent_root; - path->reada = 2; + + block_group = btrfs_lookup_block_group(info, group_start); + BUG_ON(!block_group); printk("btrfs relocating block group %llu flags %llu\n", - (unsigned long long)shrink_start, - (unsigned long long)shrink_block_group->flags); + (unsigned long long)block_group->key.objectid, + (unsigned long long)block_group->flags); - __alloc_chunk_for_shrink(root, shrink_block_group, 1); + path = btrfs_alloc_path(); + BUG_ON(!path); + reloc_inode = create_reloc_inode(info, block_group); + BUG_ON(IS_ERR(reloc_inode)); + + mutex_lock(&root->fs_info->alloc_mutex); + + __alloc_chunk_for_shrink(root, block_group, 1); + block_group->ro = 1; + block_group->space_info->total_bytes -= block_group->key.offset; + + mutex_unlock(&root->fs_info->alloc_mutex); + + btrfs_start_delalloc_inodes(info->tree_root); + btrfs_wait_ordered_extents(info->tree_root, 0); again: - - shrink_block_group->ro = 1; - total_found = 0; progress = 0; - key.objectid = shrink_start; + key.objectid = block_group->key.objectid; key.offset = 0; key.type = 0; cur_byte = key.objectid; - mutex_unlock(&root->fs_info->alloc_mutex); + trans = btrfs_start_transaction(info->tree_root, 1); + btrfs_commit_transaction(trans, info->tree_root); - btrfs_start_delalloc_inodes(root); - btrfs_wait_ordered_extents(tree_root, 0); + mutex_lock(&root->fs_info->cleaner_mutex); + btrfs_clean_old_snapshots(info->tree_root); + btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); + mutex_unlock(&root->fs_info->cleaner_mutex); mutex_lock(&root->fs_info->alloc_mutex); - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) - goto out; - - ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); - if (ret < 0) - goto out; - - if (ret == 0) { - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid + found_key.offset > shrink_start && - found_key.objectid < shrink_last_byte) { - cur_byte = found_key.objectid; - key.objectid = cur_byte; - } - } - btrfs_release_path(root, path); - while(1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; - next: leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); @@ -3779,109 +4944,76 @@ next: nritems = btrfs_header_nritems(leaf); } - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (found_key.objectid >= shrink_last_byte) + if (key.objectid >= block_group->key.objectid + + block_group->key.offset) break; if (progress && need_resched()) { - memcpy(&key, &found_key, sizeof(key)); - cond_resched(); btrfs_release_path(root, path); - btrfs_search_slot(NULL, root, &key, path, 0, 0); + mutex_unlock(&root->fs_info->alloc_mutex); + cond_resched(); + mutex_lock(&root->fs_info->alloc_mutex); progress = 0; - goto next; + continue; } progress = 1; - if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY || - found_key.objectid + found_key.offset <= cur_byte) { - memcpy(&key, &found_key, sizeof(key)); - key.offset++; + if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || + key.objectid + key.offset <= cur_byte) { path->slots[0]++; goto next; } total_found++; - cur_byte = found_key.objectid + found_key.offset; - key.objectid = cur_byte; + cur_byte = key.objectid + key.offset; btrfs_release_path(root, path); - ret = relocate_one_extent(root, path, &found_key); - __alloc_chunk_for_shrink(root, shrink_block_group, 0); + + __alloc_chunk_for_shrink(root, block_group, 0); + ret = relocate_one_extent(root, path, &key, block_group, + reloc_inode, pass); + BUG_ON(ret < 0); + + key.objectid = cur_byte; + key.type = 0; + key.offset = 0; } btrfs_release_path(root, path); + mutex_unlock(&root->fs_info->alloc_mutex); + + if (pass == 0) { + btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); + invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); + WARN_ON(reloc_inode->i_mapping->nrpages); + } if (total_found > 0) { - printk("btrfs relocate found %llu last extent was %llu\n", - (unsigned long long)total_found, - (unsigned long long)found_key.objectid); - mutex_unlock(&root->fs_info->alloc_mutex); - trans = btrfs_start_transaction(tree_root, 1); - btrfs_commit_transaction(trans, tree_root); - - btrfs_clean_old_snapshots(tree_root); - - btrfs_start_delalloc_inodes(root); - btrfs_wait_ordered_extents(tree_root, 0); - - trans = btrfs_start_transaction(tree_root, 1); - btrfs_commit_transaction(trans, tree_root); - mutex_lock(&root->fs_info->alloc_mutex); + printk("btrfs found %llu extents in pass %d\n", + (unsigned long long)total_found, pass); + pass++; goto again; } - /* - * we've freed all the extents, now remove the block - * group item from the tree - */ - mutex_unlock(&root->fs_info->alloc_mutex); + /* delete reloc_inode */ + iput(reloc_inode); - trans = btrfs_start_transaction(root, 1); - - mutex_lock(&root->fs_info->alloc_mutex); - memcpy(&key, &shrink_block_group->key, sizeof(key)); - - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret > 0) - ret = -EIO; - if (ret < 0) { - btrfs_end_transaction(trans, root); - goto out; - } - - spin_lock(&root->fs_info->block_group_cache_lock); - rb_erase(&shrink_block_group->cache_node, - &root->fs_info->block_group_cache_tree); - spin_unlock(&root->fs_info->block_group_cache_lock); - - ret = btrfs_remove_free_space(shrink_block_group, key.objectid, - key.offset); - if (ret) { - btrfs_end_transaction(trans, root); - goto out; - } - /* - memset(shrink_block_group, 0, sizeof(*shrink_block_group)); - kfree(shrink_block_group); - */ - - btrfs_del_item(trans, root, path); - btrfs_release_path(root, path); - mutex_unlock(&root->fs_info->alloc_mutex); - btrfs_commit_transaction(trans, root); + /* unpin extents in this range */ + trans = btrfs_start_transaction(info->tree_root, 1); + btrfs_commit_transaction(trans, info->tree_root); mutex_lock(&root->fs_info->alloc_mutex); - /* the code to unpin extents might set a few bits in the free - * space cache for this range again - */ - /* XXX? */ - ret = btrfs_remove_free_space(shrink_block_group, key.objectid, - key.offset); + spin_lock(&block_group->lock); + WARN_ON(block_group->pinned > 0); + WARN_ON(block_group->reserved > 0); + WARN_ON(btrfs_block_group_used(&block_group->item) > 0); + spin_unlock(&block_group->lock); + ret = 0; out: - btrfs_free_path(path); mutex_unlock(&root->fs_info->alloc_mutex); + btrfs_free_path(path); return ret; } @@ -3922,6 +5054,33 @@ out: return ret; } +int btrfs_free_block_groups(struct btrfs_fs_info *info) +{ + struct btrfs_block_group_cache *block_group; + struct rb_node *n; + + mutex_lock(&info->alloc_mutex); + spin_lock(&info->block_group_cache_lock); + while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { + block_group = rb_entry(n, struct btrfs_block_group_cache, + cache_node); + + spin_unlock(&info->block_group_cache_lock); + btrfs_remove_free_space_cache(block_group); + spin_lock(&info->block_group_cache_lock); + + rb_erase(&block_group->cache_node, + &info->block_group_cache_tree); + spin_lock(&block_group->space_info->lock); + list_del(&block_group->list); + spin_unlock(&block_group->space_info->lock); + kfree(block_group); + } + spin_unlock(&info->block_group_cache_lock); + mutex_unlock(&info->alloc_mutex); + return 0; +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; @@ -4039,3 +5198,46 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, return 0; } + +int btrfs_remove_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 group_start) +{ + struct btrfs_path *path; + struct btrfs_block_group_cache *block_group; + struct btrfs_key key; + int ret; + + BUG_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); + root = root->fs_info->extent_root; + + block_group = btrfs_lookup_block_group(root->fs_info, group_start); + BUG_ON(!block_group); + + memcpy(&key, &block_group->key, sizeof(key)); + + path = btrfs_alloc_path(); + BUG_ON(!path); + + btrfs_remove_free_space_cache(block_group); + rb_erase(&block_group->cache_node, + &root->fs_info->block_group_cache_tree); + spin_lock(&block_group->space_info->lock); + list_del(&block_group->list); + spin_unlock(&block_group->space_info->lock); + + /* + memset(shrink_block_group, 0, sizeof(*shrink_block_group)); + kfree(shrink_block_group); + */ + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) + ret = -EIO; + if (ret < 0) + goto out; + + ret = btrfs_del_item(trans, root, path); +out: + btrfs_free_path(path); + return ret; +} diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index e3984f902e71..0091c01abb06 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -210,7 +210,10 @@ again: goto err; } - ret = btrfs_add_dead_root(dead_root, latest); + if (objectid == BTRFS_TREE_RELOC_OBJECTID) + ret = btrfs_add_dead_reloc_root(dead_root); + else + ret = btrfs_add_dead_root(dead_root, latest); if (ret) goto err; goto again; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8c83cf464c83..444abe0796ae 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -477,6 +477,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, dirty = root->dirty_root; btrfs_free_log(trans, root); + btrfs_free_reloc_root(root); if (root->commit_root == root->node) { WARN_ON(root->node->start != @@ -855,6 +856,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, * with the tree-log code. */ mutex_lock(&root->fs_info->tree_log_mutex); + /* + * keep tree reloc code from adding new reloc trees + */ + mutex_lock(&root->fs_info->tree_reloc_mutex); + ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, &dirty_fs_roots); @@ -865,6 +871,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, */ btrfs_free_log_root_tree(trans, root->fs_info); + btrfs_free_reloc_mappings(root); + ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); @@ -910,10 +918,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->tree_log_mutex); btrfs_finish_extent_commit(trans, root, pinned_copy); - mutex_lock(&root->fs_info->trans_mutex); - kfree(pinned_copy); + btrfs_drop_dead_reloc_roots(root); + mutex_unlock(&root->fs_info->tree_reloc_mutex); + + mutex_lock(&root->fs_info->trans_mutex); + cur_trans->commit_done = 1; root->fs_info->last_trans_committed = cur_trans->transid; wake_up(&cur_trans->commit_wait); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ddf89626498a..51f113119b25 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1268,7 +1268,7 @@ int btrfs_relocate_chunk(struct btrfs_root *root, em_tree = &root->fs_info->mapping_tree.map_tree; /* step one, relocate all the extents inside this chunk */ - ret = btrfs_shrink_extent_tree(extent_root, chunk_offset); + ret = btrfs_relocate_block_group(extent_root, chunk_offset); BUG_ON(ret); trans = btrfs_start_transaction(root, 1); @@ -1308,15 +1308,18 @@ int btrfs_relocate_chunk(struct btrfs_root *root, BUG_ON(ret); } + ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); + BUG_ON(ret); + spin_lock(&em_tree->lock); remove_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + kfree(map); em->bdev = NULL; /* once for the tree */ free_extent_map(em); - spin_unlock(&em_tree->lock); - /* once for us */ free_extent_map(em); From 8c8bee1d7ca47fc75b6bd24a8085c525a2394c02 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 29 Sep 2008 11:19:10 -0400 Subject: [PATCH 0725/2985] Btrfs: Wait for IO on the block device inodes of newly added devices btrfs-vol -a /dev/xxx will zero the first and last two MB of the device. The kernel code needs to wait for this IO to finish before it adds the device. btrfs metadata IO does not happen through the block device inode. A separate address space is used, allowing the zero filled buffer heads in the block device inode to be written to disk after FS metadata starts going down to the disk via the btrfs metadata inode. The end result is zero filled metadata blocks after adding new devices into the filesystem. The fix is a simple filemap_write_and_wait on the block device inode before actually inserting it into the pool of available devices. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 11 ++++++++--- fs/btrfs/inode.c | 13 ++++++++++++- fs/btrfs/volumes.c | 1 + 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2775e270881e..0079b60b18f3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -610,6 +610,7 @@ struct btrfs_fs_info { struct list_head dead_roots; atomic_t nr_async_submits; + atomic_t async_submit_draining; atomic_t nr_async_bios; atomic_t tree_log_writers; atomic_t tree_log_commit; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 45bc3132b054..45b4f7285275 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -460,6 +460,13 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->submit_bio_hook = submit_bio_hook; async->work.func = run_one_async_submit; async->work.flags = 0; + + while(atomic_read(&fs_info->async_submit_draining) && + atomic_read(&fs_info->nr_async_submits)) { + wait_event(fs_info->async_submit_wait, + (atomic_read(&fs_info->nr_async_submits) == 0)); + } + atomic_inc(&fs_info->nr_async_submits); btrfs_queue_worker(&fs_info->workers, &async->work); @@ -495,11 +502,8 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { struct btrfs_root *root = BTRFS_I(inode)->root; - u64 offset; int ret; - offset = bio->bi_sector << 9; - /* * when we're called for a write, we're already in the async * submission context. Just jump into btrfs_map_bio @@ -1360,6 +1364,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); atomic_set(&fs_info->nr_async_submits, 0); + atomic_set(&fs_info->async_submit_draining, 0); atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->throttles, 0); atomic_set(&fs_info->throttle_gen, 0); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4516fbf01671..404704d26822 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3440,13 +3440,24 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root) list_del_init(&binode->delalloc_inodes); spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); if (inode) { - filemap_write_and_wait(inode->i_mapping); + filemap_flush(inode->i_mapping); iput(inode); } cond_resched(); spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); } spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); + + /* the filemap_flush will queue IO into the worker threads, but + * we have to make sure the IO is actually started and that + * ordered extents get created before we return + */ + atomic_inc(&root->fs_info->async_submit_draining); + while(atomic_read(&root->fs_info->nr_async_submits)) { + wait_event(root->fs_info->async_submit_wait, + (atomic_read(&root->fs_info->nr_async_submits) == 0)); + } + atomic_dec(&root->fs_info->async_submit_draining); return 0; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 51f113119b25..f63cf7621a01 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1038,6 +1038,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) return -EIO; } + filemap_write_and_wait(bdev->bd_inode->i_mapping); mutex_lock(&root->fs_info->volume_mutex); trans = btrfs_start_transaction(root, 1); From 9a5e1ea1e1e539e244a54afffc330fc368376ab9 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 29 Sep 2008 11:24:41 -0400 Subject: [PATCH 0726/2985] Btrfs: drop WARN_ON from btrfs_add_leaf_ref btrfs_add_leaf_ref was doing checks on the objects it found in the rbtree to make sure they were properly linked into the tree. But, the field it was checking can be safely changed outside of the tree spin lock. The WARN_ON was for debugging the initial implementation and can be safely removed. Signed-off-by: Chris Mason --- fs/btrfs/ref-cache.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index c5809988c875..30fcb7aea5b5 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c @@ -67,7 +67,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, while(*p) { parent = *p; entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node); - WARN_ON(!entry->in_tree); if (bytenr < entry->bytenr) p = &(*p)->rb_left; From d352ac68148b69937d39ca5d48bcc4478e118dbf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 29 Sep 2008 15:18:18 -0400 Subject: [PATCH 0727/2985] Btrfs: add and improve comments This improves the comments at the top of many functions. It didn't dive into the guts of functions because I was trying to avoid merging problems with the new allocator and back reference work. extent-tree.c and volumes.c were both skipped, and there is definitely more work todo in cleaning and commenting the code. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/TODO | 20 ----- fs/btrfs/async-thread.c | 10 ++- fs/btrfs/async-thread.h | 7 +- fs/btrfs/bit-radix.c | 130 --------------------------- fs/btrfs/bit-radix.h | 33 ------- fs/btrfs/btrfs_inode.h | 54 +++++++++++- fs/btrfs/crc32c.h | 18 ++++ fs/btrfs/ctree.c | 127 ++++++++++++++++++++++++--- fs/btrfs/ctree.h | 1 - fs/btrfs/dir-item.c | 41 +++++++++ fs/btrfs/disk-io.c | 33 ++++++- fs/btrfs/extent_io.c | 34 +++++++- fs/btrfs/extent_map.c | 10 +++ fs/btrfs/file.c | 44 +++++++++- fs/btrfs/inode.c | 189 ++++++++++++++++++++++++++-------------- fs/btrfs/locking.c | 13 +++ fs/btrfs/ordered-data.c | 19 +++- fs/btrfs/ref-cache.c | 26 ++++++ fs/btrfs/ref-cache.h | 3 + fs/btrfs/root-tree.c | 21 ++++- fs/btrfs/struct-funcs.c | 21 +++++ fs/btrfs/super.c | 3 + fs/btrfs/transaction.c | 67 +++++++++++++- fs/btrfs/tree-defrag.c | 4 + 25 files changed, 653 insertions(+), 277 deletions(-) delete mode 100644 fs/btrfs/TODO delete mode 100644 fs/btrfs/bit-radix.c delete mode 100644 fs/btrfs/bit-radix.h diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d5c28557fba9..48b7909ca8d1 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ file-item.o inode-item.o inode-map.o disk-io.o \ - transaction.o bit-radix.o inode.o file.o tree-defrag.o \ + transaction.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ ref-cache.o export.o tree-log.o acl.o free-space-cache.o diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO deleted file mode 100644 index d9b6d38c603a..000000000000 --- a/fs/btrfs/TODO +++ /dev/null @@ -1,20 +0,0 @@ -* cleanup, add more error checking, get rid of BUG_ONs -* Fix ENOSPC handling -* Make allocator smarter -* add a block group to struct inode -* Do actual block accounting -* Check compat and incompat flags on the inode -* Get rid of struct ctree_path, limiting tree levels held at one time -* Add generation number to key pointer in nodes -* Add generation number to inode -* forbid cross subvolume renames and hardlinks -* Release -* Do real tree locking -* Add extent mirroring (backup copies of blocks) -* Add fancy interface to get access to incremental backups -* Add fancy striped extents to make big reads faster -* Use relocation to try and fix write errors -* Make allocator much smarter -* xattrs (directory streams for regular files) -* Scrub & defrag - diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 4e780b279de6..04fb9702d14c 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -231,17 +231,25 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) /* * if we pick a busy task, move the task to the end of the list. - * hopefully this will keep things somewhat evenly balanced + * hopefully this will keep things somewhat evenly balanced. + * Do the move in batches based on the sequence number. This groups + * requests submitted at roughly the same time onto the same worker. */ next = workers->worker_list.next; worker = list_entry(next, struct btrfs_worker_thread, worker_list); atomic_inc(&worker->num_pending); worker->sequence++; + if (worker->sequence % workers->idle_thresh == 0) list_move_tail(next, &workers->worker_list); return worker; } +/* + * selects a worker thread to take the next job. This will either find + * an idle worker, start a new worker up to the max count, or just return + * one of the existing busy workers. + */ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) { struct btrfs_worker_thread *worker; diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 43e44d115dd1..4ec9a2ee0f9d 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -63,14 +63,17 @@ struct btrfs_workers { /* once a worker has this many requests or fewer, it is idle */ int idle_thresh; - /* list with all the work threads */ + /* list with all the work threads. The workers on the idle thread + * may be actively servicing jobs, but they haven't yet hit the + * idle thresh limit above. + */ struct list_head worker_list; struct list_head idle_list; /* lock for finding the next worker thread to queue on */ spinlock_t lock; - /* extra name for this worker */ + /* extra name for this worker, used for current->name */ char *name; }; diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c deleted file mode 100644 index e8bf876db393..000000000000 --- a/fs/btrfs/bit-radix.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#include "bit-radix.h" - -#define BIT_ARRAY_BYTES 256 -#define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8) - -extern struct kmem_cache *btrfs_bit_radix_cachep; -int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) -{ - unsigned long *bits; - unsigned long slot; - int bit_slot; - int ret; - - slot = bit / BIT_RADIX_BITS_PER_ARRAY; - bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; - - bits = radix_tree_lookup(radix, slot); - if (!bits) { - bits = kmem_cache_alloc(btrfs_bit_radix_cachep, GFP_NOFS); - if (!bits) - return -ENOMEM; - memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long)); - bits[0] = slot; - ret = radix_tree_insert(radix, slot, bits); - if (ret) - return ret; - } - ret = test_and_set_bit(bit_slot, bits + 1); - if (ret < 0) - ret = 1; - return ret; -} - -int test_radix_bit(struct radix_tree_root *radix, unsigned long bit) -{ - unsigned long *bits; - unsigned long slot; - int bit_slot; - - slot = bit / BIT_RADIX_BITS_PER_ARRAY; - bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; - - bits = radix_tree_lookup(radix, slot); - if (!bits) - return 0; - return test_bit(bit_slot, bits + 1); -} - -int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) -{ - unsigned long *bits; - unsigned long slot; - int bit_slot; - int i; - int empty = 1; - - slot = bit / BIT_RADIX_BITS_PER_ARRAY; - bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; - - bits = radix_tree_lookup(radix, slot); - if (!bits) - return 0; - clear_bit(bit_slot, bits + 1); - for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) { - if (bits[i]) { - empty = 0; - break; - } - } - if (empty) { - bits = radix_tree_delete(radix, slot); - BUG_ON(!bits); - kmem_cache_free(btrfs_bit_radix_cachep, bits); - } - return 0; -} - -int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, - unsigned long start, int nr) -{ - unsigned long *bits; - unsigned long *gang[4]; - int found; - int ret; - int i; - int total_found = 0; - unsigned long slot; - - slot = start / BIT_RADIX_BITS_PER_ARRAY; - ret = radix_tree_gang_lookup(radix, (void **)gang, slot, - ARRAY_SIZE(gang)); - found = start % BIT_RADIX_BITS_PER_ARRAY; - for (i = 0; i < ret && nr > 0; i++) { - bits = gang[i]; - while(nr > 0) { - found = find_next_bit(bits + 1, - BIT_RADIX_BITS_PER_ARRAY, - found); - if (found < BIT_RADIX_BITS_PER_ARRAY) { - *retbits = bits[0] * - BIT_RADIX_BITS_PER_ARRAY + found; - retbits++; - nr--; - total_found++; - found++; - } else - break; - } - found = 0; - } - return total_found; -} diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h deleted file mode 100644 index c100f54d5c32..000000000000 --- a/fs/btrfs/bit-radix.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#ifndef __BIT_RADIX__ -#define __BIT_RADIX__ -#include - -int set_radix_bit(struct radix_tree_root *radix, unsigned long bit); -int test_radix_bit(struct radix_tree_root *radix, unsigned long bit); -int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit); -int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, - unsigned long start, int nr); - -static inline void init_bit_radix(struct radix_tree_root *radix) -{ - INIT_RADIX_TREE(radix, GFP_NOFS); -} -#endif diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 0577fda2168a..0b2e623cf421 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -25,27 +25,58 @@ /* in memory btrfs inode */ struct btrfs_inode { + /* which subvolume this inode belongs to */ struct btrfs_root *root; + + /* the block group preferred for allocations. This pointer is buggy + * and needs to be replaced with a bytenr instead + */ struct btrfs_block_group_cache *block_group; + + /* key used to find this inode on disk. This is used by the code + * to read in roots of subvolumes + */ struct btrfs_key location; + + /* the extent_tree has caches of all the extent mappings to disk */ struct extent_map_tree extent_tree; + + /* the io_tree does range state (DIRTY, LOCKED etc) */ struct extent_io_tree io_tree; + + /* special utility tree used to record which mirrors have already been + * tried when checksums fail for a given block + */ struct extent_io_tree io_failure_tree; + + /* held while inserting checksums to avoid races */ struct mutex csum_mutex; + + /* held while inesrting or deleting extents from files */ struct mutex extent_mutex; + + /* held while logging the inode in tree-log.c */ struct mutex log_mutex; - struct inode vfs_inode; + + /* used to order data wrt metadata */ struct btrfs_ordered_inode_tree ordered_tree; + /* standard acl pointers */ struct posix_acl *i_acl; struct posix_acl *i_default_acl; /* for keeping track of orphaned inodes */ struct list_head i_orphan; + /* list of all the delalloc inodes in the FS. There are times we need + * to write all the delalloc pages to disk, and this list is used + * to walk them all. + */ struct list_head delalloc_inodes; - /* full 64 bit generation number */ + /* full 64 bit generation number, struct vfs_inode doesn't have a big + * enough field for this. + */ u64 generation; /* @@ -57,10 +88,25 @@ struct btrfs_inode { */ u64 logged_trans; - /* trans that last made a change that should be fully fsync'd */ + /* + * trans that last made a change that should be fully fsync'd. This + * gets reset to zero each time the inode is logged + */ u64 log_dirty_trans; + + /* total number of bytes pending delalloc, used by stat to calc the + * real block usage of the file + */ u64 delalloc_bytes; + + /* + * the size of the file stored in the metadata on disk. data=ordered + * means the in-memory i_size might be larger than the size on disk + * because not all the blocks are written yet. + */ u64 disk_i_size; + + /* flags field from the on disk inode */ u32 flags; /* @@ -68,6 +114,8 @@ struct btrfs_inode { * number for new files that are created */ u64 index_cnt; + + struct inode vfs_inode; }; static inline struct btrfs_inode *BTRFS_I(struct inode *inode) diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h index 4f0fefed132a..1eaf11d334fd 100644 --- a/fs/btrfs/crc32c.h +++ b/fs/btrfs/crc32c.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BTRFS_CRC32C__ #define __BTRFS_CRC32C__ #include diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 50e81f43e6d4..ff3261ff2e19 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007 Oracle. All rights reserved. + * Copyright (C) 2007,2008 Oracle. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public @@ -54,12 +54,19 @@ struct btrfs_path *btrfs_alloc_path(void) return path; } +/* this also releases the path */ void btrfs_free_path(struct btrfs_path *p) { btrfs_release_path(NULL, p); kmem_cache_free(btrfs_path_cachep, p); } +/* + * path release drops references on the extent buffers in the path + * and it drops any locks held by this path + * + * It is safe to call this on paths that no locks or extent buffers held. + */ void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; @@ -77,6 +84,16 @@ void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) } } +/* + * safely gets a reference on the root node of a tree. A lock + * is not taken, so a concurrent writer may put a different node + * at the root of the tree. See btrfs_lock_root_node for the + * looping required. + * + * The extent buffer returned by this has a reference taken, so + * it won't disappear. It may stop being the root of the tree + * at any time because there are no locks held. + */ struct extent_buffer *btrfs_root_node(struct btrfs_root *root) { struct extent_buffer *eb; @@ -87,6 +104,10 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root) return eb; } +/* loop around taking references on and locking the root node of the + * tree until you end up with a lock on the root. A locked buffer + * is returned, with a reference held. + */ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) { struct extent_buffer *eb; @@ -108,6 +129,10 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) return eb; } +/* cowonly root (everything not a reference counted cow subvolume), just get + * put onto a simple dirty list. transaction.c walks this to make sure they + * get properly updated on disk. + */ static void add_root_to_dirty_list(struct btrfs_root *root) { if (root->track_dirty && list_empty(&root->dirty_list)) { @@ -116,6 +141,11 @@ static void add_root_to_dirty_list(struct btrfs_root *root) } } +/* + * used by snapshot creation to make a copy of a root for a tree with + * a given objectid. The buffer with the new root node is returned in + * cow_ret, and this func returns zero on success or a negative error code. + */ int btrfs_copy_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, @@ -167,6 +197,22 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, return 0; } +/* + * does the dirty work in cow of a single block. The parent block + * (if supplied) is updated to point to the new cow copy. The new + * buffer is marked dirty and returned locked. If you modify the block + * it needs to be marked dirty again. + * + * search_start -- an allocation hint for the new block + * + * empty_size -- a hint that you plan on doing more cow. This is the size in bytes + * the allocator should try to find free next to the block it returns. This is + * just a hint and may be ignored by the allocator. + * + * prealloc_dest -- if you have already reserved a destination for the cow, + * this uses that block instead of allocating a new one. btrfs_alloc_reserved_extent + * is used to finish the allocation. + */ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, @@ -311,6 +357,11 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } +/* + * cows a single block, see __btrfs_cow_block for the real work. + * This version of it has extra checks so that a block isn't cow'd more than + * once per transaction, as long as it hasn't been written yet + */ int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, @@ -347,6 +398,10 @@ int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, return ret; } +/* + * helper function for defrag to decide if two blocks pointed to by a + * node are actually close by + */ static int close_blocks(u64 blocknr, u64 other, u32 blocksize) { if (blocknr < other && other - (blocknr + blocksize) < 32768) @@ -381,6 +436,11 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) } +/* + * this is used by the defrag code to go through all the + * leaves pointed to by a node and reallocate them so that + * disk order is close to key order + */ int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, int start_slot, int cache_only, u64 *last_ret, @@ -521,6 +581,10 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root, return btrfs_item_offset_nr(leaf, nr - 1); } +/* + * extra debugging checks to make sure all the items in a key are + * well formed and in the proper order + */ static int check_node(struct btrfs_root *root, struct btrfs_path *path, int level) { @@ -561,6 +625,10 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, return 0; } +/* + * extra checking to make sure all the items in a leaf are + * well formed and in the proper order + */ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, int level) { @@ -782,6 +850,10 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, return -1; } +/* given a node and slot number, this reads the blocks it points to. The + * extent buffer is returned with a reference taken (but unlocked). + * NULL is returned on error. + */ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, struct extent_buffer *parent, int slot) { @@ -798,6 +870,11 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, btrfs_node_ptr_generation(parent, slot)); } +/* + * node level balancing, used to make sure nodes are in proper order for + * item deletion. We balance from the top down, so we have to make sure + * that a deletion won't leave an node completely empty later on. + */ static noinline int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) @@ -1024,7 +1101,10 @@ enospc: return ret; } -/* returns zero if the push worked, non-zero otherwise */ +/* Node balancing for insertion. Here we only split or push nodes around + * when they are completely full. This is also done top down, so we + * have to be pessimistic. + */ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) @@ -1150,7 +1230,8 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, } /* - * readahead one full node of leaves + * readahead one full node of leaves, finding things that are close + * to the block in 'slot', and triggering ra on them. */ static noinline void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, @@ -1226,6 +1307,19 @@ static noinline void reada_for_search(struct btrfs_root *root, } } +/* + * when we walk down the tree, it is usually safe to unlock the higher layers in + * the tree. The exceptions are when our path goes through slot 0, because operations + * on the tree might require changing key pointers higher up in the tree. + * + * callers might also have set path->keep_locks, which tells this code to + * keep the lock if the path points to the last slot in the block. This is + * part of walking through the tree, and selecting the next slot in the higher + * block. + * + * lowest_unlock sets the lowest level in the tree we're allowed to unlock. + * so if lowest_unlock is 1, level 0 won't be unlocked + */ static noinline void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) { @@ -2705,6 +2799,12 @@ again: return ret; } +/* + * make the item pointed to by the path smaller. new_size indicates + * how small to make it, and from_end tells us if we just chop bytes + * off the end of the item or if we shift the item to chop bytes off + * the front. + */ int btrfs_truncate_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -2818,6 +2918,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, return ret; } +/* + * make the item pointed to by the path bigger, data_size is the new size. + */ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size) @@ -2897,7 +3000,7 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, } /* - * Given a key and some data, insert an item into the tree. + * Given a key and some data, insert items into the tree. * This does all the path init required, making room in the tree if needed. */ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, @@ -3046,9 +3149,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root /* * delete the pointer from a given node. * - * If the delete empties a node, the node is removed from the tree, - * continuing all the way the root if required. The root is converted into - * a leaf if all the nodes are emptied. + * the tree should have been previously balanced so the deletion does not + * empty a node. */ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot) @@ -3233,6 +3335,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, * search the tree again to find a leaf with lesser keys * returns 0 if it found something or 1 if there are no lesser leaves. * returns < 0 on io errors. + * + * This may release the path, and so you may lose any locks held at the + * time you call it. */ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { @@ -3265,9 +3370,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) /* * A helper function to walk down the tree starting at min_key, and looking * for nodes or leaves that are either in cache or have a minimum - * transaction id. This is used by the btree defrag code, but could - * also be used to search for blocks that have changed since a given - * transaction id. + * transaction id. This is used by the btree defrag code, and tree logging * * This does not cow, but it does stuff the starting key it finds back * into min_key, so you can call btrfs_search_slot with cow=1 on the @@ -3279,6 +3382,10 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) * This honors path->lowest_level to prevent descent past a given level * of the tree. * + * min_trans indicates the oldest transaction that you are interested + * in walking through. Any nodes or leaves older than min_trans are + * skipped over (without reading them). + * * returns zero if something useful was found, < 0 on error and 1 if there * was nothing in the tree that matched the search criteria. */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0079b60b18f3..ded1643c0273 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -27,7 +27,6 @@ #include #include #include -#include "bit-radix.h" #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index e4f30090d640..5040b71f1900 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -21,6 +21,14 @@ #include "hash.h" #include "transaction.h" +/* + * insert a name into a directory, doing overflow properly if there is a hash + * collision. data_size indicates how big the item inserted should be. On + * success a struct btrfs_dir_item pointer is returned, otherwise it is + * an ERR_PTR. + * + * The name is not copied into the dir item, you have to do that yourself. + */ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -55,6 +63,10 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle return (struct btrfs_dir_item *)ptr; } +/* + * xattrs work a lot like directories, this inserts an xattr item + * into the tree + */ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, u16 name_len, const void *data, u16 data_len, @@ -109,6 +121,13 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, return ret; } +/* + * insert a directory item in the tree, doing all the magic for + * both indexes. 'dir' indicates which objectid to insert it into, + * 'location' is the key to stuff into the directory item, 'type' is the + * type of the inode we're pointing to, and 'index' is the sequence number + * to use for the second index (if one is created). + */ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, struct btrfs_key *location, u8 type, u64 index) @@ -184,6 +203,11 @@ out: return 0; } +/* + * lookup a directory item based on name. 'dir' is the objectid + * we're searching in, and 'mod' tells us if you plan on deleting the + * item (use mod < 0) or changing the options (use mod > 0) + */ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, @@ -222,6 +246,14 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, return btrfs_match_dir_item_name(root, path, name, name_len); } +/* + * lookup a directory item based on index. 'dir' is the objectid + * we're searching in, and 'mod' tells us if you plan on deleting the + * item (use mod < 0) or changing the options (use mod > 0) + * + * The name is used to make sure the index really points to the name you were + * looking for. + */ struct btrfs_dir_item * btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -282,6 +314,11 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, return btrfs_match_dir_item_name(root, path, name, name_len); } +/* + * helper function to look at the directory item pointed to by 'path' + * this walks through all the entries in a dir item and finds one + * for a specific name. + */ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len) @@ -313,6 +350,10 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, return NULL; } +/* + * given a pointer into a directory item, delete it. This + * handles items that have more than one entry in them. + */ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 45b4f7285275..5ee10d3136f5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -55,6 +55,11 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); +/* + * end_io_wq structs are used to do processing in task context when an IO is + * complete. This is used during reads to verify checksums, and it is used + * by writes to insert metadata for new file extents after IO is complete. + */ struct end_io_wq { struct bio *bio; bio_end_io_t *end_io; @@ -66,6 +71,11 @@ struct end_io_wq { struct btrfs_work work; }; +/* + * async submit bios are used to offload expensive checksumming + * onto the worker threads. They checksum file and metadata bios + * just before they are sent down the IO stack. + */ struct async_submit_bio { struct inode *inode; struct bio *bio; @@ -76,6 +86,10 @@ struct async_submit_bio { struct btrfs_work work; }; +/* + * extents on the btree inode are pretty simple, there's one extent + * that covers the entire device + */ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 len, int create) @@ -151,6 +165,10 @@ void btrfs_csum_final(u32 crc, char *result) *(__le32 *)result = ~cpu_to_le32(crc); } +/* + * compute the csum for a btree block, and either verify it or write it + * into the csum field of the block. + */ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int verify) { @@ -204,6 +222,12 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, return 0; } +/* + * we can't consider a given block up to date unless the transid of the + * block matches the transid in the parent node's pointer. This is how we + * detect blocks that either didn't get written at all or got written + * in the wrong place. + */ static int verify_parent_transid(struct extent_io_tree *io_tree, struct extent_buffer *eb, u64 parent_transid) { @@ -228,9 +252,12 @@ out: unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); return ret; - } +/* + * helper to read a given tree block, doing retries as required when + * the checksums don't match and we have alternate mirrors to try. + */ static int btree_read_extent_buffer_pages(struct btrfs_root *root, struct extent_buffer *eb, u64 start, u64 parent_transid) @@ -260,6 +287,10 @@ printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror return -EIO; } +/* + * checksum a dirty tree block before IO. This has extra checks to make + * sure we only fill in the checksum field in the first page of a multi-page block + */ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { struct extent_io_tree *tree; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8bd1b402f3fd..563b2d12f4f2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -914,6 +914,10 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(wait_on_extent_writeback); +/* + * either insert or lock state struct between start and end use mask to tell + * us if waiting is desired. + */ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { int err; @@ -982,6 +986,13 @@ int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(set_range_writeback); +/* + * find the first offset in the io tree with 'bits' set. zero is + * returned if we find something, and *start_ret and *end_ret are + * set to reflect the state struct that was found. + * + * If nothing was found, 1 is returned, < 0 on error + */ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, u64 *start_ret, u64 *end_ret, int bits) { @@ -1017,6 +1028,10 @@ out: } EXPORT_SYMBOL(find_first_extent_bit); +/* find the first state struct with 'bits' set after 'start', and + * return it. tree->lock must be held. NULL will returned if + * nothing was found after 'start' + */ struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, u64 start, int bits) { @@ -1046,8 +1061,14 @@ out: } EXPORT_SYMBOL(find_first_extent_bit_state); -u64 find_lock_delalloc_range(struct extent_io_tree *tree, - u64 *start, u64 *end, u64 max_bytes) +/* + * find a contiguous range of bytes in the file marked as delalloc, not + * more than 'max_bytes'. start and end are used to return the range, + * + * 1 is returned if we find something, 0 if nothing was in the tree + */ +static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree, + u64 *start, u64 *end, u64 max_bytes) { struct rb_node *node; struct extent_state *state; @@ -1130,6 +1151,11 @@ out: return found; } +/* + * count the number of bytes in the tree that have a given bit(s) + * set. This can be fairly slow, except for EXTENT_DIRTY which is + * cached. The total number found is returned. + */ u64 count_range_bits(struct extent_io_tree *tree, u64 *start, u64 search_end, u64 max_bytes, unsigned long bits) @@ -1245,6 +1271,10 @@ int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(unlock_range); +/* + * set the private field for a given byte offset in the tree. If there isn't + * an extent_state there already, this does nothing. + */ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) { struct rb_node *node; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 78ced11d18c7..74b2a29880d3 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -114,6 +114,10 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, return NULL; } +/* + * search through the tree for an extent_map with a given offset. If + * it can't be found, try to find some neighboring extents + */ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, struct rb_node **prev_ret, struct rb_node **next_ret) @@ -160,6 +164,10 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, return NULL; } +/* + * look for an offset in the tree, and if it can't be found, return + * the first offset we can find smaller than 'offset'. + */ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) { struct rb_node *prev; @@ -170,6 +178,7 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) return ret; } +/* check to see if two extent_map structs are adjacent and safe to merge */ static int mergable_maps(struct extent_map *prev, struct extent_map *next) { if (test_bit(EXTENT_FLAG_PINNED, &prev->flags)) @@ -250,6 +259,7 @@ out: } EXPORT_SYMBOL(add_extent_mapping); +/* simple helper to do math around the end of an extent, handling wrap */ static u64 range_end(u64 start, u64 len) { if (start + len < start) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1b7e51a9db0f..3088a1184483 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -41,6 +41,9 @@ #include "compat.h" +/* simple helper to fault in pages and copy. This should go away + * and be replaced with calls into generic code. + */ static int noinline btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, struct page **prepared_pages, @@ -72,12 +75,19 @@ static int noinline btrfs_copy_from_user(loff_t pos, int num_pages, return page_fault ? -EFAULT : 0; } +/* + * unlocks pages after btrfs_file_write is done with them + */ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) { size_t i; for (i = 0; i < num_pages; i++) { if (!pages[i]) break; + /* page checked is some magic around finding pages that + * have been modified without going through btrfs_set_page_dirty + * clear it here + */ ClearPageChecked(pages[i]); unlock_page(pages[i]); mark_page_accessed(pages[i]); @@ -85,6 +95,10 @@ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) } } +/* this does all the hard work for inserting an inline extent into + * the btree. Any existing inline extent is extended as required to make room, + * otherwise things are inserted as required into the btree + */ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 offset, size_t size, @@ -228,6 +242,14 @@ fail: return err; } +/* + * after copy_from_user, pages need to be dirtied and we need to make + * sure holes are created between the current EOF and the start of + * any next extents (if required). + * + * this also makes the decision about creating an inline extent vs + * doing real data extents, marking pages dirty and delalloc as required. + */ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct file *file, @@ -362,6 +384,10 @@ out_unlock: return err; } +/* + * this drops all the extents in the cache that intersect the range + * [start, end]. Existing extents are split as required. + */ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, int skip_pinned) { @@ -536,6 +562,9 @@ out: * If an extent intersects the range but is not entirely inside the range * it is either truncated or split. Anything entirely inside the range * is deleted from the tree. + * + * inline_limit is used to tell this code which offsets in the file to keep + * if they contain inline extents. */ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, @@ -796,7 +825,9 @@ out: } /* - * this gets pages into the page cache and locks them down + * this gets pages into the page cache and locks them down, it also properly + * waits for data=ordered extents to finish before allowing the pages to be + * modified. */ static int noinline prepare_pages(struct btrfs_root *root, struct file *file, struct page **pages, size_t num_pages, @@ -1034,6 +1065,17 @@ int btrfs_release_file(struct inode * inode, struct file * filp) return 0; } +/* + * fsync call for both files and directories. This logs the inode into + * the tree log instead of forcing full commits whenever possible. + * + * It needs to call filemap_fdatawait so that all ordered extent updates are + * in the metadata btree are up to date for copying to the log. + * + * It drops the inode mutex before doing the tree log commit. This is an + * important optimization for directories because holding the mutex prevents + * new operations on the dir while we write to disk. + */ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 404704d26822..f3abecc2d14c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -83,6 +83,10 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { static void btrfs_truncate(struct inode *inode); +/* + * a very lame attempt at stopping writes when the FS is 85% full. There + * are countless ways this is incorrect, but it is better than nothing. + */ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, int for_del) { @@ -108,6 +112,12 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, return ret; } +/* + * when extent_io.c finds a delayed allocation range in the file, + * the call backs end up in this code. The basic idea is to + * allocate extents on disk for the range, and create ordered data structs + * in ram to track those extents. + */ static int cow_file_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -185,6 +195,13 @@ out: return ret; } +/* + * when nowcow writeback call back. This checks for snapshots or COW copies + * of the extents that exist in the file, and COWs the file as required. + * + * If no cow copies or snapshots exist, we write directly to the existing + * blocks on disk + */ static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) { u64 extent_start; @@ -291,6 +308,9 @@ out: return err; } +/* + * extent_io.c call back to do delayed allocation processing + */ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -305,6 +325,11 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) return ret; } +/* + * extent_io.c set_bit_hook, used to track delayed allocation + * bytes in this file, and to maintain the list of inodes that + * have pending delalloc work to be done. + */ int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits) { @@ -323,6 +348,9 @@ int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, return 0; } +/* + * extent_io.c clear_bit_hook, see set_bit_hook for why + */ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits) { @@ -349,6 +377,10 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, return 0; } +/* + * extent_io.c merge_bio_hook, this must check the chunk tree to make sure + * we don't create bios that span stripes or chunks + */ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio) { @@ -371,6 +403,14 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, return 0; } +/* + * in order to insert checksums into the metadata in large chunks, + * we wait until bio submission time. All the pages in the bio are + * checksummed and sums are attached onto the ordered extent record. + * + * At IO completion time the cums attached on the ordered extent record + * are inserted into the btree + */ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { @@ -383,6 +423,10 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, return btrfs_map_bio(root, rw, bio, mirror_num, 1); } +/* + * extent_io.c submission hook. This does the right thing for csum calculation on write, + * or reading the csums from the tree before a read + */ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { @@ -408,6 +452,10 @@ mapit: return btrfs_map_bio(root, rw, bio, mirror_num, 0); } +/* + * given a list of ordered sums record them in the inode. This happens + * at IO completion time based on sums calculated at bio submission time. + */ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, struct inode *inode, u64 file_offset, struct list_head *list) @@ -430,12 +478,12 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) GFP_NOFS); } +/* see btrfs_writepage_start_hook for details on why this is required */ struct btrfs_writepage_fixup { struct page *page; struct btrfs_work work; }; -/* see btrfs_writepage_start_hook for details on why this is required */ void btrfs_writepage_fixup_worker(struct btrfs_work *work) { struct btrfs_writepage_fixup *fixup; @@ -522,6 +570,10 @@ int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) return -EAGAIN; } +/* as ordered data IO finishes, this gets called so we can finish + * an ordered extent if the range of bytes in the file it covers are + * fully written. + */ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -631,6 +683,14 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, return btrfs_finish_ordered_io(page->mapping->host, start, end); } +/* + * When IO fails, either with EIO or csum verification fails, we + * try other mirrors that might have a good copy of the data. This + * io_failure_record is used to record state as we go through all the + * mirrors. If another mirror has good data, the page is set up to date + * and things continue. If a good mirror can't be found, the original + * bio end_io callback is called to indicate things have failed. + */ struct io_failure_record { struct page *page; u64 start; @@ -725,6 +785,10 @@ int btrfs_io_failed_hook(struct bio *failed_bio, return 0; } +/* + * each time an IO finishes, we do a fast check in the IO failure tree + * to see if we need to process or clean up an io_failure_record + */ int btrfs_clean_io_failures(struct inode *inode, u64 start) { u64 private; @@ -753,6 +817,11 @@ int btrfs_clean_io_failures(struct inode *inode, u64 start) return 0; } +/* + * when reads are done, we need to check csums to verify the data is correct + * if there's a match, we allow the bio to finish. If not, we go through + * the io_failure_record routines to find good copies + */ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) { @@ -990,6 +1059,9 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) btrfs_free_path(path); } +/* + * read an inode from the btree into the in-memory inode + */ void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; @@ -1083,6 +1155,9 @@ make_bad: make_bad_inode(inode); } +/* + * given a leaf and an inode, copy the inode fields into the leaf + */ static void fill_inode_item(struct btrfs_trans_handle *trans, struct extent_buffer *leaf, struct btrfs_inode_item *item, @@ -1118,6 +1193,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, BTRFS_I(inode)->block_group->key.objectid); } +/* + * copy everything in the in-memory inode into the btree. + */ int noinline btrfs_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) @@ -1151,6 +1229,11 @@ failed: } +/* + * unlink helper that gets used here in inode.c and in the tree logging + * recovery code. It remove a link in a directory with a given name, and + * also drops the back refs in the inode to the directory + */ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *dir, struct inode *inode, @@ -1309,7 +1392,7 @@ fail: /* * this can truncate away extent items, csum items and directory items. * It starts at a high offset and removes keys until it can't find - * any higher than i_size. + * any higher than new_size * * csum items that cross the new i_size are truncated to the new size * as well. @@ -2123,6 +2206,11 @@ void btrfs_dirty_inode(struct inode *inode) btrfs_end_transaction(trans, root); } +/* + * find the highest existing sequence number in a directory + * and then set the in-memory index_cnt variable to reflect + * free sequence numbers + */ static int btrfs_set_inode_index_count(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -2175,6 +2263,10 @@ out: return ret; } +/* + * helper to find a free sequence number in a given directory. This current + * code is very simple, later versions will do smarter things in the btree + */ static int btrfs_set_inode_index(struct inode *dir, struct inode *inode, u64 *index) { @@ -2305,6 +2397,12 @@ static inline u8 btrfs_inode_type(struct inode *inode) return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; } +/* + * utility function to add 'inode' into 'parent_inode' with + * a give name and a given sequence number. + * if 'add_backref' is true, also insert a backref from the + * inode to the parent directory. + */ int btrfs_add_link(struct btrfs_trans_handle *trans, struct inode *parent_inode, struct inode *inode, const char *name, int name_len, int add_backref, u64 index) @@ -2611,6 +2709,10 @@ out_unlock: return err; } +/* helper for btfs_get_extent. Given an existing extent in the tree, + * and an extent that you want to insert, deal with overlap and insert + * the new extent into the tree. + */ static int merge_extent_mapping(struct extent_map_tree *em_tree, struct extent_map *existing, struct extent_map *em, @@ -2627,6 +2729,14 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree, return add_extent_mapping(em_tree, em); } +/* + * a bit scary, this does extent mapping from logical file offset to the disk. + * the ugly parts come from merging extents from the disk with the + * in-ram representation. This gets more complex because of the data=ordered code, + * where the in-ram extents might be locked pending data=ordered completion. + * + * This also copies inline extents directly into the page. + */ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create) @@ -2869,76 +2979,11 @@ out: return em; } -#if 0 /* waiting for O_DIRECT reads */ -static int btrfs_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - struct extent_map *em; - u64 start = (u64)iblock << inode->i_blkbits; - struct btrfs_multi_bio *multi = NULL; - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 len; - u64 logical; - u64 map_length; - int ret = 0; - - em = btrfs_get_extent(inode, NULL, 0, start, bh_result->b_size, 0); - - if (!em || IS_ERR(em)) - goto out; - - if (em->start > start || em->start + em->len <= start) { - goto out; - } - - if (em->block_start == EXTENT_MAP_INLINE) { - ret = -EINVAL; - goto out; - } - - len = em->start + em->len - start; - len = min_t(u64, len, INT_LIMIT(typeof(bh_result->b_size))); - - if (em->block_start == EXTENT_MAP_HOLE || - em->block_start == EXTENT_MAP_DELALLOC) { - bh_result->b_size = len; - goto out; - } - - logical = start - em->start; - logical = em->block_start + logical; - - map_length = len; - ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, - logical, &map_length, &multi, 0); - BUG_ON(ret); - bh_result->b_blocknr = multi->stripes[0].physical >> inode->i_blkbits; - bh_result->b_size = min(map_length, len); - - bh_result->b_bdev = multi->stripes[0].dev->bdev; - set_buffer_mapped(bh_result); - kfree(multi); -out: - free_extent_map(em); - return ret; -} -#endif - static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { return -EINVAL; -#if 0 - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - - if (rw == WRITE) - return -EINVAL; - - return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, btrfs_get_block, NULL); -#endif } static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) @@ -3202,6 +3247,9 @@ void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, } } +/* + * create a new subvolume directory/inode (helper for the ioctl). + */ int btrfs_create_subvol_root(struct btrfs_root *new_root, struct btrfs_trans_handle *trans, u64 new_dirid, struct btrfs_block_group_cache *block_group) @@ -3223,6 +3271,9 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root, return btrfs_update_inode(trans, new_root, inode); } +/* helper function for file defrag and space balancing. This + * forces readahead on a given range of bytes in an inode + */ unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index) @@ -3424,6 +3475,10 @@ out_unlock: return ret; } +/* + * some fairly slow code that needs optimization. This walks the list + * of all the inodes with pending delalloc and forces them to disk. + */ int btrfs_start_delalloc_inodes(struct btrfs_root *root) { struct list_head *head = &root->fs_info->delalloc_inodes; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 0cc314c10d66..e30aa6e2958f 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -25,6 +25,15 @@ #include "extent_io.h" #include "locking.h" +/* + * locks the per buffer mutex in an extent buffer. This uses adaptive locks + * and the spin is not tuned very extensively. The spinning does make a big + * difference in almost every workload, but spinning for the right amount of + * time needs some help. + * + * In general, we want to spin as long as the lock holder is doing btree searches, + * and we should give up if they are in more expensive code. + */ int btrfs_tree_lock(struct extent_buffer *eb) { int i; @@ -57,6 +66,10 @@ int btrfs_tree_locked(struct extent_buffer *eb) return mutex_is_locked(&eb->mutex); } +/* + * btrfs_search_slot uses this to decide if it should drop its locks + * before doing something expensive like allocating free blocks for cow. + */ int btrfs_path_lock_waiting(struct btrfs_path *path, int level) { int i; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 951eacff2420..dcc1730dd837 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -26,7 +26,6 @@ #include "btrfs_inode.h" #include "extent_io.h" - static u64 entry_end(struct btrfs_ordered_extent *entry) { if (entry->file_offset + entry->len < entry->file_offset) @@ -34,6 +33,9 @@ static u64 entry_end(struct btrfs_ordered_extent *entry) return entry->file_offset + entry->len; } +/* returns NULL if the insertion worked, or it returns the node it did find + * in the tree + */ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, struct rb_node *node) { @@ -58,6 +60,10 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, return NULL; } +/* + * look for a given offset in the tree, and if it can't be found return the + * first lesser offset + */ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, struct rb_node **prev_ret) { @@ -108,6 +114,9 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, return NULL; } +/* + * helper to check if a given offset is inside a given entry + */ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) { if (file_offset < entry->file_offset || @@ -116,6 +125,10 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) return 1; } +/* + * look find the first ordered struct that has this offset, otherwise + * the first one less than this offset + */ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, u64 file_offset) { @@ -305,6 +318,10 @@ int btrfs_remove_ordered_extent(struct inode *inode, return 0; } +/* + * wait for all the ordered extents in a root. This is done when balancing + * space between drives. + */ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) { struct list_head splice; diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index 30fcb7aea5b5..a50ebb67055d 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c @@ -21,6 +21,16 @@ #include "ref-cache.h" #include "transaction.h" +/* + * leaf refs are used to cache the information about which extents + * a given leaf has references on. This allows us to process that leaf + * in btrfs_drop_snapshot without needing to read it back from disk. + */ + +/* + * kmalloc a leaf reference struct and update the counters for the + * total ref cache size + */ struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, int nr_extents) { @@ -40,6 +50,10 @@ struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, return ref; } +/* + * free a leaf reference struct and update the counters for the + * total ref cache size + */ void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) { if (!ref) @@ -135,6 +149,10 @@ int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, return 0; } +/* + * find the leaf ref for a given extent. This returns the ref struct with + * a usage reference incremented + */ struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, u64 bytenr) { @@ -160,6 +178,10 @@ again: return NULL; } +/* + * add a fully filled in leaf ref struct + * remove all the refs older than a given root generation + */ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, int shared) { @@ -184,6 +206,10 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, return ret; } +/* + * remove a single leaf ref from the tree. This drops the ref held by the tree + * only + */ int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) { struct btrfs_leaf_ref_tree *tree; diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h index 617564787f52..16f3183d7c59 100644 --- a/fs/btrfs/ref-cache.h +++ b/fs/btrfs/ref-cache.h @@ -19,8 +19,11 @@ #define __REFCACHE__ struct btrfs_extent_info { + /* bytenr and num_bytes find the extent in the extent allocation tree */ u64 bytenr; u64 num_bytes; + + /* objectid and offset find the back reference for the file */ u64 objectid; u64 offset; }; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 0091c01abb06..eb7f7655e9d5 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -22,8 +22,10 @@ #include "print-tree.h" /* - * returns 0 on finding something, 1 if no more roots are there - * and < 0 on error + * search forward for a root, starting with objectid 'search_start' + * if a root key is found, the objectid we find is filled into 'found_objectid' + * and 0 is returned. < 0 is returned on error, 1 if there is nothing + * left in the tree. */ int btrfs_search_root(struct btrfs_root *root, u64 search_start, u64 *found_objectid) @@ -66,6 +68,11 @@ out: return ret; } +/* + * lookup the root with the highest offset for a given objectid. The key we do + * find is copied into 'key'. If we find something return 0, otherwise 1, < 0 + * on error. + */ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key) { @@ -104,6 +111,9 @@ out: return ret; } +/* + * copy the data in 'item' into the btree + */ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_root_item *item) @@ -147,6 +157,12 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } +/* + * at mount time we want to find all the old transaction snapshots that were in + * the process of being deleted if we crashed. This is any root item with an offset + * lower than the latest root. They need to be queued for deletion to finish + * what was happening when we crashed. + */ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, struct btrfs_root *latest) { @@ -227,6 +243,7 @@ err: return ret; } +/* drop the root item for 'key' from 'root' */ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key) { diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index ad03a32d1116..cdedbe144d45 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c @@ -17,6 +17,27 @@ */ #include + +/* this is some deeply nasty code. ctree.h has a different + * definition for this BTRFS_SETGET_FUNCS macro, behind a #ifndef + * + * The end result is that anyone who #includes ctree.h gets a + * declaration for the btrfs_set_foo functions and btrfs_foo functions + * + * This file declares the macros and then #includes ctree.h, which results + * in cpp creating the function here based on the template below. + * + * These setget functions do all the extent_buffer related mapping + * required to efficiently read and write specific fields in the extent + * buffers. Every pointer to metadata items in btrfs is really just + * an unsigned long offset into the extent buffer which has been + * cast to a specific type. This gives us all the gcc type checking. + * + * The extent buffer api is used to do all the kmapping and page + * spanning work required to get extent buffers in highmem and have + * a metadata blocksize different from the page size. + */ + #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ u##bits btrfs_##name(struct extent_buffer *eb, \ type *s) \ diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8399d6d05d63..2e6039825b7b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -519,6 +519,9 @@ static struct file_system_type btrfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; +/* + * used by btrfsctl to scan devices when no FS is mounted + */ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 444abe0796ae..11266d68a6c9 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -46,6 +46,9 @@ static noinline void put_transaction(struct btrfs_transaction *transaction) } } +/* + * either allocate a new transaction or hop into the existing one + */ static noinline int join_transaction(struct btrfs_root *root) { struct btrfs_transaction *cur_trans; @@ -85,6 +88,12 @@ static noinline int join_transaction(struct btrfs_root *root) return 0; } +/* + * this does all the record keeping required to make sure that a + * reference counted root is properly recorded in a given transaction. + * This is required to make sure the old root from before we joined the transaction + * is deleted when the transaction commits + */ noinline int btrfs_record_root_in_trans(struct btrfs_root *root) { struct btrfs_dirty_root *dirty; @@ -127,6 +136,10 @@ noinline int btrfs_record_root_in_trans(struct btrfs_root *root) return 0; } +/* wait for commit against the current transaction to become unblocked + * when this is done, it is safe to start a new transaction, but the current + * transaction might not be fully on disk. + */ static void wait_current_trans(struct btrfs_root *root) { struct btrfs_transaction *cur_trans; @@ -198,7 +211,7 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, return start_transaction(r, num_blocks, 2); } - +/* wait for a transaction commit to be fully complete */ static noinline int wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { @@ -218,6 +231,10 @@ static noinline int wait_for_commit(struct btrfs_root *root, return 0; } +/* + * rate limit against the drop_snapshot code. This helps to slow down new operations + * if the drop_snapshot code isn't able to keep up. + */ static void throttle_on_drops(struct btrfs_root *root) { struct btrfs_fs_info *info = root->fs_info; @@ -302,7 +319,11 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, return __btrfs_end_transaction(trans, root, 1); } - +/* + * when btree blocks are allocated, they have some corresponding bits set for + * them in one of two extent_io trees. This is used to make sure all of + * those extents are on disk for transaction or log commit + */ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages) { @@ -393,6 +414,16 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, &trans->transaction->dirty_pages); } +/* + * this is used to update the root pointer in the tree of tree roots. + * + * But, in the case of the extent allocation tree, updating the root + * pointer may allocate blocks which may change the root of the extent + * allocation tree. + * + * So, this loops and repeats and makes sure the cowonly root didn't + * change while the root pointer was being updated in the metadata. + */ static int update_cowonly_root(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -418,6 +449,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, return 0; } +/* + * update all the cowonly tree roots on disk + */ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -433,6 +467,11 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, return 0; } +/* + * dead roots are old snapshots that need to be deleted. This allocates + * a dirty root struct and adds it into the list of dead roots that need to + * be deleted + */ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) { struct btrfs_dirty_root *dirty; @@ -449,6 +488,12 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) return 0; } +/* + * at transaction commit time we need to schedule the old roots for + * deletion via btrfs_drop_snapshot. This runs through all the + * reference counted roots that were modified in the current + * transaction and puts them into the drop list + */ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, struct radix_tree_root *radix, struct list_head *list) @@ -541,6 +586,10 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, return err; } +/* + * defrag a given btree. If cacheonly == 1, this won't read from the disk, + * otherwise every leaf in the btree is read and defragged. + */ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) { struct btrfs_fs_info *info = root->fs_info; @@ -570,6 +619,10 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) return 0; } +/* + * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on + * all of them + */ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) { @@ -664,6 +717,10 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, return ret; } +/* + * new snapshots need to be created at a very specific time in the + * transaction commit. This does the actual creation + */ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_pending_snapshot *pending) @@ -734,6 +791,9 @@ fail: return ret; } +/* + * create all the snapshots we've scheduled for creation + */ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { @@ -944,6 +1004,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; } +/* + * interface function to delete all the snapshots we have scheduled for deletion + */ int btrfs_clean_old_snapshots(struct btrfs_root *root) { struct list_head dirty_roots; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index b3bb5bbad76e..6f57d0889b1e 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -23,6 +23,10 @@ #include "transaction.h" #include "locking.h" +/* defrag all the leaves in a given btree. If cache_only == 1, don't read things + * from disk, otherwise read all the leaves and try to get key order to + * better reflect disk order + */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only) { From 45b8c9a8b1e15bf79c2c17ec217adf96785f8011 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 30 Sep 2008 14:40:06 -0400 Subject: [PATCH 0728/2985] Btrfs: fix seekiness due to finding the wrong block group This patch fixes a problem where we end up seeking too much when *last_ptr is valid. This happens because btrfs_lookup_first_block_group only returns a block group that starts on or after the given search start, so if the search_start is in the middle of a block group it will return the block group after the given search_start, which is suboptimal. This patch fixes that by doing a btrfs_lookup_block_group, which will return the block group that contains the given search start. If we fail to find a block group, we fall back on btrfs_lookup_first_block_group so we can find the next block group, not sure if this is absolutely needed, but better safe than sorry. Also if we can't find the block group that we need, or it happens to not be of the right type, we need to add empty_cluster since *last_ptr could point to a mismatched block group, which means we need to start over with empty_cluster added to total needed. Thank you, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8043b9d584a9..64e14ddf6232 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2238,7 +2238,10 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, total_needed += empty_size; new_group: - block_group = btrfs_lookup_first_block_group(info, search_start); + block_group = btrfs_lookup_block_group(info, search_start); + if (!block_group) + block_group = btrfs_lookup_first_block_group(info, + search_start); /* * Ok this looks a little tricky, buts its really simple. First if we @@ -2255,8 +2258,10 @@ new_group: if (!block_group || (!block_group_bits(block_group, data) && last_ptr && *last_ptr)) { if (search_start != orig_search_start) { - if (last_ptr && *last_ptr) + if (last_ptr && *last_ptr) { + total_needed += empty_cluster; *last_ptr = 0; + } search_start = orig_search_start; goto new_group; } else if (!chunk_alloc_done && allowed_chunk_alloc) { From 75ccf47d13bfb66de7faf596bfe497b9af7aaa40 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 30 Sep 2008 19:24:06 -0400 Subject: [PATCH 0729/2985] Btrfs: fix multi-device code to use raid policies set by mkfs When reading in block groups, a global mask of the available raid policies should be adjusted based on the types of block groups found on disk. This global mask is then used to decide which raid policy to use for new block groups. The recent allocator changes dropped the call that updated the global mask, making all the block groups allocated at run time single striped onto a single drive. This also fixes the async worker threads to set any thread that uses the requeue mechanism as busy. This allows us to avoid blocking on get_request_wait for the async bio submission threads. Signed-off-by: Chris Mason --- fs/btrfs/async-thread.c | 14 +++++++++++++- fs/btrfs/extent-tree.c | 2 ++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 04fb9702d14c..d82efd722a48 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -302,8 +302,20 @@ int btrfs_requeue_work(struct btrfs_work *work) spin_lock_irqsave(&worker->lock, flags); atomic_inc(&worker->num_pending); list_add_tail(&work->list, &worker->pending); - check_busy_worker(worker); + + /* by definition we're busy, take ourselves off the idle + * list + */ + if (worker->idle) { + spin_lock_irqsave(&worker->workers->lock, flags); + worker->idle = 0; + list_move_tail(&worker->worker_list, + &worker->workers->worker_list); + spin_unlock_irqrestore(&worker->workers->lock, flags); + } + spin_unlock_irqrestore(&worker->lock, flags); + out: return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 64e14ddf6232..677d5e774fad 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5145,6 +5145,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) ret = btrfs_add_block_group_cache(root->fs_info, cache); BUG_ON(ret); + + set_avail_alloc_bits(root->fs_info, cache->flags); } ret = 0; error: From 0463bb4e8d2f717a9bf3be6cc12c0aec51cc261d Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Wed, 1 Oct 2008 19:09:04 -0400 Subject: [PATCH 0730/2985] Btrfs: disk-io.c (open_ctree): Don't deref. NULL upon failed kzalloc Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5ee10d3136f5..41b7d24d07e2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1371,7 +1371,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_super_block *disk_super; - if (!extent_root || !tree_root || !fs_info) { + if (!extent_root || !tree_root || !fs_info || + !chunk_root || !dev_root) { err = -ENOMEM; goto fail; } From 83afeac42c5680b0b70d64fb8c4724cf05483fc2 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Wed, 1 Oct 2008 19:09:51 -0400 Subject: [PATCH 0731/2985] Btrfs: disk-io.c (open_ctree): avoid leaks upon allocation failure Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 41b7d24d07e2..0be044bb6194 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1699,6 +1699,8 @@ fail: kfree(tree_root); bdi_destroy(&fs_info->bdi); kfree(fs_info); + kfree(chunk_root); + kfree(dev_root); return ERR_PTR(err); } From cf749823857230017c86504bfdc70524f929ba96 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 1 Oct 2008 19:11:18 -0400 Subject: [PATCH 0732/2985] Btrfs: fix deadlock between alloc_mutex/chunk_mutex This fixes a deadlock that happens between the alloc_mutex and chunk_mutex. Process A comes in, decides to do a do_chunk_alloc, which takes the chunk_mutex, and is holding the alloc_mutex because the only way you get to do_chunk_alloc is by holding the alloc_mutex. btrfs_alloc_chunk does its thing and goes to insert a new item, which results in a cow of the block. We get into del_pending_extents from there, where if we need to be rescheduled we drop the alloc_mutex and schedule. At this point process B comes in to do an allocation and gets the alloc_mutex, and because process A did not do the chunk allocation completely it thinks its a good time to do a chunk allocation as well, and hangs on the chunk_mutex. Process A wakes up and tries to take the alloc_mutex and cannot. The way to fix this is do a mutex_trylock() on chunk_mutex. If we return 0 we didn't get the lock, and if this is just a "hey it may be a good time to allocate a chunk" then we just exit. If we are trying to force an allocation then we reschedule and keep trying to acquire the chunk_mutex. If once we acquire it the space is already full then we can just exit, otherwise we can continue with the chunk allocation. Thank you, Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 677d5e774fad..db37b867e4f1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1505,7 +1505,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 thresh; u64 start; u64 num_bytes; - int ret = 0; + int ret = 0, waited = 0; flags = reduce_alloc_profile(extent_root, flags); @@ -1530,7 +1530,18 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, space_info->bytes_reserved + alloc_bytes) < thresh) goto out; - mutex_lock(&extent_root->fs_info->chunk_mutex); + while (!mutex_trylock(&extent_root->fs_info->chunk_mutex)) { + if (!force) + goto out; + mutex_unlock(&extent_root->fs_info->alloc_mutex); + cond_resched(); + mutex_lock(&extent_root->fs_info->alloc_mutex); + waited = 1; + } + + if (waited && space_info->full) + goto out_unlock; + ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags); if (ret == -ENOSPC) { printk("space info full %Lu\n", flags); From 323ac95bce442bbde514e3ce57e840402f80d909 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 1 Oct 2008 19:05:46 -0400 Subject: [PATCH 0733/2985] Btrfs: don't read leaf blocks containing only checksums during truncate Checksum items take up a significant portion of the metadata for large files. It is possible to avoid reading them during truncates by checking the keys in the higher level nodes. If a given leaf is followed by another leaf where the lowest key is a checksum item from the same file, we know we can safely delete the leaf without reading it. For a 32GB file on a 6 drive raid0 array, Btrfs needs 8s to delete the file with a cold cache. It is read bound during the run. With this change, Btrfs is able to delete the file in 0.5s Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 69 +++++++++++---------- fs/btrfs/ctree.h | 4 +- fs/btrfs/inode.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 193 insertions(+), 32 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ff3261ff2e19..2eab4643dcbc 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1388,7 +1388,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key prealloc_block; lowest_level = p->lowest_level; - WARN_ON(lowest_level && ins_len); + WARN_ON(lowest_level && ins_len > 0); WARN_ON(p->nodes[0] != NULL); WARN_ON(cow && root == root->fs_info->extent_root && !mutex_is_locked(&root->fs_info->alloc_mutex)); @@ -3186,6 +3186,36 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, return ret; } +/* + * a helper function to delete the leaf pointed to by path->slots[1] and + * path->nodes[1]. bytenr is the node block pointer, but since the callers + * already know it, it is faster to have them pass it down than to + * read it out of the node again. + * + * This deletes the pointer in path->nodes[1] and frees the leaf + * block extent. zero is returned if it all worked out, < 0 otherwise. + * + * The path must have already been setup for deleting the leaf, including + * all the proper balancing. path->nodes[1] must be locked. + */ +noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 bytenr) +{ + int ret; + u64 root_gen = btrfs_header_generation(path->nodes[1]); + + ret = del_ptr(trans, root, path, 1, path->slots[1]); + if (ret) + return ret; + + ret = btrfs_free_extent(trans, root, bytenr, + btrfs_level_size(root, 0), + path->nodes[1]->start, + btrfs_header_owner(path->nodes[1]), + root_gen, 0, 0, 1); + return ret; +} /* * delete the item at the leaf level in path. If that empties * the leaf, remove it from the tree @@ -3251,17 +3281,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (leaf == root->node) { btrfs_set_header_level(leaf, 0); } else { - u64 root_gen = btrfs_header_generation(path->nodes[1]); - wret = del_ptr(trans, root, path, 1, path->slots[1]); - if (wret) - ret = wret; - wret = btrfs_free_extent(trans, root, - leaf->start, leaf->len, - path->nodes[1]->start, - btrfs_header_owner(path->nodes[1]), - root_gen, 0, 0, 1); - if (wret) - ret = wret; + ret = btrfs_del_leaf(trans, root, path, leaf->start); + BUG_ON(ret); } } else { int used = leaf_space_used(leaf, 0, nritems); @@ -3296,24 +3317,10 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, } if (btrfs_header_nritems(leaf) == 0) { - u64 root_gen; - u64 bytenr = leaf->start; - u32 blocksize = leaf->len; - - root_gen = btrfs_header_generation( - path->nodes[1]); - - wret = del_ptr(trans, root, path, 1, slot); - if (wret) - ret = wret; - + path->slots[1] = slot; + ret = btrfs_del_leaf(trans, root, path, leaf->start); + BUG_ON(ret); free_extent_buffer(leaf); - wret = btrfs_free_extent(trans, root, bytenr, - blocksize, path->nodes[1]->start, - btrfs_header_owner(path->nodes[1]), - root_gen, 0, 0, 1); - if (wret) - ret = wret; } else { /* if we're still in the path, make sure * we're dirty. Otherwise, one of the @@ -3418,8 +3425,8 @@ again: level = btrfs_header_level(cur); sret = bin_search(cur, min_key, level, &slot); - /* at level = 0, we're done, setup the path and exit */ - if (level == 0) { + /* at the lowest level, we're done, setup the path and exit */ + if (level == path->lowest_level) { if (slot >= nritems) goto find_next_key; ret = 0; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ded1643c0273..94e0cdfddc0c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1649,7 +1649,9 @@ void btrfs_free_path(struct btrfs_path *p); void btrfs_init_path(struct btrfs_path *p); int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int slot, int nr); - +int btrfs_del_leaf(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 bytenr); static inline int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f3abecc2d14c..e5c9261dcbaa 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1389,6 +1389,154 @@ fail: return err; } +/* + * when truncating bytes in a file, it is possible to avoid reading + * the leaves that contain only checksum items. This can be the + * majority of the IO required to delete a large file, but it must + * be done carefully. + * + * The keys in the level just above the leaves are checked to make sure + * the lowest key in a given leaf is a csum key, and starts at an offset + * after the new size. + * + * Then the key for the next leaf is checked to make sure it also has + * a checksum item for the same file. If it does, we know our target leaf + * contains only checksum items, and it can be safely freed without reading + * it. + * + * This is just an optimization targeted at large files. It may do + * nothing. It will return 0 unless things went badly. + */ +static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct inode *inode, u64 new_size) +{ + struct btrfs_key key; + int ret; + int nritems; + struct btrfs_key found_key; + struct btrfs_key other_key; + + path->lowest_level = 1; + key.objectid = inode->i_ino; + key.type = BTRFS_CSUM_ITEM_KEY; + key.offset = new_size; +again: + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + goto out; + + if (path->nodes[1] == NULL) { + ret = 0; + goto out; + } + ret = 0; + btrfs_node_key_to_cpu(path->nodes[1], &found_key, path->slots[1]); + nritems = btrfs_header_nritems(path->nodes[1]); + + if (!nritems) + goto out; + + if (path->slots[1] >= nritems) + goto next_node; + + /* did we find a key greater than anything we want to delete? */ + if (found_key.objectid > inode->i_ino || + (found_key.objectid == inode->i_ino && found_key.type > key.type)) + goto out; + + /* we check the next key in the node to make sure the leave contains + * only checksum items. This comparison doesn't work if our + * leaf is the last one in the node + */ + if (path->slots[1] + 1 >= nritems) { +next_node: + /* search forward from the last key in the node, this + * will bring us into the next node in the tree + */ + btrfs_node_key_to_cpu(path->nodes[1], &found_key, nritems - 1); + + /* unlikely, but we inc below, so check to be safe */ + if (found_key.offset == (u64)-1) + goto out; + + /* search_forward needs a path with locks held, do the + * search again for the original key. It is possible + * this will race with a balance and return a path that + * we could modify, but this drop is just an optimization + * and is allowed to miss some leaves. + */ + btrfs_release_path(root, path); + found_key.offset++; + + /* setup a max key for search_forward */ + other_key.offset = (u64)-1; + other_key.type = key.type; + other_key.objectid = key.objectid; + + path->keep_locks = 1; + ret = btrfs_search_forward(root, &found_key, &other_key, + path, 0, 0); + path->keep_locks = 0; + if (ret || found_key.objectid != key.objectid || + found_key.type != key.type) { + ret = 0; + goto out; + } + + key.offset = found_key.offset; + btrfs_release_path(root, path); + cond_resched(); + goto again; + } + + /* we know there's one more slot after us in the tree, + * read that key so we can verify it is also a checksum item + */ + btrfs_node_key_to_cpu(path->nodes[1], &other_key, path->slots[1] + 1); + + if (found_key.objectid < inode->i_ino) + goto next_key; + + if (found_key.type != key.type || found_key.offset < new_size) + goto next_key; + + /* + * if the key for the next leaf isn't a csum key from this objectid, + * we can't be sure there aren't good items inside this leaf. + * Bail out + */ + if (other_key.objectid != inode->i_ino || other_key.type != key.type) + goto out; + + /* + * it is safe to delete this leaf, it contains only + * csum items from this inode at an offset >= new_size + */ + ret = btrfs_del_leaf(trans, root, path, + btrfs_node_blockptr(path->nodes[1], + path->slots[1])); + BUG_ON(ret); + +next_key: + btrfs_release_path(root, path); + + if (other_key.objectid == inode->i_ino && + other_key.type == key.type && other_key.offset > key.offset) { + key.offset = other_key.offset; + cond_resched(); + goto again; + } + ret = 0; +out: + /* fixup any changes we've made to the path */ + path->lowest_level = 0; + path->keep_locks = 0; + btrfs_release_path(root, path); + return ret; +} + /* * this can truncate away extent items, csum items and directory items. * It starts at a high offset and removes keys until it can't find @@ -1436,6 +1584,10 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, key.type = (u8)-1; btrfs_init_path(path); + + ret = drop_csum_leaves(trans, root, path, inode, new_size); + BUG_ON(ret); + search_again: ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { From cb843a6f513a1a91c54951005e60bd9b95bdf973 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 3 Oct 2008 12:30:02 -0400 Subject: [PATCH 0734/2985] Btrfs: O_DIRECT writes via buffered writes + invaldiate This reworks the btrfs O_DIRECT write code a bit. It had always fallen back to buffered IO and done an invalidate, but needed to be updated for the data=ordered code. The invalidate wasn't actually removing pages because they were still inside an ordered extent. This also combines the O_DIRECT/O_SYNC paths where possible, and kicks off IO in the main btrfs_file_write loop to keep the pipe down the the disk full as we process long writes. Signed-off-by: Chris Mason --- fs/btrfs/file.c | 64 ++++++++++++++++++++++------------------- fs/btrfs/ordered-data.c | 3 +- fs/btrfs/ordered-data.h | 2 +- 3 files changed, 38 insertions(+), 31 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3088a1184483..a03d1bbb19ad 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -905,6 +905,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, struct page *pinned[2]; unsigned long first_index; unsigned long last_index; + int will_write; + + will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) || + (file->f_flags & O_DIRECT)); nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / (sizeof(struct page *))); @@ -1001,15 +1005,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (ret) goto out; + if (will_write) { + btrfs_fdatawrite_range(inode->i_mapping, pos, + pos + write_bytes - 1, + WB_SYNC_NONE); + } else { + balance_dirty_pages_ratelimited_nr(inode->i_mapping, + num_pages); + if (num_pages < + (root->leafsize >> PAGE_CACHE_SHIFT) + 1) + btrfs_btree_balance_dirty(root, 1); + btrfs_throttle(root); + } + buf += write_bytes; count -= write_bytes; pos += write_bytes; num_written += write_bytes; - balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); - if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) - btrfs_btree_balance_dirty(root, 1); - btrfs_throttle(root); cond_resched(); } out: @@ -1023,36 +1036,29 @@ out_nolock: page_cache_release(pinned[1]); *ppos = pos; - if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { + if (num_written > 0 && will_write) { struct btrfs_trans_handle *trans; - err = btrfs_fdatawrite_range(inode->i_mapping, start_pos, - start_pos + num_written -1, - WB_SYNC_NONE); - if (err < 0) + err = btrfs_wait_ordered_range(inode, start_pos, num_written); + if (err) num_written = err; - err = btrfs_wait_on_page_writeback_range(inode->i_mapping, - start_pos, start_pos + num_written - 1); - if (err < 0) - num_written = err; - - trans = btrfs_start_transaction(root, 1); - ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); - if (ret == 0) { - btrfs_sync_log(trans, root); - btrfs_end_transaction(trans, root); - } else { - btrfs_commit_transaction(trans, root); + if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { + trans = btrfs_start_transaction(root, 1); + ret = btrfs_log_dentry_safe(trans, root, + file->f_dentry); + if (ret == 0) { + btrfs_sync_log(trans, root); + btrfs_end_transaction(trans, root); + } else { + btrfs_commit_transaction(trans, root); + } + } + if (file->f_flags & O_DIRECT) { + invalidate_mapping_pages(inode->i_mapping, + start_pos >> PAGE_CACHE_SHIFT, + (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); } - } else if (num_written > 0 && (file->f_flags & O_DIRECT)) { - do_sync_mapping_range(inode->i_mapping, start_pos, - start_pos + num_written - 1, - SYNC_FILE_RANGE_WRITE | - SYNC_FILE_RANGE_WAIT_AFTER); - invalidate_mapping_pages(inode->i_mapping, - start_pos >> PAGE_CACHE_SHIFT, - (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); } current->backing_dev_info = NULL; return num_written ? num_written : err; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index dcc1730dd837..2eb6caba57c2 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -397,7 +397,7 @@ void btrfs_start_ordered_extent(struct inode *inode, /* * Used to wait on ordered extents across a large range of bytes. */ -void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) +int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) { u64 end; u64 orig_end; @@ -451,6 +451,7 @@ again: (unsigned long long)orig_end); goto again; } + return 0; } /* diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index fd45519f30a8..f50f8870a144 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -135,7 +135,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, u64 file_offset); void btrfs_start_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry, int wait); -void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); +int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); struct btrfs_ordered_extent * btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); int btrfs_ordered_update_i_size(struct inode *inode, From 30c43e2444c16afe3b2130f40ad273541bf3dc36 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 3 Oct 2008 12:24:01 -0400 Subject: [PATCH 0735/2985] Btrfs: remove last_log_alloc allocator optimization The tree logging code was trying to separate tree log allocations from normal metadata allocations to improve writeback patterns during an fsync. But, the code was not effective and ended up just mixing tree log blocks with regular metadata. That seems to be working fairly well, so the last_log_alloc code can be removed. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 - fs/btrfs/extent-tree.c | 7 ------- fs/btrfs/transaction.c | 1 - 3 files changed, 9 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 94e0cdfddc0c..8566eb30f567 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -672,7 +672,6 @@ struct btrfs_fs_info { u64 delalloc_bytes; u64 last_alloc; u64 last_data_alloc; - u64 last_log_alloc; spinlock_t ref_cache_lock; u64 total_ref_cache_size; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index db37b867e4f1..29380467b671 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2228,13 +2228,6 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) last_ptr = &root->fs_info->last_data_alloc; - if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { - last_ptr = &root->fs_info->last_log_alloc; - if (!last_ptr == 0 && root->fs_info->last_alloc) { - *last_ptr = root->fs_info->last_alloc + empty_cluster; - } - } - if (last_ptr) { if (*last_ptr) hint_byte = *last_ptr; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 11266d68a6c9..5ecc24d634a2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -61,7 +61,6 @@ static noinline int join_transaction(struct btrfs_root *root) root->fs_info->generation++; root->fs_info->last_alloc = 0; root->fs_info->last_data_alloc = 0; - root->fs_info->last_log_alloc = 0; cur_trans->num_writers = 1; cur_trans->num_joined = 0; cur_trans->transid = root->fs_info->generation; From a62b940160d8125016e85046e68ae621c99e751f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 3 Oct 2008 16:31:08 -0400 Subject: [PATCH 0736/2985] Btrfs: cast bio->bi_sector to a u64 before shifting On 32 bit machines without CONFIG_LBD, the bi_sector field is only 32 bits. Btrfs needs to cast it before shifting up, or we end up doing IO into the wrong place. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- fs/btrfs/volumes.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e5c9261dcbaa..ff0c35976657 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -386,7 +386,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, { struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; struct btrfs_mapping_tree *map_tree; - u64 logical = bio->bi_sector << 9; + u64 logical = (u64)bio->bi_sector << 9; u64 length = 0; u64 map_length; int ret; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f63cf7621a01..2eed7f91f51a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2187,7 +2187,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, struct btrfs_mapping_tree *map_tree; struct btrfs_device *dev; struct bio *first_bio = bio; - u64 logical = bio->bi_sector << 9; + u64 logical = (u64)bio->bi_sector << 9; u64 length = 0; u64 map_length; struct btrfs_multi_bio *multi = NULL; From a76a3cd40c1127ca199d4f7f37bf0d541bf44eb2 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Thu, 9 Oct 2008 11:46:29 -0400 Subject: [PATCH 0737/2985] Btrfs: Count space allocated to file in bytes This patch makes btrfs count space allocated to file in bytes instead of 512 byte sectors. Everything else in btrfs uses a byte count instead of sector sizes or blocks sizes, so this fits better. Signed-off-by: Yan Zheng --- fs/btrfs/ctree.h | 13 ++----------- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/file.c | 24 +++++++++++++----------- fs/btrfs/inode.c | 23 ++++++++++++----------- fs/btrfs/ioctl.c | 4 ++-- fs/btrfs/tree-log.c | 6 +++--- 6 files changed, 33 insertions(+), 39 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8566eb30f567..50fbcc9ec45f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -411,7 +411,7 @@ struct btrfs_inode_item { /* transid that last touched this inode */ __le64 transid; __le64 size; - __le64 nblocks; + __le64 nbytes; __le64 block_group; __le32 nlink; __le32 uid; @@ -1017,7 +1017,7 @@ BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64); BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); -BTRFS_SETGET_FUNCS(inode_nblocks, struct btrfs_inode_item, nblocks, 64); +BTRFS_SETGET_FUNCS(inode_nbytes, struct btrfs_inode_item, nbytes, 64); BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64); BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); @@ -1814,15 +1814,6 @@ void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio); -static inline void dec_i_blocks(struct inode *inode, u64 dec) -{ - dec = dec >> 9; - if (dec <= inode->i_blocks) - inode->i_blocks -= dec; - else - inode->i_blocks = 0; -} - unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 29380467b671..69db54e09fb9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3930,7 +3930,7 @@ next: BUG_ON(ret); btrfs_release_path(root, path); - inode->i_blocks += extent_len >> 9; + inode_add_bytes(inode, extent_len); ext_offset = 0; num_bytes -= extent_len; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a03d1bbb19ad..18dfdf5f91d1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -193,7 +193,7 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - inode->i_blocks += (offset + size - found_end) >> 9; + inode_add_bytes(inode, offset + size - found_end); } if (found_end < offset) { ptr = btrfs_file_extent_inline_start(ei) + found_size; @@ -203,7 +203,7 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, insert: btrfs_release_path(root, path); datasize = offset + size - key.offset; - inode->i_blocks += datasize >> 9; + inode_add_bytes(inode, datasize); datasize = btrfs_file_extent_calc_inline_size(datasize); ret = btrfs_insert_empty_item(trans, root, path, &key, datasize); @@ -713,7 +713,8 @@ next_slot: extent); if (btrfs_file_extent_disk_bytenr(leaf, extent)) { - dec_i_blocks(inode, old_num - new_num); + inode_sub_bytes(inode, old_num - + new_num); } btrfs_set_file_extent_num_bytes(leaf, extent, new_num); @@ -724,14 +725,17 @@ next_slot: u32 new_size; new_size = btrfs_file_extent_calc_inline_size( inline_limit - key.offset); - dec_i_blocks(inode, (extent_end - key.offset) - - (inline_limit - key.offset)); + inode_sub_bytes(inode, extent_end - + inline_limit); btrfs_truncate_item(trans, root, path, new_size, 1); } } /* delete the entire extent */ if (!keep) { + if (found_inline) + inode_sub_bytes(inode, extent_end - + key.offset); ret = btrfs_del_item(trans, root, path); /* TODO update progress marker and return */ BUG_ON(ret); @@ -743,8 +747,7 @@ next_slot: u32 new_size; new_size = btrfs_file_extent_calc_inline_size( extent_end - end); - dec_i_blocks(inode, (extent_end - key.offset) - - (extent_end - end)); + inode_sub_bytes(inode, end - key.offset); ret = btrfs_truncate_item(trans, root, path, new_size, 0); BUG_ON(ret); @@ -791,9 +794,7 @@ next_slot: } btrfs_release_path(root, path); if (disk_bytenr != 0) { - inode->i_blocks += - btrfs_file_extent_num_bytes(leaf, - extent) >> 9; + inode_add_bytes(inode, extent_end - end); } } @@ -801,7 +802,8 @@ next_slot: u64 disk_bytenr = le64_to_cpu(old.disk_bytenr); if (disk_bytenr != 0) { - dec_i_blocks(inode, le64_to_cpu(old.num_bytes)); + inode_sub_bytes(inode, + le64_to_cpu(old.num_bytes)); ret = btrfs_free_extent(trans, root, disk_bytenr, le64_to_cpu(old.disk_num_bytes), diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ff0c35976657..f9df89c5fdfc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -652,7 +652,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) BUG_ON(ret); btrfs_release_path(root, path); - inode->i_blocks += ordered_extent->len >> 9; + inode_add_bytes(inode, ordered_extent->len); unlock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, GFP_NOFS); @@ -1104,7 +1104,7 @@ void btrfs_read_locked_inode(struct inode *inode) inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec); inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); - inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item); + inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); inode->i_generation = BTRFS_I(inode)->generation; inode->i_rdev = 0; @@ -1184,7 +1184,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), inode->i_ctime.tv_nsec); - btrfs_set_inode_nblocks(leaf, item, inode->i_blocks); + btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); btrfs_set_inode_transid(leaf, item, trans->transid); btrfs_set_inode_rdev(leaf, item, inode->i_rdev); @@ -1679,7 +1679,7 @@ search_again: num_dec = (orig_num_bytes - extent_num_bytes); if (root->ref_cows && extent_start != 0) - dec_i_blocks(inode, num_dec); + inode_sub_bytes(inode, num_dec); btrfs_mark_buffer_dirty(leaf); } else { extent_num_bytes = @@ -1690,7 +1690,7 @@ search_again: if (extent_start != 0) { found_extent = 1; if (root->ref_cows) - dec_i_blocks(inode, num_dec); + inode_sub_bytes(inode, num_dec); } root_gen = btrfs_header_generation(leaf); root_owner = btrfs_header_owner(leaf); @@ -1700,8 +1700,8 @@ search_again: u32 size = new_size - found_key.offset; if (root->ref_cows) { - dec_i_blocks(inode, item_end + 1 - - found_key.offset - size); + inode_sub_bytes(inode, item_end + 1 - + new_size); } size = btrfs_file_extent_calc_inline_size(size); @@ -1709,8 +1709,8 @@ search_again: size, 1); BUG_ON(ret); } else if (root->ref_cows) { - dec_i_blocks(inode, item_end + 1 - - found_key.offset); + inode_sub_bytes(inode, item_end + 1 - + found_key.offset); } } delete: @@ -2514,7 +2514,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_gid = current->fsgid; inode->i_mode = mode; inode->i_ino = objectid; - inode->i_blocks = 0; + inode_set_bytes(inode, 0); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); @@ -3557,7 +3557,8 @@ static int btrfs_getattr(struct vfsmount *mnt, struct inode *inode = dentry->d_inode; generic_fillattr(inode, stat); stat->blksize = PAGE_CACHE_SIZE; - stat->blocks = inode->i_blocks + (BTRFS_I(inode)->delalloc_bytes >> 9); + stat->blocks = (inode_get_bytes(inode) + + BTRFS_I(inode)->delalloc_bytes) >> 9; return 0; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 04de767a8db2..ab7a0f61ded8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -99,7 +99,7 @@ static noinline int create_subvol(struct btrfs_root *root, char *name, inode_item->generation = cpu_to_le64(1); inode_item->size = cpu_to_le64(3); inode_item->nlink = cpu_to_le32(1); - inode_item->nblocks = cpu_to_le64(1); + inode_item->nbytes = cpu_to_le64(root->leafsize); inode_item->mode = cpu_to_le32(S_IFDIR | 0755); btrfs_set_root_bytenr(&root_item, leaf->start); @@ -671,7 +671,7 @@ out: btrfs_release_path(root, path); if (ret == 0) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_blocks = src->i_blocks; + inode_set_bytes(inode, inode_get_bytes(src)); btrfs_i_size_write(inode, src->i_size); BTRFS_I(inode)->flags = BTRFS_I(src)->flags; ret = btrfs_update_inode(trans, root, inode); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 88bbfd959f18..8df719a73d82 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -113,7 +113,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, inode_item->generation = cpu_to_le64(1); inode_item->size = cpu_to_le64(3); inode_item->nlink = cpu_to_le32(1); - inode_item->nblocks = cpu_to_le64(1); + inode_item->nbytes = cpu_to_le64(root->leafsize); inode_item->mode = cpu_to_le32(S_IFDIR | 0755); btrfs_set_root_bytenr(&root_item, leaf->start); @@ -598,8 +598,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, ret = overwrite_item(trans, root, path, eb, slot, key); BUG_ON(ret); - /* btrfs_drop_extents changes i_blocks, update it here */ - inode->i_blocks += (extent_end - start) >> 9; + /* btrfs_drop_extents changes i_bytes & i_blocks, update it here */ + inode_add_bytes(inode, extent_end - start); btrfs_update_inode(trans, root, inode); out: if (inode) From 3bb1a1bc42f2ae9582c28adf620484efcd4da38d Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Thu, 9 Oct 2008 11:46:24 -0400 Subject: [PATCH 0738/2985] Btrfs: Remove offset field from struct btrfs_extent_ref The offset field in struct btrfs_extent_ref records the position inside file that file extent is referenced by. In the new back reference system, tree leaves holding references to file extent are recorded explicitly. We can scan these tree leaves very quickly, so the offset field is not required. This patch also makes the back reference system check the objectid when extents are in deleting. Signed-off-by: Yan Zheng --- fs/btrfs/ctree.c | 24 +++--- fs/btrfs/ctree.h | 25 ++---- fs/btrfs/extent-tree.c | 185 ++++++++++++++++++++--------------------- fs/btrfs/file.c | 6 +- fs/btrfs/inode.c | 6 +- fs/btrfs/ioctl.c | 2 +- fs/btrfs/print-tree.c | 3 +- fs/btrfs/tree-log.c | 8 +- 8 files changed, 115 insertions(+), 144 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2eab4643dcbc..9caeb377de63 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -254,8 +254,7 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, ret = btrfs_alloc_reserved_extent(trans, root, parent_start, root->root_key.objectid, - trans->transid, level, 0, - &ins); + trans->transid, level, &ins); BUG_ON(ret); cow = btrfs_init_new_buffer(trans, root, prealloc_dest, buf->len); @@ -333,7 +332,7 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, buf->len, buf->start, root->root_key.objectid, btrfs_header_generation(buf), - 0, 0, 1); + level, 1); } free_extent_buffer(buf); add_root_to_dirty_list(root); @@ -347,7 +346,7 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(btrfs_header_generation(parent) != trans->transid); btrfs_free_extent(trans, root, buf->start, buf->len, parent_start, btrfs_header_owner(parent), - btrfs_header_generation(parent), 0, 0, 1); + btrfs_header_generation(parent), level, 1); } if (unlock_orig) btrfs_tree_unlock(buf); @@ -927,7 +926,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, ret = btrfs_update_extent_ref(trans, root, child->start, mid->start, child->start, root->root_key.objectid, - trans->transid, level - 1, 0); + trans->transid, level - 1); BUG_ON(ret); add_root_to_dirty_list(root); @@ -940,7 +939,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, free_extent_buffer(mid); ret = btrfs_free_extent(trans, root, mid->start, mid->len, mid->start, root->root_key.objectid, - btrfs_header_generation(mid), 0, 0, 1); + btrfs_header_generation(mid), + level, 1); /* once for the root ptr */ free_extent_buffer(mid); return ret; @@ -1006,7 +1006,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, wret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, btrfs_header_owner(parent), - generation, 0, 0, 1); + generation, level, 1); if (wret) ret = wret; } else { @@ -1055,7 +1055,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, wret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, btrfs_header_owner(parent), - root_gen, 0, 0, 1); + root_gen, level, 1); if (wret) ret = wret; } else { @@ -1691,13 +1691,13 @@ next_level: blocksize, parent->start, btrfs_header_owner(parent), btrfs_header_generation(parent), - level - 1, 0); + level - 1); BUG_ON(ret); ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, btrfs_header_owner(parent), btrfs_header_generation(parent), - level - 1, 0, 1); + level - 1, 1); BUG_ON(ret); if (generation == trans->transid) { @@ -1973,7 +1973,7 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans, ret = btrfs_update_extent_ref(trans, root, lower->start, lower->start, c->start, root->root_key.objectid, - trans->transid, level - 1, 0); + trans->transid, level - 1); BUG_ON(ret); /* the super has an extra ref to root->node */ @@ -3213,7 +3213,7 @@ noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, btrfs_level_size(root, 0), path->nodes[1]->start, btrfs_header_owner(path->nodes[1]), - root_gen, 0, 0, 1); + root_gen, 0, 1); return ret; } /* diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 50fbcc9ec45f..a37fd783407c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -374,7 +374,6 @@ struct btrfs_extent_ref { __le64 root; __le64 generation; __le64 objectid; - __le64 offset; __le32 num_refs; } __attribute__ ((__packed__)); @@ -1082,7 +1081,6 @@ static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64); -BTRFS_SETGET_FUNCS(ref_offset, struct btrfs_extent_ref, offset, 64); BTRFS_SETGET_FUNCS(ref_num_refs, struct btrfs_extent_ref, num_refs, 32); BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64); @@ -1090,8 +1088,6 @@ BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref, generation, 64); BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref, objectid, 64); -BTRFS_SETGET_STACK_FUNCS(stack_ref_offset, struct btrfs_extent_ref, - offset, 64); BTRFS_SETGET_STACK_FUNCS(stack_ref_num_refs, struct btrfs_extent_ref, num_refs, 32); @@ -1522,29 +1518,20 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u32 blocksize); -int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - u64 bytenr, u64 parent, - u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 parent, u64 min_bytes, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, - u64 empty_size, u64 hint_byte, + u64 owner, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, u64 data); int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, - struct btrfs_key *ins); + u64 owner, struct btrfs_key *ins); int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, - struct btrfs_key *ins); + u64 owner, struct btrfs_key *ins); int btrfs_reserve_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, @@ -1563,7 +1550,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner_objectid, u64 owner_offset, int pin); + u64 owner_objectid, int pin); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -1572,12 +1559,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset); + u64 owner_objectid); int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 orig_parent, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset); + u64 owner_objectid); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_block_groups(struct btrfs_fs_info *info); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 69db54e09fb9..ab36769c356c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -525,31 +525,28 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) * - Objectid of the subvolume root * - Generation number of the tree holding the reference * - objectid of the file holding the reference - * - offset in the file corresponding to the key holding the reference * - number of references holding by parent node (alway 1 for tree blocks) * * Btree leaf may hold multiple references to a file extent. In most cases, * these references are from same file and the corresponding offsets inside - * the file are close together. So inode objectid and offset in file are - * just hints, they provide hints about where in the btree the references - * can be found and when we can stop searching. + * the file are close together. * * When a file extent is allocated the fields are filled in: - * (root_key.objectid, trans->transid, inode objectid, offset in file, 1) + * (root_key.objectid, trans->transid, inode objectid, 1) * * When a leaf is cow'd new references are added for every file extent found * in the leaf. It looks similar to the create case, but trans->transid will * be different when the block is cow'd. * - * (root_key.objectid, trans->transid, inode objectid, offset in file, + * (root_key.objectid, trans->transid, inode objectid, * number of references in the leaf) * - * Because inode objectid and offset in file are just hints, they are not - * used when backrefs are deleted. When a file extent is removed either - * during snapshot deletion or file truncation, we find the corresponding - * back back reference and check the following fields. + * When a file extent is removed either during snapshot deletion or + * file truncation, we find the corresponding back reference and check + * the following fields: * - * (btrfs_header_owner(leaf), btrfs_header_generation(leaf)) + * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), + * inode objectid) * * Btree extents can be referenced by: * @@ -558,21 +555,21 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) * * When a tree block is created, back references are inserted: * - * (root->root_key.objectid, trans->transid, level, 0, 1) + * (root->root_key.objectid, trans->transid, level, 1) * * When a tree block is cow'd, new back references are added for all the * blocks it points to. If the tree block isn't in reference counted root, * the old back references are removed. These new back references are of * the form (trans->transid will have increased since creation): * - * (root->root_key.objectid, trans->transid, level, 0, 1) + * (root->root_key.objectid, trans->transid, level, 1) * * When a backref is in deleting, the following fields are checked: * * if backref was for a tree root: - * (btrfs_header_owner(itself), btrfs_header_generation(itself)) + * (btrfs_header_owner(itself), btrfs_header_generation(itself), level) * else - * (btrfs_header_owner(parent), btrfs_header_generation(parent)) + * (btrfs_header_owner(parent), btrfs_header_generation(parent), level) * * Back Reference Key composing: * @@ -584,13 +581,15 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, u64 bytenr, - u64 parent, u64 ref_root, - u64 ref_generation, int del) + struct btrfs_path *path, + u64 bytenr, u64 parent, + u64 ref_root, u64 ref_generation, + u64 owner_objectid, int del) { struct btrfs_key key; struct btrfs_extent_ref *ref; struct extent_buffer *leaf; + u64 ref_objectid; int ret; key.objectid = bytenr; @@ -607,8 +606,11 @@ static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); + ref_objectid = btrfs_ref_objectid(leaf, ref); if (btrfs_ref_root(leaf, ref) != ref_root || - btrfs_ref_generation(leaf, ref) != ref_generation) { + btrfs_ref_generation(leaf, ref) != ref_generation || + (ref_objectid != owner_objectid && + ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { ret = -EIO; WARN_ON(1); goto out; @@ -623,7 +625,7 @@ static int noinline insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_path *path, u64 bytenr, u64 parent, u64 ref_root, u64 ref_generation, - u64 owner_objectid, u64 owner_offset) + u64 owner_objectid) { struct btrfs_key key; struct extent_buffer *leaf; @@ -643,7 +645,6 @@ static int noinline insert_extent_backref(struct btrfs_trans_handle *trans, btrfs_set_ref_root(leaf, ref, ref_root); btrfs_set_ref_generation(leaf, ref, ref_generation); btrfs_set_ref_objectid(leaf, ref, owner_objectid); - btrfs_set_ref_offset(leaf, ref, owner_offset); btrfs_set_ref_num_refs(leaf, ref, 1); } else if (ret == -EEXIST) { u64 existing_owner; @@ -663,14 +664,10 @@ static int noinline insert_extent_backref(struct btrfs_trans_handle *trans, btrfs_set_ref_num_refs(leaf, ref, num_refs + 1); existing_owner = btrfs_ref_objectid(leaf, ref); - if (existing_owner == owner_objectid && - btrfs_ref_offset(leaf, ref) > owner_offset) { - btrfs_set_ref_offset(leaf, ref, owner_offset); - } else if (existing_owner != owner_objectid && - existing_owner != BTRFS_MULTIPLE_OBJECTIDS) { + if (existing_owner != owner_objectid && + existing_owner != BTRFS_MULTIPLE_OBJECTIDS) { btrfs_set_ref_objectid(leaf, ref, BTRFS_MULTIPLE_OBJECTIDS); - btrfs_set_ref_offset(leaf, ref, 0); } ret = 0; } else { @@ -711,7 +708,7 @@ static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, u64 orig_parent, u64 parent, u64 orig_root, u64 ref_root, u64 orig_generation, u64 ref_generation, - u64 owner_objectid, u64 owner_offset) + u64 owner_objectid) { int ret; struct btrfs_root *extent_root = root->fs_info->extent_root; @@ -762,7 +759,7 @@ static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, return -ENOMEM; ret = lookup_extent_backref(trans, extent_root, path, bytenr, orig_parent, orig_root, - orig_generation, 1); + orig_generation, owner_objectid, 1); if (ret) goto out; ret = remove_extent_backref(trans, extent_root, path); @@ -770,7 +767,7 @@ static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, goto out; ret = insert_extent_backref(trans, extent_root, path, bytenr, parent, ref_root, ref_generation, - owner_objectid, owner_offset); + owner_objectid); BUG_ON(ret); finish_current_insert(trans, extent_root); del_pending_extents(trans, extent_root); @@ -783,7 +780,7 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 orig_parent, u64 parent, u64 ref_root, u64 ref_generation, - u64 owner_objectid, u64 owner_offset) + u64 owner_objectid) { int ret; if (ref_root == BTRFS_TREE_LOG_OBJECTID && @@ -793,7 +790,7 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent, parent, ref_root, ref_root, ref_generation, ref_generation, - owner_objectid, owner_offset); + owner_objectid); maybe_unlock_mutex(root); return ret; } @@ -803,7 +800,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, u64 orig_parent, u64 parent, u64 orig_root, u64 ref_root, u64 orig_generation, u64 ref_generation, - u64 owner_objectid, u64 owner_offset) + u64 owner_objectid) { struct btrfs_path *path; int ret; @@ -845,7 +842,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, ret = insert_extent_backref(trans, root->fs_info->extent_root, path, bytenr, parent, ref_root, ref_generation, - owner_objectid, owner_offset); + owner_objectid); BUG_ON(ret); finish_current_insert(trans, root->fs_info->extent_root); del_pending_extents(trans, root->fs_info->extent_root); @@ -858,7 +855,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, u64 ref_generation, - u64 owner_objectid, u64 owner_offset) + u64 owner_objectid) { int ret; if (ref_root == BTRFS_TREE_LOG_OBJECTID && @@ -867,7 +864,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, maybe_lock_mutex(root); ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent, 0, ref_root, 0, ref_generation, - owner_objectid, owner_offset); + owner_objectid); maybe_unlock_mutex(root); return ret; } @@ -1179,7 +1176,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int ret = 0; int faili = 0; int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, - u64, u64, u64, u64, u64, u64, u64, u64, u64); + u64, u64, u64, u64, u64, u64, u64, u64); ref_root = btrfs_header_owner(buf); ref_generation = btrfs_header_generation(buf); @@ -1223,7 +1220,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, orig_buf->start, buf->start, orig_root, ref_root, orig_generation, ref_generation, - key.objectid, key.offset); + key.objectid); maybe_unlock_mutex(root); if (ret) { @@ -1238,7 +1235,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, orig_buf->start, buf->start, orig_root, ref_root, orig_generation, ref_generation, - level - 1, 0); + level - 1); maybe_unlock_mutex(root); if (ret) { faili = i; @@ -1314,7 +1311,7 @@ int btrfs_update_ref(struct btrfs_trans_handle *trans, orig_buf->start, buf->start, orig_root, ref_root, orig_generation, ref_generation, - key.objectid, key.offset); + key.objectid); maybe_unlock_mutex(root); if (ret) goto fail; @@ -1325,7 +1322,7 @@ int btrfs_update_ref(struct btrfs_trans_handle *trans, orig_buf->start, buf->start, orig_root, ref_root, orig_generation, ref_generation, - level - 1, 0); + level - 1); maybe_unlock_mutex(root); if (ret) goto fail; @@ -1781,13 +1778,14 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, start, extent_op->parent, extent_root->root_key.objectid, extent_op->generation, - extent_op->level, 0); + extent_op->level); BUG_ON(err); } else if (extent_op->type == PENDING_BACKREF_UPDATE) { err = lookup_extent_backref(trans, extent_root, path, start, extent_op->orig_parent, extent_root->root_key.objectid, - extent_op->orig_generation, 0); + extent_op->orig_generation, + extent_op->level, 0); BUG_ON(err); clear_extent_bits(&info->extent_ins, start, end, @@ -1870,8 +1868,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner_objectid, u64 owner_offset, - int pin, int mark_free) + u64 owner_objectid, int pin, int mark_free) { struct btrfs_path *path; struct btrfs_key key; @@ -1894,8 +1891,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, return -ENOMEM; path->reada = 1; - ret = lookup_extent_backref(trans, extent_root, path, bytenr, parent, - root_objectid, ref_generation, 1); + ret = lookup_extent_backref(trans, extent_root, path, + bytenr, parent, root_objectid, + ref_generation, owner_objectid, 1); if (ret == 0) { struct btrfs_key found_key; extent_slot = path->slots[0]; @@ -1926,9 +1924,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, btrfs_print_leaf(extent_root, path->nodes[0]); WARN_ON(1); printk("Unable to find ref byte nr %Lu root %Lu " - " gen %Lu owner %Lu offset %Lu\n", bytenr, - root_objectid, ref_generation, owner_objectid, - owner_offset); + "gen %Lu owner %Lu\n", bytenr, + root_objectid, ref_generation, owner_objectid); } leaf = path->nodes[0]; @@ -2068,7 +2065,7 @@ free_extent: extent_op->orig_parent, extent_root->root_key.objectid, extent_op->orig_generation, - extent_op->level, 0, 0, mark_free); + extent_op->level, 0, mark_free); kfree(extent_op); } else { kfree(extent_op); @@ -2107,7 +2104,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner_objectid, u64 owner_offset, int pin) + u64 owner_objectid, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; int pending_ret; @@ -2156,8 +2153,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, pin = 1; ret = __free_extent(trans, root, bytenr, num_bytes, parent, - root_objectid, ref_generation, owner_objectid, - owner_offset, pin, pin == 0); + root_objectid, ref_generation, + owner_objectid, pin, pin == 0); finish_current_insert(trans, root->fs_info->extent_root); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); @@ -2168,14 +2165,14 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner_objectid, u64 owner_offset, int pin) + u64 owner_objectid, int pin) { int ret; maybe_lock_mutex(root); ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent, root_objectid, ref_generation, - owner_objectid, owner_offset, pin); + owner_objectid, pin); maybe_unlock_mutex(root); return ret; } @@ -2522,8 +2519,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, - struct btrfs_key *ins) + u64 owner, struct btrfs_key *ins) { int ret; int pending_ret; @@ -2597,7 +2593,6 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, btrfs_set_ref_root(path->nodes[0], ref, root_objectid); btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); btrfs_set_ref_objectid(path->nodes[0], ref, owner); - btrfs_set_ref_offset(path->nodes[0], ref, owner_offset); btrfs_set_ref_num_refs(path->nodes[0], ref, 1); btrfs_mark_buffer_dirty(path->nodes[0]); @@ -2629,17 +2624,15 @@ out: int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, - struct btrfs_key *ins) + u64 owner, struct btrfs_key *ins) { int ret; if (root_objectid == BTRFS_TREE_LOG_OBJECTID) return 0; maybe_lock_mutex(root); - ret = __btrfs_alloc_reserved_extent(trans, root, parent, - root_objectid, ref_generation, - owner, owner_offset, ins); + ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, + ref_generation, owner, ins); update_reserved_extents(root, ins->objectid, ins->offset, 0); maybe_unlock_mutex(root); return ret; @@ -2653,8 +2646,7 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner, u64 owner_offset, - struct btrfs_key *ins) + u64 owner, struct btrfs_key *ins) { int ret; struct btrfs_block_group_cache *block_group; @@ -2665,9 +2657,8 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset); BUG_ON(ret); - ret = __btrfs_alloc_reserved_extent(trans, root, parent, - root_objectid, ref_generation, - owner, owner_offset, ins); + ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, + ref_generation, owner, ins); maybe_unlock_mutex(root); return ret; } @@ -2683,8 +2674,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 parent, u64 min_alloc_size, u64 root_objectid, u64 ref_generation, - u64 owner_objectid, u64 owner_offset, - u64 empty_size, u64 hint_byte, + u64 owner_objectid, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, u64 data) { int ret; @@ -2698,7 +2688,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, ref_generation, - owner_objectid, owner_offset, ins); + owner_objectid, ins); BUG_ON(ret); } else { @@ -2750,7 +2740,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct extent_buffer *buf; ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize, - root_objectid, ref_generation, level, 0, + root_objectid, ref_generation, level, empty_size, hint, (u64)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); @@ -2800,7 +2790,7 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, ret = __btrfs_free_extent(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(leaf, fi), leaf->start, leaf_owner, leaf_generation, - key.objectid, key.offset, 0); + key.objectid, 0); mutex_unlock(&root->fs_info->alloc_mutex); BUG_ON(ret); @@ -2824,7 +2814,7 @@ static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans, ret = __btrfs_free_extent(trans, root, info->bytenr, info->num_bytes, ref->bytenr, ref->owner, ref->generation, - info->objectid, info->offset, 0); + info->objectid, 0); mutex_unlock(&root->fs_info->alloc_mutex); atomic_inc(&root->fs_info->throttle_gen); @@ -2940,7 +2930,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, - root_owner, root_gen, 0, 0, 1); + root_owner, root_gen, + *level - 1, 1); BUG_ON(ret); mutex_unlock(&root->fs_info->alloc_mutex); @@ -2970,9 +2961,10 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, *level = 0; break; } - if (printk_ratelimit()) + if (printk_ratelimit()) { printk("leaf ref miss for bytenr %llu\n", (unsigned long long)bytenr); + } } next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { @@ -3020,7 +3012,7 @@ out: mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, root_owner, root_gen, - 0, 0, 1); + *level, 1); mutex_unlock(&root->fs_info->alloc_mutex); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; @@ -3073,8 +3065,8 @@ static int noinline walk_up_tree(struct btrfs_trans_handle *trans, ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, path->nodes[*level]->len, - parent->start, - root_owner, root_gen, 0, 0, 1); + parent->start, root_owner, + root_gen, *level, 1); BUG_ON(ret); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; @@ -3308,7 +3300,6 @@ struct btrfs_ref_path { u64 root_objectid; u64 root_generation; u64 owner_objectid; - u64 owner_offset; u32 num_refs; int lowest_level; int current_level; @@ -3480,7 +3471,6 @@ found: if (ref_path->lowest_level == level) { ref_path->owner_objectid = ref_objectid; - ref_path->owner_offset = btrfs_ref_offset(leaf, ref); ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); } @@ -3686,16 +3676,20 @@ static int noinline replace_one_extent(struct btrfs_trans_handle *trans, u64 ext_offset; u64 first_pos; u32 nritems; + int nr_scaned = 0; int extent_locked = 0; int ret; - first_pos = ref_path->owner_offset; + memcpy(&key, leaf_key, sizeof(key)); + first_pos = INT_LIMIT(loff_t) - extent_key->offset; if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { - key.objectid = ref_path->owner_objectid; - key.offset = ref_path->owner_offset; - key.type = BTRFS_EXTENT_DATA_KEY; - } else { - memcpy(&key, leaf_key, sizeof(key)); + if (key.objectid < ref_path->owner_objectid || + (key.objectid == ref_path->owner_objectid && + key.type < BTRFS_EXTENT_DATA_KEY)) { + key.objectid = ref_path->owner_objectid; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; + } } while (1) { @@ -3718,8 +3712,7 @@ next: } if (path->slots[0] >= nritems) { - if (ref_path->owner_objectid == - BTRFS_MULTIPLE_OBJECTIDS) + if (++nr_scaned > 2) break; BUG_ON(extent_locked); @@ -3858,7 +3851,7 @@ next: leaf->start, root->root_key.objectid, trans->transid, - key.objectid, key.offset); + key.objectid); BUG_ON(ret); ret = btrfs_free_extent(trans, root, @@ -3867,7 +3860,7 @@ next: leaf->start, btrfs_header_owner(leaf), btrfs_header_generation(leaf), - key.objectid, key.offset, 0); + key.objectid, 0); BUG_ON(ret); btrfs_release_path(root, path); @@ -3925,8 +3918,7 @@ next: new_extents[i].disk_num_bytes, leaf->start, root->root_key.objectid, - trans->transid, - key.objectid, key.offset); + trans->transid, key.objectid); BUG_ON(ret); btrfs_release_path(root, path); @@ -4182,14 +4174,13 @@ static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans, new_extent->disk_num_bytes, leaf->start, root->root_key.objectid, - trans->transid, - key.objectid, key.offset); + trans->transid, key.objectid); BUG_ON(ret); ret = btrfs_free_extent(trans, root, bytenr, num_bytes, leaf->start, btrfs_header_owner(leaf), btrfs_header_generation(leaf), - key.objectid, key.offset, 0); + key.objectid, 0); BUG_ON(ret); cond_resched(); } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 18dfdf5f91d1..69abbe19add2 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -788,8 +788,7 @@ next_slot: le64_to_cpu(old.disk_num_bytes), leaf->start, root->root_key.objectid, - trans->transid, - ins.objectid, ins.offset); + trans->transid, ins.objectid); BUG_ON(ret); } btrfs_release_path(root, path); @@ -808,8 +807,7 @@ next_slot: disk_bytenr, le64_to_cpu(old.disk_num_bytes), leaf_start, root_owner, - root_gen, key.objectid, - key.offset, 0); + root_gen, key.objectid, 0); BUG_ON(ret); *hint_byte = disk_bytenr; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f9df89c5fdfc..3ab147dc3c05 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -647,8 +647,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ins.type = BTRFS_EXTENT_ITEM_KEY; ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, root->root_key.objectid, - trans->transid, inode->i_ino, - ordered_extent->file_offset, &ins); + trans->transid, inode->i_ino, &ins); BUG_ON(ret); btrfs_release_path(root, path); @@ -1734,8 +1733,7 @@ delete: ret = btrfs_free_extent(trans, root, extent_start, extent_num_bytes, leaf->start, root_owner, - root_gen, inode->i_ino, - found_key.offset, 0); + root_gen, inode->i_ino, 0); BUG_ON(ret); } next: diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ab7a0f61ded8..50c8a066d1f0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -658,7 +658,7 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) ds, dl, leaf->start, root->root_key.objectid, trans->transid, - inode->i_ino, key.offset); + inode->i_ino); BUG_ON(ret); } } diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 3577badfa5bc..bd9ab3e9a7f2 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -102,11 +102,10 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) case BTRFS_EXTENT_REF_KEY: ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref); printk("\t\textent back ref root %llu gen %llu " - "owner %llu offset %llu num_refs %lu\n", + "owner %llu num_refs %lu\n", (unsigned long long)btrfs_ref_root(l, ref), (unsigned long long)btrfs_ref_generation(l, ref), (unsigned long long)btrfs_ref_objectid(l, ref), - (unsigned long long)btrfs_ref_offset(l, ref), (unsigned long)btrfs_ref_num_refs(l, ref)); break; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 8df719a73d82..cf618cc8b34a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -460,8 +460,7 @@ insert: ins.objectid, ins.offset, path->nodes[0]->start, root->root_key.objectid, - trans->transid, - key->objectid, key->offset); + trans->transid, key->objectid); } else { /* * insert the extent pointer in the extent @@ -471,7 +470,7 @@ insert: path->nodes[0]->start, root->root_key.objectid, trans->transid, key->objectid, - key->offset, &ins); + &ins); BUG_ON(ret); } } @@ -2534,8 +2533,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, dst_path->nodes[0]->start, BTRFS_TREE_LOG_OBJECTID, trans->transid, - ins_keys[i].objectid, - ins_keys[i].offset); + ins_keys[i].objectid); BUG_ON(ret); } } From 5b84e8d6eef82fcf7b1b16d92e29375b85b6549a Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Thu, 9 Oct 2008 11:46:19 -0400 Subject: [PATCH 0739/2985] Btrfs: Fix leaf reference cache miss Due to the optimization for truncate, tree leaves only containing checksum items can be deleted without being COW'ed first. This causes reference cache misses. The way to fix the miss is create cache entries for tree leaves only contain checksum. This patch also fixes a -EEXIST issue in shared reference cache. Signed-off-by: Yan Zheng --- fs/btrfs/extent-tree.c | 8 ++++++++ fs/btrfs/inode.c | 26 +++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ab36769c356c..280ac1aa9b6d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1151,6 +1151,14 @@ int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, } ret = btrfs_add_leaf_ref(root, ref, shared); + if (ret == -EEXIST && shared) { + struct btrfs_leaf_ref *old; + old = btrfs_lookup_leaf_ref(root, ref->bytenr); + BUG_ON(!old); + btrfs_remove_leaf_ref(root, old); + btrfs_free_leaf_ref(root, old); + ret = btrfs_add_leaf_ref(root, ref, shared); + } WARN_ON(ret); btrfs_free_leaf_ref(root, ref); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3ab147dc3c05..11bfe131fde6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -48,6 +48,7 @@ #include "xattr.h" #include "compat.h" #include "tree-log.h" +#include "ref-cache.h" struct btrfs_iget_args { u64 ino; @@ -1416,6 +1417,9 @@ static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans, int nritems; struct btrfs_key found_key; struct btrfs_key other_key; + struct btrfs_leaf_ref *ref; + u64 leaf_gen; + u64 leaf_start; path->lowest_level = 1; key.objectid = inode->i_ino; @@ -1509,15 +1513,31 @@ next_node: if (other_key.objectid != inode->i_ino || other_key.type != key.type) goto out; + leaf_start = btrfs_node_blockptr(path->nodes[1], path->slots[1]); + leaf_gen = btrfs_node_ptr_generation(path->nodes[1], path->slots[1]); /* * it is safe to delete this leaf, it contains only * csum items from this inode at an offset >= new_size */ - ret = btrfs_del_leaf(trans, root, path, - btrfs_node_blockptr(path->nodes[1], - path->slots[1])); + ret = btrfs_del_leaf(trans, root, path, leaf_start); BUG_ON(ret); + if (root->ref_cows && leaf_gen < trans->transid) { + ref = btrfs_alloc_leaf_ref(root, 0); + if (ref) { + ref->root_gen = root->root_key.offset; + ref->bytenr = leaf_start; + ref->owner = 0; + ref->generation = leaf_gen; + ref->nritems = 0; + + ret = btrfs_add_leaf_ref(root, ref, 0); + WARN_ON(ret); + btrfs_free_leaf_ref(root, ref); + } else { + WARN_ON(1); + } + } next_key: btrfs_release_path(root, path); From 61f8c86ee8f9ca55488449db886104a0ab4e1f98 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 9 Oct 2008 11:52:35 -0400 Subject: [PATCH 0740/2985] Btrfs: Fix makefile for builing btrfs static This fixes the btrfs makefile for building in the tree and out of the tree both as a module and static. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 48b7909ca8d1..7125716e142b 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -1,7 +1,7 @@ ifneq ($(KERNELRELEASE),) # kbuild part of makefile -obj-m := btrfs.o +obj-$(CONFIG_BTRFS_FS) := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o inode.o file.o tree-defrag.o \ @@ -14,7 +14,7 @@ else KERNELDIR := /lib/modules/`uname -r`/build all: - $(MAKE) -C $(KERNELDIR) M=`pwd` modules + $(MAKE) -C $(KERNELDIR) M=`pwd` CONFIG_BTRFS_FS=m modules modules_install: $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install From 833023e46c2a0180ff07d90252c24cb3fdea811d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 9 Oct 2008 11:55:03 -0400 Subject: [PATCH 0741/2985] Btrfs: Rev the disk format for the new back reference format Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a37fd783407c..3fa9b8d6751d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -39,7 +39,7 @@ extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; struct btrfs_ordered_sum; -#define BTRFS_MAGIC "_B9RfS_M" +#define BTRFS_MAGIC "_BBRfS_M" #define BTRFS_ACL_NOT_CACHED ((void *)-1) From cb8e70901d36f32017614f16d2cf7cc089544574 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 9 Oct 2008 13:39:39 -0400 Subject: [PATCH 0742/2985] Btrfs: Fix subvolume creation locking rules Creating a subvolume is in many ways like a normal VFS ->mkdir, and we really need to play with the VFS topology locking rules. So instead of just creating the snapshot on disk and then later getting rid of confliting aliases do it correctly from the start. This will become especially important once we allow for subvolumes anywhere in the tree, and not just below a hidden root. Note that snapshots will need the same treatment, but do to the delay in creating them we can't do it currently. Chris promised to fix that issue, so I'll wait on that. Signed-off-by: Christoph Hellwig --- fs/btrfs/ctree.h | 2 +- fs/btrfs/inode.c | 10 ++++- fs/btrfs/ioctl.c | 102 ++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 100 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3fa9b8d6751d..8559f39fd47f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1791,7 +1791,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root); int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc); -int btrfs_create_subvol_root(struct btrfs_root *new_root, +int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry, struct btrfs_trans_handle *trans, u64 new_dirid, struct btrfs_block_group_cache *block_group); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 11bfe131fde6..bf4bed6ca4d6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3420,11 +3420,12 @@ void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, /* * create a new subvolume directory/inode (helper for the ioctl). */ -int btrfs_create_subvol_root(struct btrfs_root *new_root, +int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry, struct btrfs_trans_handle *trans, u64 new_dirid, struct btrfs_block_group_cache *block_group) { struct inode *inode; + int error; u64 index = 0; inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, @@ -3438,7 +3439,12 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root, inode->i_nlink = 1; btrfs_i_size_write(inode, 0); - return btrfs_update_inode(trans, new_root, inode); + error = btrfs_update_inode(trans, new_root, inode); + if (error) + return error; + + d_instantiate(dentry, inode); + return 0; } /* helper function for file defrag and space balancing. This diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 50c8a066d1f0..3d85f18bbba6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -28,12 +29,15 @@ #include #include #include +#include #include +#include #include #include #include #include #include +#include #include #include #include @@ -48,8 +52,9 @@ -static noinline int create_subvol(struct btrfs_root *root, char *name, - int namelen) +static noinline int create_subvol(struct btrfs_root *root, + struct dentry *dentry, + char *name, int namelen) { struct btrfs_trans_handle *trans; struct btrfs_key key; @@ -151,14 +156,11 @@ static noinline int create_subvol(struct btrfs_root *root, char *name, trans = btrfs_start_transaction(new_root, 1); BUG_ON(!trans); - ret = btrfs_create_subvol_root(new_root, trans, new_dirid, + ret = btrfs_create_subvol_root(new_root, dentry, trans, new_dirid, BTRFS_I(dir)->block_group); if (ret) goto fail; - /* Invalidate existing dcache entry for new subvolume. */ - btrfs_invalidate_dcache_root(root, name, namelen); - fail: nr = trans->blocks_used; err = btrfs_commit_transaction(trans, new_root); @@ -210,6 +212,79 @@ fail_unlock: return ret; } +/* copy of may_create in fs/namei.c() */ +static inline int btrfs_may_create(struct inode *dir, struct dentry *child) +{ + if (child->d_inode) + return -EEXIST; + if (IS_DEADDIR(dir)) + return -ENOENT; + return inode_permission(dir, MAY_WRITE | MAY_EXEC); +} + +/* + * Create a new subvolume below @parent. This is largely modeled after + * sys_mkdirat and vfs_mkdir, but we only do a single component lookup + * inside this filesystem so it's quite a bit simpler. + */ +static noinline int btrfs_mksubvol(struct path *parent, char *name, + int mode, int namelen) +{ + struct dentry *dentry; + int error; + + mutex_lock_nested(&parent->dentry->d_inode->i_mutex, I_MUTEX_PARENT); + + dentry = lookup_one_len(name, parent->dentry, namelen); + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) + goto out_unlock; + + error = -EEXIST; + if (dentry->d_inode) + goto out_dput; + + if (!IS_POSIXACL(parent->dentry->d_inode)) + mode &= ~current->fs->umask; + error = mnt_want_write(parent->mnt); + if (error) + goto out_dput; + + error = btrfs_may_create(parent->dentry->d_inode, dentry); + if (error) + goto out_drop_write; + + mode &= (S_IRWXUGO|S_ISVTX); + error = security_inode_mkdir(parent->dentry->d_inode, dentry, mode); + if (error) + goto out_drop_write; + + /* + * Actually perform the low-level subvolume creation after all + * this VFS fuzz. + * + * Eventually we want to pass in an inode under which we create this + * subvolume, but for now all are under the filesystem root. + * + * Also we should pass on the mode eventually to allow creating new + * subvolume with specific mode bits. + */ + error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root, dentry, + name, namelen); + if (error) + goto out_drop_write; + + fsnotify_mkdir(parent->dentry->d_inode, dentry); +out_drop_write: + mnt_drop_write(parent->mnt); +out_dput: + dput(dentry); +out_unlock: + mutex_unlock(&parent->dentry->d_inode->i_mutex); + return error; +} + + int btrfs_defrag_file(struct file *file) { struct inode *inode = fdentry(file)->d_inode; @@ -395,9 +470,10 @@ out: return ret; } -static noinline int btrfs_ioctl_snap_create(struct btrfs_root *root, +static noinline int btrfs_ioctl_snap_create(struct file *file, void __user *arg) { + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_vol_args *vol_args; struct btrfs_dir_item *di; struct btrfs_path *path; @@ -444,10 +520,14 @@ static noinline int btrfs_ioctl_snap_create(struct btrfs_root *root, goto out; } - if (root == root->fs_info->tree_root) - ret = create_subvol(root, vol_args->name, namelen); - else + if (root == root->fs_info->tree_root) { + ret = btrfs_mksubvol(&file->f_path, vol_args->name, + file->f_path.dentry->d_inode->i_mode, + namelen); + } else { ret = create_snapshot(root, vol_args->name, namelen); + } + out: kfree(vol_args); return ret; @@ -761,7 +841,7 @@ long btrfs_ioctl(struct file *file, unsigned int switch (cmd) { case BTRFS_IOC_SNAP_CREATE: - return btrfs_ioctl_snap_create(root, (void __user *)arg); + return btrfs_ioctl_snap_create(file, (void __user *)arg); case BTRFS_IOC_DEFRAG: return btrfs_ioctl_defrag(file); case BTRFS_IOC_RESIZE: From a3dddf3fc82a1f5942c0928abfd114e9a8c0d0c8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 10 Oct 2008 10:23:22 -0400 Subject: [PATCH 0743/2985] Btrfs: Don't call security_inode_mkdir during subvol creation Subvol creation already requires privs, and security_inode_mkdir isn't exported. For now we don't need it. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3d85f18bbba6..1136ce2febcc 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -254,11 +254,6 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, if (error) goto out_drop_write; - mode &= (S_IRWXUGO|S_ISVTX); - error = security_inode_mkdir(parent->dentry->d_inode, dentry, mode); - if (error) - goto out_drop_write; - /* * Actually perform the low-level subvolume creation after all * this VFS fuzz. From 37d3cdddf5378606f6eab99982ca16819745d8f4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 10 Oct 2008 10:24:32 -0400 Subject: [PATCH 0744/2985] Btrfs: make tree_search_offset more flexible in its searching Sometimes we end up freeing a reserved extent because we don't need it, however this means that its possible for transaction->last_alloc to point to the middle of a free area. When we search for free space in find_free_space we do a tree_search_offset with contains set to 0, because we want it to find the next best free area if we do not have an offset starting on the given offset. Unfortunately that currently means that if the offset we were given as a hint points to the middle of a free area, we won't find anything. This is especially bad if we happened to last allocate from the big huge chunk of a newly formed block group, since we won't find anything and have to go back and search the long way around. This fixes this problem by making it so that we return the free space area regardless of the contains variable. This made cache missing happen _alot_ less, and speeds things up considerably. Signed-off-by: Josef Bacik --- fs/btrfs/free-space-cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f1d9b6bc23ba..96241f01fa0a 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -90,8 +90,8 @@ static struct btrfs_free_space *tree_search_offset(struct rb_root *root, ret = entry; n = n->rb_left; } else if (offset > entry->offset) { - if (contains && - (entry->offset + entry->bytes - 1) >= offset) { + if ((entry->offset + entry->bytes - 1) >= offset && + bytes <= entry->bytes) { ret = entry; break; } From f940260a980a1ad7570dfd7a4b73c6ad2207f738 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 27 Oct 2008 18:17:41 +0800 Subject: [PATCH 0745/2985] Blackfin arch: unify duplicated bss init code Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- arch/blackfin/mach-common/head.S | 66 +++++++++----------------------- 1 file changed, 19 insertions(+), 47 deletions(-) diff --git a/arch/blackfin/mach-common/head.S b/arch/blackfin/mach-common/head.S index f123a62e2451..42ee2b1831bb 100644 --- a/arch/blackfin/mach-common/head.S +++ b/arch/blackfin/mach-common/head.S @@ -195,6 +195,19 @@ ENDPROC(__start) # define WDOG_CTL WDOGA_CTL #endif +ENTRY(__init_clear_bss) + r2 = r2 - r1; + cc = r2 == 0; + if cc jump .L_bss_done; + r2 >>= 2; + p1 = r1; + p2 = r2; + lsetup (1f, 1f) lc0 = p2; +1: [p1++] = r0; +.L_bss_done: + rts; +ENDPROC(__init_clear_bss) + ENTRY(_real_start) /* Enable nested interrupts */ [--sp] = reti; @@ -206,75 +219,34 @@ ENTRY(_real_start) w[p0] = r0; ssync; + r0 = 0 (x); + /* Zero out all of the fun bss regions */ #if L1_DATA_A_LENGTH > 0 r1.l = __sbss_l1; r1.h = __sbss_l1; r2.l = __ebss_l1; r2.h = __ebss_l1; - r0 = 0 (z); - r2 = r2 - r1; - cc = r2 == 0; - if cc jump .L_a_l1_done; - r2 >>= 2; - p1 = r1; - p2 = r2; - lsetup (.L_clear_a_l1, .L_clear_a_l1 ) lc0 = p2; -.L_clear_a_l1: - [p1++] = r0; -.L_a_l1_done: + call __init_clear_bss #endif - #if L1_DATA_B_LENGTH > 0 r1.l = __sbss_b_l1; r1.h = __sbss_b_l1; r2.l = __ebss_b_l1; r2.h = __ebss_b_l1; - r0 = 0 (z); - r2 = r2 - r1; - cc = r2 == 0; - if cc jump .L_b_l1_done; - r2 >>= 2; - p1 = r1; - p2 = r2; - lsetup (.L_clear_b_l1, .L_clear_b_l1 ) lc0 = p2; -.L_clear_b_l1: - [p1++] = r0; -.L_b_l1_done: + call __init_clear_bss #endif - #if L2_LENGTH > 0 r1.l = __sbss_l2; r1.h = __sbss_l2; r2.l = __ebss_l2; r2.h = __ebss_l2; - r0 = 0 (z); - r2 = r2 - r1; - cc = r2 == 0; - if cc jump .L_l2_done; - r2 >>= 2; - p1 = r1; - p2 = r2; - lsetup (.L_clear_l2, .L_clear_l2 ) lc0 = p2; -.L_clear_l2: - [p1++] = r0; -.L_l2_done: + call __init_clear_bss #endif - - /* Zero out the bss region - * Note: this will fail if bss is 0 bytes ... - */ - r0 = 0 (z); r1.l = ___bss_start; r1.h = ___bss_start; r2.l = ___bss_stop; r2.h = ___bss_stop; - r2 = r2 - r1; - r2 >>= 2; - p1 = r1; - p2 = r2; - lsetup (.L_clear_bss, .L_clear_bss) lc0 = p2; -.L_clear_bss: - [p1++] = r0; + call __init_clear_bss /* In case there is a NULL pointer reference, * zero out region before stext From 2d56f3a32c0e62f99c043d2579840f9731fe5855 Mon Sep 17 00:00:00 2001 From: Philip Langdale Date: Thu, 16 Oct 2008 22:31:42 -0400 Subject: [PATCH 0746/2985] Input: refactor evdev 32bit compat to be shareable with uinput Currently, evdev has working 32bit compatibility and uinput does not. uinput needs the input_event code that evdev uses, so let's refactor it so it can be shared. [dtor@mail.ru: add fix for force feedback compat issues] Signed-off-by: Philip Langdale Signed-off-by: Dmitry Torokhov --- drivers/input/Makefile | 2 +- drivers/input/evdev.c | 197 ++--------------------------------- drivers/input/input-compat.c | 135 ++++++++++++++++++++++++ drivers/input/input-compat.h | 94 +++++++++++++++++ drivers/input/misc/uinput.c | 172 +++++++++++++++++++++++++----- 5 files changed, 383 insertions(+), 217 deletions(-) create mode 100644 drivers/input/input-compat.c create mode 100644 drivers/input/input-compat.h diff --git a/drivers/input/Makefile b/drivers/input/Makefile index 98c4f9a77876..4c9c745a7020 100644 --- a/drivers/input/Makefile +++ b/drivers/input/Makefile @@ -5,7 +5,7 @@ # Each configuration option enables a list of files. obj-$(CONFIG_INPUT) += input-core.o -input-core-objs := input.o ff-core.o +input-core-objs := input.o input-compat.o ff-core.o obj-$(CONFIG_INPUT_FF_MEMLESS) += ff-memless.o obj-$(CONFIG_INPUT_POLLDEV) += input-polldev.o diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index 3524bef62be6..377b2007377e 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include "input-compat.h" struct evdev { int exist; @@ -291,187 +291,6 @@ static int evdev_open(struct inode *inode, struct file *file) return error; } -#ifdef CONFIG_COMPAT - -struct input_event_compat { - struct compat_timeval time; - __u16 type; - __u16 code; - __s32 value; -}; - -struct ff_periodic_effect_compat { - __u16 waveform; - __u16 period; - __s16 magnitude; - __s16 offset; - __u16 phase; - - struct ff_envelope envelope; - - __u32 custom_len; - compat_uptr_t custom_data; -}; - -struct ff_effect_compat { - __u16 type; - __s16 id; - __u16 direction; - struct ff_trigger trigger; - struct ff_replay replay; - - union { - struct ff_constant_effect constant; - struct ff_ramp_effect ramp; - struct ff_periodic_effect_compat periodic; - struct ff_condition_effect condition[2]; /* One for each axis */ - struct ff_rumble_effect rumble; - } u; -}; - -/* Note to the author of this code: did it ever occur to - you why the ifdefs are needed? Think about it again. -AK */ -#ifdef CONFIG_X86_64 -# define COMPAT_TEST is_compat_task() -#elif defined(CONFIG_IA64) -# define COMPAT_TEST IS_IA32_PROCESS(task_pt_regs(current)) -#elif defined(CONFIG_S390) -# define COMPAT_TEST test_thread_flag(TIF_31BIT) -#elif defined(CONFIG_MIPS) -# define COMPAT_TEST test_thread_flag(TIF_32BIT_ADDR) -#else -# define COMPAT_TEST test_thread_flag(TIF_32BIT) -#endif - -static inline size_t evdev_event_size(void) -{ - return COMPAT_TEST ? - sizeof(struct input_event_compat) : sizeof(struct input_event); -} - -static int evdev_event_from_user(const char __user *buffer, - struct input_event *event) -{ - if (COMPAT_TEST) { - struct input_event_compat compat_event; - - if (copy_from_user(&compat_event, buffer, - sizeof(struct input_event_compat))) - return -EFAULT; - - event->time.tv_sec = compat_event.time.tv_sec; - event->time.tv_usec = compat_event.time.tv_usec; - event->type = compat_event.type; - event->code = compat_event.code; - event->value = compat_event.value; - - } else { - if (copy_from_user(event, buffer, sizeof(struct input_event))) - return -EFAULT; - } - - return 0; -} - -static int evdev_event_to_user(char __user *buffer, - const struct input_event *event) -{ - if (COMPAT_TEST) { - struct input_event_compat compat_event; - - compat_event.time.tv_sec = event->time.tv_sec; - compat_event.time.tv_usec = event->time.tv_usec; - compat_event.type = event->type; - compat_event.code = event->code; - compat_event.value = event->value; - - if (copy_to_user(buffer, &compat_event, - sizeof(struct input_event_compat))) - return -EFAULT; - - } else { - if (copy_to_user(buffer, event, sizeof(struct input_event))) - return -EFAULT; - } - - return 0; -} - -static int evdev_ff_effect_from_user(const char __user *buffer, size_t size, - struct ff_effect *effect) -{ - if (COMPAT_TEST) { - struct ff_effect_compat *compat_effect; - - if (size != sizeof(struct ff_effect_compat)) - return -EINVAL; - - /* - * It so happens that the pointer which needs to be changed - * is the last field in the structure, so we can copy the - * whole thing and replace just the pointer. - */ - - compat_effect = (struct ff_effect_compat *)effect; - - if (copy_from_user(compat_effect, buffer, - sizeof(struct ff_effect_compat))) - return -EFAULT; - - if (compat_effect->type == FF_PERIODIC && - compat_effect->u.periodic.waveform == FF_CUSTOM) - effect->u.periodic.custom_data = - compat_ptr(compat_effect->u.periodic.custom_data); - } else { - if (size != sizeof(struct ff_effect)) - return -EINVAL; - - if (copy_from_user(effect, buffer, sizeof(struct ff_effect))) - return -EFAULT; - } - - return 0; -} - -#else - -static inline size_t evdev_event_size(void) -{ - return sizeof(struct input_event); -} - -static int evdev_event_from_user(const char __user *buffer, - struct input_event *event) -{ - if (copy_from_user(event, buffer, sizeof(struct input_event))) - return -EFAULT; - - return 0; -} - -static int evdev_event_to_user(char __user *buffer, - const struct input_event *event) -{ - if (copy_to_user(buffer, event, sizeof(struct input_event))) - return -EFAULT; - - return 0; -} - -static int evdev_ff_effect_from_user(const char __user *buffer, size_t size, - struct ff_effect *effect) -{ - if (size != sizeof(struct ff_effect)) - return -EINVAL; - - if (copy_from_user(effect, buffer, sizeof(struct ff_effect))) - return -EFAULT; - - return 0; -} - -#endif /* CONFIG_COMPAT */ - static ssize_t evdev_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { @@ -491,14 +310,14 @@ static ssize_t evdev_write(struct file *file, const char __user *buffer, while (retval < count) { - if (evdev_event_from_user(buffer + retval, &event)) { + if (input_event_from_user(buffer + retval, &event)) { retval = -EFAULT; goto out; } input_inject_event(&evdev->handle, event.type, event.code, event.value); - retval += evdev_event_size(); + retval += input_event_size(); } out: @@ -532,7 +351,7 @@ static ssize_t evdev_read(struct file *file, char __user *buffer, struct input_event event; int retval; - if (count < evdev_event_size()) + if (count < input_event_size()) return -EINVAL; if (client->head == client->tail && evdev->exist && @@ -547,13 +366,13 @@ static ssize_t evdev_read(struct file *file, char __user *buffer, if (!evdev->exist) return -ENODEV; - while (retval + evdev_event_size() <= count && + while (retval + input_event_size() <= count && evdev_fetch_next_event(client, &event)) { - if (evdev_event_to_user(buffer + retval, &event)) + if (input_event_to_user(buffer + retval, &event)) return -EFAULT; - retval += evdev_event_size(); + retval += input_event_size(); } return retval; @@ -824,7 +643,7 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd, if (_IOC_NR(cmd) == _IOC_NR(EVIOCSFF)) { - if (evdev_ff_effect_from_user(p, _IOC_SIZE(cmd), &effect)) + if (input_ff_effect_from_user(p, _IOC_SIZE(cmd), &effect)) return -EFAULT; error = input_ff_upload(dev, &effect, file); diff --git a/drivers/input/input-compat.c b/drivers/input/input-compat.c new file mode 100644 index 000000000000..1accb89ae66f --- /dev/null +++ b/drivers/input/input-compat.c @@ -0,0 +1,135 @@ +/* + * 32bit compatibility wrappers for the input subsystem. + * + * Very heavily based on evdev.c - Copyright (c) 1999-2002 Vojtech Pavlik + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include +#include "input-compat.h" + +#ifdef CONFIG_COMPAT + +int input_event_from_user(const char __user *buffer, + struct input_event *event) +{ + if (INPUT_COMPAT_TEST) { + struct input_event_compat compat_event; + + if (copy_from_user(&compat_event, buffer, + sizeof(struct input_event_compat))) + return -EFAULT; + + event->time.tv_sec = compat_event.time.tv_sec; + event->time.tv_usec = compat_event.time.tv_usec; + event->type = compat_event.type; + event->code = compat_event.code; + event->value = compat_event.value; + + } else { + if (copy_from_user(event, buffer, sizeof(struct input_event))) + return -EFAULT; + } + + return 0; +} + +int input_event_to_user(char __user *buffer, + const struct input_event *event) +{ + if (INPUT_COMPAT_TEST) { + struct input_event_compat compat_event; + + compat_event.time.tv_sec = event->time.tv_sec; + compat_event.time.tv_usec = event->time.tv_usec; + compat_event.type = event->type; + compat_event.code = event->code; + compat_event.value = event->value; + + if (copy_to_user(buffer, &compat_event, + sizeof(struct input_event_compat))) + return -EFAULT; + + } else { + if (copy_to_user(buffer, event, sizeof(struct input_event))) + return -EFAULT; + } + + return 0; +} + +int input_ff_effect_from_user(const char __user *buffer, size_t size, + struct ff_effect *effect) +{ + if (INPUT_COMPAT_TEST) { + struct ff_effect_compat *compat_effect; + + if (size != sizeof(struct ff_effect_compat)) + return -EINVAL; + + /* + * It so happens that the pointer which needs to be changed + * is the last field in the structure, so we can retrieve the + * whole thing and replace just the pointer. + */ + compat_effect = (struct ff_effect_compat *)effect; + + if (copy_from_user(compat_effect, buffer, + sizeof(struct ff_effect_compat))) + return -EFAULT; + + if (compat_effect->type == FF_PERIODIC && + compat_effect->u.periodic.waveform == FF_CUSTOM) + effect->u.periodic.custom_data = + compat_ptr(compat_effect->u.periodic.custom_data); + } else { + if (size != sizeof(struct ff_effect)) + return -EINVAL; + + if (copy_from_user(effect, buffer, sizeof(struct ff_effect))) + return -EFAULT; + } + + return 0; +} + +#else + +int input_event_from_user(const char __user *buffer, + struct input_event *event) +{ + if (copy_from_user(event, buffer, sizeof(struct input_event))) + return -EFAULT; + + return 0; +} + +int input_event_to_user(char __user *buffer, + const struct input_event *event) +{ + if (copy_to_user(buffer, event, sizeof(struct input_event))) + return -EFAULT; + + return 0; +} + +int input_ff_effect_from_user(const char __user *buffer, size_t size, + struct ff_effect *effect) +{ + if (size != sizeof(struct ff_effect)) + return -EINVAL; + + if (copy_from_user(effect, buffer, sizeof(struct ff_effect))) + return -EFAULT; + + return 0; +} + +#endif /* CONFIG_COMPAT */ + +EXPORT_SYMBOL_GPL(input_event_from_user); +EXPORT_SYMBOL_GPL(input_event_to_user); +EXPORT_SYMBOL_GPL(input_ff_effect_from_user); diff --git a/drivers/input/input-compat.h b/drivers/input/input-compat.h new file mode 100644 index 000000000000..47cd9eaee66a --- /dev/null +++ b/drivers/input/input-compat.h @@ -0,0 +1,94 @@ +#ifndef _INPUT_COMPAT_H +#define _INPUT_COMPAT_H + +/* + * 32bit compatibility wrappers for the input subsystem. + * + * Very heavily based on evdev.c - Copyright (c) 1999-2002 Vojtech Pavlik + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include +#include +#include + +#ifdef CONFIG_COMPAT + +/* Note to the author of this code: did it ever occur to + you why the ifdefs are needed? Think about it again. -AK */ +#ifdef CONFIG_X86_64 +# define INPUT_COMPAT_TEST is_compat_task() +#elif defined(CONFIG_IA64) +# define INPUT_COMPAT_TEST IS_IA32_PROCESS(task_pt_regs(current)) +#elif defined(CONFIG_S390) +# define INPUT_COMPAT_TEST test_thread_flag(TIF_31BIT) +#elif defined(CONFIG_MIPS) +# define INPUT_COMPAT_TEST test_thread_flag(TIF_32BIT_ADDR) +#else +# define INPUT_COMPAT_TEST test_thread_flag(TIF_32BIT) +#endif + +struct input_event_compat { + struct compat_timeval time; + __u16 type; + __u16 code; + __s32 value; +}; + +struct ff_periodic_effect_compat { + __u16 waveform; + __u16 period; + __s16 magnitude; + __s16 offset; + __u16 phase; + + struct ff_envelope envelope; + + __u32 custom_len; + compat_uptr_t custom_data; +}; + +struct ff_effect_compat { + __u16 type; + __s16 id; + __u16 direction; + struct ff_trigger trigger; + struct ff_replay replay; + + union { + struct ff_constant_effect constant; + struct ff_ramp_effect ramp; + struct ff_periodic_effect_compat periodic; + struct ff_condition_effect condition[2]; /* One for each axis */ + struct ff_rumble_effect rumble; + } u; +}; + +static inline size_t input_event_size(void) +{ + return INPUT_COMPAT_TEST ? + sizeof(struct input_event_compat) : sizeof(struct input_event); +} + +#else + +static inline size_t input_event_size(void) +{ + return sizeof(struct input_event); +} + +#endif /* CONFIG_COMPAT */ + +int input_event_from_user(const char __user *buffer, + struct input_event *event); + +int input_event_to_user(char __user *buffer, + const struct input_event *event); + +int input_ff_effect_from_user(const char __user *buffer, size_t size, + struct ff_effect *effect); + +#endif /* _INPUT_COMPAT_H */ diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index 223d56d5555b..46b7caeb2817 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -37,6 +37,7 @@ #include #include #include +#include "../input-compat.h" static int uinput_dev_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) { @@ -78,6 +79,7 @@ static struct uinput_request* uinput_request_find(struct uinput_device *udev, in /* Find an input request, by ID. Returns NULL if the ID isn't valid. */ if (id >= UINPUT_NUM_REQUESTS || id < 0) return NULL; + return udev->requests[id]; } @@ -127,6 +129,17 @@ static int uinput_dev_upload_effect(struct input_dev *dev, struct ff_effect *eff struct uinput_request request; int retval; + /* + * uinput driver does not currently support periodic effects with + * custom waveform since it does not have a way to pass buffer of + * samples (custom_data) to userspace. If ever there is a device + * supporting custom waveforms we would need to define an additional + * ioctl (UI_UPLOAD_SAMPLES) but for now we just bail out. + */ + if (effect->type == FF_PERIODIC && + effect->u.periodic.waveform == FF_CUSTOM) + return -EINVAL; + request.id = -1; init_completion(&request.done); request.code = UI_FF_UPLOAD; @@ -353,15 +366,15 @@ static inline ssize_t uinput_inject_event(struct uinput_device *udev, const char { struct input_event ev; - if (count != sizeof(struct input_event)) + if (count < input_event_size()) return -EINVAL; - if (copy_from_user(&ev, buffer, sizeof(struct input_event))) + if (input_event_from_user(buffer, &ev)) return -EFAULT; input_event(udev->dev, ev.type, ev.code, ev.value); - return sizeof(struct input_event); + return input_event_size(); } static ssize_t uinput_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) @@ -407,13 +420,13 @@ static ssize_t uinput_read(struct file *file, char __user *buffer, size_t count, goto out; } - while (udev->head != udev->tail && retval + sizeof(struct input_event) <= count) { - if (copy_to_user(buffer + retval, &udev->buff[udev->tail], sizeof(struct input_event))) { + while (udev->head != udev->tail && retval + input_event_size() <= count) { + if (input_event_to_user(buffer + retval, &udev->buff[udev->tail])) { retval = -EFAULT; goto out; } udev->tail = (udev->tail + 1) % UINPUT_BUFFER_SIZE; - retval += sizeof(struct input_event); + retval += input_event_size(); } out: @@ -444,6 +457,93 @@ static int uinput_release(struct inode *inode, struct file *file) return 0; } +#ifdef CONFIG_COMPAT +struct uinput_ff_upload_compat { + int request_id; + int retval; + struct ff_effect_compat effect; + struct ff_effect_compat old; +}; + +static int uinput_ff_upload_to_user(char __user *buffer, + const struct uinput_ff_upload *ff_up) +{ + if (INPUT_COMPAT_TEST) { + struct uinput_ff_upload_compat ff_up_compat; + + ff_up_compat.request_id = ff_up->request_id; + ff_up_compat.retval = ff_up->retval; + /* + * It so happens that the pointer that gives us the trouble + * is the last field in the structure. Since we don't support + * custom waveforms in uinput anyway we can just copy the whole + * thing (to the compat size) and ignore the pointer. + */ + memcpy(&ff_up_compat.effect, &ff_up->effect, + sizeof(struct ff_effect_compat)); + memcpy(&ff_up_compat.old, &ff_up->old, + sizeof(struct ff_effect_compat)); + + if (copy_to_user(buffer, &ff_up_compat, + sizeof(struct uinput_ff_upload_compat))) + return -EFAULT; + } else { + if (copy_to_user(buffer, ff_up, + sizeof(struct uinput_ff_upload))) + return -EFAULT; + } + + return 0; +} + +static int uinput_ff_upload_from_user(const char __user *buffer, + struct uinput_ff_upload *ff_up) +{ + if (INPUT_COMPAT_TEST) { + struct uinput_ff_upload_compat ff_up_compat; + + if (copy_from_user(&ff_up_compat, buffer, + sizeof(struct uinput_ff_upload_compat))) + return -EFAULT; + + ff_up->request_id = ff_up_compat.request_id; + ff_up->retval = ff_up_compat.retval; + memcpy(&ff_up->effect, &ff_up_compat.effect, + sizeof(struct ff_effect_compat)); + memcpy(&ff_up->old, &ff_up_compat.old, + sizeof(struct ff_effect_compat)); + + } else { + if (copy_from_user(ff_up, buffer, + sizeof(struct uinput_ff_upload))) + return -EFAULT; + } + + return 0; +} + +#else + +static int uinput_ff_upload_to_user(char __user *buffer, + const struct uinput_ff_upload *ff_up) +{ + if (copy_to_user(buffer, ff_up, sizeof(struct uinput_ff_upload))) + return -EFAULT; + + return 0; +} + +static int uinput_ff_upload_from_user(const char __user *buffer, + struct uinput_ff_upload *ff_up) +{ + if (copy_from_user(ff_up, buffer, sizeof(struct uinput_ff_upload))) + return -EFAULT; + + return 0; +} + +#endif + #define uinput_set_bit(_arg, _bit, _max) \ ({ \ int __ret = 0; \ @@ -455,19 +555,17 @@ static int uinput_release(struct inode *inode, struct file *file) __ret; \ }) -static long uinput_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +static long uinput_ioctl_handler(struct file *file, unsigned int cmd, + unsigned long arg, void __user *p) { int retval; - struct uinput_device *udev; - void __user *p = (void __user *)arg; + struct uinput_device *udev = file->private_data; struct uinput_ff_upload ff_up; struct uinput_ff_erase ff_erase; struct uinput_request *req; int length; char *phys; - udev = file->private_data; - retval = mutex_lock_interruptible(&udev->mutex); if (retval) return retval; @@ -549,26 +647,24 @@ static long uinput_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; case UI_BEGIN_FF_UPLOAD: - if (copy_from_user(&ff_up, p, sizeof(ff_up))) { - retval = -EFAULT; + retval = uinput_ff_upload_from_user(p, &ff_up); + if (retval) break; - } + req = uinput_request_find(udev, ff_up.request_id); - if (!(req && req->code == UI_FF_UPLOAD && req->u.upload.effect)) { + if (!req || req->code != UI_FF_UPLOAD || !req->u.upload.effect) { retval = -EINVAL; break; } + ff_up.retval = 0; - memcpy(&ff_up.effect, req->u.upload.effect, sizeof(struct ff_effect)); + ff_up.effect = *req->u.upload.effect; if (req->u.upload.old) - memcpy(&ff_up.old, req->u.upload.old, sizeof(struct ff_effect)); + ff_up.old = *req->u.upload.old; else memset(&ff_up.old, 0, sizeof(struct ff_effect)); - if (copy_to_user(p, &ff_up, sizeof(ff_up))) { - retval = -EFAULT; - break; - } + retval = uinput_ff_upload_to_user(p, &ff_up); break; case UI_BEGIN_FF_ERASE: @@ -576,29 +672,34 @@ static long uinput_ioctl(struct file *file, unsigned int cmd, unsigned long arg) retval = -EFAULT; break; } + req = uinput_request_find(udev, ff_erase.request_id); - if (!(req && req->code == UI_FF_ERASE)) { + if (!req || req->code != UI_FF_ERASE) { retval = -EINVAL; break; } + ff_erase.retval = 0; ff_erase.effect_id = req->u.effect_id; if (copy_to_user(p, &ff_erase, sizeof(ff_erase))) { retval = -EFAULT; break; } + break; case UI_END_FF_UPLOAD: - if (copy_from_user(&ff_up, p, sizeof(ff_up))) { - retval = -EFAULT; + retval = uinput_ff_upload_from_user(p, &ff_up); + if (retval) break; - } + req = uinput_request_find(udev, ff_up.request_id); - if (!(req && req->code == UI_FF_UPLOAD && req->u.upload.effect)) { + if (!req || req->code != UI_FF_UPLOAD || + !req->u.upload.effect) { retval = -EINVAL; break; } + req->retval = ff_up.retval; uinput_request_done(udev, req); break; @@ -608,11 +709,13 @@ static long uinput_ioctl(struct file *file, unsigned int cmd, unsigned long arg) retval = -EFAULT; break; } + req = uinput_request_find(udev, ff_erase.request_id); - if (!(req && req->code == UI_FF_ERASE)) { + if (!req || req->code != UI_FF_ERASE) { retval = -EINVAL; break; } + req->retval = ff_erase.retval; uinput_request_done(udev, req); break; @@ -626,6 +729,18 @@ static long uinput_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return retval; } +static long uinput_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + return uinput_ioctl_handler(file, cmd, arg, (void __user *)arg); +} + +#ifdef CONFIG_COMPAT +static long uinput_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + return uinput_ioctl_handler(file, cmd, arg, compat_ptr(arg)); +} +#endif + static const struct file_operations uinput_fops = { .owner = THIS_MODULE, .open = uinput_open, @@ -634,6 +749,9 @@ static const struct file_operations uinput_fops = { .write = uinput_write, .poll = uinput_poll, .unlocked_ioctl = uinput_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = uinput_compat_ioctl, +#endif }; static struct miscdevice uinput_misc = { From b4a3ac9a3c26e82e31f8a8f55cb014bc580b1216 Mon Sep 17 00:00:00 2001 From: John Linn Date: Mon, 27 Oct 2008 22:17:22 -0400 Subject: [PATCH 0747/2985] Input: xilinx_ps2 - various cleanups Review comments were incorporated to improve the driver. 1. Some data was eliminated that was not needed. 2. Renaming of variables for clarity. 3. Removed unneeded type casting. 4. Changed to use dev_err rather than other I/O. 5. Merged together some functions. 6. Added kerneldoc format to functions. Signed-off-by: Sadanand Signed-off-by: John Linn Acked-by: Peter Korsgaard Acked-by: Grant Likely Signed-off-by: Dmitry Torokhov --- drivers/input/serio/xilinx_ps2.c | 218 ++++++++++++++++--------------- 1 file changed, 112 insertions(+), 106 deletions(-) diff --git a/drivers/input/serio/xilinx_ps2.c b/drivers/input/serio/xilinx_ps2.c index 765007899d9a..ebb22f88c842 100644 --- a/drivers/input/serio/xilinx_ps2.c +++ b/drivers/input/serio/xilinx_ps2.c @@ -58,23 +58,20 @@ /* Mask for all the Receive Interrupts */ #define XPS2_IPIXR_RX_ALL (XPS2_IPIXR_RX_OVF | XPS2_IPIXR_RX_ERR | \ - XPS2_IPIXR_RX_FULL) + XPS2_IPIXR_RX_FULL) /* Mask for all the Interrupts */ #define XPS2_IPIXR_ALL (XPS2_IPIXR_TX_ALL | XPS2_IPIXR_RX_ALL | \ - XPS2_IPIXR_WDT_TOUT) + XPS2_IPIXR_WDT_TOUT) /* Global Interrupt Enable mask */ #define XPS2_GIER_GIE_MASK 0x80000000 struct xps2data { int irq; - u32 phys_addr; - u32 remap_size; spinlock_t lock; - u8 rxb; /* Rx buffer */ void __iomem *base_address; /* virt. address of control registers */ - unsigned int dfl; + unsigned int flags; struct serio serio; /* serio */ }; @@ -82,8 +79,13 @@ struct xps2data { /* XPS PS/2 data transmission calls */ /************************************/ -/* - * xps2_recv() will attempt to receive a byte of data from the PS/2 port. +/** + * xps2_recv() - attempts to receive a byte from the PS/2 port. + * @drvdata: pointer to ps2 device private data structure + * @byte: address where the read data will be copied + * + * If there is any data available in the PS/2 receiver, this functions reads + * the data, otherwise it returns error. */ static int xps2_recv(struct xps2data *drvdata, u8 *byte) { @@ -116,33 +118,27 @@ static irqreturn_t xps2_interrupt(int irq, void *dev_id) /* Check which interrupt is active */ if (intr_sr & XPS2_IPIXR_RX_OVF) - printk(KERN_WARNING "%s: receive overrun error\n", - drvdata->serio.name); + dev_warn(drvdata->serio.dev.parent, "receive overrun error\n"); if (intr_sr & XPS2_IPIXR_RX_ERR) - drvdata->dfl |= SERIO_PARITY; + drvdata->flags |= SERIO_PARITY; if (intr_sr & (XPS2_IPIXR_TX_NOACK | XPS2_IPIXR_WDT_TOUT)) - drvdata->dfl |= SERIO_TIMEOUT; + drvdata->flags |= SERIO_TIMEOUT; if (intr_sr & XPS2_IPIXR_RX_FULL) { - status = xps2_recv(drvdata, &drvdata->rxb); + status = xps2_recv(drvdata, &c); /* Error, if a byte is not received */ if (status) { - printk(KERN_ERR - "%s: wrong rcvd byte count (%d)\n", - drvdata->serio.name, status); + dev_err(drvdata->serio.dev.parent, + "wrong rcvd byte count (%d)\n", status); } else { - c = drvdata->rxb; - serio_interrupt(&drvdata->serio, c, drvdata->dfl); - drvdata->dfl = 0; + serio_interrupt(&drvdata->serio, c, drvdata->flags); + drvdata->flags = 0; } } - if (intr_sr & XPS2_IPIXR_TX_ACK) - drvdata->dfl = 0; - return IRQ_HANDLED; } @@ -150,8 +146,15 @@ static irqreturn_t xps2_interrupt(int irq, void *dev_id) /* serio callbacks */ /*******************/ -/* - * sxps2_write() sends a byte out through the PS/2 interface. +/** + * sxps2_write() - sends a byte out through the PS/2 port. + * @pserio: pointer to the serio structure of the PS/2 port + * @c: data that needs to be written to the PS/2 port + * + * This function checks if the PS/2 transmitter is empty and sends a byte. + * Otherwise it returns error. Transmission fails only when nothing is connected + * to the PS/2 port. Thats why, we do not try to resend the data in case of a + * failure. */ static int sxps2_write(struct serio *pserio, unsigned char c) { @@ -174,33 +177,39 @@ static int sxps2_write(struct serio *pserio, unsigned char c) return status; } -/* - * sxps2_open() is called when a port is open by the higher layer. +/** + * sxps2_open() - called when a port is opened by the higher layer. + * @pserio: pointer to the serio structure of the PS/2 device + * + * This function requests irq and enables interrupts for the PS/2 device. */ static int sxps2_open(struct serio *pserio) { struct xps2data *drvdata = pserio->port_data; - int retval; + int error; + u8 c; - retval = request_irq(drvdata->irq, &xps2_interrupt, 0, + error = request_irq(drvdata->irq, &xps2_interrupt, 0, DRIVER_NAME, drvdata); - if (retval) { - printk(KERN_ERR - "%s: Couldn't allocate interrupt %d\n", - drvdata->serio.name, drvdata->irq); - return retval; + if (error) { + dev_err(drvdata->serio.dev.parent, + "Couldn't allocate interrupt %d\n", drvdata->irq); + return error; } /* start reception by enabling the interrupts */ out_be32(drvdata->base_address + XPS2_GIER_OFFSET, XPS2_GIER_GIE_MASK); out_be32(drvdata->base_address + XPS2_IPIER_OFFSET, XPS2_IPIXR_RX_ALL); - (void)xps2_recv(drvdata, &drvdata->rxb); + (void)xps2_recv(drvdata, &c); return 0; /* success */ } -/* - * sxps2_close() frees the interrupt. +/** + * sxps2_close() - frees the interrupt. + * @pserio: pointer to the serio structure of the PS/2 device + * + * This function frees the irq and disables interrupts for the PS/2 device. */ static void sxps2_close(struct serio *pserio) { @@ -212,24 +221,41 @@ static void sxps2_close(struct serio *pserio) free_irq(drvdata->irq, drvdata); } -/*********************/ -/* Device setup code */ -/*********************/ - -static int xps2_setup(struct device *dev, struct resource *regs_res, - struct resource *irq_res) +/** + * xps2_of_probe - probe method for the PS/2 device. + * @of_dev: pointer to OF device structure + * @match: pointer to the stucture used for matching a device + * + * This function probes the PS/2 device in the device tree. + * It initializes the driver data structure and the hardware. + * It returns 0, if the driver is bound to the PS/2 device, or a negative + * value if there is an error. + */ +static int __devinit xps2_of_probe(struct of_device *ofdev, + const struct of_device_id *match) { + struct resource r_irq; /* Interrupt resources */ + struct resource r_mem; /* IO mem resources */ struct xps2data *drvdata; struct serio *serio; - unsigned long remap_size; - int retval; + struct device *dev = &ofdev->dev; + resource_size_t remap_size, phys_addr; + int error; - if (!dev) - return -EINVAL; + dev_info(dev, "Device Tree Probing \'%s\'\n", + ofdev->node->name); - if (!regs_res || !irq_res) { - dev_err(dev, "IO resource(s) not found\n"); - return -EINVAL; + /* Get iospace for the device */ + error = of_address_to_resource(ofdev->node, 0, &r_mem); + if (error) { + dev_err(dev, "invalid address\n"); + return error; + } + + /* Get IRQ for the device */ + if (of_irq_to_resource(ofdev->node, 0, &r_irq) == NO_IRQ) { + dev_err(dev, "no IRQ found\n"); + return -ENODEV; } drvdata = kzalloc(sizeof(struct xps2data), GFP_KERNEL); @@ -241,24 +267,23 @@ static int xps2_setup(struct device *dev, struct resource *regs_res, dev_set_drvdata(dev, drvdata); spin_lock_init(&drvdata->lock); - drvdata->irq = irq_res->start; + drvdata->irq = r_irq.start; - remap_size = regs_res->end - regs_res->start + 1; - if (!request_mem_region(regs_res->start, remap_size, DRIVER_NAME)) { - dev_err(dev, "Couldn't lock memory region at 0x%08X\n", - (unsigned int)regs_res->start); - retval = -EBUSY; + phys_addr = r_mem.start; + remap_size = r_mem.end - r_mem.start + 1; + if (!request_mem_region(phys_addr, remap_size, DRIVER_NAME)) { + dev_err(dev, "Couldn't lock memory region at 0x%08llX\n", + (unsigned long long)phys_addr); + error = -EBUSY; goto failed1; } /* Fill in configuration data and add them to the list */ - drvdata->phys_addr = regs_res->start; - drvdata->remap_size = remap_size; - drvdata->base_address = ioremap(regs_res->start, remap_size); + drvdata->base_address = ioremap(phys_addr, remap_size); if (drvdata->base_address == NULL) { - dev_err(dev, "Couldn't ioremap memory at 0x%08X\n", - (unsigned int)regs_res->start); - retval = -EFAULT; + dev_err(dev, "Couldn't ioremap memory at 0x%08llX\n", + (unsigned long long)phys_addr); + error = -EFAULT; goto failed2; } @@ -269,8 +294,9 @@ static int xps2_setup(struct device *dev, struct resource *regs_res, * we have the PS2 in a good state */ out_be32(drvdata->base_address + XPS2_SRST_OFFSET, XPS2_SRST_RESET); - dev_info(dev, "Xilinx PS2 at 0x%08X mapped to 0x%p, irq=%d\n", - drvdata->phys_addr, drvdata->base_address, drvdata->irq); + dev_info(dev, "Xilinx PS2 at 0x%08llX mapped to 0x%p, irq=%d\n", + (unsigned long long)phys_addr, drvdata->base_address, + drvdata->irq); serio = &drvdata->serio; serio->id.type = SERIO_8042; @@ -280,71 +306,51 @@ static int xps2_setup(struct device *dev, struct resource *regs_res, serio->port_data = drvdata; serio->dev.parent = dev; snprintf(serio->name, sizeof(serio->name), - "Xilinx XPS PS/2 at %08X", drvdata->phys_addr); + "Xilinx XPS PS/2 at %08llX", (unsigned long long)phys_addr); snprintf(serio->phys, sizeof(serio->phys), - "xilinxps2/serio at %08X", drvdata->phys_addr); + "xilinxps2/serio at %08llX", (unsigned long long)phys_addr); + serio_register_port(serio); return 0; /* success */ failed2: - release_mem_region(regs_res->start, remap_size); + release_mem_region(phys_addr, remap_size); failed1: kfree(drvdata); dev_set_drvdata(dev, NULL); - return retval; -} - -/***************************/ -/* OF Platform Bus Support */ -/***************************/ - -static int __devinit xps2_of_probe(struct of_device *ofdev, const struct - of_device_id * match) -{ - struct resource r_irq; /* Interrupt resources */ - struct resource r_mem; /* IO mem resources */ - int rc = 0; - - printk(KERN_INFO "Device Tree Probing \'%s\'\n", - ofdev->node->name); - - /* Get iospace for the device */ - rc = of_address_to_resource(ofdev->node, 0, &r_mem); - if (rc) { - dev_err(&ofdev->dev, "invalid address\n"); - return rc; - } - - /* Get IRQ for the device */ - rc = of_irq_to_resource(ofdev->node, 0, &r_irq); - if (rc == NO_IRQ) { - dev_err(&ofdev->dev, "no IRQ found\n"); - return rc; - } - - return xps2_setup(&ofdev->dev, &r_mem, &r_irq); + return error; } +/** + * xps2_of_remove - unbinds the driver from the PS/2 device. + * @of_dev: pointer to OF device structure + * + * This function is called if a device is physically removed from the system or + * if the driver module is being unloaded. It frees any resources allocated to + * the device. + */ static int __devexit xps2_of_remove(struct of_device *of_dev) { struct device *dev = &of_dev->dev; - struct xps2data *drvdata; - - if (!dev) - return -EINVAL; - - drvdata = dev_get_drvdata(dev); + struct xps2data *drvdata = dev_get_drvdata(dev); + struct resource r_mem; /* IO mem resources */ serio_unregister_port(&drvdata->serio); iounmap(drvdata->base_address); - release_mem_region(drvdata->phys_addr, drvdata->remap_size); + + /* Get iospace of the device */ + if (of_address_to_resource(of_dev->node, 0, &r_mem)) + dev_err(dev, "invalid address\n"); + else + release_mem_region(r_mem.start, r_mem.end - r_mem.start + 1); + kfree(drvdata); dev_set_drvdata(dev, NULL); - return 0; /* success */ + return 0; } /* Match table for of_platform binding */ From b67b4b117746aef686e527c3205792db0f2c9e16 Mon Sep 17 00:00:00 2001 From: Dominic Curran Date: Mon, 27 Oct 2008 22:30:53 -0400 Subject: [PATCH 0748/2985] Input: gpio-keys - add flag to allow auto repeat This patch adds a flag to gpio-key driver to turn on the input subsystems auto repeat feature if needed. Signed-off-by: Dominic Curran Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/gpio_keys.c | 4 ++++ include/linux/gpio_keys.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 05f3f43582c2..ad67d763fdbd 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -98,6 +98,10 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) input->id.product = 0x0001; input->id.version = 0x0100; + /* Enable auto repeat feature of Linux input subsystem */ + if (pdata->rep) + __set_bit(EV_REP, input->evbit); + ddata->input = input; for (i = 0; i < pdata->nbuttons; i++) { diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h index ec6ecd74781d..1289fa7623ca 100644 --- a/include/linux/gpio_keys.h +++ b/include/linux/gpio_keys.h @@ -15,6 +15,7 @@ struct gpio_keys_button { struct gpio_keys_platform_data { struct gpio_keys_button *buttons; int nbuttons; + unsigned int rep:1; /* enable input subsystem auto repeat */ }; #endif From dc26aec25d1a4e2690df166dbe843344728994ce Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Tue, 18 Nov 2008 17:48:22 +0800 Subject: [PATCH 0749/2985] Blackfin arch: BF538/9 Linux kernel Support Add supporing for Blackfin BF538 and BF539 processors. Signed-off-by: Michael Hennerich Signed-off-by: Bryan Wu --- arch/blackfin/Kconfig | 26 +- arch/blackfin/Makefile | 4 + arch/blackfin/configs/BF538-EZKIT_defconfig | 1288 +++++ arch/blackfin/include/asm/gpio.h | 68 + arch/blackfin/kernel/bfin_gpio.c | 6 +- arch/blackfin/mach-bf538/Kconfig | 164 + arch/blackfin/mach-bf538/Makefile | 7 + arch/blackfin/mach-bf538/boards/Kconfig | 12 + arch/blackfin/mach-bf538/boards/Makefile | 5 + arch/blackfin/mach-bf538/boards/ezkit.c | 538 +++ arch/blackfin/mach-bf538/dma.c | 161 + arch/blackfin/mach-bf538/head.S | 137 + .../mach-bf538/include/mach/anomaly.h | 121 + arch/blackfin/mach-bf538/include/mach/bf538.h | 124 + .../mach-bf538/include/mach/bfin_serial_5xx.h | 183 + .../mach-bf538/include/mach/bfin_sir.h | 159 + .../mach-bf538/include/mach/blackfin.h | 100 + .../mach-bf538/include/mach/cdefBF538.h | 2105 ++++++++ .../mach-bf538/include/mach/cdefBF539.h | 240 + .../mach-bf538/include/mach/defBF539.h | 4243 +++++++++++++++++ arch/blackfin/mach-bf538/include/mach/dma.h | 65 + arch/blackfin/mach-bf538/include/mach/irq.h | 211 + .../mach-bf538/include/mach/mem_init.h | 303 ++ .../mach-bf538/include/mach/mem_map.h | 107 + .../mach-bf538/include/mach/portmux.h | 106 + arch/blackfin/mach-bf538/ints-priority.c | 94 + arch/blackfin/mach-common/dpmc_modes.S | 3 +- arch/blackfin/mach-common/ints-priority.c | 20 +- arch/blackfin/mach-common/pm.c | 3 +- 29 files changed, 10587 insertions(+), 16 deletions(-) create mode 100644 arch/blackfin/configs/BF538-EZKIT_defconfig create mode 100644 arch/blackfin/mach-bf538/Kconfig create mode 100644 arch/blackfin/mach-bf538/Makefile create mode 100644 arch/blackfin/mach-bf538/boards/Kconfig create mode 100644 arch/blackfin/mach-bf538/boards/Makefile create mode 100644 arch/blackfin/mach-bf538/boards/ezkit.c create mode 100644 arch/blackfin/mach-bf538/dma.c create mode 100644 arch/blackfin/mach-bf538/head.S create mode 100644 arch/blackfin/mach-bf538/include/mach/anomaly.h create mode 100644 arch/blackfin/mach-bf538/include/mach/bf538.h create mode 100644 arch/blackfin/mach-bf538/include/mach/bfin_serial_5xx.h create mode 100644 arch/blackfin/mach-bf538/include/mach/bfin_sir.h create mode 100644 arch/blackfin/mach-bf538/include/mach/blackfin.h create mode 100644 arch/blackfin/mach-bf538/include/mach/cdefBF538.h create mode 100644 arch/blackfin/mach-bf538/include/mach/cdefBF539.h create mode 100644 arch/blackfin/mach-bf538/include/mach/defBF539.h create mode 100644 arch/blackfin/mach-bf538/include/mach/dma.h create mode 100644 arch/blackfin/mach-bf538/include/mach/irq.h create mode 100644 arch/blackfin/mach-bf538/include/mach/mem_init.h create mode 100644 arch/blackfin/mach-bf538/include/mach/mem_map.h create mode 100644 arch/blackfin/mach-bf538/include/mach/portmux.h create mode 100644 arch/blackfin/mach-bf538/ints-priority.c diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index 29e71ed6b8a7..ac76baac1df3 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -137,6 +137,16 @@ config BF537 help BF537 Processor Support. +config BF538 + bool "BF538" + help + BF538 Processor Support. + +config BF539 + bool "BF539" + help + BF539 Processor Support. + config BF542 bool "BF542" help @@ -174,12 +184,13 @@ config BF_REV_MIN default 0 if (BF52x || BF54x) default 2 if (BF537 || BF536 || BF534) default 3 if (BF561 ||BF533 || BF532 || BF531) + default 4 if (BF538 || BF539) config BF_REV_MAX int default 2 if (BF52x || BF54x) default 3 if (BF537 || BF536 || BF534) - default 5 if (BF561) + default 5 if (BF561|| BF538 || BF539) default 6 if (BF533 || BF532 || BF531) choice @@ -206,11 +217,11 @@ config BF_REV_0_3 config BF_REV_0_4 bool "0.4" - depends on (BF561 || BF533 || BF532 || BF531) + depends on (BF561 || BF533 || BF532 || BF531 || BF538 || BF539) config BF_REV_0_5 bool "0.5" - depends on (BF561 || BF533 || BF532 || BF531) + depends on (BF561 || BF533 || BF532 || BF531 || BF538 || BF539) config BF_REV_0_6 bool "0.6" @@ -258,7 +269,7 @@ config MEM_MT48LC16M16A2TG_75 config MEM_MT48LC32M8A2_75 bool - depends on (BFIN537_STAMP || PNAV10) + depends on (BFIN537_STAMP || PNAV10 || BFIN538_EZKIT) default y config MEM_MT48LC8M32B2B5_7 @@ -275,6 +286,7 @@ source "arch/blackfin/mach-bf527/Kconfig" source "arch/blackfin/mach-bf533/Kconfig" source "arch/blackfin/mach-bf561/Kconfig" source "arch/blackfin/mach-bf537/Kconfig" +source "arch/blackfin/mach-bf538/Kconfig" source "arch/blackfin/mach-bf548/Kconfig" menu "Board customizations" @@ -318,7 +330,7 @@ config CLKIN_HZ int "Frequency of the crystal on the board in Hz" default "11059200" if BFIN533_STAMP default "27000000" if BFIN533_EZKIT - default "25000000" if (BFIN537_STAMP || BFIN527_EZKIT || H8606_HVSISTEMAS || BLACKSTAMP || BFIN526_EZBRD) + default "25000000" if (BFIN537_STAMP || BFIN527_EZKIT || H8606_HVSISTEMAS || BLACKSTAMP || BFIN526_EZBRD || BFIN538_EZKIT) default "30000000" if BFIN561_EZKIT default "24576000" if PNAV10 default "10000000" if BFIN532_IP0X @@ -354,7 +366,7 @@ config VCO_MULT range 1 64 default "22" if BFIN533_EZKIT default "45" if BFIN533_STAMP - default "20" if (BFIN537_STAMP || BFIN527_EZKIT || BFIN548_EZKIT || BFIN548_BLUETECHNIX_CM) + default "20" if (BFIN537_STAMP || BFIN527_EZKIT || BFIN548_EZKIT || BFIN548_BLUETECHNIX_CM || BFIN538_EZKIT) default "22" if BFIN533_BLUETECHNIX_CM default "20" if (BFIN537_BLUETECHNIX_CM || BFIN527_BLUETECHNIX_CM || BFIN561_BLUETECHNIX_CM) default "20" if BFIN561_EZKIT @@ -716,7 +728,7 @@ config BFIN_GPTIMERS config BFIN_DMA_5XX bool "Enable DMA Support" - depends on (BF52x || BF53x || BF561 || BF54x) + depends on (BF52x || BF53x || BF561 || BF54x || BF538 || BF539) default y help DMA driver for BF5xx. diff --git a/arch/blackfin/Makefile b/arch/blackfin/Makefile index 6bf50977850c..ce45df3708e8 100644 --- a/arch/blackfin/Makefile +++ b/arch/blackfin/Makefile @@ -33,6 +33,8 @@ machine-$(CONFIG_BF533) := bf533 machine-$(CONFIG_BF534) := bf537 machine-$(CONFIG_BF536) := bf537 machine-$(CONFIG_BF537) := bf537 +machine-$(CONFIG_BF538) := bf538 +machine-$(CONFIG_BF539) := bf538 machine-$(CONFIG_BF542) := bf548 machine-$(CONFIG_BF544) := bf548 machine-$(CONFIG_BF547) := bf548 @@ -54,6 +56,8 @@ cpu-$(CONFIG_BF533) := bf533 cpu-$(CONFIG_BF534) := bf534 cpu-$(CONFIG_BF536) := bf536 cpu-$(CONFIG_BF537) := bf537 +cpu-$(CONFIG_BF538) := bf538 +cpu-$(CONFIG_BF539) := bf539 cpu-$(CONFIG_BF542) := bf542 cpu-$(CONFIG_BF544) := bf544 cpu-$(CONFIG_BF547) := bf547 diff --git a/arch/blackfin/configs/BF538-EZKIT_defconfig b/arch/blackfin/configs/BF538-EZKIT_defconfig new file mode 100644 index 000000000000..b780777ae740 --- /dev/null +++ b/arch/blackfin/configs/BF538-EZKIT_defconfig @@ -0,0 +1,1288 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.26.5 +# Fri Oct 17 15:19:20 2008 +# +# CONFIG_MMU is not set +# CONFIG_FPU is not set +CONFIG_RWSEM_GENERIC_SPINLOCK=y +# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set +CONFIG_BLACKFIN=y +CONFIG_ZONE_DMA=y +CONFIG_GENERIC_FIND_NEXT_BIT=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_GPIO=y +CONFIG_FORCE_MAX_ZONEORDER=14 +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +# CONFIG_POSIX_MQUEUE is not set +# CONFIG_BSD_PROCESS_ACCT is not set +# CONFIG_TASKSTATS is not set +# CONFIG_AUDIT is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_CGROUPS is not set +# CONFIG_GROUP_SCHED is not set +# CONFIG_SYSFS_DEPRECATED is not set +# CONFIG_SYSFS_DEPRECATED_V2 is not set +# CONFIG_RELAY is not set +# CONFIG_NAMESPACES is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SYSCTL=y +CONFIG_EMBEDDED=y +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_COMPAT_BRK=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_ANON_INODES=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set +CONFIG_HAVE_OPROFILE=y +# CONFIG_HAVE_KPROBES is not set +# CONFIG_HAVE_KRETPROBES is not set +# CONFIG_HAVE_DMA_ATTRS is not set +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +CONFIG_TINY_SHMEM=y +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_KMOD=y +CONFIG_BLOCK=y +# CONFIG_LBD is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LSF is not set +# CONFIG_BLK_DEV_BSG is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +# CONFIG_IOSCHED_DEADLINE is not set +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" +CONFIG_CLASSIC_RCU=y +# CONFIG_PREEMPT_NONE is not set +CONFIG_PREEMPT_VOLUNTARY=y +# CONFIG_PREEMPT is not set + +# +# Blackfin Processor Options +# + +# +# Processor and Board Settings +# +# CONFIG_BF522 is not set +# CONFIG_BF523 is not set +# CONFIG_BF524 is not set +# CONFIG_BF525 is not set +# CONFIG_BF526 is not set +# CONFIG_BF527 is not set +# CONFIG_BF531 is not set +# CONFIG_BF532 is not set +# CONFIG_BF533 is not set +# CONFIG_BF534 is not set +# CONFIG_BF536 is not set +# CONFIG_BF537 is not set +CONFIG_BF538=y +# CONFIG_BF539 is not set +# CONFIG_BF542 is not set +# CONFIG_BF544 is not set +# CONFIG_BF547 is not set +# CONFIG_BF548 is not set +# CONFIG_BF549 is not set +# CONFIG_BF561 is not set +CONFIG_BF_REV_MIN=4 +CONFIG_BF_REV_MAX=5 +# CONFIG_BF_REV_0_0 is not set +# CONFIG_BF_REV_0_1 is not set +# CONFIG_BF_REV_0_2 is not set +# CONFIG_BF_REV_0_3 is not set +CONFIG_BF_REV_0_4=y +# CONFIG_BF_REV_0_5 is not set +# CONFIG_BF_REV_0_6 is not set +# CONFIG_BF_REV_ANY is not set +# CONFIG_BF_REV_NONE is not set +CONFIG_MEM_MT48LC32M8A2_75=y +CONFIG_IRQ_PLL_WAKEUP=7 +CONFIG_IRQ_DMA0_ERROR=7 +CONFIG_IRQ_PPI_ERROR=7 +CONFIG_IRQ_SPORT0_ERROR=7 +CONFIG_IRQ_SPORT1_ERROR=7 +CONFIG_IRQ_UART0_ERROR=7 +CONFIG_IRQ_UART1_ERROR=7 +CONFIG_IRQ_RTC=8 +CONFIG_IRQ_PPI=8 +CONFIG_IRQ_SPORT0_RX=9 +CONFIG_IRQ_SPORT0_TX=9 +CONFIG_IRQ_SPORT1_RX=9 +CONFIG_IRQ_SPORT1_TX=9 +CONFIG_IRQ_UART0_RX=10 +CONFIG_IRQ_UART0_TX=10 +CONFIG_IRQ_UART1_RX=10 +CONFIG_IRQ_UART1_TX=10 +CONFIG_IRQ_TMR0=12 +CONFIG_IRQ_TMR1=12 +CONFIG_IRQ_TMR2=12 +CONFIG_IRQ_WATCH=13 +CONFIG_IRQ_PORTF_INTA=12 +CONFIG_IRQ_PORTF_INTB=12 +CONFIG_IRQ_DMA1_ERROR=7 +CONFIG_IRQ_CAN_RX=11 +CONFIG_IRQ_CAN_TX=11 +CONFIG_BFIN538_EZKIT=y + +# +# BF538 Specific Configuration +# + +# +# Interrupt Priority Assignment +# + +# +# Priority +# +CONFIG_IRQ_SPI0_ERROR=7 +CONFIG_IRQ_SPI0=10 +CONFIG_IRQ_MEM0_DMA0=13 +CONFIG_IRQ_MEM0_DMA1=13 +CONFIG_IRQ_SPORT2_ERROR=7 +CONFIG_IRQ_SPORT3_ERROR=7 +CONFIG_IRQ_SPI1_ERROR=7 +CONFIG_IRQ_SPI2_ERROR=7 +CONFIG_IRQ_UART2_ERROR=7 +CONFIG_IRQ_CAN_ERROR=7 +CONFIG_IRQ_SPORT2_RX=9 +CONFIG_IRQ_SPORT2_TX=9 +CONFIG_IRQ_SPORT3_RX=9 +CONFIG_IRQ_SPORT3_TX=9 +CONFIG_IRQ_SPI1=10 +CONFIG_IRQ_SPI2=10 +CONFIG_IRQ_UART2_RX=10 +CONFIG_IRQ_UART2_TX=10 +CONFIG_IRQ_TWI0=11 +CONFIG_IRQ_TWI1=11 +CONFIG_IRQ_MEM1_DMA0=13 +CONFIG_IRQ_MEM1_DMA1=13 + +# +# Board customizations +# +# CONFIG_CMDLINE_BOOL is not set +CONFIG_BOOT_LOAD=0x1000 +CONFIG_ROM_BASE=0x20040000 + +# +# Clock/PLL Setup +# +CONFIG_CLKIN_HZ=25000000 +# CONFIG_BFIN_KERNEL_CLOCK is not set +CONFIG_MAX_MEM_SIZE=512 +CONFIG_MAX_VCO_HZ=533333333 +CONFIG_MIN_VCO_HZ=50000000 +CONFIG_MAX_SCLK_HZ=133333333 +CONFIG_MIN_SCLK_HZ=27000000 + +# +# Kernel Timer/Scheduler +# +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_300 is not set +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +# CONFIG_SCHED_HRTICK is not set +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CLOCKEVENTS=y +# CONFIG_CYCLES_CLOCKSOURCE is not set +CONFIG_TICK_ONESHOT=y +# CONFIG_NO_HZ is not set +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y + +# +# Misc +# +CONFIG_BFIN_SCRATCH_REG_RETN=y +# CONFIG_BFIN_SCRATCH_REG_RETE is not set +# CONFIG_BFIN_SCRATCH_REG_CYCLES is not set + +# +# Blackfin Kernel Optimizations +# + +# +# Memory Optimizations +# +CONFIG_I_ENTRY_L1=y +CONFIG_EXCPT_IRQ_SYSC_L1=y +CONFIG_DO_IRQ_L1=y +CONFIG_CORE_TIMER_IRQ_L1=y +CONFIG_IDLE_L1=y +CONFIG_SCHEDULE_L1=y +CONFIG_ARITHMETIC_OPS_L1=y +CONFIG_ACCESS_OK_L1=y +CONFIG_MEMSET_L1=y +CONFIG_MEMCPY_L1=y +CONFIG_SYS_BFIN_SPINLOCK_L1=y +# CONFIG_IP_CHECKSUM_L1 is not set +CONFIG_CACHELINE_ALIGNED_L1=y +# CONFIG_SYSCALL_TAB_L1 is not set +# CONFIG_CPLB_SWITCH_TAB_L1 is not set +CONFIG_APP_STACK_L1=y + +# +# Speed Optimizations +# +CONFIG_BFIN_INS_LOWOVERHEAD=y +CONFIG_RAMKERNEL=y +# CONFIG_ROMKERNEL is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set +# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set +CONFIG_PAGEFLAGS_EXTENDED=y +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_RESOURCES_64BIT is not set +CONFIG_ZONE_DMA_FLAG=1 +CONFIG_VIRT_TO_BUS=y +CONFIG_BFIN_GPTIMERS=y +CONFIG_BFIN_DMA_5XX=y +# CONFIG_DMA_UNCACHED_4M is not set +# CONFIG_DMA_UNCACHED_2M is not set +CONFIG_DMA_UNCACHED_1M=y +# CONFIG_DMA_UNCACHED_NONE is not set + +# +# Cache Support +# +CONFIG_BFIN_ICACHE=y +CONFIG_BFIN_DCACHE=y +# CONFIG_BFIN_DCACHE_BANKA is not set +# CONFIG_BFIN_ICACHE_LOCK is not set +# CONFIG_BFIN_WB is not set +CONFIG_BFIN_WT=y +# CONFIG_MPU is not set + +# +# Asynchonous Memory Configuration +# + +# +# EBIU_AMGCTL Global Control +# +CONFIG_C_AMCKEN=y +CONFIG_C_CDPRIO=y +# CONFIG_C_AMBEN is not set +# CONFIG_C_AMBEN_B0 is not set +# CONFIG_C_AMBEN_B0_B1 is not set +# CONFIG_C_AMBEN_B0_B1_B2 is not set +CONFIG_C_AMBEN_ALL=y + +# +# EBIU_AMBCTL Control +# +CONFIG_BANK_0=0x7BB0 +CONFIG_BANK_1=0x7BB0 +CONFIG_BANK_2=0x7BB0 +CONFIG_BANK_3=0x99B2 + +# +# Bus options (PCI, PCMCIA, EISA, MCA, ISA) +# +# CONFIG_ARCH_SUPPORTS_MSI is not set +# CONFIG_PCCARD is not set + +# +# Executable file formats +# +CONFIG_BINFMT_ELF_FDPIC=y +CONFIG_BINFMT_FLAT=y +CONFIG_BINFMT_ZFLAT=y +# CONFIG_BINFMT_SHARED_FLAT is not set +# CONFIG_BINFMT_MISC is not set + +# +# Power management options +# +# CONFIG_PM is not set +CONFIG_ARCH_SUSPEND_POSSIBLE=y +# CONFIG_PM_WAKEUP_BY_GPIO is not set + +# +# CPU Frequency scaling +# +# CONFIG_CPU_FREQ is not set + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set +# CONFIG_NET_KEY is not set +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_PNP=y +# CONFIG_IP_PNP_DHCP is not set +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +CONFIG_SYN_COOKIES=y +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_XFRM_MODE_BEET=y +# CONFIG_INET_LRO is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +# CONFIG_IPV6 is not set +# CONFIG_NETLABEL is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +CONFIG_IRDA=m + +# +# IrDA protocols +# +CONFIG_IRLAN=m +CONFIG_IRCOMM=m +# CONFIG_IRDA_ULTRA is not set + +# +# IrDA options +# +CONFIG_IRDA_CACHE_LAST_LSAP=y +# CONFIG_IRDA_FAST_RR is not set +# CONFIG_IRDA_DEBUG is not set + +# +# Infrared-port device drivers +# + +# +# SIR device drivers +# +CONFIG_IRTTY_SIR=m +CONFIG_BFIN_SIR=m +CONFIG_SIR_BFIN_DMA=y +# CONFIG_SIR_BFIN_PIO is not set + +# +# Dongle support +# +# CONFIG_DONGLE is not set + +# +# FIR device drivers +# +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set + +# +# Wireless +# +# CONFIG_CFG80211 is not set +# CONFIG_WIRELESS_EXT is not set +# CONFIG_MAC80211 is not set +# CONFIG_IEEE80211 is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_FW_LOADER is not set +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_CONNECTOR is not set +CONFIG_MTD=y +# CONFIG_MTD_DEBUG is not set +# CONFIG_MTD_CONCAT is not set +CONFIG_MTD_PARTITIONS=y +# CONFIG_MTD_REDBOOT_PARTS is not set +CONFIG_MTD_CMDLINE_PARTS=y +# CONFIG_MTD_AR7_PARTS is not set + +# +# User Modules And Translation Layers +# +CONFIG_MTD_CHAR=m +CONFIG_MTD_BLKDEVS=y +CONFIG_MTD_BLOCK=y +# CONFIG_FTL is not set +# CONFIG_NFTL is not set +# CONFIG_INFTL is not set +# CONFIG_RFD_FTL is not set +# CONFIG_SSFDC is not set +# CONFIG_MTD_OOPS is not set + +# +# RAM/ROM/Flash chip drivers +# +CONFIG_MTD_CFI=m +# CONFIG_MTD_JEDECPROBE is not set +CONFIG_MTD_GEN_PROBE=m +# CONFIG_MTD_CFI_ADV_OPTIONS is not set +CONFIG_MTD_MAP_BANK_WIDTH_1=y +CONFIG_MTD_MAP_BANK_WIDTH_2=y +CONFIG_MTD_MAP_BANK_WIDTH_4=y +# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set +CONFIG_MTD_CFI_I1=y +CONFIG_MTD_CFI_I2=y +# CONFIG_MTD_CFI_I4 is not set +# CONFIG_MTD_CFI_I8 is not set +# CONFIG_MTD_CFI_INTELEXT is not set +CONFIG_MTD_CFI_AMDSTD=m +# CONFIG_MTD_CFI_STAA is not set +CONFIG_MTD_CFI_UTIL=m +CONFIG_MTD_RAM=y +CONFIG_MTD_ROM=m +# CONFIG_MTD_ABSENT is not set + +# +# Mapping drivers for chip access +# +# CONFIG_MTD_COMPLEX_MAPPINGS is not set +CONFIG_MTD_PHYSMAP=m +CONFIG_MTD_PHYSMAP_START=0x20000000 +CONFIG_MTD_PHYSMAP_LEN=0x0 +CONFIG_MTD_PHYSMAP_BANKWIDTH=2 +# CONFIG_MTD_GPIO_ADDR is not set +# CONFIG_MTD_UCLINUX is not set +# CONFIG_MTD_PLATRAM is not set + +# +# Self-contained MTD device drivers +# +# CONFIG_MTD_DATAFLASH is not set +# CONFIG_MTD_M25P80 is not set +# CONFIG_MTD_SLRAM is not set +# CONFIG_MTD_PHRAM is not set +# CONFIG_MTD_MTDRAM is not set +# CONFIG_MTD_BLOCK2MTD is not set + +# +# Disk-On-Chip Device Drivers +# +# CONFIG_MTD_DOC2000 is not set +# CONFIG_MTD_DOC2001 is not set +# CONFIG_MTD_DOC2001PLUS is not set +CONFIG_MTD_NAND=m +# CONFIG_MTD_NAND_VERIFY_WRITE is not set +# CONFIG_MTD_NAND_ECC_SMC is not set +# CONFIG_MTD_NAND_MUSEUM_IDS is not set +CONFIG_MTD_NAND_BFIN=m +CONFIG_BFIN_NAND_BASE=0x20212000 +CONFIG_BFIN_NAND_CLE=2 +CONFIG_BFIN_NAND_ALE=1 +CONFIG_BFIN_NAND_READY=3 +CONFIG_MTD_NAND_IDS=m +# CONFIG_MTD_NAND_DISKONCHIP is not set +# CONFIG_MTD_NAND_NANDSIM is not set +# CONFIG_MTD_NAND_PLATFORM is not set +# CONFIG_MTD_ONENAND is not set + +# +# UBI - Unsorted block images +# +# CONFIG_MTD_UBI is not set +# CONFIG_PARPORT is not set +CONFIG_BLK_DEV=y +# CONFIG_BLK_DEV_COW_COMMON is not set +# CONFIG_BLK_DEV_LOOP is not set +# CONFIG_BLK_DEV_NBD is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +# CONFIG_BLK_DEV_XIP is not set +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +# CONFIG_MISC_DEVICES is not set +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +# CONFIG_SCSI is not set +# CONFIG_SCSI_DMA is not set +# CONFIG_SCSI_NETLINK is not set +# CONFIG_ATA is not set +# CONFIG_MD is not set +CONFIG_NETDEVICES=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set +# CONFIG_VETH is not set +CONFIG_PHYLIB=y + +# +# MII PHY device drivers +# +# CONFIG_MARVELL_PHY is not set +# CONFIG_DAVICOM_PHY is not set +# CONFIG_QSEMI_PHY is not set +# CONFIG_LXT_PHY is not set +# CONFIG_CICADA_PHY is not set +# CONFIG_VITESSE_PHY is not set +CONFIG_SMSC_PHY=y +# CONFIG_BROADCOM_PHY is not set +# CONFIG_ICPLUS_PHY is not set +# CONFIG_REALTEK_PHY is not set +# CONFIG_FIXED_PHY is not set +# CONFIG_MDIO_BITBANG is not set +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +CONFIG_SMC91X=y +# CONFIG_SMSC911X is not set +# CONFIG_DM9000 is not set +# CONFIG_ENC28J60 is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set +# CONFIG_B44 is not set +# CONFIG_NETDEV_1000 is not set +# CONFIG_NETDEV_10000 is not set + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set +# CONFIG_IWLWIFI_LEDS is not set +# CONFIG_WAN is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set + +# +# Userland interfaces +# +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_JOYDEV is not set +CONFIG_INPUT_EVDEV=m +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set +CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_TOUCHSCREEN_ADS7846 is not set +# CONFIG_TOUCHSCREEN_AD7877 is not set +CONFIG_TOUCHSCREEN_AD7879=m +# CONFIG_TOUCHSCREEN_AD7879_I2C is not set +CONFIG_TOUCHSCREEN_AD7879_SPI=y +# CONFIG_TOUCHSCREEN_FUJITSU is not set +# CONFIG_TOUCHSCREEN_GUNZE is not set +# CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_MTOUCH is not set +# CONFIG_TOUCHSCREEN_MK712 is not set +# CONFIG_TOUCHSCREEN_PENMOUNT is not set +# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set +# CONFIG_TOUCHSCREEN_TOUCHWIN is not set +# CONFIG_TOUCHSCREEN_UCB1400 is not set +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_UINPUT is not set +# CONFIG_TWI_KEYPAD is not set + +# +# Hardware I/O ports +# +# CONFIG_SERIO is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +# CONFIG_AD9960 is not set +# CONFIG_SPI_ADC_BF533 is not set +# CONFIG_BF5xx_PPIFCD is not set +# CONFIG_BFIN_SIMPLE_TIMER is not set +# CONFIG_BF5xx_PPI is not set +CONFIG_BFIN_SPORT=y +# CONFIG_BFIN_TIMER_LATENCY is not set +# CONFIG_TWI_LCD is not set +CONFIG_SIMPLE_GPIO=m +# CONFIG_VT is not set +# CONFIG_DEVKMEM is not set +# CONFIG_BFIN_JTAG_COMM is not set +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +# CONFIG_SERIAL_8250 is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_BFIN=y +CONFIG_SERIAL_BFIN_CONSOLE=y +CONFIG_SERIAL_BFIN_DMA=y +# CONFIG_SERIAL_BFIN_PIO is not set +CONFIG_SERIAL_BFIN_UART0=y +# CONFIG_BFIN_UART0_CTSRTS is not set +CONFIG_SERIAL_BFIN_UART1=y +# CONFIG_BFIN_UART1_CTSRTS is not set +CONFIG_SERIAL_BFIN_UART2=y +# CONFIG_BFIN_UART2_CTSRTS is not set +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_BFIN_SPORT is not set +CONFIG_UNIX98_PTYS=y +# CONFIG_LEGACY_PTYS is not set + +# +# CAN, the car bus and industrial fieldbus +# +# CONFIG_CAN4LINUX is not set +# CONFIG_IPMI_HANDLER is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_R3964 is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +CONFIG_I2C=y +CONFIG_I2C_BOARDINFO=y +# CONFIG_I2C_CHARDEV is not set +CONFIG_I2C_HELPER_AUTO=y + +# +# I2C Hardware Bus support +# +CONFIG_I2C_BLACKFIN_TWI=y +CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=50 +# CONFIG_I2C_GPIO is not set +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_SIMTEC is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_PCA_PLATFORM is not set + +# +# Miscellaneous I2C Chip support +# +# CONFIG_DS1682 is not set +# CONFIG_SENSORS_AD5252 is not set +# CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_PCF8575 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_MAX6875 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set +CONFIG_SPI=y +CONFIG_SPI_MASTER=y + +# +# SPI Master Controller Drivers +# +CONFIG_SPI_BFIN=y +# CONFIG_SPI_BITBANG is not set + +# +# SPI Protocol Masters +# +# CONFIG_SPI_AT25 is not set +# CONFIG_SPI_SPIDEV is not set +# CONFIG_SPI_TLE62X0 is not set +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +CONFIG_HWMON=y +# CONFIG_HWMON_VID is not set +# CONFIG_SENSORS_AD7418 is not set +# CONFIG_SENSORS_ADM1021 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM1026 is not set +# CONFIG_SENSORS_ADM1029 is not set +# CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set +# CONFIG_SENSORS_ADT7470 is not set +# CONFIG_SENSORS_ADT7473 is not set +# CONFIG_SENSORS_ATXP1 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_F71805F is not set +# CONFIG_SENSORS_F71882FG is not set +# CONFIG_SENSORS_F75375S is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_LM63 is not set +# CONFIG_SENSORS_LM70 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM77 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM87 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set +# CONFIG_SENSORS_LM93 is not set +# CONFIG_SENSORS_MAX1619 is not set +# CONFIG_SENSORS_MAX6650 is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_DME1737 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47M192 is not set +# CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_ADS7828 is not set +# CONFIG_SENSORS_THMC50 is not set +# CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_W83781D is not set +# CONFIG_SENSORS_W83791D is not set +# CONFIG_SENSORS_W83792D is not set +# CONFIG_SENSORS_W83793 is not set +# CONFIG_SENSORS_W83L785TS is not set +# CONFIG_SENSORS_W83L786NG is not set +# CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set +# CONFIG_HWMON_DEBUG_CHIP is not set +# CONFIG_THERMAL is not set +# CONFIG_THERMAL_HWMON is not set +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +# CONFIG_SOFT_WATCHDOG is not set +CONFIG_BFIN_WDT=y + +# +# Sonics Silicon Backplane +# +CONFIG_SSB_POSSIBLE=y +# CONFIG_SSB is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set + +# +# Multimedia devices +# + +# +# Multimedia core support +# +# CONFIG_VIDEO_DEV is not set +# CONFIG_DVB_CORE is not set +# CONFIG_VIDEO_MEDIA is not set + +# +# Multimedia drivers +# +# CONFIG_DAB is not set + +# +# Graphics support +# +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +CONFIG_FB=m +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +CONFIG_FB_CFB_FILLRECT=m +CONFIG_FB_CFB_COPYAREA=m +CONFIG_FB_CFB_IMAGEBLIT=m +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +# CONFIG_FB_SVGALIB is not set +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +# CONFIG_FB_MODE_HELPERS is not set +# CONFIG_FB_TILEBLITTING is not set + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_BFIN_T350MCQB is not set +CONFIG_FB_BFIN_LQ035Q1=m +# CONFIG_FB_BFIN_7393 is not set +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_VIRTUAL is not set +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_LOGO is not set + +# +# Sound +# +# CONFIG_SOUND is not set +CONFIG_HID_SUPPORT=y +CONFIG_HID=y +# CONFIG_HID_DEBUG is not set +# CONFIG_HIDRAW is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +# CONFIG_NEW_LEDS is not set +# CONFIG_ACCESSIBILITY is not set +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_HCTOSYS=y +CONFIG_RTC_HCTOSYS_DEVICE="rtc0" +# CONFIG_RTC_DEBUG is not set + +# +# RTC interfaces +# +CONFIG_RTC_INTF_SYSFS=y +CONFIG_RTC_INTF_PROC=y +CONFIG_RTC_INTF_DEV=y +# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +# CONFIG_RTC_DRV_TEST is not set + +# +# I2C RTC drivers +# +# CONFIG_RTC_DRV_DS1307 is not set +# CONFIG_RTC_DRV_DS1374 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_MAX6900 is not set +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_ISL1208 is not set +# CONFIG_RTC_DRV_X1205 is not set +# CONFIG_RTC_DRV_PCF8563 is not set +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_S35390A is not set +# CONFIG_RTC_DRV_FM3130 is not set + +# +# SPI RTC drivers +# +# CONFIG_RTC_DRV_MAX6902 is not set +# CONFIG_RTC_DRV_R9701 is not set +# CONFIG_RTC_DRV_RS5C348 is not set + +# +# Platform RTC drivers +# +# CONFIG_RTC_DRV_DS1511 is not set +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set +# CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_V3020 is not set + +# +# on-CPU RTC drivers +# +CONFIG_RTC_DRV_BFIN=y +# CONFIG_UIO is not set + +# +# File systems +# +# CONFIG_EXT2_FS is not set +# CONFIG_EXT3_FS is not set +# CONFIG_EXT4DEV_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_FS_POSIX_ACL is not set +# CONFIG_XFS_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_DNOTIFY is not set +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +# CONFIG_TMPFS is not set +# CONFIG_HUGETLB_PAGE is not set +# CONFIG_CONFIGFS_FS is not set + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +CONFIG_YAFFS_FS=m +CONFIG_YAFFS_YAFFS1=y +# CONFIG_YAFFS_9BYTE_TAGS is not set +# CONFIG_YAFFS_DOES_ECC is not set +CONFIG_YAFFS_YAFFS2=y +CONFIG_YAFFS_AUTO_YAFFS2=y +# CONFIG_YAFFS_DISABLE_LAZY_LOAD is not set +# CONFIG_YAFFS_DISABLE_WIDE_TNODES is not set +# CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED is not set +CONFIG_YAFFS_SHORT_NAMES_IN_RAM=y +CONFIG_JFFS2_FS=m +CONFIG_JFFS2_FS_DEBUG=0 +CONFIG_JFFS2_FS_WRITEBUFFER=y +# CONFIG_JFFS2_FS_WBUF_VERIFY is not set +# CONFIG_JFFS2_SUMMARY is not set +# CONFIG_JFFS2_FS_XATTR is not set +# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set +CONFIG_JFFS2_ZLIB=y +# CONFIG_JFFS2_LZO is not set +CONFIG_JFFS2_RTIME=y +# CONFIG_JFFS2_RUBIN is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_NFS_FS=m +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFSD is not set +CONFIG_LOCKD=m +CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=m +# CONFIG_SUNRPC_BIND34 is not set +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_RPCSEC_GSS_SPKM3 is not set +CONFIG_SMB_FS=m +# CONFIG_SMB_NLS_DEFAULT is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y +CONFIG_NLS=m +CONFIG_NLS_DEFAULT="iso8859-1" +# CONFIG_NLS_CODEPAGE_437 is not set +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ASCII is not set +# CONFIG_NLS_ISO8859_1 is not set +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set +# CONFIG_DLM is not set + +# +# Kernel hacking +# +# CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=1024 +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_CHECK is not set +# CONFIG_DEBUG_KERNEL is not set +# CONFIG_DEBUG_BUGVERBOSE is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +CONFIG_DEBUG_VERBOSE=y +CONFIG_DEBUG_MMRS=y +# CONFIG_DEBUG_DOUBLEFAULT is not set +CONFIG_DEBUG_HUNT_FOR_ZERO=y +CONFIG_DEBUG_BFIN_HWTRACE_ON=y +CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y +# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set +# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set +CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0 +# CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set +# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set +CONFIG_EARLY_PRINTK=y +CONFIG_CPLB_INFO=y +CONFIG_ACCESS_CHECK=y + +# +# Security options +# +# CONFIG_KEYS is not set +CONFIG_SECURITY=y +# CONFIG_SECURITY_NETWORK is not set +# CONFIG_SECURITY_CAPABILITIES is not set +CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=0 +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +# CONFIG_CRYPTO_MANAGER is not set +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_AUTHENC is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +# CONFIG_CRYPTO_CBC is not set +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +# CONFIG_CRYPTO_ECB is not set +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +# CONFIG_CRYPTO_MD5 is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_DES is not set +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_LZO is not set +CONFIG_CRYPTO_HW=y + +# +# Library routines +# +CONFIG_BITREVERSE=y +# CONFIG_GENERIC_FIND_FIRST_BIT is not set +CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +# CONFIG_LIBCRC32C is not set +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=m +CONFIG_PLIST=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y diff --git a/arch/blackfin/include/asm/gpio.h b/arch/blackfin/include/asm/gpio.h index ad33ac271fd9..c7d287ca5d0a 100644 --- a/arch/blackfin/include/asm/gpio.h +++ b/arch/blackfin/include/asm/gpio.h @@ -165,6 +165,74 @@ #endif +#if defined(BF538_FAMILY) + /* FIXME: + * For now only support PORTF GPIOs. + * PORT C,D and E are for peripheral usage only + */ +#define MAX_BLACKFIN_GPIOS 16 + +#define GPIO_PF0 0 /* PF */ +#define GPIO_PF1 1 +#define GPIO_PF2 2 +#define GPIO_PF3 3 +#define GPIO_PF4 4 +#define GPIO_PF5 5 +#define GPIO_PF6 6 +#define GPIO_PF7 7 +#define GPIO_PF8 8 +#define GPIO_PF9 9 +#define GPIO_PF10 10 +#define GPIO_PF11 11 +#define GPIO_PF12 12 +#define GPIO_PF13 13 +#define GPIO_PF14 14 +#define GPIO_PF15 15 +#define GPIO_PC0 16 /* PC */ +#define GPIO_PC1 17 +#define GPIO_PC4 20 +#define GPIO_PC5 21 +#define GPIO_PC6 22 +#define GPIO_PC7 23 +#define GPIO_PC8 24 +#define GPIO_PC9 25 +#define GPIO_PD0 32 /* PD */ +#define GPIO_PD1 33 +#define GPIO_PD2 34 +#define GPIO_PD3 35 +#define GPIO_PD4 36 +#define GPIO_PD5 37 +#define GPIO_PD6 38 +#define GPIO_PD7 39 +#define GPIO_PD8 40 +#define GPIO_PD9 41 +#define GPIO_PD10 42 +#define GPIO_PD11 43 +#define GPIO_PD12 44 +#define GPIO_PD13 45 +#define GPIO_PE0 48 /* PE */ +#define GPIO_PE1 49 +#define GPIO_PE2 50 +#define GPIO_PE3 51 +#define GPIO_PE4 52 +#define GPIO_PE5 53 +#define GPIO_PE6 54 +#define GPIO_PE7 55 +#define GPIO_PE8 56 +#define GPIO_PE9 57 +#define GPIO_PE10 58 +#define GPIO_PE11 59 +#define GPIO_PE12 60 +#define GPIO_PE13 61 +#define GPIO_PE14 62 +#define GPIO_PE15 63 + +#define PORT_F GPIO_PF0 +#define PORT_C GPIO_PC0 +#define PORT_D GPIO_PD0 +#define PORT_E GPIO_PE0 +#endif + #if defined(BF527_FAMILY) || defined(BF537_FAMILY) #define MAX_BLACKFIN_GPIOS 48 diff --git a/arch/blackfin/kernel/bfin_gpio.c b/arch/blackfin/kernel/bfin_gpio.c index 5c0800adb4dd..e129102ad09b 100644 --- a/arch/blackfin/kernel/bfin_gpio.c +++ b/arch/blackfin/kernel/bfin_gpio.c @@ -119,7 +119,7 @@ enum { #define AWA_DUMMY_READ(...) do { } while (0) #endif -#ifdef BF533_FAMILY +#if defined(BF533_FAMILY) || defined(BF538_FAMILY) static struct gpio_port_t *gpio_bankb[gpio_bank(MAX_BLACKFIN_GPIOS)] = { (struct gpio_port_t *) FIO_FLAG_D, }; @@ -202,6 +202,10 @@ static unsigned int sic_iwr_irqs[gpio_bank(MAX_BLACKFIN_GPIOS)] = {IRQ_PROG_INTB static unsigned int sic_iwr_irqs[gpio_bank(MAX_BLACKFIN_GPIOS)] = {IRQ_PROG_INTB, IRQ_PORTG_INTB, IRQ_MAC_TX}; #endif +#ifdef BF538_FAMILY +static unsigned int sic_iwr_irqs[gpio_bank(MAX_BLACKFIN_GPIOS)] = {IRQ_PORTF_INTB}; +#endif + #ifdef BF527_FAMILY static unsigned int sic_iwr_irqs[gpio_bank(MAX_BLACKFIN_GPIOS)] = {IRQ_PORTF_INTB, IRQ_PORTG_INTB, IRQ_PORTH_INTB}; #endif diff --git a/arch/blackfin/mach-bf538/Kconfig b/arch/blackfin/mach-bf538/Kconfig new file mode 100644 index 000000000000..a6f3307758c0 --- /dev/null +++ b/arch/blackfin/mach-bf538/Kconfig @@ -0,0 +1,164 @@ +if (BF538 || BF539) + +source "arch/blackfin/mach-bf538/boards/Kconfig" + +menu "BF538 Specific Configuration" + +comment "Interrupt Priority Assignment" +menu "Priority" + +config IRQ_PLL_WAKEUP + int "IRQ_PLL_WAKEUP" + default 7 +config IRQ_DMA0_ERROR + int "IRQ_DMA0_ERROR" + default 7 +config IRQ_PPI_ERROR + int "IRQ_PPI_ERROR" + default 7 +config IRQ_SPORT0_ERROR + int "IRQ_SPORT0_ERROR" + default 7 +config IRQ_SPORT1_ERROR + int "IRQ_SPORT1_ERROR" + default 7 +config IRQ_SPI0_ERROR + int "IRQ_SPI0_ERROR" + default 7 +config IRQ_UART0_ERROR + int "IRQ_UART0_ERROR" + default 7 +config IRQ_RTC + int "IRQ_RTC" + default 8 +config IRQ_PPI + int "IRQ_PPI" + default 8 +config IRQ_SPORT0_RX + int "IRQ_SPORT0_RX" + default 9 +config IRQ_SPORT0_TX + int "IRQ_SPORT0_TX" + default 9 +config IRQ_SPORT1_RX + int "IRQ_SPORT1_RX" + default 9 +config IRQ_SPORT1_TX + int "IRQ_SPORT1_TX" + default 9 +config IRQ_SPI0 + int "IRQ_SPI0" + default 10 +config IRQ_UART0_RX + int "IRQ_UART0_RX" + default 10 +config IRQ_UART0_TX + int "IRQ_UART0_TX" + default 10 +config IRQ_TMR0 + int "IRQ_TMR0" + default 11 +config IRQ_TMR1 + int "IRQ_TMR1" + default 11 +config IRQ_TMR2 + int "IRQ_TMR2" + default 11 +config IRQ_PORTF_INTA + int "IRQ_PORTF_INTA" + default 12 +config IRQ_PORTF_INTB + int "IRQ_PORTF_INTB" + default 12 +config IRQ_MEM0_DMA0 + int "IRQ_MEM0_DMA0" + default 13 +config IRQ_MEM0_DMA1 + int "IRQ_MEM0_DMA1" + default 13 +config IRQ_WATCH + int "IRQ_WATCH" + default 13 +config IRQ_DMA1_ERROR + int "IRQ_DMA1_ERROR" + default 7 +config IRQ_SPORT2_ERROR + int "IRQ_SPORT2_ERROR" + default 7 +config IRQ_SPORT3_ERROR + int "IRQ_SPORT3_ERROR" + default 7 +config IRQ_SPI1_ERROR + int "IRQ_SPI1_ERROR" + default 7 +config IRQ_SPI2_ERROR + int "IRQ_SPI2_ERROR" + default 7 +config IRQ_UART1_ERROR + int "IRQ_UART1_ERROR" + default 7 +config IRQ_UART2_ERROR + int "IRQ_UART2_ERROR" + default 7 +config IRQ_CAN_ERROR + int "IRQ_CAN_ERROR" + default 7 +config IRQ_SPORT2_RX + int "IRQ_SPORT2_RX" + default 9 +config IRQ_SPORT2_TX + int "IRQ_SPORT2_TX" + default 9 +config IRQ_SPORT3_RX + int "IRQ_SPORT3_RX" + default 9 +config IRQ_SPORT3_TX + int "IRQ_SPORT3_TX" + default 9 +config IRQ_SPI1 + int "IRQ_SPI1" + default 10 +config IRQ_SPI2 + int "IRQ_SPI2" + default 10 +config IRQ_UART1_RX + int "IRQ_UART1_RX" + default 10 +config IRQ_UART1_TX + int "IRQ_UART1_TX" + default 10 +config IRQ_UART2_RX + int "IRQ_UART2_RX" + default 10 +config IRQ_UART2_TX + int "IRQ_UART2_TX" + default 10 +config IRQ_TWI0 + int "IRQ_TWI0" + default 11 +config IRQ_TWI1 + int "IRQ_TWI1" + default 11 +config IRQ_CAN_RX + int "IRQ_CAN_RX" + default 11 +config IRQ_CAN_TX + int "IRQ_CAN_TX" + default 11 +config IRQ_MEM1_DMA0 + int "IRQ_MEM1_DMA0" + default 13 +config IRQ_MEM1_DMA1 + int "IRQ_MEM1_DMA1" + default 13 + + help + Enter the priority numbers between 7-13 ONLY. Others are Reserved. + This applies to all the above. It is not recommended to assign the + highest priority number 7 to UART or any other device. + +endmenu + +endmenu + +endif diff --git a/arch/blackfin/mach-bf538/Makefile b/arch/blackfin/mach-bf538/Makefile new file mode 100644 index 000000000000..1f093c4f7235 --- /dev/null +++ b/arch/blackfin/mach-bf538/Makefile @@ -0,0 +1,7 @@ +# +# arch/blackfin/mach-bf538/Makefile +# + +extra-y := head.o + +obj-y := ints-priority.o dma.o diff --git a/arch/blackfin/mach-bf538/boards/Kconfig b/arch/blackfin/mach-bf538/boards/Kconfig new file mode 100644 index 000000000000..215249ba58bb --- /dev/null +++ b/arch/blackfin/mach-bf538/boards/Kconfig @@ -0,0 +1,12 @@ +choice + prompt "System type" + default BFIN538_EZKIT + help + Select your board! + +config BFIN538_EZKIT + bool "BF538-EZKIT" + help + BF538-EZKIT-LITE board support. + +endchoice diff --git a/arch/blackfin/mach-bf538/boards/Makefile b/arch/blackfin/mach-bf538/boards/Makefile new file mode 100644 index 000000000000..6143b320d585 --- /dev/null +++ b/arch/blackfin/mach-bf538/boards/Makefile @@ -0,0 +1,5 @@ +# +# arch/blackfin/mach-bf538/boards/Makefile +# + +obj-$(CONFIG_BFIN538_EZKIT) += ezkit.o diff --git a/arch/blackfin/mach-bf538/boards/ezkit.c b/arch/blackfin/mach-bf538/boards/ezkit.c new file mode 100644 index 000000000000..0969e8145bc9 --- /dev/null +++ b/arch/blackfin/mach-bf538/boards/ezkit.c @@ -0,0 +1,538 @@ +/* + * File: arch/blackfin/mach-bf538/boards/ezkit.c + * Based on: arch/blackfin/mach-bf537/boards/ezkit.c + * Author: Aidan Williams + * + * Created: + * Description: + * + * Modified: + * Copyright 2005 National ICT Australia (NICTA) + * Copyright 2004-2008 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Name the Board for the /proc/cpuinfo + */ +const char bfin_board_name[] = "ADSP-BF538-EZKIT"; + +/* + * Driver needs to know address, irq and flag pin. + */ + + +#if defined(CONFIG_RTC_DRV_BFIN) || defined(CONFIG_RTC_DRV_BFIN_MODULE) +static struct platform_device rtc_device = { + .name = "rtc-bfin", + .id = -1, +}; +#endif + +#if defined(CONFIG_SERIAL_BFIN) || defined(CONFIG_SERIAL_BFIN_MODULE) +static struct resource bfin_uart_resources[] = { +#ifdef CONFIG_SERIAL_BFIN_UART0 + { + .start = 0xFFC00400, + .end = 0xFFC004FF, + .flags = IORESOURCE_MEM, + }, +#endif +#ifdef CONFIG_SERIAL_BFIN_UART1 + { + .start = 0xFFC02000, + .end = 0xFFC020FF, + .flags = IORESOURCE_MEM, + }, +#endif +#ifdef CONFIG_SERIAL_BFIN_UART2 + { + .start = 0xFFC02100, + .end = 0xFFC021FF, + .flags = IORESOURCE_MEM, + }, +#endif +}; + +static struct platform_device bfin_uart_device = { + .name = "bfin-uart", + .id = 1, + .num_resources = ARRAY_SIZE(bfin_uart_resources), + .resource = bfin_uart_resources, +}; +#endif + +#if defined(CONFIG_BFIN_SIR) || defined(CONFIG_BFIN_SIR_MODULE) +static struct resource bfin_sir_resources[] = { +#ifdef CONFIG_BFIN_SIR0 + { + .start = 0xFFC00400, + .end = 0xFFC004FF, + .flags = IORESOURCE_MEM, + }, +#endif +#ifdef CONFIG_BFIN_SIR1 + { + .start = 0xFFC02000, + .end = 0xFFC020FF, + .flags = IORESOURCE_MEM, + }, +#endif +#ifdef CONFIG_BFIN_SIR2 + { + .start = 0xFFC02100, + .end = 0xFFC021FF, + .flags = IORESOURCE_MEM, + }, +#endif +}; + +static struct platform_device bfin_sir_device = { + .name = "bfin_sir", + .id = 0, + .num_resources = ARRAY_SIZE(bfin_sir_resources), + .resource = bfin_sir_resources, +}; +#endif + +/* + * USB-LAN EzExtender board + * Driver needs to know address, irq and flag pin. + */ +#if defined(CONFIG_SMC91X) || defined(CONFIG_SMC91X_MODULE) +static struct resource smc91x_resources[] = { + { + .name = "smc91x-regs", + .start = 0x20310300, + .end = 0x20310300 + 16, + .flags = IORESOURCE_MEM, + }, { + .start = IRQ_PF0, + .end = IRQ_PF0, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, + }, +}; +static struct platform_device smc91x_device = { + .name = "smc91x", + .id = 0, + .num_resources = ARRAY_SIZE(smc91x_resources), + .resource = smc91x_resources, +}; +#endif + +#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE) +/* all SPI peripherals info goes here */ +#if defined(CONFIG_MTD_M25P80) \ + || defined(CONFIG_MTD_M25P80_MODULE) +/* SPI flash chip (m25p16) */ +static struct mtd_partition bfin_spi_flash_partitions[] = { + { + .name = "bootloader(spi)", + .size = 0x00040000, + .offset = 0, + .mask_flags = MTD_CAP_ROM + }, { + .name = "linux kernel(spi)", + .size = 0x1c0000, + .offset = 0x40000 + } +}; + +static struct flash_platform_data bfin_spi_flash_data = { + .name = "m25p80", + .parts = bfin_spi_flash_partitions, + .nr_parts = ARRAY_SIZE(bfin_spi_flash_partitions), + .type = "m25p16", +}; + +static struct bfin5xx_spi_chip spi_flash_chip_info = { + .enable_dma = 0, /* use dma transfer with this chip*/ + .bits_per_word = 8, + .cs_change_per_word = 0, +}; +#endif + +#if defined(CONFIG_TOUCHSCREEN_AD7879) || defined(CONFIG_TOUCHSCREEN_AD7879_MODULE) +#include +static const struct ad7879_platform_data bfin_ad7879_ts_info = { + .model = 7879, /* Model = AD7879 */ + .x_plate_ohms = 620, /* 620 Ohm from the touch datasheet */ + .pressure_max = 10000, + .pressure_min = 0, + .first_conversion_delay = 3, /* wait 512us before do a first conversion */ + .acquisition_time = 1, /* 4us acquisition time per sample */ + .median = 2, /* do 8 measurements */ + .averaging = 1, /* take the average of 4 middle samples */ + .pen_down_acc_interval = 255, /* 9.4 ms */ + .gpio_output = 1, /* configure AUX/VBAT/GPIO as GPIO output */ + .gpio_default = 1, /* During initialization set GPIO = HIGH */ +}; +#endif + +#if defined(CONFIG_TOUCHSCREEN_AD7879_SPI) || defined(CONFIG_TOUCHSCREEN_AD7879_SPI_MODULE) +static struct bfin5xx_spi_chip spi_ad7879_chip_info = { + .enable_dma = 0, + .bits_per_word = 16, +}; +#endif + +#if defined(CONFIG_FB_BFIN_LQ035Q1) || defined(CONFIG_FB_BFIN_LQ035Q1_MODULE) +#include + +static struct bfin_lq035q1fb_disp_info bfin_lq035q1_data = { + .mode = LQ035_NORM | LQ035_RGB | LQ035_RL | LQ035_TB, + .use_bl = 0, /* let something else control the LCD Blacklight */ + .gpio_bl = GPIO_PF7, +}; + +static struct resource bfin_lq035q1_resources[] = { + { + .start = IRQ_PPI_ERROR, + .end = IRQ_PPI_ERROR, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device bfin_lq035q1_device = { + .name = "bfin-lq035q1", + .id = -1, + .num_resources = ARRAY_SIZE(bfin_lq035q1_resources), + .resource = bfin_lq035q1_resources, + .dev = { + .platform_data = &bfin_lq035q1_data, + }, +}; +#endif + +#if defined(CONFIG_SPI_SPIDEV) || defined(CONFIG_SPI_SPIDEV_MODULE) +static struct bfin5xx_spi_chip spidev_chip_info = { + .enable_dma = 0, + .bits_per_word = 8, +}; +#endif + +#if defined(CONFIG_FB_BFIN_LQ035Q1) || defined(CONFIG_FB_BFIN_LQ035Q1_MODULE) +static struct bfin5xx_spi_chip lq035q1_spi_chip_info = { + .enable_dma = 0, + .bits_per_word = 8, +}; +#endif + +static struct spi_board_info bf538_spi_board_info[] __initdata = { +#if defined(CONFIG_MTD_M25P80) \ + || defined(CONFIG_MTD_M25P80_MODULE) + { + /* the modalias must be the same as spi device driver name */ + .modalias = "m25p80", /* Name of spi_driver for this device */ + .max_speed_hz = 25000000, /* max spi clock (SCK) speed in HZ */ + .bus_num = 0, /* Framework bus number */ + .chip_select = 1, /* SPI_SSEL1*/ + .platform_data = &bfin_spi_flash_data, + .controller_data = &spi_flash_chip_info, + .mode = SPI_MODE_3, + }, +#endif +#if defined(CONFIG_TOUCHSCREEN_AD7879_SPI) || defined(CONFIG_TOUCHSCREEN_AD7879_SPI_MODULE) + { + .modalias = "ad7879", + .platform_data = &bfin_ad7879_ts_info, + .irq = IRQ_PF3, + .max_speed_hz = 5000000, /* max spi clock (SCK) speed in HZ */ + .bus_num = 0, + .chip_select = 1, + .controller_data = &spi_ad7879_chip_info, + .mode = SPI_CPHA | SPI_CPOL, + }, +#endif +#if defined(CONFIG_FB_BFIN_LQ035Q1) || defined(CONFIG_FB_BFIN_LQ035Q1_MODULE) + { + .modalias = "bfin-lq035q1-spi", + .max_speed_hz = 20000000, /* max spi clock (SCK) speed in HZ */ + .bus_num = 0, + .chip_select = 2, + .controller_data = &lq035q1_spi_chip_info, + .mode = SPI_CPHA | SPI_CPOL, + }, +#endif +#if defined(CONFIG_SPI_SPIDEV) || defined(CONFIG_SPI_SPIDEV_MODULE) + { + .modalias = "spidev", + .max_speed_hz = 3125000, /* max spi clock (SCK) speed in HZ */ + .bus_num = 0, + .chip_select = 1, + .controller_data = &spidev_chip_info, + }, +#endif +}; + +/* SPI (0) */ +static struct resource bfin_spi0_resource[] = { + [0] = { + .start = SPI0_REGBASE, + .end = SPI0_REGBASE + 0xFF, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = CH_SPI0, + .end = CH_SPI0, + .flags = IORESOURCE_IRQ, + } +}; + +/* SPI (1) */ +static struct resource bfin_spi1_resource[] = { + [0] = { + .start = SPI1_REGBASE, + .end = SPI1_REGBASE + 0xFF, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = CH_SPI1, + .end = CH_SPI1, + .flags = IORESOURCE_IRQ, + } +}; + +/* SPI (2) */ +static struct resource bfin_spi2_resource[] = { + [0] = { + .start = SPI2_REGBASE, + .end = SPI2_REGBASE + 0xFF, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = CH_SPI2, + .end = CH_SPI2, + .flags = IORESOURCE_IRQ, + } +}; + +/* SPI controller data */ +static struct bfin5xx_spi_master bf538_spi_master_info0 = { + .num_chipselect = 8, + .enable_dma = 1, /* master has the ability to do dma transfer */ + .pin_req = {P_SPI0_SCK, P_SPI0_MISO, P_SPI0_MOSI, 0}, +}; + +static struct platform_device bf538_spi_master0 = { + .name = "bfin-spi", + .id = 0, /* Bus number */ + .num_resources = ARRAY_SIZE(bfin_spi0_resource), + .resource = bfin_spi0_resource, + .dev = { + .platform_data = &bf538_spi_master_info0, /* Passed to driver */ + }, +}; + +static struct bfin5xx_spi_master bf538_spi_master_info1 = { + .num_chipselect = 8, + .enable_dma = 1, /* master has the ability to do dma transfer */ + .pin_req = {P_SPI1_SCK, P_SPI1_MISO, P_SPI1_MOSI, 0}, +}; + +static struct platform_device bf538_spi_master1 = { + .name = "bfin-spi", + .id = 1, /* Bus number */ + .num_resources = ARRAY_SIZE(bfin_spi1_resource), + .resource = bfin_spi1_resource, + .dev = { + .platform_data = &bf538_spi_master_info1, /* Passed to driver */ + }, +}; + +static struct bfin5xx_spi_master bf538_spi_master_info2 = { + .num_chipselect = 8, + .enable_dma = 1, /* master has the ability to do dma transfer */ + .pin_req = {P_SPI2_SCK, P_SPI2_MISO, P_SPI2_MOSI, 0}, +}; + +static struct platform_device bf538_spi_master2 = { + .name = "bfin-spi", + .id = 2, /* Bus number */ + .num_resources = ARRAY_SIZE(bfin_spi2_resource), + .resource = bfin_spi2_resource, + .dev = { + .platform_data = &bf538_spi_master_info2, /* Passed to driver */ + }, +}; + +#endif /* spi master and devices */ + +#if defined(CONFIG_I2C_BLACKFIN_TWI) || defined(CONFIG_I2C_BLACKFIN_TWI_MODULE) +static struct resource bfin_twi0_resource[] = { + [0] = { + .start = TWI0_REGBASE, + .end = TWI0_REGBASE + 0xFF, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = IRQ_TWI0, + .end = IRQ_TWI0, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device i2c_bfin_twi0_device = { + .name = "i2c-bfin-twi", + .id = 0, + .num_resources = ARRAY_SIZE(bfin_twi0_resource), + .resource = bfin_twi0_resource, +}; + +#if !defined(CONFIG_BF542) /* The BF542 only has 1 TWI */ +static struct resource bfin_twi1_resource[] = { + [0] = { + .start = TWI1_REGBASE, + .end = TWI1_REGBASE + 0xFF, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = IRQ_TWI1, + .end = IRQ_TWI1, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device i2c_bfin_twi1_device = { + .name = "i2c-bfin-twi", + .id = 1, + .num_resources = ARRAY_SIZE(bfin_twi1_resource), + .resource = bfin_twi1_resource, +}; +#endif +#endif + +#if defined(CONFIG_KEYBOARD_GPIO) || defined(CONFIG_KEYBOARD_GPIO_MODULE) +#include + +static struct gpio_keys_button bfin_gpio_keys_table[] = { + {BTN_0, GPIO_PC7, 1, "gpio-keys: BTN0"}, +}; + +static struct gpio_keys_platform_data bfin_gpio_keys_data = { + .buttons = bfin_gpio_keys_table, + .nbuttons = ARRAY_SIZE(bfin_gpio_keys_table), +}; + +static struct platform_device bfin_device_gpiokeys = { + .name = "gpio-keys", + .dev = { + .platform_data = &bfin_gpio_keys_data, + }, +}; +#endif + +static const unsigned int cclk_vlev_datasheet[] = +{ +/* + * Internal VLEV BF538SBBC1533 + ****temporarily using these values until data sheet is updated + */ + VRPAIR(VLEV_100, 150000000), + VRPAIR(VLEV_100, 250000000), + VRPAIR(VLEV_110, 276000000), + VRPAIR(VLEV_115, 301000000), + VRPAIR(VLEV_120, 525000000), + VRPAIR(VLEV_125, 550000000), + VRPAIR(VLEV_130, 600000000), +}; + +static struct bfin_dpmc_platform_data bfin_dmpc_vreg_data = { + .tuple_tab = cclk_vlev_datasheet, + .tabsize = ARRAY_SIZE(cclk_vlev_datasheet), + .vr_settling_time = 25 /* us */, +}; + +static struct platform_device bfin_dpmc = { + .name = "bfin dpmc", + .dev = { + .platform_data = &bfin_dmpc_vreg_data, + }, +}; + +static struct platform_device *cm_bf538_devices[] __initdata = { + + &bfin_dpmc, + +#if defined(CONFIG_RTC_DRV_BFIN) || defined(CONFIG_RTC_DRV_BFIN_MODULE) + &rtc_device, +#endif + +#if defined(CONFIG_SERIAL_BFIN) || defined(CONFIG_SERIAL_BFIN_MODULE) + &bfin_uart_device, +#endif + +#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE) + &bf538_spi_master0, + &bf538_spi_master1, + &bf538_spi_master2, +#endif + +#if defined(CONFIG_I2C_BLACKFIN_TWI) || defined(CONFIG_I2C_BLACKFIN_TWI_MODULE) + &i2c_bfin_twi0_device, + &i2c_bfin_twi1_device, +#endif + +#if defined(CONFIG_BFIN_SIR) || defined(CONFIG_BFIN_SIR_MODULE) + &bfin_sir_device, +#endif + +#if defined(CONFIG_SMC91X) || defined(CONFIG_SMC91X_MODULE) + &smc91x_device, +#endif + +#if defined(CONFIG_FB_BFIN_LQ035Q1) || defined(CONFIG_FB_BFIN_LQ035Q1_MODULE) + &bfin_lq035q1_device, +#endif + +#if defined(CONFIG_KEYBOARD_GPIO) || defined(CONFIG_KEYBOARD_GPIO_MODULE) + &bfin_device_gpiokeys, +#endif +}; + +static int __init ezkit_init(void) +{ + printk(KERN_INFO "%s(): registering device resources\n", __func__); + platform_add_devices(cm_bf538_devices, ARRAY_SIZE(cm_bf538_devices)); + +#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE) + spi_register_board_info(bf538_spi_board_info, + ARRAY_SIZE(bf538_spi_board_info)); +#endif + + return 0; +} + +arch_initcall(ezkit_init); diff --git a/arch/blackfin/mach-bf538/dma.c b/arch/blackfin/mach-bf538/dma.c new file mode 100644 index 000000000000..359fdaa12b8f --- /dev/null +++ b/arch/blackfin/mach-bf538/dma.c @@ -0,0 +1,161 @@ +/* + * File: arch/blackfin/mach-bf538/dma.c + * Based on: + * Author: + * + * Created: + * Description: This file contains the simple DMA Implementation for Blackfin + * + * Modified: + * Copyright 2008 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include + +#include +#include + +struct dma_register *dma_io_base_addr[MAX_BLACKFIN_DMA_CHANNEL] = { + (struct dma_register *) DMA0_NEXT_DESC_PTR, + (struct dma_register *) DMA1_NEXT_DESC_PTR, + (struct dma_register *) DMA2_NEXT_DESC_PTR, + (struct dma_register *) DMA3_NEXT_DESC_PTR, + (struct dma_register *) DMA4_NEXT_DESC_PTR, + (struct dma_register *) DMA5_NEXT_DESC_PTR, + (struct dma_register *) DMA6_NEXT_DESC_PTR, + (struct dma_register *) DMA7_NEXT_DESC_PTR, + (struct dma_register *) DMA8_NEXT_DESC_PTR, + (struct dma_register *) DMA9_NEXT_DESC_PTR, + (struct dma_register *) DMA10_NEXT_DESC_PTR, + (struct dma_register *) DMA11_NEXT_DESC_PTR, + (struct dma_register *) DMA12_NEXT_DESC_PTR, + (struct dma_register *) DMA13_NEXT_DESC_PTR, + (struct dma_register *) DMA14_NEXT_DESC_PTR, + (struct dma_register *) DMA15_NEXT_DESC_PTR, + (struct dma_register *) DMA16_NEXT_DESC_PTR, + (struct dma_register *) DMA17_NEXT_DESC_PTR, + (struct dma_register *) DMA18_NEXT_DESC_PTR, + (struct dma_register *) DMA19_NEXT_DESC_PTR, + (struct dma_register *) MDMA0_D0_NEXT_DESC_PTR, + (struct dma_register *) MDMA0_S0_NEXT_DESC_PTR, + (struct dma_register *) MDMA0_D1_NEXT_DESC_PTR, + (struct dma_register *) MDMA0_S1_NEXT_DESC_PTR, + (struct dma_register *) MDMA1_D0_NEXT_DESC_PTR, + (struct dma_register *) MDMA1_S0_NEXT_DESC_PTR, + (struct dma_register *) MDMA1_D1_NEXT_DESC_PTR, + (struct dma_register *) MDMA1_S1_NEXT_DESC_PTR, +}; +EXPORT_SYMBOL(dma_io_base_addr); + +int channel2irq(unsigned int channel) +{ + int ret_irq = -1; + + switch (channel) { + case CH_PPI: + ret_irq = IRQ_PPI; + break; + + case CH_UART0_RX: + ret_irq = IRQ_UART0_RX; + break; + + case CH_UART0_TX: + ret_irq = IRQ_UART0_TX; + break; + + case CH_UART1_RX: + ret_irq = IRQ_UART1_RX; + break; + + case CH_UART1_TX: + ret_irq = IRQ_UART1_TX; + break; + + case CH_UART2_RX: + ret_irq = IRQ_UART2_RX; + break; + + case CH_UART2_TX: + ret_irq = IRQ_UART2_TX; + break; + + case CH_SPORT0_RX: + ret_irq = IRQ_SPORT0_RX; + break; + + case CH_SPORT0_TX: + ret_irq = IRQ_SPORT0_TX; + break; + + case CH_SPORT1_RX: + ret_irq = IRQ_SPORT1_RX; + break; + + case CH_SPORT1_TX: + ret_irq = IRQ_SPORT1_TX; + break; + + case CH_SPORT2_RX: + ret_irq = IRQ_SPORT2_RX; + break; + + case CH_SPORT2_TX: + ret_irq = IRQ_SPORT2_TX; + break; + + case CH_SPORT3_RX: + ret_irq = IRQ_SPORT3_RX; + break; + + case CH_SPORT3_TX: + ret_irq = IRQ_SPORT3_TX; + break; + + case CH_SPI0: + ret_irq = IRQ_SPI0; + break; + + case CH_SPI1: + ret_irq = IRQ_SPI1; + break; + + case CH_SPI2: + ret_irq = IRQ_SPI2; + break; + + case CH_MEM_STREAM0_SRC: + case CH_MEM_STREAM0_DEST: + ret_irq = IRQ_MEM0_DMA0; + break; + case CH_MEM_STREAM1_SRC: + case CH_MEM_STREAM1_DEST: + ret_irq = IRQ_MEM0_DMA1; + break; + case CH_MEM_STREAM2_SRC: + case CH_MEM_STREAM2_DEST: + ret_irq = IRQ_MEM1_DMA0; + break; + case CH_MEM_STREAM3_SRC: + case CH_MEM_STREAM3_DEST: + ret_irq = IRQ_MEM1_DMA1; + break; + } + return ret_irq; +} diff --git a/arch/blackfin/mach-bf538/head.S b/arch/blackfin/mach-bf538/head.S new file mode 100644 index 000000000000..39013ec97008 --- /dev/null +++ b/arch/blackfin/mach-bf538/head.S @@ -0,0 +1,137 @@ +/* + * File: arch/blackfin/mach-bf538/head.S + * Based on: + * Author: Jeff Dionne COPYRIGHT 1998 D. Jeff Dionne + * + * Created: 1998 + * Description: bf533 startup file + * + * Modified: + * Copyright 2004-2006 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#ifdef CONFIG_BFIN_KERNEL_CLOCK +#include +#include +#endif + +.section .l1.text +#ifdef CONFIG_BFIN_KERNEL_CLOCK +ENTRY(_start_dma_code) + p0.h = hi(SIC_IWR0); + p0.l = lo(SIC_IWR0); + r0.l = 0x1; + r0.h = 0x0; + [p0] = r0; + SSYNC; + + /* + * Set PLL_CTL + * - [14:09] = MSEL[5:0] : CLKIN / VCO multiplication factors + * - [8] = BYPASS : BYPASS the PLL, run CLKIN into CCLK/SCLK + * - [7] = output delay (add 200ps of delay to mem signals) + * - [6] = input delay (add 200ps of input delay to mem signals) + * - [5] = PDWN : 1=All Clocks off + * - [3] = STOPCK : 1=Core Clock off + * - [1] = PLL_OFF : 1=Disable Power to PLL + * - [0] = DF : 1=Pass CLKIN/2 to PLL / 0=Pass CLKIN to PLL + * all other bits set to zero + */ + + p0.h = hi(PLL_LOCKCNT); + p0.l = lo(PLL_LOCKCNT); + r0 = 0x300(Z); + w[p0] = r0.l; + ssync; + + P2.H = hi(EBIU_SDGCTL); + P2.L = lo(EBIU_SDGCTL); + R0 = [P2]; + BITSET (R0, 24); + [P2] = R0; + SSYNC; + + r0 = CONFIG_VCO_MULT & 63; /* Load the VCO multiplier */ + r0 = r0 << 9; /* Shift it over, */ + r1 = CLKIN_HALF; /* Do we need to divide CLKIN by 2?*/ + r0 = r1 | r0; + r1 = PLL_BYPASS; /* Bypass the PLL? */ + r1 = r1 << 8; /* Shift it over */ + r0 = r1 | r0; /* add them all together */ +#ifdef ANOMALY_05000265 + BITSET(r0, 15); /* Add 250 mV of hysteresis to SPORT input pins */ +#endif + + p0.h = hi(PLL_CTL); + p0.l = lo(PLL_CTL); /* Load the address */ + cli r2; /* Disable interrupts */ + ssync; + w[p0] = r0.l; /* Set the value */ + idle; /* Wait for the PLL to stablize */ + sti r2; /* Enable interrupts */ + +.Lcheck_again: + p0.h = hi(PLL_STAT); + p0.l = lo(PLL_STAT); + R0 = W[P0](Z); + CC = BITTST(R0,5); + if ! CC jump .Lcheck_again; + + /* Configure SCLK & CCLK Dividers */ + r0 = (CONFIG_CCLK_ACT_DIV | CONFIG_SCLK_DIV); + p0.h = hi(PLL_DIV); + p0.l = lo(PLL_DIV); + w[p0] = r0.l; + ssync; + + p0.l = lo(EBIU_SDRRC); + p0.h = hi(EBIU_SDRRC); + r0 = mem_SDRRC; + w[p0] = r0.l; + ssync; + + P2.H = hi(EBIU_SDGCTL); + P2.L = lo(EBIU_SDGCTL); + R0 = [P2]; + BITCLR (R0, 24); + p0.h = hi(EBIU_SDSTAT); + p0.l = lo(EBIU_SDSTAT); + r2.l = w[p0]; + cc = bittst(r2,3); + if !cc jump .Lskip; + NOP; + BITSET (R0, 23); +.Lskip: + [P2] = R0; + SSYNC; + + R0.L = lo(mem_SDGCTL); + R0.H = hi(mem_SDGCTL); + R1 = [p2]; + R1 = R1 | R0; + [P2] = R1; + SSYNC; + + RTS; +ENDPROC(_start_dma_code) +#endif /* CONFIG_BFIN_KERNEL_CLOCK */ diff --git a/arch/blackfin/mach-bf538/include/mach/anomaly.h b/arch/blackfin/mach-bf538/include/mach/anomaly.h new file mode 100644 index 000000000000..4df618ce2a6d --- /dev/null +++ b/arch/blackfin/mach-bf538/include/mach/anomaly.h @@ -0,0 +1,121 @@ +/* + * File: include/asm-blackfin/mach-bf538/anomaly.h + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * Copyright (C) 2004-2008 Analog Devices Inc. + * Licensed under the GPL-2 or later. + */ + +/* This file shoule be up to date with: + * - Revision F, 06/18/2008; ADSP-BF538/BF538F Blackfin Processor Anomaly List + * - Revision K, 06/18/2008; ADSP-BF539/BF539F Blackfin Processor Anomaly List + */ + +#ifndef _MACH_ANOMALY_H_ +#define _MACH_ANOMALY_H_ + +#if __SILICON_REVISION__ < 4 +# error will not work on BF538 silicon version 0.0, 0.1, 0.2 or 0.3 +#endif + +/* Multi-Issue Instruction with dsp32shiftimm in slot1 and P-reg Store in slot2 Not Supported */ +#define ANOMALY_05000074 (1) +/* DMA_RUN Bit Is Not Valid after a Peripheral Receive Channel DMA Stops */ +#define ANOMALY_05000119 (1) +/* Rx.H Cannot Be Used to Access 16-bit System MMR Registers */ +#define ANOMALY_05000122 (1) +/* PPI Data Lengths between 8 and 16 Do Not Zero Out Upper Bits */ +#define ANOMALY_05000166 (1) +/* PPI_COUNT Cannot Be Programmed to 0 in General Purpose TX or RX Modes */ +#define ANOMALY_05000179 (1) +/* PPI_DELAY Not Functional in PPI Modes with 0 Frame Syncs */ +#define ANOMALY_05000180 (1) +/* False I/O Pin Interrupts on Edge-Sensitive Inputs When Polarity Setting Is Changed */ +#define ANOMALY_05000193 (1) +/* Current DMA Address Shows Wrong Value During Carry Fix */ +#define ANOMALY_05000199 (__SILICON_REVISION__ < 4) +/* NMI Event at Boot Time Results in Unpredictable State */ +#define ANOMALY_05000219 (1) +/* SPI Slave Boot Mode Modifies Registers from Reset Value */ +#define ANOMALY_05000229 (1) +/* PPI_FS3 Is Not Driven in 2 or 3 Internal Frame Sync Transmit Modes */ +#define ANOMALY_05000233 (1) +/* If i-cache is on, CSYNC/SSYNC/IDLE around Change of Control causes failures */ +#define ANOMALY_05000244 (__SILICON_REVISION__ < 3) +/* Spurious Hardware Error from an Access in the Shadow of a Conditional Branch */ +#define ANOMALY_05000245 (1) +/* Maximum External Clock Speed for Timers */ +#define ANOMALY_05000253 (1) +/* DCPLB_FAULT_ADDR MMR register may be corrupted */ +#define ANOMALY_05000261 (__SILICON_REVISION__ < 3) +/* High I/O Activity Causes Output Voltage of Internal Voltage Regulator (Vddint) to Decrease */ +#define ANOMALY_05000270 (__SILICON_REVISION__ < 4) +/* Certain Data Cache Writethrough Modes Fail for Vddint <= 0.9V */ +#define ANOMALY_05000272 (1) +/* Writes to Synchronous SDRAM Memory May Be Lost */ +#define ANOMALY_05000273 (__SILICON_REVISION__ < 4) +/* Writes to an I/O Data Register One SCLK Cycle after an Edge Is Detected May Clear Interrupt */ +#define ANOMALY_05000277 (__SILICON_REVISION__ < 4) +/* Disabling Peripherals with DMA Running May Cause DMA System Instability */ +#define ANOMALY_05000278 (__SILICON_REVISION__ < 4) +/* False Hardware Error Exception when ISR Context Is Not Restored */ +#define ANOMALY_05000281 (__SILICON_REVISION__ < 4) +/* Memory DMA Corruption with 32-Bit Data and Traffic Control */ +#define ANOMALY_05000282 (__SILICON_REVISION__ < 4) +/* System MMR Write Is Stalled Indefinitely when Killed in a Particular Stage */ +#define ANOMALY_05000283 (__SILICON_REVISION__ < 4) +/* SPORTs May Receive Bad Data If FIFOs Fill Up */ +#define ANOMALY_05000288 (__SILICON_REVISION__ < 4) +/* Reads from CAN Mailbox and Acceptance Mask Area Can Fail */ +#define ANOMALY_05000291 (__SILICON_REVISION__ < 4) +/* Hibernate Leakage Current Is Higher Than Specified */ +#define ANOMALY_05000293 (__SILICON_REVISION__ < 4) +/* Timer Pin Limitations for PPI TX Modes with External Frame Syncs */ +#define ANOMALY_05000294 (1) +/* Memory-To-Memory DMA Source/Destination Descriptors Must Be in Same Memory Space */ +#define ANOMALY_05000301 (__SILICON_REVISION__ < 4) +/* SSYNCs After Writes To CAN/DMA MMR Registers Are Not Always Handled Correctly */ +#define ANOMALY_05000304 (__SILICON_REVISION__ < 4) +/* SCKELOW Bit Does Not Maintain State Through Hibernate */ +#define ANOMALY_05000307 (__SILICON_REVISION__ < 4) +/* False Hardware Errors Caused by Fetches at the Boundary of Reserved Memory */ +#define ANOMALY_05000310 (1) +/* Errors when SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted */ +#define ANOMALY_05000312 (__SILICON_REVISION__ < 5) +/* PPI Is Level-Sensitive on First Transfer */ +#define ANOMALY_05000313 (__SILICON_REVISION__ < 4) +/* Killed System MMR Write Completes Erroneously on Next System MMR Access */ +#define ANOMALY_05000315 (__SILICON_REVISION__ < 4) +/* PFx Glitch on Write to FIO_FLAG_D or FIO_FLAG_T */ +#define ANOMALY_05000318 (__SILICON_REVISION__ < 4) +/* Regulator Programming Blocked when Hibernate Wakeup Source Remains Active */ +#define ANOMALY_05000355 (__SILICON_REVISION__ < 5) +/* Serial Port (SPORT) Multichannel Transmit Failure when Channel 0 Is Disabled */ +#define ANOMALY_05000357 (__SILICON_REVISION__ < 5) +/* PPI Underflow Error Goes Undetected in ITU-R 656 Mode */ +#define ANOMALY_05000366 (1) +/* Possible RETS Register Corruption when Subroutine Is under 5 Cycles in Duration */ +#define ANOMALY_05000371 (__SILICON_REVISION__ < 5) +/* Entering Hibernate State with Peripheral Wakeups Enabled Draws Excess Current */ +#define ANOMALY_05000374 (__SILICON_REVISION__ == 4) +/* New Feature: Open-Drain GPIO Outputs on PC1 and PC4 (Not Available on Older Silicon) */ +#define ANOMALY_05000375 (__SILICON_REVISION__ < 4) +/* SSYNC Stalls Processor when Executed from Non-Cacheable Memory */ +#define ANOMALY_05000402 (__SILICON_REVISION__ < 4) +/* Level-Sensitive External GPIO Wakeups May Cause Indefinite Stall */ +#define ANOMALY_05000403 (1) +/* Speculative Fetches Can Cause Undesired External FIFO Operations */ +#define ANOMALY_05000416 (1) + +/* Anomalies that don't exist on this proc */ +#define ANOMALY_05000230 (0) +#define ANOMALY_05000353 (1) +#define ANOMALY_05000386 (1) +#define ANOMALY_05000198 (0) +#define ANOMALY_05000158 (0) +#define ANOMALY_05000311 (0) +#define ANOMALY_05000323 (0) +#define ANOMALY_05000263 (0) +#define ANOMALY_05000363 (0) + +#endif diff --git a/arch/blackfin/mach-bf538/include/mach/bf538.h b/arch/blackfin/mach-bf538/include/mach/bf538.h new file mode 100644 index 000000000000..c9e8197a29fe --- /dev/null +++ b/arch/blackfin/mach-bf538/include/mach/bf538.h @@ -0,0 +1,124 @@ +/* + * File: include/asm-blackfin/mach-bf538/bf538.h + * Based on: include/asm-blackfin/mach-bf537/bf537.h + * Author: Michael Hennerich (michael.hennerich@analog.com) + * + * Created: + * Description: SYSTEM MMR REGISTER AND MEMORY MAP FOR ADSP-BF527 + * + * Modified: + * Copyright 2004-2007 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef __MACH_BF538_H__ +#define __MACH_BF538_H__ + +#define OFFSET_(x) ((x) & 0x0000FFFF) + +/*some misc defines*/ +#define IMASK_IVG15 0x8000 +#define IMASK_IVG14 0x4000 +#define IMASK_IVG13 0x2000 +#define IMASK_IVG12 0x1000 + +#define IMASK_IVG11 0x0800 +#define IMASK_IVG10 0x0400 +#define IMASK_IVG9 0x0200 +#define IMASK_IVG8 0x0100 + +#define IMASK_IVG7 0x0080 +#define IMASK_IVGTMR 0x0040 +#define IMASK_IVGHW 0x0020 + +/***************************/ + +#define BFIN_DSUBBANKS 4 +#define BFIN_DWAYS 2 +#define BFIN_DLINES 64 +#define BFIN_ISUBBANKS 4 +#define BFIN_IWAYS 4 +#define BFIN_ILINES 32 + +#define WAY0_L 0x1 +#define WAY1_L 0x2 +#define WAY01_L 0x3 +#define WAY2_L 0x4 +#define WAY02_L 0x5 +#define WAY12_L 0x6 +#define WAY012_L 0x7 + +#define WAY3_L 0x8 +#define WAY03_L 0x9 +#define WAY13_L 0xA +#define WAY013_L 0xB + +#define WAY32_L 0xC +#define WAY320_L 0xD +#define WAY321_L 0xE +#define WAYALL_L 0xF + +#define DMC_ENABLE (2<<2) /*yes, 2, not 1 */ + +/********************************* EBIU Settings ************************************/ +#define AMBCTL0VAL ((CONFIG_BANK_1 << 16) | CONFIG_BANK_0) +#define AMBCTL1VAL ((CONFIG_BANK_3 << 16) | CONFIG_BANK_2) + +#ifdef CONFIG_C_AMBEN_ALL +#define V_AMBEN AMBEN_ALL +#endif +#ifdef CONFIG_C_AMBEN +#define V_AMBEN 0x0 +#endif +#ifdef CONFIG_C_AMBEN_B0 +#define V_AMBEN AMBEN_B0 +#endif +#ifdef CONFIG_C_AMBEN_B0_B1 +#define V_AMBEN AMBEN_B0_B1 +#endif +#ifdef CONFIG_C_AMBEN_B0_B1_B2 +#define V_AMBEN AMBEN_B0_B1_B2 +#endif +#ifdef CONFIG_C_AMCKEN +#define V_AMCKEN AMCKEN +#else +#define V_AMCKEN 0x0 +#endif +#ifdef CONFIG_C_CDPRIO +#define V_CDPRIO 0x100 +#else +#define V_CDPRIO 0x0 +#endif + +#define AMGCTLVAL (V_AMBEN | V_AMCKEN | V_CDPRIO) + +#ifdef CONFIG_BF538 +#define CPU "BF538" +#define CPUID 0x27C4 +#endif +#ifdef CONFIG_BF539 +#define CPU "BF539" +#define CPUID 0x27C4 /* FXIME:? */ +#endif + +#ifndef CPU +#error Unknown CPU type - This kernel doesn't seem to be configured properly +#endif + +#endif /* __MACH_BF538_H__ */ diff --git a/arch/blackfin/mach-bf538/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf538/include/mach/bfin_serial_5xx.h new file mode 100644 index 000000000000..40503b6b89a3 --- /dev/null +++ b/arch/blackfin/mach-bf538/include/mach/bfin_serial_5xx.h @@ -0,0 +1,183 @@ +/* + * file: include/asm-blackfin/mach-bf538/bfin_serial_5xx.h + * based on: + * author: + * + * created: + * description: + * blackfin serial driver header files + * rev: + * + * modified: + * + * + * bugs: enter bugs at http://blackfin.uclinux.org/ + * + * this program is free software; you can redistribute it and/or modify + * it under the terms of the gnu general public license as published by + * the free software foundation; either version 2, or (at your option) + * any later version. + * + * this program is distributed in the hope that it will be useful, + * but without any warranty; without even the implied warranty of + * merchantability or fitness for a particular purpose. see the + * gnu general public license for more details. + * + * you should have received a copy of the gnu general public license + * along with this program; see the file copying. + * if not, write to the free software foundation, + * 59 temple place - suite 330, boston, ma 02111-1307, usa. + */ + +#include +#include +#include + +#define UART_GET_CHAR(uart) bfin_read16(((uart)->port.membase + OFFSET_RBR)) +#define UART_GET_DLL(uart) bfin_read16(((uart)->port.membase + OFFSET_DLL)) +#define UART_GET_IER(uart) bfin_read16(((uart)->port.membase + OFFSET_IER)) +#define UART_GET_DLH(uart) bfin_read16(((uart)->port.membase + OFFSET_DLH)) +#define UART_GET_IIR(uart) bfin_read16(((uart)->port.membase + OFFSET_IIR)) +#define UART_GET_LCR(uart) bfin_read16(((uart)->port.membase + OFFSET_LCR)) +#define UART_GET_GCTL(uart) bfin_read16(((uart)->port.membase + OFFSET_GCTL)) + +#define UART_PUT_CHAR(uart, v) bfin_write16(((uart)->port.membase + OFFSET_THR), v) +#define UART_PUT_DLL(uart, v) bfin_write16(((uart)->port.membase + OFFSET_DLL), v) +#define UART_PUT_IER(uart, v) bfin_write16(((uart)->port.membase + OFFSET_IER), v) +#define UART_SET_IER(uart, v) UART_PUT_IER(uart, UART_GET_IER(uart) | (v)) +#define UART_CLEAR_IER(uart, v) UART_PUT_IER(uart, UART_GET_IER(uart) & ~(v)) +#define UART_PUT_DLH(uart, v) bfin_write16(((uart)->port.membase + OFFSET_DLH), v) +#define UART_PUT_LCR(uart, v) bfin_write16(((uart)->port.membase + OFFSET_LCR), v) +#define UART_PUT_GCTL(uart, v) bfin_write16(((uart)->port.membase + OFFSET_GCTL), v) + +#define UART_SET_DLAB(uart) do { UART_PUT_LCR(uart, UART_GET_LCR(uart) | DLAB); SSYNC(); } while (0) +#define UART_CLEAR_DLAB(uart) do { UART_PUT_LCR(uart, UART_GET_LCR(uart) & ~DLAB); SSYNC(); } while (0) + +#define UART_GET_CTS(x) gpio_get_value(x->cts_pin) +#define UART_SET_RTS(x) gpio_set_value(x->rts_pin, 1) +#define UART_CLEAR_RTS(x) gpio_set_value(x->rts_pin, 0) +#define UART_ENABLE_INTS(x, v) UART_PUT_IER(x, v) +#define UART_DISABLE_INTS(x) UART_PUT_IER(x, 0) + +#if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS) +# define CONFIG_SERIAL_BFIN_CTSRTS + +# ifndef CONFIG_UART0_CTS_PIN +# define CONFIG_UART0_CTS_PIN -1 +# endif + +# ifndef CONFIG_UART0_RTS_PIN +# define CONFIG_UART0_RTS_PIN -1 +# endif + +# ifndef CONFIG_UART1_CTS_PIN +# define CONFIG_UART1_CTS_PIN -1 +# endif + +# ifndef CONFIG_UART1_RTS_PIN +# define CONFIG_UART1_RTS_PIN -1 +# endif +#endif + +#define BFIN_UART_TX_FIFO_SIZE 2 + +/* + * The pin configuration is different from schematic + */ +struct bfin_serial_port { + struct uart_port port; + unsigned int old_status; + unsigned int lsr; +#ifdef CONFIG_SERIAL_BFIN_DMA + int tx_done; + int tx_count; + struct circ_buf rx_dma_buf; + struct timer_list rx_dma_timer; + int rx_dma_nrows; + unsigned int tx_dma_channel; + unsigned int rx_dma_channel; + struct work_struct tx_dma_workqueue; +#endif +#ifdef CONFIG_SERIAL_BFIN_CTSRTS + struct timer_list cts_timer; + int cts_pin; + int rts_pin; +#endif +}; + +/* The hardware clears the LSR bits upon read, so we need to cache + * some of the more fun bits in software so they don't get lost + * when checking the LSR in other code paths (TX). + */ +static inline unsigned int UART_GET_LSR(struct bfin_serial_port *uart) +{ + unsigned int lsr = bfin_read16(uart->port.membase + OFFSET_LSR); + uart->lsr |= (lsr & (BI|FE|PE|OE)); + return lsr | uart->lsr; +} + +static inline void UART_CLEAR_LSR(struct bfin_serial_port *uart) +{ + uart->lsr = 0; + bfin_write16(uart->port.membase + OFFSET_LSR, -1); +} + +struct bfin_serial_res { + unsigned long uart_base_addr; + int uart_irq; +#ifdef CONFIG_SERIAL_BFIN_DMA + unsigned int uart_tx_dma_channel; + unsigned int uart_rx_dma_channel; +#endif +#ifdef CONFIG_SERIAL_BFIN_CTSRTS + int uart_cts_pin; + int uart_rts_pin; +#endif +}; + +struct bfin_serial_res bfin_serial_resource[] = { +#ifdef CONFIG_SERIAL_BFIN_UART0 + { + 0xFFC00400, + IRQ_UART0_RX, +#ifdef CONFIG_SERIAL_BFIN_DMA + CH_UART0_TX, + CH_UART0_RX, +#endif +#ifdef CONFIG_BFIN_UART0_CTSRTS + CONFIG_UART0_CTS_PIN, + CONFIG_UART0_RTS_PIN, +#endif + }, +#endif +#ifdef CONFIG_SERIAL_BFIN_UART1 + { + 0xFFC02000, + IRQ_UART1_RX, +#ifdef CONFIG_SERIAL_BFIN_DMA + CH_UART1_TX, + CH_UART1_RX, +#endif +#ifdef CONFIG_BFIN_UART1_CTSRTS + CONFIG_UART1_CTS_PIN, + CONFIG_UART1_RTS_PIN, +#endif + }, +#endif +#ifdef CONFIG_SERIAL_BFIN_UART2 + { + 0xFFC02100, + IRQ_UART2_RX, +#ifdef CONFIG_SERIAL_BFIN_DMA + CH_UART2_TX, + CH_UART2_RX, +#endif +#ifdef CONFIG_BFIN_UART2_CTSRTS + CONFIG_UART2_CTS_PIN, + CONFIG_UART2_RTS_PIN, +#endif + }, +#endif +}; + +#define DRIVER_NAME "bfin-uart" diff --git a/arch/blackfin/mach-bf538/include/mach/bfin_sir.h b/arch/blackfin/mach-bf538/include/mach/bfin_sir.h new file mode 100644 index 000000000000..b2b546d0b9d3 --- /dev/null +++ b/arch/blackfin/mach-bf538/include/mach/bfin_sir.h @@ -0,0 +1,159 @@ +/* + * Blackfin Infra-red Driver + * + * Copyright 2006-2008 Analog Devices Inc. + * + * Enter bugs at http://blackfin.uclinux.org/ + * + * Licensed under the GPL-2 or later. + * + */ + +#include +#include +#include + +#define SIR_UART_GET_CHAR(port) bfin_read16((port)->membase + OFFSET_RBR) +#define SIR_UART_GET_DLL(port) bfin_read16((port)->membase + OFFSET_DLL) +#define SIR_UART_GET_IER(port) bfin_read16((port)->membase + OFFSET_IER) +#define SIR_UART_GET_DLH(port) bfin_read16((port)->membase + OFFSET_DLH) +#define SIR_UART_GET_IIR(port) bfin_read16((port)->membase + OFFSET_IIR) +#define SIR_UART_GET_LCR(port) bfin_read16((port)->membase + OFFSET_LCR) +#define SIR_UART_GET_GCTL(port) bfin_read16((port)->membase + OFFSET_GCTL) + +#define SIR_UART_PUT_CHAR(port, v) bfin_write16(((port)->membase + OFFSET_THR), v) +#define SIR_UART_PUT_DLL(port, v) bfin_write16(((port)->membase + OFFSET_DLL), v) +#define SIR_UART_PUT_IER(port, v) bfin_write16(((port)->membase + OFFSET_IER), v) +#define SIR_UART_PUT_DLH(port, v) bfin_write16(((port)->membase + OFFSET_DLH), v) +#define SIR_UART_PUT_LCR(port, v) bfin_write16(((port)->membase + OFFSET_LCR), v) +#define SIR_UART_PUT_GCTL(port, v) bfin_write16(((port)->membase + OFFSET_GCTL), v) + +#ifdef CONFIG_SIR_BFIN_DMA +struct dma_rx_buf { + char *buf; + int head; + int tail; + }; +#endif /* CONFIG_SIR_BFIN_DMA */ + +struct bfin_sir_port { + unsigned char __iomem *membase; + unsigned int irq; + unsigned int lsr; + unsigned long clk; + struct net_device *dev; +#ifdef CONFIG_SIR_BFIN_DMA + int tx_done; + struct dma_rx_buf rx_dma_buf; + struct timer_list rx_dma_timer; + int rx_dma_nrows; +#endif /* CONFIG_SIR_BFIN_DMA */ + unsigned int tx_dma_channel; + unsigned int rx_dma_channel; +}; + +struct bfin_sir_port sir_ports[BFIN_UART_NR_PORTS]; + +struct bfin_sir_port_res { + unsigned long base_addr; + int irq; + unsigned int rx_dma_channel; + unsigned int tx_dma_channel; +}; + +struct bfin_sir_port_res bfin_sir_port_resource[] = { +#ifdef CONFIG_BFIN_SIR0 + { + 0xFFC00400, + IRQ_UART0_RX, + CH_UART0_RX, + CH_UART0_TX, + }, +#endif +#ifdef CONFIG_BFIN_SIR1 + { + 0xFFC02000, + IRQ_UART1_RX, + CH_UART1_RX, + CH_UART1_TX, + }, +#endif +#ifdef CONFIG_BFIN_SIR2 + { + 0xFFC02100, + IRQ_UART2_RX, + CH_UART2_RX, + CH_UART2_TX, + }, +#endif +}; + +int nr_sirs = ARRAY_SIZE(bfin_sir_port_resource); + +struct bfin_sir_self { + struct bfin_sir_port *sir_port; + spinlock_t lock; + unsigned int open; + int speed; + int newspeed; + + struct sk_buff *txskb; + struct sk_buff *rxskb; + struct net_device_stats stats; + struct device *dev; + struct irlap_cb *irlap; + struct qos_info qos; + + iobuff_t tx_buff; + iobuff_t rx_buff; + + struct work_struct work; + int mtt; +}; + +static inline unsigned int SIR_UART_GET_LSR(struct bfin_sir_port *port) +{ + unsigned int lsr = bfin_read16(port->membase + OFFSET_LSR); + port->lsr |= (lsr & (BI|FE|PE|OE)); + return lsr | port->lsr; +} + +static inline void SIR_UART_CLEAR_LSR(struct bfin_sir_port *port) +{ + port->lsr = 0; + bfin_read16(port->membase + OFFSET_LSR); +} + +#define DRIVER_NAME "bfin_sir" + +static int bfin_sir_hw_init(void) +{ + int ret = -ENODEV; +#ifdef CONFIG_BFIN_SIR0 + ret = peripheral_request(P_UART0_TX, DRIVER_NAME); + if (ret) + return ret; + ret = peripheral_request(P_UART0_RX, DRIVER_NAME); + if (ret) + return ret; +#endif + +#ifdef CONFIG_BFIN_SIR1 + ret = peripheral_request(P_UART1_TX, DRIVER_NAME); + if (ret) + return ret; + ret = peripheral_request(P_UART1_RX, DRIVER_NAME); + if (ret) + return ret; +#endif + +#ifdef CONFIG_BFIN_SIR2 + ret = peripheral_request(P_UART2_TX, DRIVER_NAME); + if (ret) + return ret; + ret = peripheral_request(P_UART2_RX, DRIVER_NAME); + if (ret) + return ret; +#endif + return ret; +} diff --git a/arch/blackfin/mach-bf538/include/mach/blackfin.h b/arch/blackfin/mach-bf538/include/mach/blackfin.h new file mode 100644 index 000000000000..d10366f6847f --- /dev/null +++ b/arch/blackfin/mach-bf538/include/mach/blackfin.h @@ -0,0 +1,100 @@ +/* + * File: include/asm-blackfin/mach-bf538/blackfin.h + * Based on: + * Author: + * + * Created: + * Description: + * + * Rev: + * + * Modified: + * + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _MACH_BLACKFIN_H_ +#define _MACH_BLACKFIN_H_ + +#define BF538_FAMILY + +#include "bf538.h" +#include "mem_map.h" +#include "defBF539.h" +#include "anomaly.h" + + +#if !defined(__ASSEMBLY__) +#include "cdefBF538.h" + +#if defined(CONFIG_BF539) +#include "cdefBF539.h" +#endif +#endif + +/* UART_IIR Register */ +#define STATUS(x) ((x << 1) & 0x06) +#define STATUS_P1 0x02 +#define STATUS_P0 0x01 + +#define BFIN_UART_NR_PORTS 3 + +#define OFFSET_THR 0x00 /* Transmit Holding register */ +#define OFFSET_RBR 0x00 /* Receive Buffer register */ +#define OFFSET_DLL 0x00 /* Divisor Latch (Low-Byte) */ +#define OFFSET_IER 0x04 /* Interrupt Enable Register */ +#define OFFSET_DLH 0x04 /* Divisor Latch (High-Byte) */ +#define OFFSET_IIR 0x08 /* Interrupt Identification Register */ +#define OFFSET_LCR 0x0C /* Line Control Register */ +#define OFFSET_MCR 0x10 /* Modem Control Register */ +#define OFFSET_LSR 0x14 /* Line Status Register */ +#define OFFSET_MSR 0x18 /* Modem Status Register */ +#define OFFSET_SCR 0x1C /* SCR Scratch Register */ +#define OFFSET_GCTL 0x24 /* Global Control Register */ + + +#define bfin_write_MDMA_D0_IRQ_STATUS bfin_write_MDMA0_D0_IRQ_STATUS +#define bfin_write_MDMA_D0_START_ADDR bfin_write_MDMA0_D0_START_ADDR +#define bfin_write_MDMA_S0_START_ADDR bfin_write_MDMA0_S0_START_ADDR +#define bfin_write_MDMA_D0_X_COUNT bfin_write_MDMA0_D0_X_COUNT +#define bfin_write_MDMA_S0_X_COUNT bfin_write_MDMA0_S0_X_COUNT +#define bfin_write_MDMA_D0_Y_COUNT bfin_write_MDMA0_D0_Y_COUNT +#define bfin_write_MDMA_S0_Y_COUNT bfin_write_MDMA0_S0_Y_COUNT +#define bfin_write_MDMA_D0_X_MODIFY bfin_write_MDMA0_D0_X_MODIFY +#define bfin_write_MDMA_S0_X_MODIFY bfin_write_MDMA0_S0_X_MODIFY +#define bfin_write_MDMA_D0_Y_MODIFY bfin_write_MDMA0_D0_Y_MODIFY +#define bfin_write_MDMA_S0_Y_MODIFY bfin_write_MDMA0_S0_Y_MODIFY +#define bfin_write_MDMA_S0_CONFIG bfin_write_MDMA0_S0_CONFIG +#define bfin_write_MDMA_D0_CONFIG bfin_write_MDMA0_D0_CONFIG +#define bfin_read_MDMA_D0_IRQ_STATUS bfin_read_MDMA0_D0_IRQ_STATUS +#define bfin_write_MDMA_S0_IRQ_STATUS bfin_write_MDMA0_S0_IRQ_STATUS + + +/* DPMC*/ +#define bfin_read_STOPCK_OFF() bfin_read_STOPCK() +#define bfin_write_STOPCK_OFF(val) bfin_write_STOPCK(val) +#define STOPCK_OFF STOPCK + +/* PLL_DIV Masks */ +#define CCLK_DIV1 CSEL_DIV1 /* CCLK = VCO / 1 */ +#define CCLK_DIV2 CSEL_DIV2 /* CCLK = VCO / 2 */ +#define CCLK_DIV4 CSEL_DIV4 /* CCLK = VCO / 4 */ +#define CCLK_DIV8 CSEL_DIV8 /* CCLK = VCO / 8 */ + +#endif diff --git a/arch/blackfin/mach-bf538/include/mach/cdefBF538.h b/arch/blackfin/mach-bf538/include/mach/cdefBF538.h new file mode 100644 index 000000000000..f92e7c3932f3 --- /dev/null +++ b/arch/blackfin/mach-bf538/include/mach/cdefBF538.h @@ -0,0 +1,2105 @@ +/* + * File: include/asm-blackfin/mach-bf538/cdefBF538.h + * Based on: + * Author: + * + * Created: + * Description: + * + * Rev: + * + * Modified: + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _CDEF_BF538_H +#define _CDEF_BF538_H + +#include + +/*include all Core registers and bit definitions*/ +#include "defBF539.h" + +/*include core specific register pointer definitions*/ +#include + +#include + +#define bfin_writePTR(addr, val) bfin_write32(addr, val) + +#define bfin_read_PLL_CTL() bfin_read16(PLL_CTL) +/* Writing to PLL_CTL initiates a PLL relock sequence. */ +static __inline__ void bfin_write_PLL_CTL(unsigned int val) +{ + unsigned long flags, iwr0, iwr1; + + if (val == bfin_read_PLL_CTL()) + return; + + local_irq_save(flags); + /* Enable the PLL Wakeup bit in SIC IWR */ + iwr0 = bfin_read32(SIC_IWR0); + iwr1 = bfin_read32(SIC_IWR1); + /* Only allow PPL Wakeup) */ + bfin_write32(SIC_IWR0, IWR_ENABLE(0)); + bfin_write32(SIC_IWR1, 0); + + bfin_write16(PLL_CTL, val); + SSYNC(); + asm("IDLE;"); + + bfin_write32(SIC_IWR0, iwr0); + bfin_write32(SIC_IWR1, iwr1); + local_irq_restore(flags); +} +#define bfin_read_PLL_DIV() bfin_read16(PLL_DIV) +#define bfin_write_PLL_DIV(val) bfin_write16(PLL_DIV, val) +#define bfin_read_VR_CTL() bfin_read16(VR_CTL) +/* Writing to VR_CTL initiates a PLL relock sequence. */ +static __inline__ void bfin_write_VR_CTL(unsigned int val) +{ + unsigned long flags, iwr0, iwr1; + + if (val == bfin_read_VR_CTL()) + return; + + local_irq_save(flags); + /* Enable the PLL Wakeup bit in SIC IWR */ + iwr0 = bfin_read32(SIC_IWR0); + iwr1 = bfin_read32(SIC_IWR1); + /* Only allow PPL Wakeup) */ + bfin_write32(SIC_IWR0, IWR_ENABLE(0)); + bfin_write32(SIC_IWR1, 0); + + bfin_write16(VR_CTL, val); + SSYNC(); + asm("IDLE;"); + + bfin_write32(SIC_IWR0, iwr0); + bfin_write32(SIC_IWR1, iwr1); + local_irq_restore(flags); +} +#define bfin_read_PLL_STAT() bfin_read16(PLL_STAT) +#define bfin_write_PLL_STAT(val) bfin_write16(PLL_STAT, val) +#define bfin_read_PLL_LOCKCNT() bfin_read16(PLL_LOCKCNT) +#define bfin_write_PLL_LOCKCNT(val) bfin_write16(PLL_LOCKCNT, val) +#define bfin_read_CHIPID() bfin_read32(CHIPID) +#define bfin_write_CHIPID(val) bfin_write32(CHIPID, val) +#define bfin_read_SWRST() bfin_read16(SWRST) +#define bfin_write_SWRST(val) bfin_write16(SWRST, val) +#define bfin_read_SYSCR() bfin_read16(SYSCR) +#define bfin_write_SYSCR(val) bfin_write16(SYSCR, val) +#define bfin_read_SIC_RVECT() bfin_readPTR(SIC_RVECT) +#define bfin_write_SIC_RVECT(val) bfin_writePTR(SIC_RVECT, val) +#define bfin_read_SIC_IMASK0() bfin_read32(SIC_IMASK0) +#define bfin_write_SIC_IMASK0(val) bfin_write32(SIC_IMASK0, val) +#define bfin_read_SIC_IMASK1() bfin_read32(SIC_IMASK1) +#define bfin_write_SIC_IMASK1(val) bfin_write32(SIC_IMASK1, val) +#define bfin_read_SIC_IMASK(x) bfin_read32(SIC_IMASK0 + x * (SIC_IMASK1 - SIC_IMASK0)) +#define bfin_write_SIC_IMASK(x, val) bfin_write32(SIC_IMASK0 + x * (SIC_IMASK1 - SIC_IMASK0), val) +#define bfin_read_SIC_ISR0() bfin_read32(SIC_ISR0) +#define bfin_write_SIC_ISR0(val) bfin_write32(SIC_ISR0, val) +#define bfin_read_SIC_ISR1() bfin_read32(SIC_ISR1) +#define bfin_write_SIC_ISR1(val) bfin_write32(SIC_ISR1, val) +#define bfin_read_SIC_ISR(x) bfin_read32(SIC_ISR0 + x * (SIC_ISR1 - SIC_ISR0)) +#define bfin_write_SIC_ISR(x, val) bfin_write32(SIC_ISR0 + x * (SIC_ISR1 - SIC_ISR0), val) +#define bfin_read_SIC_IWR0() bfin_read32(SIC_IWR0) +#define bfin_write_SIC_IWR0(val) bfin_write32(SIC_IWR0, val) +#define bfin_read_SIC_IWR1() bfin_read32(SIC_IWR1) +#define bfin_write_SIC_IWR1(val) bfin_write32(SIC_IWR1, val) +#define bfin_read_SIC_IWR(x) bfin_read32(SIC_IWR0 + x * (SIC_IWR1 - SIC_IWR0)) +#define bfin_write_SIC_IWR(x, val) bfin_write32((SIC_IWR0 + x * (SIC_IWR1 - SIC_IWR0), val) +#define bfin_read_SIC_IAR0() bfin_read32(SIC_IAR0) +#define bfin_write_SIC_IAR0(val) bfin_write32(SIC_IAR0, val) +#define bfin_read_SIC_IAR1() bfin_read32(SIC_IAR1) +#define bfin_write_SIC_IAR1(val) bfin_write32(SIC_IAR1, val) +#define bfin_read_SIC_IAR2() bfin_read32(SIC_IAR2) +#define bfin_write_SIC_IAR2(val) bfin_write32(SIC_IAR2, val) +#define bfin_read_SIC_IAR3() bfin_read32(SIC_IAR3) +#define bfin_write_SIC_IAR3(val) bfin_write32(SIC_IAR3, val) +#define bfin_read_SIC_IAR4() bfin_read32(SIC_IAR4) +#define bfin_write_SIC_IAR4(val) bfin_write32(SIC_IAR4, val) +#define bfin_read_SIC_IAR5() bfin_read32(SIC_IAR5) +#define bfin_write_SIC_IAR5(val) bfin_write32(SIC_IAR5, val) +#define bfin_read_SIC_IAR6() bfin_read32(SIC_IAR6) +#define bfin_write_SIC_IAR6(val) bfin_write32(SIC_IAR6, val) +#define bfin_read_WDOG_CTL() bfin_read16(WDOG_CTL) +#define bfin_write_WDOG_CTL(val) bfin_write16(WDOG_CTL, val) +#define bfin_read_WDOG_CNT() bfin_read32(WDOG_CNT) +#define bfin_write_WDOG_CNT(val) bfin_write32(WDOG_CNT, val) +#define bfin_read_WDOG_STAT() bfin_read32(WDOG_STAT) +#define bfin_write_WDOG_STAT(val) bfin_write32(WDOG_STAT, val) +#define bfin_read_RTC_STAT() bfin_read32(RTC_STAT) +#define bfin_write_RTC_STAT(val) bfin_write32(RTC_STAT, val) +#define bfin_read_RTC_ICTL() bfin_read16(RTC_ICTL) +#define bfin_write_RTC_ICTL(val) bfin_write16(RTC_ICTL, val) +#define bfin_read_RTC_ISTAT() bfin_read16(RTC_ISTAT) +#define bfin_write_RTC_ISTAT(val) bfin_write16(RTC_ISTAT, val) +#define bfin_read_RTC_SWCNT() bfin_read16(RTC_SWCNT) +#define bfin_write_RTC_SWCNT(val) bfin_write16(RTC_SWCNT, val) +#define bfin_read_RTC_ALARM() bfin_read32(RTC_ALARM) +#define bfin_write_RTC_ALARM(val) bfin_write32(RTC_ALARM, val) +#define bfin_read_RTC_PREN() bfin_read16(RTC_PREN) +#define bfin_write_RTC_PREN(val) bfin_write16(RTC_PREN, val) +#define bfin_read_UART0_THR() bfin_read16(UART0_THR) +#define bfin_write_UART0_THR(val) bfin_write16(UART0_THR, val) +#define bfin_read_UART0_RBR() bfin_read16(UART0_RBR) +#define bfin_write_UART0_RBR(val) bfin_write16(UART0_RBR, val) +#define bfin_read_UART0_DLL() bfin_read16(UART0_DLL) +#define bfin_write_UART0_DLL(val) bfin_write16(UART0_DLL, val) +#define bfin_read_UART0_DLH() bfin_read16(UART0_DLH) +#define bfin_write_UART0_DLH(val) bfin_write16(UART0_DLH, val) +#define bfin_read_UART0_IER() bfin_read16(UART0_IER) +#define bfin_write_UART0_IER(val) bfin_write16(UART0_IER, val) +#define bfin_read_UART0_IIR() bfin_read16(UART0_IIR) +#define bfin_write_UART0_IIR(val) bfin_write16(UART0_IIR, val) +#define bfin_read_UART0_LCR() bfin_read16(UART0_LCR) +#define bfin_write_UART0_LCR(val) bfin_write16(UART0_LCR, val) +#define bfin_read_UART0_MCR() bfin_read16(UART0_MCR) +#define bfin_write_UART0_MCR(val) bfin_write16(UART0_MCR, val) +#define bfin_read_UART0_LSR() bfin_read16(UART0_LSR) +#define bfin_write_UART0_LSR(val) bfin_write16(UART0_LSR, val) +#define bfin_read_UART0_SCR() bfin_read16(UART0_SCR) +#define bfin_write_UART0_SCR(val) bfin_write16(UART0_SCR, val) +#define bfin_read_UART0_GCTL() bfin_read16(UART0_GCTL) +#define bfin_write_UART0_GCTL(val) bfin_write16(UART0_GCTL, val) +#define bfin_read_UART1_THR() bfin_read16(UART1_THR) +#define bfin_write_UART1_THR(val) bfin_write16(UART1_THR, val) +#define bfin_read_UART1_RBR() bfin_read16(UART1_RBR) +#define bfin_write_UART1_RBR(val) bfin_write16(UART1_RBR, val) +#define bfin_read_UART1_DLL() bfin_read16(UART1_DLL) +#define bfin_write_UART1_DLL(val) bfin_write16(UART1_DLL, val) +#define bfin_read_UART1_DLH() bfin_read16(UART1_DLH) +#define bfin_write_UART1_DLH(val) bfin_write16(UART1_DLH, val) +#define bfin_read_UART1_IER() bfin_read16(UART1_IER) +#define bfin_write_UART1_IER(val) bfin_write16(UART1_IER, val) +#define bfin_read_UART1_IIR() bfin_read16(UART1_IIR) +#define bfin_write_UART1_IIR(val) bfin_write16(UART1_IIR, val) +#define bfin_read_UART1_LCR() bfin_read16(UART1_LCR) +#define bfin_write_UART1_LCR(val) bfin_write16(UART1_LCR, val) +#define bfin_read_UART1_MCR() bfin_read16(UART1_MCR) +#define bfin_write_UART1_MCR(val) bfin_write16(UART1_MCR, val) +#define bfin_read_UART1_LSR() bfin_read16(UART1_LSR) +#define bfin_write_UART1_LSR(val) bfin_write16(UART1_LSR, val) +#define bfin_read_UART1_SCR() bfin_read16(UART1_SCR) +#define bfin_write_UART1_SCR(val) bfin_write16(UART1_SCR, val) +#define bfin_read_UART1_GCTL() bfin_read16(UART1_GCTL) +#define bfin_write_UART1_GCTL(val) bfin_write16(UART1_GCTL, val) +#define bfin_read_UART2_THR() bfin_read16(UART2_THR) +#define bfin_write_UART2_THR(val) bfin_write16(UART2_THR, val) +#define bfin_read_UART2_RBR() bfin_read16(UART2_RBR) +#define bfin_write_UART2_RBR(val) bfin_write16(UART2_RBR, val) +#define bfin_read_UART2_DLL() bfin_read16(UART2_DLL) +#define bfin_write_UART2_DLL(val) bfin_write16(UART2_DLL, val) +#define bfin_read_UART2_DLH() bfin_read16(UART2_DLH) +#define bfin_write_UART2_DLH(val) bfin_write16(UART2_DLH, val) +#define bfin_read_UART2_IER() bfin_read16(UART2_IER) +#define bfin_write_UART2_IER(val) bfin_write16(UART2_IER, val) +#define bfin_read_UART2_IIR() bfin_read16(UART2_IIR) +#define bfin_write_UART2_IIR(val) bfin_write16(UART2_IIR, val) +#define bfin_read_UART2_LCR() bfin_read16(UART2_LCR) +#define bfin_write_UART2_LCR(val) bfin_write16(UART2_LCR, val) +#define bfin_read_UART2_MCR() bfin_read16(UART2_MCR) +#define bfin_write_UART2_MCR(val) bfin_write16(UART2_MCR, val) +#define bfin_read_UART2_LSR() bfin_read16(UART2_LSR) +#define bfin_write_UART2_LSR(val) bfin_write16(UART2_LSR, val) +#define bfin_read_UART2_SCR() bfin_read16(UART2_SCR) +#define bfin_write_UART2_SCR(val) bfin_write16(UART2_SCR, val) +#define bfin_read_UART2_GCTL() bfin_read16(UART2_GCTL) +#define bfin_write_UART2_GCTL(val) bfin_write16(UART2_GCTL, val) +#define bfin_read_SPI0_CTL() bfin_read16(SPI0_CTL) +#define bfin_write_SPI0_CTL(val) bfin_write16(SPI0_CTL, val) +#define bfin_read_SPI0_FLG() bfin_read16(SPI0_FLG) +#define bfin_write_SPI0_FLG(val) bfin_write16(SPI0_FLG, val) +#define bfin_read_SPI0_STAT() bfin_read16(SPI0_STAT) +#define bfin_write_SPI0_STAT(val) bfin_write16(SPI0_STAT, val) +#define bfin_read_SPI0_TDBR() bfin_read16(SPI0_TDBR) +#define bfin_write_SPI0_TDBR(val) bfin_write16(SPI0_TDBR, val) +#define bfin_read_SPI0_RDBR() bfin_read16(SPI0_RDBR) +#define bfin_write_SPI0_RDBR(val) bfin_write16(SPI0_RDBR, val) +#define bfin_read_SPI0_BAUD() bfin_read16(SPI0_BAUD) +#define bfin_write_SPI0_BAUD(val) bfin_write16(SPI0_BAUD, val) +#define bfin_read_SPI0_SHADOW() bfin_read16(SPI0_SHADOW) +#define bfin_write_SPI0_SHADOW(val) bfin_write16(SPI0_SHADOW, val) +#define bfin_read_SPI1_CTL() bfin_read16(SPI1_CTL) +#define bfin_write_SPI1_CTL(val) bfin_write16(SPI1_CTL, val) +#define bfin_read_SPI1_FLG() bfin_read16(SPI1_FLG) +#define bfin_write_SPI1_FLG(val) bfin_write16(SPI1_FLG, val) +#define bfin_read_SPI1_STAT() bfin_read16(SPI1_STAT) +#define bfin_write_SPI1_STAT(val) bfin_write16(SPI1_STAT, val) +#define bfin_read_SPI1_TDBR() bfin_read16(SPI1_TDBR) +#define bfin_write_SPI1_TDBR(val) bfin_write16(SPI1_TDBR, val) +#define bfin_read_SPI1_RDBR() bfin_read16(SPI1_RDBR) +#define bfin_write_SPI1_RDBR(val) bfin_write16(SPI1_RDBR, val) +#define bfin_read_SPI1_BAUD() bfin_read16(SPI1_BAUD) +#define bfin_write_SPI1_BAUD(val) bfin_write16(SPI1_BAUD, val) +#define bfin_read_SPI1_SHADOW() bfin_read16(SPI1_SHADOW) +#define bfin_write_SPI1_SHADOW(val) bfin_write16(SPI1_SHADOW, val) +#define bfin_read_SPI2_CTL() bfin_read16(SPI2_CTL) +#define bfin_write_SPI2_CTL(val) bfin_write16(SPI2_CTL, val) +#define bfin_read_SPI2_FLG() bfin_read16(SPI2_FLG) +#define bfin_write_SPI2_FLG(val) bfin_write16(SPI2_FLG, val) +#define bfin_read_SPI2_STAT() bfin_read16(SPI2_STAT) +#define bfin_write_SPI2_STAT(val) bfin_write16(SPI2_STAT, val) +#define bfin_read_SPI2_TDBR() bfin_read16(SPI2_TDBR) +#define bfin_write_SPI2_TDBR(val) bfin_write16(SPI2_TDBR, val) +#define bfin_read_SPI2_RDBR() bfin_read16(SPI2_RDBR) +#define bfin_write_SPI2_RDBR(val) bfin_write16(SPI2_RDBR, val) +#define bfin_read_SPI2_BAUD() bfin_read16(SPI2_BAUD) +#define bfin_write_SPI2_BAUD(val) bfin_write16(SPI2_BAUD, val) +#define bfin_read_SPI2_SHADOW() bfin_read16(SPI2_SHADOW) +#define bfin_write_SPI2_SHADOW(val) bfin_write16(SPI2_SHADOW, val) +#define bfin_read_TIMER0_CONFIG() bfin_read16(TIMER0_CONFIG) +#define bfin_write_TIMER0_CONFIG(val) bfin_write16(TIMER0_CONFIG, val) +#define bfin_read_TIMER0_COUNTER() bfin_read32(TIMER0_COUNTER) +#define bfin_write_TIMER0_COUNTER(val) bfin_write32(TIMER0_COUNTER, val) +#define bfin_read_TIMER0_PERIOD() bfin_read32(TIMER0_PERIOD) +#define bfin_write_TIMER0_PERIOD(val) bfin_write32(TIMER0_PERIOD, val) +#define bfin_read_TIMER0_WIDTH() bfin_read32(TIMER0_WIDTH) +#define bfin_write_TIMER0_WIDTH(val) bfin_write32(TIMER0_WIDTH, val) +#define bfin_read_TIMER1_CONFIG() bfin_read16(TIMER1_CONFIG) +#define bfin_write_TIMER1_CONFIG(val) bfin_write16(TIMER1_CONFIG, val) +#define bfin_read_TIMER1_COUNTER() bfin_read32(TIMER1_COUNTER) +#define bfin_write_TIMER1_COUNTER(val) bfin_write32(TIMER1_COUNTER, val) +#define bfin_read_TIMER1_PERIOD() bfin_read32(TIMER1_PERIOD) +#define bfin_write_TIMER1_PERIOD(val) bfin_write32(TIMER1_PERIOD, val) +#define bfin_read_TIMER1_WIDTH() bfin_read32(TIMER1_WIDTH) +#define bfin_write_TIMER1_WIDTH(val) bfin_write32(TIMER1_WIDTH, val) +#define bfin_read_TIMER2_CONFIG() bfin_read16(TIMER2_CONFIG) +#define bfin_write_TIMER2_CONFIG(val) bfin_write16(TIMER2_CONFIG, val) +#define bfin_read_TIMER2_COUNTER() bfin_read32(TIMER2_COUNTER) +#define bfin_write_TIMER2_COUNTER(val) bfin_write32(TIMER2_COUNTER, val) +#define bfin_read_TIMER2_PERIOD() bfin_read32(TIMER2_PERIOD) +#define bfin_write_TIMER2_PERIOD(val) bfin_write32(TIMER2_PERIOD, val) +#define bfin_read_TIMER2_WIDTH() bfin_read32(TIMER2_WIDTH) +#define bfin_write_TIMER2_WIDTH(val) bfin_write32(TIMER2_WIDTH, val) +#define bfin_read_TIMER_ENABLE() bfin_read16(TIMER_ENABLE) +#define bfin_write_TIMER_ENABLE(val) bfin_write16(TIMER_ENABLE, val) +#define bfin_read_TIMER_DISABLE() bfin_read16(TIMER_DISABLE) +#define bfin_write_TIMER_DISABLE(val) bfin_write16(TIMER_DISABLE, val) +#define bfin_read_TIMER_STATUS() bfin_read16(TIMER_STATUS) +#define bfin_write_TIMER_STATUS(val) bfin_write16(TIMER_STATUS, val) +#define bfin_read_SPORT0_TCR1() bfin_read16(SPORT0_TCR1) +#define bfin_write_SPORT0_TCR1(val) bfin_write16(SPORT0_TCR1, val) +#define bfin_read_SPORT0_TCR2() bfin_read16(SPORT0_TCR2) +#define bfin_write_SPORT0_TCR2(val) bfin_write16(SPORT0_TCR2, val) +#define bfin_read_SPORT0_TCLKDIV() bfin_read16(SPORT0_TCLKDIV) +#define bfin_write_SPORT0_TCLKDIV(val) bfin_write16(SPORT0_TCLKDIV, val) +#define bfin_read_SPORT0_TFSDIV() bfin_read16(SPORT0_TFSDIV) +#define bfin_write_SPORT0_TFSDIV(val) bfin_write16(SPORT0_TFSDIV, val) +#define bfin_read_SPORT0_TX() bfin_read32(SPORT0_TX) +#define bfin_write_SPORT0_TX(val) bfin_write32(SPORT0_TX, val) +#define bfin_read_SPORT0_RX() bfin_read32(SPORT0_RX) +#define bfin_write_SPORT0_RX(val) bfin_write32(SPORT0_RX, val) +#define bfin_read_SPORT0_RCR1() bfin_read16(SPORT0_RCR1) +#define bfin_write_SPORT0_RCR1(val) bfin_write16(SPORT0_RCR1, val) +#define bfin_read_SPORT0_RCR2() bfin_read16(SPORT0_RCR2) +#define bfin_write_SPORT0_RCR2(val) bfin_write16(SPORT0_RCR2, val) +#define bfin_read_SPORT0_RCLKDIV() bfin_read16(SPORT0_RCLKDIV) +#define bfin_write_SPORT0_RCLKDIV(val) bfin_write16(SPORT0_RCLKDIV, val) +#define bfin_read_SPORT0_RFSDIV() bfin_read16(SPORT0_RFSDIV) +#define bfin_write_SPORT0_RFSDIV(val) bfin_write16(SPORT0_RFSDIV, val) +#define bfin_read_SPORT0_STAT() bfin_read16(SPORT0_STAT) +#define bfin_write_SPORT0_STAT(val) bfin_write16(SPORT0_STAT, val) +#define bfin_read_SPORT0_CHNL() bfin_read16(SPORT0_CHNL) +#define bfin_write_SPORT0_CHNL(val) bfin_write16(SPORT0_CHNL, val) +#define bfin_read_SPORT0_MCMC1() bfin_read16(SPORT0_MCMC1) +#define bfin_write_SPORT0_MCMC1(val) bfin_write16(SPORT0_MCMC1, val) +#define bfin_read_SPORT0_MCMC2() bfin_read16(SPORT0_MCMC2) +#define bfin_write_SPORT0_MCMC2(val) bfin_write16(SPORT0_MCMC2, val) +#define bfin_read_SPORT0_MTCS0() bfin_read32(SPORT0_MTCS0) +#define bfin_write_SPORT0_MTCS0(val) bfin_write32(SPORT0_MTCS0, val) +#define bfin_read_SPORT0_MTCS1() bfin_read32(SPORT0_MTCS1) +#define bfin_write_SPORT0_MTCS1(val) bfin_write32(SPORT0_MTCS1, val) +#define bfin_read_SPORT0_MTCS2() bfin_read32(SPORT0_MTCS2) +#define bfin_write_SPORT0_MTCS2(val) bfin_write32(SPORT0_MTCS2, val) +#define bfin_read_SPORT0_MTCS3() bfin_read32(SPORT0_MTCS3) +#define bfin_write_SPORT0_MTCS3(val) bfin_write32(SPORT0_MTCS3, val) +#define bfin_read_SPORT0_MRCS0() bfin_read32(SPORT0_MRCS0) +#define bfin_write_SPORT0_MRCS0(val) bfin_write32(SPORT0_MRCS0, val) +#define bfin_read_SPORT0_MRCS1() bfin_read32(SPORT0_MRCS1) +#define bfin_write_SPORT0_MRCS1(val) bfin_write32(SPORT0_MRCS1, val) +#define bfin_read_SPORT0_MRCS2() bfin_read32(SPORT0_MRCS2) +#define bfin_write_SPORT0_MRCS2(val) bfin_write32(SPORT0_MRCS2, val) +#define bfin_read_SPORT0_MRCS3() bfin_read32(SPORT0_MRCS3) +#define bfin_write_SPORT0_MRCS3(val) bfin_write32(SPORT0_MRCS3, val) +#define bfin_read_SPORT1_TCR1() bfin_read16(SPORT1_TCR1) +#define bfin_write_SPORT1_TCR1(val) bfin_write16(SPORT1_TCR1, val) +#define bfin_read_SPORT1_TCR2() bfin_read16(SPORT1_TCR2) +#define bfin_write_SPORT1_TCR2(val) bfin_write16(SPORT1_TCR2, val) +#define bfin_read_SPORT1_TCLKDIV() bfin_read16(SPORT1_TCLKDIV) +#define bfin_write_SPORT1_TCLKDIV(val) bfin_write16(SPORT1_TCLKDIV, val) +#define bfin_read_SPORT1_TFSDIV() bfin_read16(SPORT1_TFSDIV) +#define bfin_write_SPORT1_TFSDIV(val) bfin_write16(SPORT1_TFSDIV, val) +#define bfin_read_SPORT1_TX() bfin_read32(SPORT1_TX) +#define bfin_write_SPORT1_TX(val) bfin_write32(SPORT1_TX, val) +#define bfin_read_SPORT1_RX() bfin_read32(SPORT1_RX) +#define bfin_write_SPORT1_RX(val) bfin_write32(SPORT1_RX, val) +#define bfin_read_SPORT1_RCR1() bfin_read16(SPORT1_RCR1) +#define bfin_write_SPORT1_RCR1(val) bfin_write16(SPORT1_RCR1, val) +#define bfin_read_SPORT1_RCR2() bfin_read16(SPORT1_RCR2) +#define bfin_write_SPORT1_RCR2(val) bfin_write16(SPORT1_RCR2, val) +#define bfin_read_SPORT1_RCLKDIV() bfin_read16(SPORT1_RCLKDIV) +#define bfin_write_SPORT1_RCLKDIV(val) bfin_write16(SPORT1_RCLKDIV, val) +#define bfin_read_SPORT1_RFSDIV() bfin_read16(SPORT1_RFSDIV) +#define bfin_write_SPORT1_RFSDIV(val) bfin_write16(SPORT1_RFSDIV, val) +#define bfin_read_SPORT1_STAT() bfin_read16(SPORT1_STAT) +#define bfin_write_SPORT1_STAT(val) bfin_write16(SPORT1_STAT, val) +#define bfin_read_SPORT1_CHNL() bfin_read16(SPORT1_CHNL) +#define bfin_write_SPORT1_CHNL(val) bfin_write16(SPORT1_CHNL, val) +#define bfin_read_SPORT1_MCMC1() bfin_read16(SPORT1_MCMC1) +#define bfin_write_SPORT1_MCMC1(val) bfin_write16(SPORT1_MCMC1, val) +#define bfin_read_SPORT1_MCMC2() bfin_read16(SPORT1_MCMC2) +#define bfin_write_SPORT1_MCMC2(val) bfin_write16(SPORT1_MCMC2, val) +#define bfin_read_SPORT1_MTCS0() bfin_read32(SPORT1_MTCS0) +#define bfin_write_SPORT1_MTCS0(val) bfin_write32(SPORT1_MTCS0, val) +#define bfin_read_SPORT1_MTCS1() bfin_read32(SPORT1_MTCS1) +#define bfin_write_SPORT1_MTCS1(val) bfin_write32(SPORT1_MTCS1, val) +#define bfin_read_SPORT1_MTCS2() bfin_read32(SPORT1_MTCS2) +#define bfin_write_SPORT1_MTCS2(val) bfin_write32(SPORT1_MTCS2, val) +#define bfin_read_SPORT1_MTCS3() bfin_read32(SPORT1_MTCS3) +#define bfin_write_SPORT1_MTCS3(val) bfin_write32(SPORT1_MTCS3, val) +#define bfin_read_SPORT1_MRCS0() bfin_read32(SPORT1_MRCS0) +#define bfin_write_SPORT1_MRCS0(val) bfin_write32(SPORT1_MRCS0, val) +#define bfin_read_SPORT1_MRCS1() bfin_read32(SPORT1_MRCS1) +#define bfin_write_SPORT1_MRCS1(val) bfin_write32(SPORT1_MRCS1, val) +#define bfin_read_SPORT1_MRCS2() bfin_read32(SPORT1_MRCS2) +#define bfin_write_SPORT1_MRCS2(val) bfin_write32(SPORT1_MRCS2, val) +#define bfin_read_SPORT1_MRCS3() bfin_read32(SPORT1_MRCS3) +#define bfin_write_SPORT1_MRCS3(val) bfin_write32(SPORT1_MRCS3, val) +#define bfin_read_SPORT2_TCR1() bfin_read16(SPORT2_TCR1) +#define bfin_write_SPORT2_TCR1(val) bfin_write16(SPORT2_TCR1, val) +#define bfin_read_SPORT2_TCR2() bfin_read16(SPORT2_TCR2) +#define bfin_write_SPORT2_TCR2(val) bfin_write16(SPORT2_TCR2, val) +#define bfin_read_SPORT2_TCLKDIV() bfin_read16(SPORT2_TCLKDIV) +#define bfin_write_SPORT2_TCLKDIV(val) bfin_write16(SPORT2_TCLKDIV, val) +#define bfin_read_SPORT2_TFSDIV() bfin_read16(SPORT2_TFSDIV) +#define bfin_write_SPORT2_TFSDIV(val) bfin_write16(SPORT2_TFSDIV, val) +#define bfin_read_SPORT2_TX() bfin_read32(SPORT2_TX) +#define bfin_write_SPORT2_TX(val) bfin_write32(SPORT2_TX, val) +#define bfin_read_SPORT2_RX() bfin_read32(SPORT2_RX) +#define bfin_write_SPORT2_RX(val) bfin_write32(SPORT2_RX, val) +#define bfin_read_SPORT2_RCR1() bfin_read16(SPORT2_RCR1) +#define bfin_write_SPORT2_RCR1(val) bfin_write16(SPORT2_RCR1, val) +#define bfin_read_SPORT2_RCR2() bfin_read16(SPORT2_RCR2) +#define bfin_write_SPORT2_RCR2(val) bfin_write16(SPORT2_RCR2, val) +#define bfin_read_SPORT2_RCLKDIV() bfin_read16(SPORT2_RCLKDIV) +#define bfin_write_SPORT2_RCLKDIV(val) bfin_write16(SPORT2_RCLKDIV, val) +#define bfin_read_SPORT2_RFSDIV() bfin_read16(SPORT2_RFSDIV) +#define bfin_write_SPORT2_RFSDIV(val) bfin_write16(SPORT2_RFSDIV, val) +#define bfin_read_SPORT2_STAT() bfin_read16(SPORT2_STAT) +#define bfin_write_SPORT2_STAT(val) bfin_write16(SPORT2_STAT, val) +#define bfin_read_SPORT2_CHNL() bfin_read16(SPORT2_CHNL) +#define bfin_write_SPORT2_CHNL(val) bfin_write16(SPORT2_CHNL, val) +#define bfin_read_SPORT2_MCMC1() bfin_read16(SPORT2_MCMC1) +#define bfin_write_SPORT2_MCMC1(val) bfin_write16(SPORT2_MCMC1, val) +#define bfin_read_SPORT2_MCMC2() bfin_read16(SPORT2_MCMC2) +#define bfin_write_SPORT2_MCMC2(val) bfin_write16(SPORT2_MCMC2, val) +#define bfin_read_SPORT2_MTCS0() bfin_read32(SPORT2_MTCS0) +#define bfin_write_SPORT2_MTCS0(val) bfin_write32(SPORT2_MTCS0, val) +#define bfin_read_SPORT2_MTCS1() bfin_read32(SPORT2_MTCS1) +#define bfin_write_SPORT2_MTCS1(val) bfin_write32(SPORT2_MTCS1, val) +#define bfin_read_SPORT2_MTCS2() bfin_read32(SPORT2_MTCS2) +#define bfin_write_SPORT2_MTCS2(val) bfin_write32(SPORT2_MTCS2, val) +#define bfin_read_SPORT2_MTCS3() bfin_read32(SPORT2_MTCS3) +#define bfin_write_SPORT2_MTCS3(val) bfin_write32(SPORT2_MTCS3, val) +#define bfin_read_SPORT2_MRCS0() bfin_read32(SPORT2_MRCS0) +#define bfin_write_SPORT2_MRCS0(val) bfin_write32(SPORT2_MRCS0, val) +#define bfin_read_SPORT2_MRCS1() bfin_read32(SPORT2_MRCS1) +#define bfin_write_SPORT2_MRCS1(val) bfin_write32(SPORT2_MRCS1, val) +#define bfin_read_SPORT2_MRCS2() bfin_read32(SPORT2_MRCS2) +#define bfin_write_SPORT2_MRCS2(val) bfin_write32(SPORT2_MRCS2, val) +#define bfin_read_SPORT2_MRCS3() bfin_read32(SPORT2_MRCS3) +#define bfin_write_SPORT2_MRCS3(val) bfin_write32(SPORT2_MRCS3, val) +#define bfin_read_SPORT3_TCR1() bfin_read16(SPORT3_TCR1) +#define bfin_write_SPORT3_TCR1(val) bfin_write16(SPORT3_TCR1, val) +#define bfin_read_SPORT3_TCR2() bfin_read16(SPORT3_TCR2) +#define bfin_write_SPORT3_TCR2(val) bfin_write16(SPORT3_TCR2, val) +#define bfin_read_SPORT3_TCLKDIV() bfin_read16(SPORT3_TCLKDIV) +#define bfin_write_SPORT3_TCLKDIV(val) bfin_write16(SPORT3_TCLKDIV, val) +#define bfin_read_SPORT3_TFSDIV() bfin_read16(SPORT3_TFSDIV) +#define bfin_write_SPORT3_TFSDIV(val) bfin_write16(SPORT3_TFSDIV, val) +#define bfin_read_SPORT3_TX() bfin_read32(SPORT3_TX) +#define bfin_write_SPORT3_TX(val) bfin_write32(SPORT3_TX, val) +#define bfin_read_SPORT3_RX() bfin_read32(SPORT3_RX) +#define bfin_write_SPORT3_RX(val) bfin_write32(SPORT3_RX, val) +#define bfin_read_SPORT3_RCR1() bfin_read16(SPORT3_RCR1) +#define bfin_write_SPORT3_RCR1(val) bfin_write16(SPORT3_RCR1, val) +#define bfin_read_SPORT3_RCR2() bfin_read16(SPORT3_RCR2) +#define bfin_write_SPORT3_RCR2(val) bfin_write16(SPORT3_RCR2, val) +#define bfin_read_SPORT3_RCLKDIV() bfin_read16(SPORT3_RCLKDIV) +#define bfin_write_SPORT3_RCLKDIV(val) bfin_write16(SPORT3_RCLKDIV, val) +#define bfin_read_SPORT3_RFSDIV() bfin_read16(SPORT3_RFSDIV) +#define bfin_write_SPORT3_RFSDIV(val) bfin_write16(SPORT3_RFSDIV, val) +#define bfin_read_SPORT3_STAT() bfin_read16(SPORT3_STAT) +#define bfin_write_SPORT3_STAT(val) bfin_write16(SPORT3_STAT, val) +#define bfin_read_SPORT3_CHNL() bfin_read16(SPORT3_CHNL) +#define bfin_write_SPORT3_CHNL(val) bfin_write16(SPORT3_CHNL, val) +#define bfin_read_SPORT3_MCMC1() bfin_read16(SPORT3_MCMC1) +#define bfin_write_SPORT3_MCMC1(val) bfin_write16(SPORT3_MCMC1, val) +#define bfin_read_SPORT3_MCMC2() bfin_read16(SPORT3_MCMC2) +#define bfin_write_SPORT3_MCMC2(val) bfin_write16(SPORT3_MCMC2, val) +#define bfin_read_SPORT3_MTCS0() bfin_read32(SPORT3_MTCS0) +#define bfin_write_SPORT3_MTCS0(val) bfin_write32(SPORT3_MTCS0, val) +#define bfin_read_SPORT3_MTCS1() bfin_read32(SPORT3_MTCS1) +#define bfin_write_SPORT3_MTCS1(val) bfin_write32(SPORT3_MTCS1, val) +#define bfin_read_SPORT3_MTCS2() bfin_read32(SPORT3_MTCS2) +#define bfin_write_SPORT3_MTCS2(val) bfin_write32(SPORT3_MTCS2, val) +#define bfin_read_SPORT3_MTCS3() bfin_read32(SPORT3_MTCS3) +#define bfin_write_SPORT3_MTCS3(val) bfin_write32(SPORT3_MTCS3, val) +#define bfin_read_SPORT3_MRCS0() bfin_read32(SPORT3_MRCS0) +#define bfin_write_SPORT3_MRCS0(val) bfin_write32(SPORT3_MRCS0, val) +#define bfin_read_SPORT3_MRCS1() bfin_read32(SPORT3_MRCS1) +#define bfin_write_SPORT3_MRCS1(val) bfin_write32(SPORT3_MRCS1, val) +#define bfin_read_SPORT3_MRCS2() bfin_read32(SPORT3_MRCS2) +#define bfin_write_SPORT3_MRCS2(val) bfin_write32(SPORT3_MRCS2, val) +#define bfin_read_SPORT3_MRCS3() bfin_read32(SPORT3_MRCS3) +#define bfin_write_SPORT3_MRCS3(val) bfin_write32(SPORT3_MRCS3, val) +#define bfin_read_PORTFIO() bfin_read16(PORTFIO) +#define bfin_write_PORTFIO(val) bfin_write16(PORTFIO, val) +#define bfin_read_PORTFIO_CLEAR() bfin_read16(PORTFIO_CLEAR) +#define bfin_write_PORTFIO_CLEAR(val) bfin_write16(PORTFIO_CLEAR, val) +#define bfin_read_PORTFIO_SET() bfin_read16(PORTFIO_SET) +#define bfin_write_PORTFIO_SET(val) bfin_write16(PORTFIO_SET, val) +#define bfin_read_PORTFIO_TOGGLE() bfin_read16(PORTFIO_TOGGLE) +#define bfin_write_PORTFIO_TOGGLE(val) bfin_write16(PORTFIO_TOGGLE, val) +#define bfin_read_PORTFIO_MASKA() bfin_read16(PORTFIO_MASKA) +#define bfin_write_PORTFIO_MASKA(val) bfin_write16(PORTFIO_MASKA, val) +#define bfin_read_PORTFIO_MASKA_CLEAR() bfin_read16(PORTFIO_MASKA_CLEAR) +#define bfin_write_PORTFIO_MASKA_CLEAR(val) bfin_write16(PORTFIO_MASKA_CLEAR, val) +#define bfin_read_PORTFIO_MASKA_SET() bfin_read16(PORTFIO_MASKA_SET) +#define bfin_write_PORTFIO_MASKA_SET(val) bfin_write16(PORTFIO_MASKA_SET, val) +#define bfin_read_PORTFIO_MASKA_TOGGLE() bfin_read16(PORTFIO_MASKA_TOGGLE) +#define bfin_write_PORTFIO_MASKA_TOGGLE(val) bfin_write16(PORTFIO_MASKA_TOGGLE, val) +#define bfin_read_PORTFIO_MASKB() bfin_read16(PORTFIO_MASKB) +#define bfin_write_PORTFIO_MASKB(val) bfin_write16(PORTFIO_MASKB, val) +#define bfin_read_PORTFIO_MASKB_CLEAR() bfin_read16(PORTFIO_MASKB_CLEAR) +#define bfin_write_PORTFIO_MASKB_CLEAR(val) bfin_write16(PORTFIO_MASKB_CLEAR, val) +#define bfin_read_PORTFIO_MASKB_SET() bfin_read16(PORTFIO_MASKB_SET) +#define bfin_write_PORTFIO_MASKB_SET(val) bfin_write16(PORTFIO_MASKB_SET, val) +#define bfin_read_PORTFIO_MASKB_TOGGLE() bfin_read16(PORTFIO_MASKB_TOGGLE) +#define bfin_write_PORTFIO_MASKB_TOGGLE(val) bfin_write16(PORTFIO_MASKB_TOGGLE, val) +#define bfin_read_PORTFIO_DIR() bfin_read16(PORTFIO_DIR) +#define bfin_write_PORTFIO_DIR(val) bfin_write16(PORTFIO_DIR, val) +#define bfin_read_PORTFIO_POLAR() bfin_read16(PORTFIO_POLAR) +#define bfin_write_PORTFIO_POLAR(val) bfin_write16(PORTFIO_POLAR, val) +#define bfin_read_PORTFIO_EDGE() bfin_read16(PORTFIO_EDGE) +#define bfin_write_PORTFIO_EDGE(val) bfin_write16(PORTFIO_EDGE, val) +#define bfin_read_PORTFIO_BOTH() bfin_read16(PORTFIO_BOTH) +#define bfin_write_PORTFIO_BOTH(val) bfin_write16(PORTFIO_BOTH, val) +#define bfin_read_PORTFIO_INEN() bfin_read16(PORTFIO_INEN) +#define bfin_write_PORTFIO_INEN(val) bfin_write16(PORTFIO_INEN, val) +#define bfin_read_PORTCIO_FER() bfin_read16(PORTCIO_FER) +#define bfin_write_PORTCIO_FER(val) bfin_write16(PORTCIO_FER, val) +#define bfin_read_PORTCIO() bfin_read16(PORTCIO) +#define bfin_write_PORTCIO(val) bfin_write16(PORTCIO, val) +#define bfin_read_PORTCIO_CLEAR() bfin_read16(PORTCIO_CLEAR) +#define bfin_write_PORTCIO_CLEAR(val) bfin_write16(PORTCIO_CLEAR, val) +#define bfin_read_PORTCIO_SET() bfin_read16(PORTCIO_SET) +#define bfin_write_PORTCIO_SET(val) bfin_write16(PORTCIO_SET, val) +#define bfin_read_PORTCIO_TOGGLE() bfin_read16(PORTCIO_TOGGLE) +#define bfin_write_PORTCIO_TOGGLE(val) bfin_write16(PORTCIO_TOGGLE, val) +#define bfin_read_PORTCIO_DIR() bfin_read16(PORTCIO_DIR) +#define bfin_write_PORTCIO_DIR(val) bfin_write16(PORTCIO_DIR, val) +#define bfin_read_PORTCIO_INEN() bfin_read16(PORTCIO_INEN) +#define bfin_write_PORTCIO_INEN(val) bfin_write16(PORTCIO_INEN, val) +#define bfin_read_PORTDIO_FER() bfin_read16(PORTDIO_FER) +#define bfin_write_PORTDIO_FER(val) bfin_write16(PORTDIO_FER, val) +#define bfin_read_PORTDIO() bfin_read16(PORTDIO) +#define bfin_write_PORTDIO(val) bfin_write16(PORTDIO, val) +#define bfin_read_PORTDIO_CLEAR() bfin_read16(PORTDIO_CLEAR) +#define bfin_write_PORTDIO_CLEAR(val) bfin_write16(PORTDIO_CLEAR, val) +#define bfin_read_PORTDIO_SET() bfin_read16(PORTDIO_SET) +#define bfin_write_PORTDIO_SET(val) bfin_write16(PORTDIO_SET, val) +#define bfin_read_PORTDIO_TOGGLE() bfin_read16(PORTDIO_TOGGLE) +#define bfin_write_PORTDIO_TOGGLE(val) bfin_write16(PORTDIO_TOGGLE, val) +#define bfin_read_PORTDIO_DIR() bfin_read16(PORTDIO_DIR) +#define bfin_write_PORTDIO_DIR(val) bfin_write16(PORTDIO_DIR, val) +#define bfin_read_PORTDIO_INEN() bfin_read16(PORTDIO_INEN) +#define bfin_write_PORTDIO_INEN(val) bfin_write16(PORTDIO_INEN, val) +#define bfin_read_PORTEIO_FER() bfin_read16(PORTEIO_FER) +#define bfin_write_PORTEIO_FER(val) bfin_write16(PORTEIO_FER, val) +#define bfin_read_PORTEIO() bfin_read16(PORTEIO) +#define bfin_write_PORTEIO(val) bfin_write16(PORTEIO, val) +#define bfin_read_PORTEIO_CLEAR() bfin_read16(PORTEIO_CLEAR) +#define bfin_write_PORTEIO_CLEAR(val) bfin_write16(PORTEIO_CLEAR, val) +#define bfin_read_PORTEIO_SET() bfin_read16(PORTEIO_SET) +#define bfin_write_PORTEIO_SET(val) bfin_write16(PORTEIO_SET, val) +#define bfin_read_PORTEIO_TOGGLE() bfin_read16(PORTEIO_TOGGLE) +#define bfin_write_PORTEIO_TOGGLE(val) bfin_write16(PORTEIO_TOGGLE, val) +#define bfin_read_PORTEIO_DIR() bfin_read16(PORTEIO_DIR) +#define bfin_write_PORTEIO_DIR(val) bfin_write16(PORTEIO_DIR, val) +#define bfin_read_PORTEIO_INEN() bfin_read16(PORTEIO_INEN) +#define bfin_write_PORTEIO_INEN(val) bfin_write16(PORTEIO_INEN, val) +#define bfin_read_EBIU_AMGCTL() bfin_read16(EBIU_AMGCTL) +#define bfin_write_EBIU_AMGCTL(val) bfin_write16(EBIU_AMGCTL, val) +#define bfin_read_EBIU_AMBCTL0() bfin_read32(EBIU_AMBCTL0) +#define bfin_write_EBIU_AMBCTL0(val) bfin_write32(EBIU_AMBCTL0, val) +#define bfin_read_EBIU_AMBCTL1() bfin_read32(EBIU_AMBCTL1) +#define bfin_write_EBIU_AMBCTL1(val) bfin_write32(EBIU_AMBCTL1, val) +#define bfin_read_EBIU_SDGCTL() bfin_read32(EBIU_SDGCTL) +#define bfin_write_EBIU_SDGCTL(val) bfin_write32(EBIU_SDGCTL, val) +#define bfin_read_EBIU_SDBCTL() bfin_read16(EBIU_SDBCTL) +#define bfin_write_EBIU_SDBCTL(val) bfin_write16(EBIU_SDBCTL, val) +#define bfin_read_EBIU_SDRRC() bfin_read16(EBIU_SDRRC) +#define bfin_write_EBIU_SDRRC(val) bfin_write16(EBIU_SDRRC, val) +#define bfin_read_EBIU_SDSTAT() bfin_read16(EBIU_SDSTAT) +#define bfin_write_EBIU_SDSTAT(val) bfin_write16(EBIU_SDSTAT, val) +#define bfin_read_DMA0_TC_PER() bfin_read16(DMA0_TC_PER) +#define bfin_write_DMA0_TC_PER(val) bfin_write16(DMA0_TC_PER, val) +#define bfin_read_DMA0_TC_CNT() bfin_read16(DMA0_TC_CNT) +#define bfin_write_DMA0_TC_CNT(val) bfin_write16(DMA0_TC_CNT, val) +#define bfin_read_DMA0_NEXT_DESC_PTR() bfin_readPTR(DMA0_NEXT_DESC_PTR) +#define bfin_write_DMA0_NEXT_DESC_PTR(val) bfin_writePTR(DMA0_NEXT_DESC_PTR, val) +#define bfin_read_DMA0_START_ADDR() bfin_readPTR(DMA0_START_ADDR) +#define bfin_write_DMA0_START_ADDR(val) bfin_writePTR(DMA0_START_ADDR, val) +#define bfin_read_DMA0_CONFIG() bfin_read16(DMA0_CONFIG) +#define bfin_write_DMA0_CONFIG(val) bfin_write16(DMA0_CONFIG, val) +#define bfin_read_DMA0_X_COUNT() bfin_read16(DMA0_X_COUNT) +#define bfin_write_DMA0_X_COUNT(val) bfin_write16(DMA0_X_COUNT, val) +#define bfin_read_DMA0_X_MODIFY() bfin_read16(DMA0_X_MODIFY) +#define bfin_write_DMA0_X_MODIFY(val) bfin_write16(DMA0_X_MODIFY, val) +#define bfin_read_DMA0_Y_COUNT() bfin_read16(DMA0_Y_COUNT) +#define bfin_write_DMA0_Y_COUNT(val) bfin_write16(DMA0_Y_COUNT, val) +#define bfin_read_DMA0_Y_MODIFY() bfin_read16(DMA0_Y_MODIFY) +#define bfin_write_DMA0_Y_MODIFY(val) bfin_write16(DMA0_Y_MODIFY, val) +#define bfin_read_DMA0_CURR_DESC_PTR() bfin_readPTR(DMA0_CURR_DESC_PTR) +#define bfin_write_DMA0_CURR_DESC_PTR(val) bfin_writePTR(DMA0_CURR_DESC_PTR, val) +#define bfin_read_DMA0_CURR_ADDR() bfin_readPTR(DMA0_CURR_ADDR) +#define bfin_write_DMA0_CURR_ADDR(val) bfin_writePTR(DMA0_CURR_ADDR, val) +#define bfin_read_DMA0_IRQ_STATUS() bfin_read16(DMA0_IRQ_STATUS) +#define bfin_write_DMA0_IRQ_STATUS(val) bfin_write16(DMA0_IRQ_STATUS, val) +#define bfin_read_DMA0_PERIPHERAL_MAP() bfin_read16(DMA0_PERIPHERAL_MAP) +#define bfin_write_DMA0_PERIPHERAL_MAP(val) bfin_write16(DMA0_PERIPHERAL_MAP, val) +#define bfin_read_DMA0_CURR_X_COUNT() bfin_read16(DMA0_CURR_X_COUNT) +#define bfin_write_DMA0_CURR_X_COUNT(val) bfin_write16(DMA0_CURR_X_COUNT, val) +#define bfin_read_DMA0_CURR_Y_COUNT() bfin_read16(DMA0_CURR_Y_COUNT) +#define bfin_write_DMA0_CURR_Y_COUNT(val) bfin_write16(DMA0_CURR_Y_COUNT, val) +#define bfin_read_DMA1_NEXT_DESC_PTR() bfin_readPTR(DMA1_NEXT_DESC_PTR) +#define bfin_write_DMA1_NEXT_DESC_PTR(val) bfin_writePTR(DMA1_NEXT_DESC_PTR, val) +#define bfin_read_DMA1_START_ADDR() bfin_readPTR(DMA1_START_ADDR) +#define bfin_write_DMA1_START_ADDR(val) bfin_writePTR(DMA1_START_ADDR, val) +#define bfin_read_DMA1_CONFIG() bfin_read16(DMA1_CONFIG) +#define bfin_write_DMA1_CONFIG(val) bfin_write16(DMA1_CONFIG, val) +#define bfin_read_DMA1_X_COUNT() bfin_read16(DMA1_X_COUNT) +#define bfin_write_DMA1_X_COUNT(val) bfin_write16(DMA1_X_COUNT, val) +#define bfin_read_DMA1_X_MODIFY() bfin_read16(DMA1_X_MODIFY) +#define bfin_write_DMA1_X_MODIFY(val) bfin_write16(DMA1_X_MODIFY, val) +#define bfin_read_DMA1_Y_COUNT() bfin_read16(DMA1_Y_COUNT) +#define bfin_write_DMA1_Y_COUNT(val) bfin_write16(DMA1_Y_COUNT, val) +#define bfin_read_DMA1_Y_MODIFY() bfin_read16(DMA1_Y_MODIFY) +#define bfin_write_DMA1_Y_MODIFY(val) bfin_write16(DMA1_Y_MODIFY, val) +#define bfin_read_DMA1_CURR_DESC_PTR() bfin_readPTR(DMA1_CURR_DESC_PTR) +#define bfin_write_DMA1_CURR_DESC_PTR(val) bfin_writePTR(DMA1_CURR_DESC_PTR, val) +#define bfin_read_DMA1_CURR_ADDR() bfin_readPTR(DMA1_CURR_ADDR) +#define bfin_write_DMA1_CURR_ADDR(val) bfin_writePTR(DMA1_CURR_ADDR, val) +#define bfin_read_DMA1_IRQ_STATUS() bfin_read16(DMA1_IRQ_STATUS) +#define bfin_write_DMA1_IRQ_STATUS(val) bfin_write16(DMA1_IRQ_STATUS, val) +#define bfin_read_DMA1_PERIPHERAL_MAP() bfin_read16(DMA1_PERIPHERAL_MAP) +#define bfin_write_DMA1_PERIPHERAL_MAP(val) bfin_write16(DMA1_PERIPHERAL_MAP, val) +#define bfin_read_DMA1_CURR_X_COUNT() bfin_read16(DMA1_CURR_X_COUNT) +#define bfin_write_DMA1_CURR_X_COUNT(val) bfin_write16(DMA1_CURR_X_COUNT, val) +#define bfin_read_DMA1_CURR_Y_COUNT() bfin_read16(DMA1_CURR_Y_COUNT) +#define bfin_write_DMA1_CURR_Y_COUNT(val) bfin_write16(DMA1_CURR_Y_COUNT, val) +#define bfin_read_DMA2_NEXT_DESC_PTR() bfin_readPTR(DMA2_NEXT_DESC_PTR) +#define bfin_write_DMA2_NEXT_DESC_PTR(val) bfin_writePTR(DMA2_NEXT_DESC_PTR, val) +#define bfin_read_DMA2_START_ADDR() bfin_readPTR(DMA2_START_ADDR) +#define bfin_write_DMA2_START_ADDR(val) bfin_writePTR(DMA2_START_ADDR, val) +#define bfin_read_DMA2_CONFIG() bfin_read16(DMA2_CONFIG) +#define bfin_write_DMA2_CONFIG(val) bfin_write16(DMA2_CONFIG, val) +#define bfin_read_DMA2_X_COUNT() bfin_read16(DMA2_X_COUNT) +#define bfin_write_DMA2_X_COUNT(val) bfin_write16(DMA2_X_COUNT, val) +#define bfin_read_DMA2_X_MODIFY() bfin_read16(DMA2_X_MODIFY) +#define bfin_write_DMA2_X_MODIFY(val) bfin_write16(DMA2_X_MODIFY, val) +#define bfin_read_DMA2_Y_COUNT() bfin_read16(DMA2_Y_COUNT) +#define bfin_write_DMA2_Y_COUNT(val) bfin_write16(DMA2_Y_COUNT, val) +#define bfin_read_DMA2_Y_MODIFY() bfin_read16(DMA2_Y_MODIFY) +#define bfin_write_DMA2_Y_MODIFY(val) bfin_write16(DMA2_Y_MODIFY, val) +#define bfin_read_DMA2_CURR_DESC_PTR() bfin_readPTR(DMA2_CURR_DESC_PTR) +#define bfin_write_DMA2_CURR_DESC_PTR(val) bfin_writePTR(DMA2_CURR_DESC_PTR, val) +#define bfin_read_DMA2_CURR_ADDR() bfin_readPTR(DMA2_CURR_ADDR) +#define bfin_write_DMA2_CURR_ADDR(val) bfin_writePTR(DMA2_CURR_ADDR, val) +#define bfin_read_DMA2_IRQ_STATUS() bfin_read16(DMA2_IRQ_STATUS) +#define bfin_write_DMA2_IRQ_STATUS(val) bfin_write16(DMA2_IRQ_STATUS, val) +#define bfin_read_DMA2_PERIPHERAL_MAP() bfin_read16(DMA2_PERIPHERAL_MAP) +#define bfin_write_DMA2_PERIPHERAL_MAP(val) bfin_write16(DMA2_PERIPHERAL_MAP, val) +#define bfin_read_DMA2_CURR_X_COUNT() bfin_read16(DMA2_CURR_X_COUNT) +#define bfin_write_DMA2_CURR_X_COUNT(val) bfin_write16(DMA2_CURR_X_COUNT, val) +#define bfin_read_DMA2_CURR_Y_COUNT() bfin_read16(DMA2_CURR_Y_COUNT) +#define bfin_write_DMA2_CURR_Y_COUNT(val) bfin_write16(DMA2_CURR_Y_COUNT, val) +#define bfin_read_DMA3_NEXT_DESC_PTR() bfin_readPTR(DMA3_NEXT_DESC_PTR) +#define bfin_write_DMA3_NEXT_DESC_PTR(val) bfin_writePTR(DMA3_NEXT_DESC_PTR, val) +#define bfin_read_DMA3_START_ADDR() bfin_readPTR(DMA3_START_ADDR) +#define bfin_write_DMA3_START_ADDR(val) bfin_writePTR(DMA3_START_ADDR, val) +#define bfin_read_DMA3_CONFIG() bfin_read16(DMA3_CONFIG) +#define bfin_write_DMA3_CONFIG(val) bfin_write16(DMA3_CONFIG, val) +#define bfin_read_DMA3_X_COUNT() bfin_read16(DMA3_X_COUNT) +#define bfin_write_DMA3_X_COUNT(val) bfin_write16(DMA3_X_COUNT, val) +#define bfin_read_DMA3_X_MODIFY() bfin_read16(DMA3_X_MODIFY) +#define bfin_write_DMA3_X_MODIFY(val) bfin_write16(DMA3_X_MODIFY, val) +#define bfin_read_DMA3_Y_COUNT() bfin_read16(DMA3_Y_COUNT) +#define bfin_write_DMA3_Y_COUNT(val) bfin_write16(DMA3_Y_COUNT, val) +#define bfin_read_DMA3_Y_MODIFY() bfin_read16(DMA3_Y_MODIFY) +#define bfin_write_DMA3_Y_MODIFY(val) bfin_write16(DMA3_Y_MODIFY, val) +#define bfin_read_DMA3_CURR_DESC_PTR() bfin_readPTR(DMA3_CURR_DESC_PTR) +#define bfin_write_DMA3_CURR_DESC_PTR(val) bfin_writePTR(DMA3_CURR_DESC_PTR, val) +#define bfin_read_DMA3_CURR_ADDR() bfin_readPTR(DMA3_CURR_ADDR) +#define bfin_write_DMA3_CURR_ADDR(val) bfin_writePTR(DMA3_CURR_ADDR, val) +#define bfin_read_DMA3_IRQ_STATUS() bfin_read16(DMA3_IRQ_STATUS) +#define bfin_write_DMA3_IRQ_STATUS(val) bfin_write16(DMA3_IRQ_STATUS, val) +#define bfin_read_DMA3_PERIPHERAL_MAP() bfin_read16(DMA3_PERIPHERAL_MAP) +#define bfin_write_DMA3_PERIPHERAL_MAP(val) bfin_write16(DMA3_PERIPHERAL_MAP, val) +#define bfin_read_DMA3_CURR_X_COUNT() bfin_read16(DMA3_CURR_X_COUNT) +#define bfin_write_DMA3_CURR_X_COUNT(val) bfin_write16(DMA3_CURR_X_COUNT, val) +#define bfin_read_DMA3_CURR_Y_COUNT() bfin_read16(DMA3_CURR_Y_COUNT) +#define bfin_write_DMA3_CURR_Y_COUNT(val) bfin_write16(DMA3_CURR_Y_COUNT, val) +#define bfin_read_DMA4_NEXT_DESC_PTR() bfin_readPTR(DMA4_NEXT_DESC_PTR) +#define bfin_write_DMA4_NEXT_DESC_PTR(val) bfin_writePTR(DMA4_NEXT_DESC_PTR, val) +#define bfin_read_DMA4_START_ADDR() bfin_readPTR(DMA4_START_ADDR) +#define bfin_write_DMA4_START_ADDR(val) bfin_writePTR(DMA4_START_ADDR, val) +#define bfin_read_DMA4_CONFIG() bfin_read16(DMA4_CONFIG) +#define bfin_write_DMA4_CONFIG(val) bfin_write16(DMA4_CONFIG, val) +#define bfin_read_DMA4_X_COUNT() bfin_read16(DMA4_X_COUNT) +#define bfin_write_DMA4_X_COUNT(val) bfin_write16(DMA4_X_COUNT, val) +#define bfin_read_DMA4_X_MODIFY() bfin_read16(DMA4_X_MODIFY) +#define bfin_write_DMA4_X_MODIFY(val) bfin_write16(DMA4_X_MODIFY, val) +#define bfin_read_DMA4_Y_COUNT() bfin_read16(DMA4_Y_COUNT) +#define bfin_write_DMA4_Y_COUNT(val) bfin_write16(DMA4_Y_COUNT, val) +#define bfin_read_DMA4_Y_MODIFY() bfin_read16(DMA4_Y_MODIFY) +#define bfin_write_DMA4_Y_MODIFY(val) bfin_write16(DMA4_Y_MODIFY, val) +#define bfin_read_DMA4_CURR_DESC_PTR() bfin_readPTR(DMA4_CURR_DESC_PTR) +#define bfin_write_DMA4_CURR_DESC_PTR(val) bfin_writePTR(DMA4_CURR_DESC_PTR, val) +#define bfin_read_DMA4_CURR_ADDR() bfin_readPTR(DMA4_CURR_ADDR) +#define bfin_write_DMA4_CURR_ADDR(val) bfin_writePTR(DMA4_CURR_ADDR, val) +#define bfin_read_DMA4_IRQ_STATUS() bfin_read16(DMA4_IRQ_STATUS) +#define bfin_write_DMA4_IRQ_STATUS(val) bfin_write16(DMA4_IRQ_STATUS, val) +#define bfin_read_DMA4_PERIPHERAL_MAP() bfin_read16(DMA4_PERIPHERAL_MAP) +#define bfin_write_DMA4_PERIPHERAL_MAP(val) bfin_write16(DMA4_PERIPHERAL_MAP, val) +#define bfin_read_DMA4_CURR_X_COUNT() bfin_read16(DMA4_CURR_X_COUNT) +#define bfin_write_DMA4_CURR_X_COUNT(val) bfin_write16(DMA4_CURR_X_COUNT, val) +#define bfin_read_DMA4_CURR_Y_COUNT() bfin_read16(DMA4_CURR_Y_COUNT) +#define bfin_write_DMA4_CURR_Y_COUNT(val) bfin_write16(DMA4_CURR_Y_COUNT, val) +#define bfin_read_DMA5_NEXT_DESC_PTR() bfin_readPTR(DMA5_NEXT_DESC_PTR) +#define bfin_write_DMA5_NEXT_DESC_PTR(val) bfin_writePTR(DMA5_NEXT_DESC_PTR, val) +#define bfin_read_DMA5_START_ADDR() bfin_readPTR(DMA5_START_ADDR) +#define bfin_write_DMA5_START_ADDR(val) bfin_writePTR(DMA5_START_ADDR, val) +#define bfin_read_DMA5_CONFIG() bfin_read16(DMA5_CONFIG) +#define bfin_write_DMA5_CONFIG(val) bfin_write16(DMA5_CONFIG, val) +#define bfin_read_DMA5_X_COUNT() bfin_read16(DMA5_X_COUNT) +#define bfin_write_DMA5_X_COUNT(val) bfin_write16(DMA5_X_COUNT, val) +#define bfin_read_DMA5_X_MODIFY() bfin_read16(DMA5_X_MODIFY) +#define bfin_write_DMA5_X_MODIFY(val) bfin_write16(DMA5_X_MODIFY, val) +#define bfin_read_DMA5_Y_COUNT() bfin_read16(DMA5_Y_COUNT) +#define bfin_write_DMA5_Y_COUNT(val) bfin_write16(DMA5_Y_COUNT, val) +#define bfin_read_DMA5_Y_MODIFY() bfin_read16(DMA5_Y_MODIFY) +#define bfin_write_DMA5_Y_MODIFY(val) bfin_write16(DMA5_Y_MODIFY, val) +#define bfin_read_DMA5_CURR_DESC_PTR() bfin_readPTR(DMA5_CURR_DESC_PTR) +#define bfin_write_DMA5_CURR_DESC_PTR(val) bfin_writePTR(DMA5_CURR_DESC_PTR, val) +#define bfin_read_DMA5_CURR_ADDR() bfin_readPTR(DMA5_CURR_ADDR) +#define bfin_write_DMA5_CURR_ADDR(val) bfin_writePTR(DMA5_CURR_ADDR, val) +#define bfin_read_DMA5_IRQ_STATUS() bfin_read16(DMA5_IRQ_STATUS) +#define bfin_write_DMA5_IRQ_STATUS(val) bfin_write16(DMA5_IRQ_STATUS, val) +#define bfin_read_DMA5_PERIPHERAL_MAP() bfin_read16(DMA5_PERIPHERAL_MAP) +#define bfin_write_DMA5_PERIPHERAL_MAP(val) bfin_write16(DMA5_PERIPHERAL_MAP, val) +#define bfin_read_DMA5_CURR_X_COUNT() bfin_read16(DMA5_CURR_X_COUNT) +#define bfin_write_DMA5_CURR_X_COUNT(val) bfin_write16(DMA5_CURR_X_COUNT, val) +#define bfin_read_DMA5_CURR_Y_COUNT() bfin_read16(DMA5_CURR_Y_COUNT) +#define bfin_write_DMA5_CURR_Y_COUNT(val) bfin_write16(DMA5_CURR_Y_COUNT, val) +#define bfin_read_DMA6_NEXT_DESC_PTR() bfin_readPTR(DMA6_NEXT_DESC_PTR) +#define bfin_write_DMA6_NEXT_DESC_PTR(val) bfin_writePTR(DMA6_NEXT_DESC_PTR, val) +#define bfin_read_DMA6_START_ADDR() bfin_readPTR(DMA6_START_ADDR) +#define bfin_write_DMA6_START_ADDR(val) bfin_writePTR(DMA6_START_ADDR, val) +#define bfin_read_DMA6_CONFIG() bfin_read16(DMA6_CONFIG) +#define bfin_write_DMA6_CONFIG(val) bfin_write16(DMA6_CONFIG, val) +#define bfin_read_DMA6_X_COUNT() bfin_read16(DMA6_X_COUNT) +#define bfin_write_DMA6_X_COUNT(val) bfin_write16(DMA6_X_COUNT, val) +#define bfin_read_DMA6_X_MODIFY() bfin_read16(DMA6_X_MODIFY) +#define bfin_write_DMA6_X_MODIFY(val) bfin_write16(DMA6_X_MODIFY, val) +#define bfin_read_DMA6_Y_COUNT() bfin_read16(DMA6_Y_COUNT) +#define bfin_write_DMA6_Y_COUNT(val) bfin_write16(DMA6_Y_COUNT, val) +#define bfin_read_DMA6_Y_MODIFY() bfin_read16(DMA6_Y_MODIFY) +#define bfin_write_DMA6_Y_MODIFY(val) bfin_write16(DMA6_Y_MODIFY, val) +#define bfin_read_DMA6_CURR_DESC_PTR() bfin_readPTR(DMA6_CURR_DESC_PTR) +#define bfin_write_DMA6_CURR_DESC_PTR(val) bfin_writePTR(DMA6_CURR_DESC_PTR, val) +#define bfin_read_DMA6_CURR_ADDR() bfin_readPTR(DMA6_CURR_ADDR) +#define bfin_write_DMA6_CURR_ADDR(val) bfin_writePTR(DMA6_CURR_ADDR, val) +#define bfin_read_DMA6_IRQ_STATUS() bfin_read16(DMA6_IRQ_STATUS) +#define bfin_write_DMA6_IRQ_STATUS(val) bfin_write16(DMA6_IRQ_STATUS, val) +#define bfin_read_DMA6_PERIPHERAL_MAP() bfin_read16(DMA6_PERIPHERAL_MAP) +#define bfin_write_DMA6_PERIPHERAL_MAP(val) bfin_write16(DMA6_PERIPHERAL_MAP, val) +#define bfin_read_DMA6_CURR_X_COUNT() bfin_read16(DMA6_CURR_X_COUNT) +#define bfin_write_DMA6_CURR_X_COUNT(val) bfin_write16(DMA6_CURR_X_COUNT, val) +#define bfin_read_DMA6_CURR_Y_COUNT() bfin_read16(DMA6_CURR_Y_COUNT) +#define bfin_write_DMA6_CURR_Y_COUNT(val) bfin_write16(DMA6_CURR_Y_COUNT, val) +#define bfin_read_DMA7_NEXT_DESC_PTR() bfin_readPTR(DMA7_NEXT_DESC_PTR) +#define bfin_write_DMA7_NEXT_DESC_PTR(val) bfin_writePTR(DMA7_NEXT_DESC_PTR, val) +#define bfin_read_DMA7_START_ADDR() bfin_readPTR(DMA7_START_ADDR) +#define bfin_write_DMA7_START_ADDR(val) bfin_writePTR(DMA7_START_ADDR, val) +#define bfin_read_DMA7_CONFIG() bfin_read16(DMA7_CONFIG) +#define bfin_write_DMA7_CONFIG(val) bfin_write16(DMA7_CONFIG, val) +#define bfin_read_DMA7_X_COUNT() bfin_read16(DMA7_X_COUNT) +#define bfin_write_DMA7_X_COUNT(val) bfin_write16(DMA7_X_COUNT, val) +#define bfin_read_DMA7_X_MODIFY() bfin_read16(DMA7_X_MODIFY) +#define bfin_write_DMA7_X_MODIFY(val) bfin_write16(DMA7_X_MODIFY, val) +#define bfin_read_DMA7_Y_COUNT() bfin_read16(DMA7_Y_COUNT) +#define bfin_write_DMA7_Y_COUNT(val) bfin_write16(DMA7_Y_COUNT, val) +#define bfin_read_DMA7_Y_MODIFY() bfin_read16(DMA7_Y_MODIFY) +#define bfin_write_DMA7_Y_MODIFY(val) bfin_write16(DMA7_Y_MODIFY, val) +#define bfin_read_DMA7_CURR_DESC_PTR() bfin_readPTR(DMA7_CURR_DESC_PTR) +#define bfin_write_DMA7_CURR_DESC_PTR(val) bfin_writePTR(DMA7_CURR_DESC_PTR, val) +#define bfin_read_DMA7_CURR_ADDR() bfin_readPTR(DMA7_CURR_ADDR) +#define bfin_write_DMA7_CURR_ADDR(val) bfin_writePTR(DMA7_CURR_ADDR, val) +#define bfin_read_DMA7_IRQ_STATUS() bfin_read16(DMA7_IRQ_STATUS) +#define bfin_write_DMA7_IRQ_STATUS(val) bfin_write16(DMA7_IRQ_STATUS, val) +#define bfin_read_DMA7_PERIPHERAL_MAP() bfin_read16(DMA7_PERIPHERAL_MAP) +#define bfin_write_DMA7_PERIPHERAL_MAP(val) bfin_write16(DMA7_PERIPHERAL_MAP, val) +#define bfin_read_DMA7_CURR_X_COUNT() bfin_read16(DMA7_CURR_X_COUNT) +#define bfin_write_DMA7_CURR_X_COUNT(val) bfin_write16(DMA7_CURR_X_COUNT, val) +#define bfin_read_DMA7_CURR_Y_COUNT() bfin_read16(DMA7_CURR_Y_COUNT) +#define bfin_write_DMA7_CURR_Y_COUNT(val) bfin_write16(DMA7_CURR_Y_COUNT, val) +#define bfin_read_DMA1_TC_PER() bfin_read16(DMA1_TC_PER) +#define bfin_write_DMA1_TC_PER(val) bfin_write16(DMA1_TC_PER, val) +#define bfin_read_DMA1_TC_CNT() bfin_read16(DMA1_TC_CNT) +#define bfin_write_DMA1_TC_CNT(val) bfin_write16(DMA1_TC_CNT, val) +#define bfin_read_DMA8_NEXT_DESC_PTR() bfin_readPTR(DMA8_NEXT_DESC_PTR) +#define bfin_write_DMA8_NEXT_DESC_PTR(val) bfin_writePTR(DMA8_NEXT_DESC_PTR, val) +#define bfin_read_DMA8_START_ADDR() bfin_readPTR(DMA8_START_ADDR) +#define bfin_write_DMA8_START_ADDR(val) bfin_writePTR(DMA8_START_ADDR, val) +#define bfin_read_DMA8_CONFIG() bfin_read16(DMA8_CONFIG) +#define bfin_write_DMA8_CONFIG(val) bfin_write16(DMA8_CONFIG, val) +#define bfin_read_DMA8_X_COUNT() bfin_read16(DMA8_X_COUNT) +#define bfin_write_DMA8_X_COUNT(val) bfin_write16(DMA8_X_COUNT, val) +#define bfin_read_DMA8_X_MODIFY() bfin_read16(DMA8_X_MODIFY) +#define bfin_write_DMA8_X_MODIFY(val) bfin_write16(DMA8_X_MODIFY, val) +#define bfin_read_DMA8_Y_COUNT() bfin_read16(DMA8_Y_COUNT) +#define bfin_write_DMA8_Y_COUNT(val) bfin_write16(DMA8_Y_COUNT, val) +#define bfin_read_DMA8_Y_MODIFY() bfin_read16(DMA8_Y_MODIFY) +#define bfin_write_DMA8_Y_MODIFY(val) bfin_write16(DMA8_Y_MODIFY, val) +#define bfin_read_DMA8_CURR_DESC_PTR() bfin_readPTR(DMA8_CURR_DESC_PTR) +#define bfin_write_DMA8_CURR_DESC_PTR(val) bfin_writePTR(DMA8_CURR_DESC_PTR, val) +#define bfin_read_DMA8_CURR_ADDR() bfin_readPTR(DMA8_CURR_ADDR) +#define bfin_write_DMA8_CURR_ADDR(val) bfin_writePTR(DMA8_CURR_ADDR, val) +#define bfin_read_DMA8_IRQ_STATUS() bfin_read16(DMA8_IRQ_STATUS) +#define bfin_write_DMA8_IRQ_STATUS(val) bfin_write16(DMA8_IRQ_STATUS, val) +#define bfin_read_DMA8_PERIPHERAL_MAP() bfin_read16(DMA8_PERIPHERAL_MAP) +#define bfin_write_DMA8_PERIPHERAL_MAP(val) bfin_write16(DMA8_PERIPHERAL_MAP, val) +#define bfin_read_DMA8_CURR_X_COUNT() bfin_read16(DMA8_CURR_X_COUNT) +#define bfin_write_DMA8_CURR_X_COUNT(val) bfin_write16(DMA8_CURR_X_COUNT, val) +#define bfin_read_DMA8_CURR_Y_COUNT() bfin_read16(DMA8_CURR_Y_COUNT) +#define bfin_write_DMA8_CURR_Y_COUNT(val) bfin_write16(DMA8_CURR_Y_COUNT, val) +#define bfin_read_DMA9_NEXT_DESC_PTR() bfin_readPTR(DMA9_NEXT_DESC_PTR) +#define bfin_write_DMA9_NEXT_DESC_PTR(val) bfin_writePTR(DMA9_NEXT_DESC_PTR, val) +#define bfin_read_DMA9_START_ADDR() bfin_readPTR(DMA9_START_ADDR) +#define bfin_write_DMA9_START_ADDR(val) bfin_writePTR(DMA9_START_ADDR, val) +#define bfin_read_DMA9_CONFIG() bfin_read16(DMA9_CONFIG) +#define bfin_write_DMA9_CONFIG(val) bfin_write16(DMA9_CONFIG, val) +#define bfin_read_DMA9_X_COUNT() bfin_read16(DMA9_X_COUNT) +#define bfin_write_DMA9_X_COUNT(val) bfin_write16(DMA9_X_COUNT, val) +#define bfin_read_DMA9_X_MODIFY() bfin_read16(DMA9_X_MODIFY) +#define bfin_write_DMA9_X_MODIFY(val) bfin_write16(DMA9_X_MODIFY, val) +#define bfin_read_DMA9_Y_COUNT() bfin_read16(DMA9_Y_COUNT) +#define bfin_write_DMA9_Y_COUNT(val) bfin_write16(DMA9_Y_COUNT, val) +#define bfin_read_DMA9_Y_MODIFY() bfin_read16(DMA9_Y_MODIFY) +#define bfin_write_DMA9_Y_MODIFY(val) bfin_write16(DMA9_Y_MODIFY, val) +#define bfin_read_DMA9_CURR_DESC_PTR() bfin_readPTR(DMA9_CURR_DESC_PTR) +#define bfin_write_DMA9_CURR_DESC_PTR(val) bfin_writePTR(DMA9_CURR_DESC_PTR, val) +#define bfin_read_DMA9_CURR_ADDR() bfin_readPTR(DMA9_CURR_ADDR) +#define bfin_write_DMA9_CURR_ADDR(val) bfin_writePTR(DMA9_CURR_ADDR, val) +#define bfin_read_DMA9_IRQ_STATUS() bfin_read16(DMA9_IRQ_STATUS) +#define bfin_write_DMA9_IRQ_STATUS(val) bfin_write16(DMA9_IRQ_STATUS, val) +#define bfin_read_DMA9_PERIPHERAL_MAP() bfin_read16(DMA9_PERIPHERAL_MAP) +#define bfin_write_DMA9_PERIPHERAL_MAP(val) bfin_write16(DMA9_PERIPHERAL_MAP, val) +#define bfin_read_DMA9_CURR_X_COUNT() bfin_read16(DMA9_CURR_X_COUNT) +#define bfin_write_DMA9_CURR_X_COUNT(val) bfin_write16(DMA9_CURR_X_COUNT, val) +#define bfin_read_DMA9_CURR_Y_COUNT() bfin_read16(DMA9_CURR_Y_COUNT) +#define bfin_write_DMA9_CURR_Y_COUNT(val) bfin_write16(DMA9_CURR_Y_COUNT, val) +#define bfin_read_DMA10_NEXT_DESC_PTR() bfin_readPTR(DMA10_NEXT_DESC_PTR) +#define bfin_write_DMA10_NEXT_DESC_PTR(val) bfin_writePTR(DMA10_NEXT_DESC_PTR, val) +#define bfin_read_DMA10_START_ADDR() bfin_readPTR(DMA10_START_ADDR) +#define bfin_write_DMA10_START_ADDR(val) bfin_writePTR(DMA10_START_ADDR, val) +#define bfin_read_DMA10_CONFIG() bfin_read16(DMA10_CONFIG) +#define bfin_write_DMA10_CONFIG(val) bfin_write16(DMA10_CONFIG, val) +#define bfin_read_DMA10_X_COUNT() bfin_read16(DMA10_X_COUNT) +#define bfin_write_DMA10_X_COUNT(val) bfin_write16(DMA10_X_COUNT, val) +#define bfin_read_DMA10_X_MODIFY() bfin_read16(DMA10_X_MODIFY) +#define bfin_write_DMA10_X_MODIFY(val) bfin_write16(DMA10_X_MODIFY, val) +#define bfin_read_DMA10_Y_COUNT() bfin_read16(DMA10_Y_COUNT) +#define bfin_write_DMA10_Y_COUNT(val) bfin_write16(DMA10_Y_COUNT, val) +#define bfin_read_DMA10_Y_MODIFY() bfin_read16(DMA10_Y_MODIFY) +#define bfin_write_DMA10_Y_MODIFY(val) bfin_write16(DMA10_Y_MODIFY, val) +#define bfin_read_DMA10_CURR_DESC_PTR() bfin_readPTR(DMA10_CURR_DESC_PTR) +#define bfin_write_DMA10_CURR_DESC_PTR(val) bfin_writePTR(DMA10_CURR_DESC_PTR, val) +#define bfin_read_DMA10_CURR_ADDR() bfin_readPTR(DMA10_CURR_ADDR) +#define bfin_write_DMA10_CURR_ADDR(val) bfin_writePTR(DMA10_CURR_ADDR, val) +#define bfin_read_DMA10_IRQ_STATUS() bfin_read16(DMA10_IRQ_STATUS) +#define bfin_write_DMA10_IRQ_STATUS(val) bfin_write16(DMA10_IRQ_STATUS, val) +#define bfin_read_DMA10_PERIPHERAL_MAP() bfin_read16(DMA10_PERIPHERAL_MAP) +#define bfin_write_DMA10_PERIPHERAL_MAP(val) bfin_write16(DMA10_PERIPHERAL_MAP, val) +#define bfin_read_DMA10_CURR_X_COUNT() bfin_read16(DMA10_CURR_X_COUNT) +#define bfin_write_DMA10_CURR_X_COUNT(val) bfin_write16(DMA10_CURR_X_COUNT, val) +#define bfin_read_DMA10_CURR_Y_COUNT() bfin_read16(DMA10_CURR_Y_COUNT) +#define bfin_write_DMA10_CURR_Y_COUNT(val) bfin_write16(DMA10_CURR_Y_COUNT, val) +#define bfin_read_DMA11_NEXT_DESC_PTR() bfin_readPTR(DMA11_NEXT_DESC_PTR) +#define bfin_write_DMA11_NEXT_DESC_PTR(val) bfin_writePTR(DMA11_NEXT_DESC_PTR, val) +#define bfin_read_DMA11_START_ADDR() bfin_readPTR(DMA11_START_ADDR) +#define bfin_write_DMA11_START_ADDR(val) bfin_writePTR(DMA11_START_ADDR, val) +#define bfin_read_DMA11_CONFIG() bfin_read16(DMA11_CONFIG) +#define bfin_write_DMA11_CONFIG(val) bfin_write16(DMA11_CONFIG, val) +#define bfin_read_DMA11_X_COUNT() bfin_read16(DMA11_X_COUNT) +#define bfin_write_DMA11_X_COUNT(val) bfin_write16(DMA11_X_COUNT, val) +#define bfin_read_DMA11_X_MODIFY() bfin_read16(DMA11_X_MODIFY) +#define bfin_write_DMA11_X_MODIFY(val) bfin_write16(DMA11_X_MODIFY, val) +#define bfin_read_DMA11_Y_COUNT() bfin_read16(DMA11_Y_COUNT) +#define bfin_write_DMA11_Y_COUNT(val) bfin_write16(DMA11_Y_COUNT, val) +#define bfin_read_DMA11_Y_MODIFY() bfin_read16(DMA11_Y_MODIFY) +#define bfin_write_DMA11_Y_MODIFY(val) bfin_write16(DMA11_Y_MODIFY, val) +#define bfin_read_DMA11_CURR_DESC_PTR() bfin_readPTR(DMA11_CURR_DESC_PTR) +#define bfin_write_DMA11_CURR_DESC_PTR(val) bfin_writePTR(DMA11_CURR_DESC_PTR, val) +#define bfin_read_DMA11_CURR_ADDR() bfin_readPTR(DMA11_CURR_ADDR) +#define bfin_write_DMA11_CURR_ADDR(val) bfin_writePTR(DMA11_CURR_ADDR, val) +#define bfin_read_DMA11_IRQ_STATUS() bfin_read16(DMA11_IRQ_STATUS) +#define bfin_write_DMA11_IRQ_STATUS(val) bfin_write16(DMA11_IRQ_STATUS, val) +#define bfin_read_DMA11_PERIPHERAL_MAP() bfin_read16(DMA11_PERIPHERAL_MAP) +#define bfin_write_DMA11_PERIPHERAL_MAP(val) bfin_write16(DMA11_PERIPHERAL_MAP, val) +#define bfin_read_DMA11_CURR_X_COUNT() bfin_read16(DMA11_CURR_X_COUNT) +#define bfin_write_DMA11_CURR_X_COUNT(val) bfin_write16(DMA11_CURR_X_COUNT, val) +#define bfin_read_DMA11_CURR_Y_COUNT() bfin_read16(DMA11_CURR_Y_COUNT) +#define bfin_write_DMA11_CURR_Y_COUNT(val) bfin_write16(DMA11_CURR_Y_COUNT, val) +#define bfin_read_DMA12_NEXT_DESC_PTR() bfin_readPTR(DMA12_NEXT_DESC_PTR) +#define bfin_write_DMA12_NEXT_DESC_PTR(val) bfin_writePTR(DMA12_NEXT_DESC_PTR, val) +#define bfin_read_DMA12_START_ADDR() bfin_readPTR(DMA12_START_ADDR) +#define bfin_write_DMA12_START_ADDR(val) bfin_writePTR(DMA12_START_ADDR, val) +#define bfin_read_DMA12_CONFIG() bfin_read16(DMA12_CONFIG) +#define bfin_write_DMA12_CONFIG(val) bfin_write16(DMA12_CONFIG, val) +#define bfin_read_DMA12_X_COUNT() bfin_read16(DMA12_X_COUNT) +#define bfin_write_DMA12_X_COUNT(val) bfin_write16(DMA12_X_COUNT, val) +#define bfin_read_DMA12_X_MODIFY() bfin_read16(DMA12_X_MODIFY) +#define bfin_write_DMA12_X_MODIFY(val) bfin_write16(DMA12_X_MODIFY, val) +#define bfin_read_DMA12_Y_COUNT() bfin_read16(DMA12_Y_COUNT) +#define bfin_write_DMA12_Y_COUNT(val) bfin_write16(DMA12_Y_COUNT, val) +#define bfin_read_DMA12_Y_MODIFY() bfin_read16(DMA12_Y_MODIFY) +#define bfin_write_DMA12_Y_MODIFY(val) bfin_write16(DMA12_Y_MODIFY, val) +#define bfin_read_DMA12_CURR_DESC_PTR() bfin_readPTR(DMA12_CURR_DESC_PTR) +#define bfin_write_DMA12_CURR_DESC_PTR(val) bfin_writePTR(DMA12_CURR_DESC_PTR, val) +#define bfin_read_DMA12_CURR_ADDR() bfin_readPTR(DMA12_CURR_ADDR) +#define bfin_write_DMA12_CURR_ADDR(val) bfin_writePTR(DMA12_CURR_ADDR, val) +#define bfin_read_DMA12_IRQ_STATUS() bfin_read16(DMA12_IRQ_STATUS) +#define bfin_write_DMA12_IRQ_STATUS(val) bfin_write16(DMA12_IRQ_STATUS, val) +#define bfin_read_DMA12_PERIPHERAL_MAP() bfin_read16(DMA12_PERIPHERAL_MAP) +#define bfin_write_DMA12_PERIPHERAL_MAP(val) bfin_write16(DMA12_PERIPHERAL_MAP, val) +#define bfin_read_DMA12_CURR_X_COUNT() bfin_read16(DMA12_CURR_X_COUNT) +#define bfin_write_DMA12_CURR_X_COUNT(val) bfin_write16(DMA12_CURR_X_COUNT, val) +#define bfin_read_DMA12_CURR_Y_COUNT() bfin_read16(DMA12_CURR_Y_COUNT) +#define bfin_write_DMA12_CURR_Y_COUNT(val) bfin_write16(DMA12_CURR_Y_COUNT, val) +#define bfin_read_DMA13_NEXT_DESC_PTR() bfin_readPTR(DMA13_NEXT_DESC_PTR) +#define bfin_write_DMA13_NEXT_DESC_PTR(val) bfin_writePTR(DMA13_NEXT_DESC_PTR, val) +#define bfin_read_DMA13_START_ADDR() bfin_readPTR(DMA13_START_ADDR) +#define bfin_write_DMA13_START_ADDR(val) bfin_writePTR(DMA13_START_ADDR, val) +#define bfin_read_DMA13_CONFIG() bfin_read16(DMA13_CONFIG) +#define bfin_write_DMA13_CONFIG(val) bfin_write16(DMA13_CONFIG, val) +#define bfin_read_DMA13_X_COUNT() bfin_read16(DMA13_X_COUNT) +#define bfin_write_DMA13_X_COUNT(val) bfin_write16(DMA13_X_COUNT, val) +#define bfin_read_DMA13_X_MODIFY() bfin_read16(DMA13_X_MODIFY) +#define bfin_write_DMA13_X_MODIFY(val) bfin_write16(DMA13_X_MODIFY, val) +#define bfin_read_DMA13_Y_COUNT() bfin_read16(DMA13_Y_COUNT) +#define bfin_write_DMA13_Y_COUNT(val) bfin_write16(DMA13_Y_COUNT, val) +#define bfin_read_DMA13_Y_MODIFY() bfin_read16(DMA13_Y_MODIFY) +#define bfin_write_DMA13_Y_MODIFY(val) bfin_write16(DMA13_Y_MODIFY, val) +#define bfin_read_DMA13_CURR_DESC_PTR() bfin_readPTR(DMA13_CURR_DESC_PTR) +#define bfin_write_DMA13_CURR_DESC_PTR(val) bfin_writePTR(DMA13_CURR_DESC_PTR, val) +#define bfin_read_DMA13_CURR_ADDR() bfin_readPTR(DMA13_CURR_ADDR) +#define bfin_write_DMA13_CURR_ADDR(val) bfin_writePTR(DMA13_CURR_ADDR, val) +#define bfin_read_DMA13_IRQ_STATUS() bfin_read16(DMA13_IRQ_STATUS) +#define bfin_write_DMA13_IRQ_STATUS(val) bfin_write16(DMA13_IRQ_STATUS, val) +#define bfin_read_DMA13_PERIPHERAL_MAP() bfin_read16(DMA13_PERIPHERAL_MAP) +#define bfin_write_DMA13_PERIPHERAL_MAP(val) bfin_write16(DMA13_PERIPHERAL_MAP, val) +#define bfin_read_DMA13_CURR_X_COUNT() bfin_read16(DMA13_CURR_X_COUNT) +#define bfin_write_DMA13_CURR_X_COUNT(val) bfin_write16(DMA13_CURR_X_COUNT, val) +#define bfin_read_DMA13_CURR_Y_COUNT() bfin_read16(DMA13_CURR_Y_COUNT) +#define bfin_write_DMA13_CURR_Y_COUNT(val) bfin_write16(DMA13_CURR_Y_COUNT, val) +#define bfin_read_DMA14_NEXT_DESC_PTR() bfin_readPTR(DMA14_NEXT_DESC_PTR) +#define bfin_write_DMA14_NEXT_DESC_PTR(val) bfin_writePTR(DMA14_NEXT_DESC_PTR, val) +#define bfin_read_DMA14_START_ADDR() bfin_readPTR(DMA14_START_ADDR) +#define bfin_write_DMA14_START_ADDR(val) bfin_writePTR(DMA14_START_ADDR, val) +#define bfin_read_DMA14_CONFIG() bfin_read16(DMA14_CONFIG) +#define bfin_write_DMA14_CONFIG(val) bfin_write16(DMA14_CONFIG, val) +#define bfin_read_DMA14_X_COUNT() bfin_read16(DMA14_X_COUNT) +#define bfin_write_DMA14_X_COUNT(val) bfin_write16(DMA14_X_COUNT, val) +#define bfin_read_DMA14_X_MODIFY() bfin_read16(DMA14_X_MODIFY) +#define bfin_write_DMA14_X_MODIFY(val) bfin_write16(DMA14_X_MODIFY, val) +#define bfin_read_DMA14_Y_COUNT() bfin_read16(DMA14_Y_COUNT) +#define bfin_write_DMA14_Y_COUNT(val) bfin_write16(DMA14_Y_COUNT, val) +#define bfin_read_DMA14_Y_MODIFY() bfin_read16(DMA14_Y_MODIFY) +#define bfin_write_DMA14_Y_MODIFY(val) bfin_write16(DMA14_Y_MODIFY, val) +#define bfin_read_DMA14_CURR_DESC_PTR() bfin_readPTR(DMA14_CURR_DESC_PTR) +#define bfin_write_DMA14_CURR_DESC_PTR(val) bfin_writePTR(DMA14_CURR_DESC_PTR, val) +#define bfin_read_DMA14_CURR_ADDR() bfin_readPTR(DMA14_CURR_ADDR) +#define bfin_write_DMA14_CURR_ADDR(val) bfin_writePTR(DMA14_CURR_ADDR, val) +#define bfin_read_DMA14_IRQ_STATUS() bfin_read16(DMA14_IRQ_STATUS) +#define bfin_write_DMA14_IRQ_STATUS(val) bfin_write16(DMA14_IRQ_STATUS, val) +#define bfin_read_DMA14_PERIPHERAL_MAP() bfin_read16(DMA14_PERIPHERAL_MAP) +#define bfin_write_DMA14_PERIPHERAL_MAP(val) bfin_write16(DMA14_PERIPHERAL_MAP, val) +#define bfin_read_DMA14_CURR_X_COUNT() bfin_read16(DMA14_CURR_X_COUNT) +#define bfin_write_DMA14_CURR_X_COUNT(val) bfin_write16(DMA14_CURR_X_COUNT, val) +#define bfin_read_DMA14_CURR_Y_COUNT() bfin_read16(DMA14_CURR_Y_COUNT) +#define bfin_write_DMA14_CURR_Y_COUNT(val) bfin_write16(DMA14_CURR_Y_COUNT, val) +#define bfin_read_DMA15_NEXT_DESC_PTR() bfin_readPTR(DMA15_NEXT_DESC_PTR) +#define bfin_write_DMA15_NEXT_DESC_PTR(val) bfin_writePTR(DMA15_NEXT_DESC_PTR, val) +#define bfin_read_DMA15_START_ADDR() bfin_readPTR(DMA15_START_ADDR) +#define bfin_write_DMA15_START_ADDR(val) bfin_writePTR(DMA15_START_ADDR, val) +#define bfin_read_DMA15_CONFIG() bfin_read16(DMA15_CONFIG) +#define bfin_write_DMA15_CONFIG(val) bfin_write16(DMA15_CONFIG, val) +#define bfin_read_DMA15_X_COUNT() bfin_read16(DMA15_X_COUNT) +#define bfin_write_DMA15_X_COUNT(val) bfin_write16(DMA15_X_COUNT, val) +#define bfin_read_DMA15_X_MODIFY() bfin_read16(DMA15_X_MODIFY) +#define bfin_write_DMA15_X_MODIFY(val) bfin_write16(DMA15_X_MODIFY, val) +#define bfin_read_DMA15_Y_COUNT() bfin_read16(DMA15_Y_COUNT) +#define bfin_write_DMA15_Y_COUNT(val) bfin_write16(DMA15_Y_COUNT, val) +#define bfin_read_DMA15_Y_MODIFY() bfin_read16(DMA15_Y_MODIFY) +#define bfin_write_DMA15_Y_MODIFY(val) bfin_write16(DMA15_Y_MODIFY, val) +#define bfin_read_DMA15_CURR_DESC_PTR() bfin_readPTR(DMA15_CURR_DESC_PTR) +#define bfin_write_DMA15_CURR_DESC_PTR(val) bfin_writePTR(DMA15_CURR_DESC_PTR, val) +#define bfin_read_DMA15_CURR_ADDR() bfin_readPTR(DMA15_CURR_ADDR) +#define bfin_write_DMA15_CURR_ADDR(val) bfin_writePTR(DMA15_CURR_ADDR, val) +#define bfin_read_DMA15_IRQ_STATUS() bfin_read16(DMA15_IRQ_STATUS) +#define bfin_write_DMA15_IRQ_STATUS(val) bfin_write16(DMA15_IRQ_STATUS, val) +#define bfin_read_DMA15_PERIPHERAL_MAP() bfin_read16(DMA15_PERIPHERAL_MAP) +#define bfin_write_DMA15_PERIPHERAL_MAP(val) bfin_write16(DMA15_PERIPHERAL_MAP, val) +#define bfin_read_DMA15_CURR_X_COUNT() bfin_read16(DMA15_CURR_X_COUNT) +#define bfin_write_DMA15_CURR_X_COUNT(val) bfin_write16(DMA15_CURR_X_COUNT, val) +#define bfin_read_DMA15_CURR_Y_COUNT() bfin_read16(DMA15_CURR_Y_COUNT) +#define bfin_write_DMA15_CURR_Y_COUNT(val) bfin_write16(DMA15_CURR_Y_COUNT, val) +#define bfin_read_DMA16_NEXT_DESC_PTR() bfin_readPTR(DMA16_NEXT_DESC_PTR) +#define bfin_write_DMA16_NEXT_DESC_PTR(val) bfin_writePTR(DMA16_NEXT_DESC_PTR, val) +#define bfin_read_DMA16_START_ADDR() bfin_readPTR(DMA16_START_ADDR) +#define bfin_write_DMA16_START_ADDR(val) bfin_writePTR(DMA16_START_ADDR, val) +#define bfin_read_DMA16_CONFIG() bfin_read16(DMA16_CONFIG) +#define bfin_write_DMA16_CONFIG(val) bfin_write16(DMA16_CONFIG, val) +#define bfin_read_DMA16_X_COUNT() bfin_read16(DMA16_X_COUNT) +#define bfin_write_DMA16_X_COUNT(val) bfin_write16(DMA16_X_COUNT, val) +#define bfin_read_DMA16_X_MODIFY() bfin_read16(DMA16_X_MODIFY) +#define bfin_write_DMA16_X_MODIFY(val) bfin_write16(DMA16_X_MODIFY, val) +#define bfin_read_DMA16_Y_COUNT() bfin_read16(DMA16_Y_COUNT) +#define bfin_write_DMA16_Y_COUNT(val) bfin_write16(DMA16_Y_COUNT, val) +#define bfin_read_DMA16_Y_MODIFY() bfin_read16(DMA16_Y_MODIFY) +#define bfin_write_DMA16_Y_MODIFY(val) bfin_write16(DMA16_Y_MODIFY, val) +#define bfin_read_DMA16_CURR_DESC_PTR() bfin_readPTR(DMA16_CURR_DESC_PTR) +#define bfin_write_DMA16_CURR_DESC_PTR(val) bfin_writePTR(DMA16_CURR_DESC_PTR, val) +#define bfin_read_DMA16_CURR_ADDR() bfin_readPTR(DMA16_CURR_ADDR) +#define bfin_write_DMA16_CURR_ADDR(val) bfin_writePTR(DMA16_CURR_ADDR, val) +#define bfin_read_DMA16_IRQ_STATUS() bfin_read16(DMA16_IRQ_STATUS) +#define bfin_write_DMA16_IRQ_STATUS(val) bfin_write16(DMA16_IRQ_STATUS, val) +#define bfin_read_DMA16_PERIPHERAL_MAP() bfin_read16(DMA16_PERIPHERAL_MAP) +#define bfin_write_DMA16_PERIPHERAL_MAP(val) bfin_write16(DMA16_PERIPHERAL_MAP, val) +#define bfin_read_DMA16_CURR_X_COUNT() bfin_read16(DMA16_CURR_X_COUNT) +#define bfin_write_DMA16_CURR_X_COUNT(val) bfin_write16(DMA16_CURR_X_COUNT, val) +#define bfin_read_DMA16_CURR_Y_COUNT() bfin_read16(DMA16_CURR_Y_COUNT) +#define bfin_write_DMA16_CURR_Y_COUNT(val) bfin_write16(DMA16_CURR_Y_COUNT, val) +#define bfin_read_DMA17_NEXT_DESC_PTR() bfin_readPTR(DMA17_NEXT_DESC_PTR) +#define bfin_write_DMA17_NEXT_DESC_PTR(val) bfin_writePTR(DMA17_NEXT_DESC_PTR, val) +#define bfin_read_DMA17_START_ADDR() bfin_readPTR(DMA17_START_ADDR) +#define bfin_write_DMA17_START_ADDR(val) bfin_writePTR(DMA17_START_ADDR, val) +#define bfin_read_DMA17_CONFIG() bfin_read16(DMA17_CONFIG) +#define bfin_write_DMA17_CONFIG(val) bfin_write16(DMA17_CONFIG, val) +#define bfin_read_DMA17_X_COUNT() bfin_read16(DMA17_X_COUNT) +#define bfin_write_DMA17_X_COUNT(val) bfin_write16(DMA17_X_COUNT, val) +#define bfin_read_DMA17_X_MODIFY() bfin_read16(DMA17_X_MODIFY) +#define bfin_write_DMA17_X_MODIFY(val) bfin_write16(DMA17_X_MODIFY, val) +#define bfin_read_DMA17_Y_COUNT() bfin_read16(DMA17_Y_COUNT) +#define bfin_write_DMA17_Y_COUNT(val) bfin_write16(DMA17_Y_COUNT, val) +#define bfin_read_DMA17_Y_MODIFY() bfin_read16(DMA17_Y_MODIFY) +#define bfin_write_DMA17_Y_MODIFY(val) bfin_write16(DMA17_Y_MODIFY, val) +#define bfin_read_DMA17_CURR_DESC_PTR() bfin_readPTR(DMA17_CURR_DESC_PTR) +#define bfin_write_DMA17_CURR_DESC_PTR(val) bfin_writePTR(DMA17_CURR_DESC_PTR, val) +#define bfin_read_DMA17_CURR_ADDR() bfin_readPTR(DMA17_CURR_ADDR) +#define bfin_write_DMA17_CURR_ADDR(val) bfin_writePTR(DMA17_CURR_ADDR, val) +#define bfin_read_DMA17_IRQ_STATUS() bfin_read16(DMA17_IRQ_STATUS) +#define bfin_write_DMA17_IRQ_STATUS(val) bfin_write16(DMA17_IRQ_STATUS, val) +#define bfin_read_DMA17_PERIPHERAL_MAP() bfin_read16(DMA17_PERIPHERAL_MAP) +#define bfin_write_DMA17_PERIPHERAL_MAP(val) bfin_write16(DMA17_PERIPHERAL_MAP, val) +#define bfin_read_DMA17_CURR_X_COUNT() bfin_read16(DMA17_CURR_X_COUNT) +#define bfin_write_DMA17_CURR_X_COUNT(val) bfin_write16(DMA17_CURR_X_COUNT, val) +#define bfin_read_DMA17_CURR_Y_COUNT() bfin_read16(DMA17_CURR_Y_COUNT) +#define bfin_write_DMA17_CURR_Y_COUNT(val) bfin_write16(DMA17_CURR_Y_COUNT, val) +#define bfin_read_DMA18_NEXT_DESC_PTR() bfin_readPTR(DMA18_NEXT_DESC_PTR) +#define bfin_write_DMA18_NEXT_DESC_PTR(val) bfin_writePTR(DMA18_NEXT_DESC_PTR, val) +#define bfin_read_DMA18_START_ADDR() bfin_readPTR(DMA18_START_ADDR) +#define bfin_write_DMA18_START_ADDR(val) bfin_writePTR(DMA18_START_ADDR, val) +#define bfin_read_DMA18_CONFIG() bfin_read16(DMA18_CONFIG) +#define bfin_write_DMA18_CONFIG(val) bfin_write16(DMA18_CONFIG, val) +#define bfin_read_DMA18_X_COUNT() bfin_read16(DMA18_X_COUNT) +#define bfin_write_DMA18_X_COUNT(val) bfin_write16(DMA18_X_COUNT, val) +#define bfin_read_DMA18_X_MODIFY() bfin_read16(DMA18_X_MODIFY) +#define bfin_write_DMA18_X_MODIFY(val) bfin_write16(DMA18_X_MODIFY, val) +#define bfin_read_DMA18_Y_COUNT() bfin_read16(DMA18_Y_COUNT) +#define bfin_write_DMA18_Y_COUNT(val) bfin_write16(DMA18_Y_COUNT, val) +#define bfin_read_DMA18_Y_MODIFY() bfin_read16(DMA18_Y_MODIFY) +#define bfin_write_DMA18_Y_MODIFY(val) bfin_write16(DMA18_Y_MODIFY, val) +#define bfin_read_DMA18_CURR_DESC_PTR() bfin_readPTR(DMA18_CURR_DESC_PTR) +#define bfin_write_DMA18_CURR_DESC_PTR(val) bfin_writePTR(DMA18_CURR_DESC_PTR, val) +#define bfin_read_DMA18_CURR_ADDR() bfin_readPTR(DMA18_CURR_ADDR) +#define bfin_write_DMA18_CURR_ADDR(val) bfin_writePTR(DMA18_CURR_ADDR, val) +#define bfin_read_DMA18_IRQ_STATUS() bfin_read16(DMA18_IRQ_STATUS) +#define bfin_write_DMA18_IRQ_STATUS(val) bfin_write16(DMA18_IRQ_STATUS, val) +#define bfin_read_DMA18_PERIPHERAL_MAP() bfin_read16(DMA18_PERIPHERAL_MAP) +#define bfin_write_DMA18_PERIPHERAL_MAP(val) bfin_write16(DMA18_PERIPHERAL_MAP, val) +#define bfin_read_DMA18_CURR_X_COUNT() bfin_read16(DMA18_CURR_X_COUNT) +#define bfin_write_DMA18_CURR_X_COUNT(val) bfin_write16(DMA18_CURR_X_COUNT, val) +#define bfin_read_DMA18_CURR_Y_COUNT() bfin_read16(DMA18_CURR_Y_COUNT) +#define bfin_write_DMA18_CURR_Y_COUNT(val) bfin_write16(DMA18_CURR_Y_COUNT, val) +#define bfin_read_DMA19_NEXT_DESC_PTR() bfin_readPTR(DMA19_NEXT_DESC_PTR) +#define bfin_write_DMA19_NEXT_DESC_PTR(val) bfin_writePTR(DMA19_NEXT_DESC_PTR, val) +#define bfin_read_DMA19_START_ADDR() bfin_readPTR(DMA19_START_ADDR) +#define bfin_write_DMA19_START_ADDR(val) bfin_writePTR(DMA19_START_ADDR, val) +#define bfin_read_DMA19_CONFIG() bfin_read16(DMA19_CONFIG) +#define bfin_write_DMA19_CONFIG(val) bfin_write16(DMA19_CONFIG, val) +#define bfin_read_DMA19_X_COUNT() bfin_read16(DMA19_X_COUNT) +#define bfin_write_DMA19_X_COUNT(val) bfin_write16(DMA19_X_COUNT, val) +#define bfin_read_DMA19_X_MODIFY() bfin_read16(DMA19_X_MODIFY) +#define bfin_write_DMA19_X_MODIFY(val) bfin_write16(DMA19_X_MODIFY, val) +#define bfin_read_DMA19_Y_COUNT() bfin_read16(DMA19_Y_COUNT) +#define bfin_write_DMA19_Y_COUNT(val) bfin_write16(DMA19_Y_COUNT, val) +#define bfin_read_DMA19_Y_MODIFY() bfin_read16(DMA19_Y_MODIFY) +#define bfin_write_DMA19_Y_MODIFY(val) bfin_write16(DMA19_Y_MODIFY, val) +#define bfin_read_DMA19_CURR_DESC_PTR() bfin_readPTR(DMA19_CURR_DESC_PTR) +#define bfin_write_DMA19_CURR_DESC_PTR(val) bfin_writePTR(DMA19_CURR_DESC_PTR, val) +#define bfin_read_DMA19_CURR_ADDR() bfin_readPTR(DMA19_CURR_ADDR) +#define bfin_write_DMA19_CURR_ADDR(val) bfin_writePTR(DMA19_CURR_ADDR, val) +#define bfin_read_DMA19_IRQ_STATUS() bfin_read16(DMA19_IRQ_STATUS) +#define bfin_write_DMA19_IRQ_STATUS(val) bfin_write16(DMA19_IRQ_STATUS, val) +#define bfin_read_DMA19_PERIPHERAL_MAP() bfin_read16(DMA19_PERIPHERAL_MAP) +#define bfin_write_DMA19_PERIPHERAL_MAP(val) bfin_write16(DMA19_PERIPHERAL_MAP, val) +#define bfin_read_DMA19_CURR_X_COUNT() bfin_read16(DMA19_CURR_X_COUNT) +#define bfin_write_DMA19_CURR_X_COUNT(val) bfin_write16(DMA19_CURR_X_COUNT, val) +#define bfin_read_DMA19_CURR_Y_COUNT() bfin_read16(DMA19_CURR_Y_COUNT) +#define bfin_write_DMA19_CURR_Y_COUNT(val) bfin_write16(DMA19_CURR_Y_COUNT, val) +#define bfin_read_MDMA0_D0_NEXT_DESC_PTR() bfin_readPTR(MDMA0_D0_NEXT_DESC_PTR) +#define bfin_write_MDMA0_D0_NEXT_DESC_PTR(val) bfin_writePTR(MDMA0_D0_NEXT_DESC_PTR, val) +#define bfin_read_MDMA0_D0_START_ADDR() bfin_readPTR(MDMA0_D0_START_ADDR) +#define bfin_write_MDMA0_D0_START_ADDR(val) bfin_writePTR(MDMA0_D0_START_ADDR, val) +#define bfin_read_MDMA0_D0_CONFIG() bfin_read16(MDMA0_D0_CONFIG) +#define bfin_write_MDMA0_D0_CONFIG(val) bfin_write16(MDMA0_D0_CONFIG, val) +#define bfin_read_MDMA0_D0_X_COUNT() bfin_read16(MDMA0_D0_X_COUNT) +#define bfin_write_MDMA0_D0_X_COUNT(val) bfin_write16(MDMA0_D0_X_COUNT, val) +#define bfin_read_MDMA0_D0_X_MODIFY() bfin_read16(MDMA0_D0_X_MODIFY) +#define bfin_write_MDMA0_D0_X_MODIFY(val) bfin_write16(MDMA0_D0_X_MODIFY, val) +#define bfin_read_MDMA0_D0_Y_COUNT() bfin_read16(MDMA0_D0_Y_COUNT) +#define bfin_write_MDMA0_D0_Y_COUNT(val) bfin_write16(MDMA0_D0_Y_COUNT, val) +#define bfin_read_MDMA0_D0_Y_MODIFY() bfin_read16(MDMA0_D0_Y_MODIFY) +#define bfin_write_MDMA0_D0_Y_MODIFY(val) bfin_write16(MDMA0_D0_Y_MODIFY, val) +#define bfin_read_MDMA0_D0_CURR_DESC_PTR() bfin_readPTR(MDMA0_D0_CURR_DESC_PTR) +#define bfin_write_MDMA0_D0_CURR_DESC_PTR(val) bfin_writePTR(MDMA0_D0_CURR_DESC_PTR, val) +#define bfin_read_MDMA0_D0_CURR_ADDR() bfin_readPTR(MDMA0_D0_CURR_ADDR) +#define bfin_write_MDMA0_D0_CURR_ADDR(val) bfin_writePTR(MDMA0_D0_CURR_ADDR, val) +#define bfin_read_MDMA0_D0_IRQ_STATUS() bfin_read16(MDMA0_D0_IRQ_STATUS) +#define bfin_write_MDMA0_D0_IRQ_STATUS(val) bfin_write16(MDMA0_D0_IRQ_STATUS, val) +#define bfin_read_MDMA0_D0_PERIPHERAL_MAP() bfin_read16(MDMA0_D0_PERIPHERAL_MAP) +#define bfin_write_MDMA0_D0_PERIPHERAL_MAP(val) bfin_write16(MDMA0_D0_PERIPHERAL_MAP, val) +#define bfin_read_MDMA0_D0_CURR_X_COUNT() bfin_read16(MDMA0_D0_CURR_X_COUNT) +#define bfin_write_MDMA0_D0_CURR_X_COUNT(val) bfin_write16(MDMA0_D0_CURR_X_COUNT, val) +#define bfin_read_MDMA0_D0_CURR_Y_COUNT() bfin_read16(MDMA0_D0_CURR_Y_COUNT) +#define bfin_write_MDMA0_D0_CURR_Y_COUNT(val) bfin_write16(MDMA0_D0_CURR_Y_COUNT, val) +#define bfin_read_MDMA0_S0_NEXT_DESC_PTR() bfin_readPTR(MDMA0_S0_NEXT_DESC_PTR) +#define bfin_write_MDMA0_S0_NEXT_DESC_PTR(val) bfin_writePTR(MDMA0_S0_NEXT_DESC_PTR, val) +#define bfin_read_MDMA0_S0_START_ADDR() bfin_readPTR(MDMA0_S0_START_ADDR) +#define bfin_write_MDMA0_S0_START_ADDR(val) bfin_writePTR(MDMA0_S0_START_ADDR, val) +#define bfin_read_MDMA0_S0_CONFIG() bfin_read16(MDMA0_S0_CONFIG) +#define bfin_write_MDMA0_S0_CONFIG(val) bfin_write16(MDMA0_S0_CONFIG, val) +#define bfin_read_MDMA0_S0_X_COUNT() bfin_read16(MDMA0_S0_X_COUNT) +#define bfin_write_MDMA0_S0_X_COUNT(val) bfin_write16(MDMA0_S0_X_COUNT, val) +#define bfin_read_MDMA0_S0_X_MODIFY() bfin_read16(MDMA0_S0_X_MODIFY) +#define bfin_write_MDMA0_S0_X_MODIFY(val) bfin_write16(MDMA0_S0_X_MODIFY, val) +#define bfin_read_MDMA0_S0_Y_COUNT() bfin_read16(MDMA0_S0_Y_COUNT) +#define bfin_write_MDMA0_S0_Y_COUNT(val) bfin_write16(MDMA0_S0_Y_COUNT, val) +#define bfin_read_MDMA0_S0_Y_MODIFY() bfin_read16(MDMA0_S0_Y_MODIFY) +#define bfin_write_MDMA0_S0_Y_MODIFY(val) bfin_write16(MDMA0_S0_Y_MODIFY, val) +#define bfin_read_MDMA0_S0_CURR_DESC_PTR() bfin_readPTR(MDMA0_S0_CURR_DESC_PTR) +#define bfin_write_MDMA0_S0_CURR_DESC_PTR(val) bfin_writePTR(MDMA0_S0_CURR_DESC_PTR, val) +#define bfin_read_MDMA0_S0_CURR_ADDR() bfin_readPTR(MDMA0_S0_CURR_ADDR) +#define bfin_write_MDMA0_S0_CURR_ADDR(val) bfin_writePTR(MDMA0_S0_CURR_ADDR, val) +#define bfin_read_MDMA0_S0_IRQ_STATUS() bfin_read16(MDMA0_S0_IRQ_STATUS) +#define bfin_write_MDMA0_S0_IRQ_STATUS(val) bfin_write16(MDMA0_S0_IRQ_STATUS, val) +#define bfin_read_MDMA0_S0_PERIPHERAL_MAP() bfin_read16(MDMA0_S0_PERIPHERAL_MAP) +#define bfin_write_MDMA0_S0_PERIPHERAL_MAP(val) bfin_write16(MDMA0_S0_PERIPHERAL_MAP, val) +#define bfin_read_MDMA0_S0_CURR_X_COUNT() bfin_read16(MDMA0_S0_CURR_X_COUNT) +#define bfin_write_MDMA0_S0_CURR_X_COUNT(val) bfin_write16(MDMA0_S0_CURR_X_COUNT, val) +#define bfin_read_MDMA0_S0_CURR_Y_COUNT() bfin_read16(MDMA0_S0_CURR_Y_COUNT) +#define bfin_write_MDMA0_S0_CURR_Y_COUNT(val) bfin_write16(MDMA0_S0_CURR_Y_COUNT, val) +#define bfin_read_MDMA0_D1_NEXT_DESC_PTR() bfin_readPTR(MDMA0_D1_NEXT_DESC_PTR) +#define bfin_write_MDMA0_D1_NEXT_DESC_PTR(val) bfin_writePTR(MDMA0_D1_NEXT_DESC_PTR, val) +#define bfin_read_MDMA0_D1_START_ADDR() bfin_readPTR(MDMA0_D1_START_ADDR) +#define bfin_write_MDMA0_D1_START_ADDR(val) bfin_writePTR(MDMA0_D1_START_ADDR, val) +#define bfin_read_MDMA0_D1_CONFIG() bfin_read16(MDMA0_D1_CONFIG) +#define bfin_write_MDMA0_D1_CONFIG(val) bfin_write16(MDMA0_D1_CONFIG, val) +#define bfin_read_MDMA0_D1_X_COUNT() bfin_read16(MDMA0_D1_X_COUNT) +#define bfin_write_MDMA0_D1_X_COUNT(val) bfin_write16(MDMA0_D1_X_COUNT, val) +#define bfin_read_MDMA0_D1_X_MODIFY() bfin_read16(MDMA0_D1_X_MODIFY) +#define bfin_write_MDMA0_D1_X_MODIFY(val) bfin_write16(MDMA0_D1_X_MODIFY, val) +#define bfin_read_MDMA0_D1_Y_COUNT() bfin_read16(MDMA0_D1_Y_COUNT) +#define bfin_write_MDMA0_D1_Y_COUNT(val) bfin_write16(MDMA0_D1_Y_COUNT, val) +#define bfin_read_MDMA0_D1_Y_MODIFY() bfin_read16(MDMA0_D1_Y_MODIFY) +#define bfin_write_MDMA0_D1_Y_MODIFY(val) bfin_write16(MDMA0_D1_Y_MODIFY, val) +#define bfin_read_MDMA0_D1_CURR_DESC_PTR() bfin_readPTR(MDMA0_D1_CURR_DESC_PTR) +#define bfin_write_MDMA0_D1_CURR_DESC_PTR(val) bfin_writePTR(MDMA0_D1_CURR_DESC_PTR, val) +#define bfin_read_MDMA0_D1_CURR_ADDR() bfin_readPTR(MDMA0_D1_CURR_ADDR) +#define bfin_write_MDMA0_D1_CURR_ADDR(val) bfin_writePTR(MDMA0_D1_CURR_ADDR, val) +#define bfin_read_MDMA0_D1_IRQ_STATUS() bfin_read16(MDMA0_D1_IRQ_STATUS) +#define bfin_write_MDMA0_D1_IRQ_STATUS(val) bfin_write16(MDMA0_D1_IRQ_STATUS, val) +#define bfin_read_MDMA0_D1_PERIPHERAL_MAP() bfin_read16(MDMA0_D1_PERIPHERAL_MAP) +#define bfin_write_MDMA0_D1_PERIPHERAL_MAP(val) bfin_write16(MDMA0_D1_PERIPHERAL_MAP, val) +#define bfin_read_MDMA0_D1_CURR_X_COUNT() bfin_read16(MDMA0_D1_CURR_X_COUNT) +#define bfin_write_MDMA0_D1_CURR_X_COUNT(val) bfin_write16(MDMA0_D1_CURR_X_COUNT, val) +#define bfin_read_MDMA0_D1_CURR_Y_COUNT() bfin_read16(MDMA0_D1_CURR_Y_COUNT) +#define bfin_write_MDMA0_D1_CURR_Y_COUNT(val) bfin_write16(MDMA0_D1_CURR_Y_COUNT, val) +#define bfin_read_MDMA0_S1_NEXT_DESC_PTR() bfin_readPTR(MDMA0_S1_NEXT_DESC_PTR) +#define bfin_write_MDMA0_S1_NEXT_DESC_PTR(val) bfin_writePTR(MDMA0_S1_NEXT_DESC_PTR, val) +#define bfin_read_MDMA0_S1_START_ADDR() bfin_readPTR(MDMA0_S1_START_ADDR) +#define bfin_write_MDMA0_S1_START_ADDR(val) bfin_writePTR(MDMA0_S1_START_ADDR, val) +#define bfin_read_MDMA0_S1_CONFIG() bfin_read16(MDMA0_S1_CONFIG) +#define bfin_write_MDMA0_S1_CONFIG(val) bfin_write16(MDMA0_S1_CONFIG, val) +#define bfin_read_MDMA0_S1_X_COUNT() bfin_read16(MDMA0_S1_X_COUNT) +#define bfin_write_MDMA0_S1_X_COUNT(val) bfin_write16(MDMA0_S1_X_COUNT, val) +#define bfin_read_MDMA0_S1_X_MODIFY() bfin_read16(MDMA0_S1_X_MODIFY) +#define bfin_write_MDMA0_S1_X_MODIFY(val) bfin_write16(MDMA0_S1_X_MODIFY, val) +#define bfin_read_MDMA0_S1_Y_COUNT() bfin_read16(MDMA0_S1_Y_COUNT) +#define bfin_write_MDMA0_S1_Y_COUNT(val) bfin_write16(MDMA0_S1_Y_COUNT, val) +#define bfin_read_MDMA0_S1_Y_MODIFY() bfin_read16(MDMA0_S1_Y_MODIFY) +#define bfin_write_MDMA0_S1_Y_MODIFY(val) bfin_write16(MDMA0_S1_Y_MODIFY, val) +#define bfin_read_MDMA0_S1_CURR_DESC_PTR() bfin_readPTR(MDMA0_S1_CURR_DESC_PTR) +#define bfin_write_MDMA0_S1_CURR_DESC_PTR(val) bfin_writePTR(MDMA0_S1_CURR_DESC_PTR, val) +#define bfin_read_MDMA0_S1_CURR_ADDR() bfin_readPTR(MDMA0_S1_CURR_ADDR) +#define bfin_write_MDMA0_S1_CURR_ADDR(val) bfin_writePTR(MDMA0_S1_CURR_ADDR, val) +#define bfin_read_MDMA0_S1_IRQ_STATUS() bfin_read16(MDMA0_S1_IRQ_STATUS) +#define bfin_write_MDMA0_S1_IRQ_STATUS(val) bfin_write16(MDMA0_S1_IRQ_STATUS, val) +#define bfin_read_MDMA0_S1_PERIPHERAL_MAP() bfin_read16(MDMA0_S1_PERIPHERAL_MAP) +#define bfin_write_MDMA0_S1_PERIPHERAL_MAP(val) bfin_write16(MDMA0_S1_PERIPHERAL_MAP, val) +#define bfin_read_MDMA0_S1_CURR_X_COUNT() bfin_read16(MDMA0_S1_CURR_X_COUNT) +#define bfin_write_MDMA0_S1_CURR_X_COUNT(val) bfin_write16(MDMA0_S1_CURR_X_COUNT, val) +#define bfin_read_MDMA0_S1_CURR_Y_COUNT() bfin_read16(MDMA0_S1_CURR_Y_COUNT) +#define bfin_write_MDMA0_S1_CURR_Y_COUNT(val) bfin_write16(MDMA0_S1_CURR_Y_COUNT, val) +#define bfin_read_MDMA1_D0_NEXT_DESC_PTR() bfin_readPTR(MDMA1_D0_NEXT_DESC_PTR) +#define bfin_write_MDMA1_D0_NEXT_DESC_PTR(val) bfin_writePTR(MDMA1_D0_NEXT_DESC_PTR, val) +#define bfin_read_MDMA1_D0_START_ADDR() bfin_readPTR(MDMA1_D0_START_ADDR) +#define bfin_write_MDMA1_D0_START_ADDR(val) bfin_writePTR(MDMA1_D0_START_ADDR, val) +#define bfin_read_MDMA1_D0_CONFIG() bfin_read16(MDMA1_D0_CONFIG) +#define bfin_write_MDMA1_D0_CONFIG(val) bfin_write16(MDMA1_D0_CONFIG, val) +#define bfin_read_MDMA1_D0_X_COUNT() bfin_read16(MDMA1_D0_X_COUNT) +#define bfin_write_MDMA1_D0_X_COUNT(val) bfin_write16(MDMA1_D0_X_COUNT, val) +#define bfin_read_MDMA1_D0_X_MODIFY() bfin_read16(MDMA1_D0_X_MODIFY) +#define bfin_write_MDMA1_D0_X_MODIFY(val) bfin_write16(MDMA1_D0_X_MODIFY, val) +#define bfin_read_MDMA1_D0_Y_COUNT() bfin_read16(MDMA1_D0_Y_COUNT) +#define bfin_write_MDMA1_D0_Y_COUNT(val) bfin_write16(MDMA1_D0_Y_COUNT, val) +#define bfin_read_MDMA1_D0_Y_MODIFY() bfin_read16(MDMA1_D0_Y_MODIFY) +#define bfin_write_MDMA1_D0_Y_MODIFY(val) bfin_write16(MDMA1_D0_Y_MODIFY, val) +#define bfin_read_MDMA1_D0_CURR_DESC_PTR() bfin_readPTR(MDMA1_D0_CURR_DESC_PTR) +#define bfin_write_MDMA1_D0_CURR_DESC_PTR(val) bfin_writePTR(MDMA1_D0_CURR_DESC_PTR, val) +#define bfin_read_MDMA1_D0_CURR_ADDR() bfin_readPTR(MDMA1_D0_CURR_ADDR) +#define bfin_write_MDMA1_D0_CURR_ADDR(val) bfin_writePTR(MDMA1_D0_CURR_ADDR, val) +#define bfin_read_MDMA1_D0_IRQ_STATUS() bfin_read16(MDMA1_D0_IRQ_STATUS) +#define bfin_write_MDMA1_D0_IRQ_STATUS(val) bfin_write16(MDMA1_D0_IRQ_STATUS, val) +#define bfin_read_MDMA1_D0_PERIPHERAL_MAP() bfin_read16(MDMA1_D0_PERIPHERAL_MAP) +#define bfin_write_MDMA1_D0_PERIPHERAL_MAP(val) bfin_write16(MDMA1_D0_PERIPHERAL_MAP, val) +#define bfin_read_MDMA1_D0_CURR_X_COUNT() bfin_read16(MDMA1_D0_CURR_X_COUNT) +#define bfin_write_MDMA1_D0_CURR_X_COUNT(val) bfin_write16(MDMA1_D0_CURR_X_COUNT, val) +#define bfin_read_MDMA1_D0_CURR_Y_COUNT() bfin_read16(MDMA1_D0_CURR_Y_COUNT) +#define bfin_write_MDMA1_D0_CURR_Y_COUNT(val) bfin_write16(MDMA1_D0_CURR_Y_COUNT, val) +#define bfin_read_MDMA1_S0_NEXT_DESC_PTR() bfin_readPTR(MDMA1_S0_NEXT_DESC_PTR) +#define bfin_write_MDMA1_S0_NEXT_DESC_PTR(val) bfin_writePTR(MDMA1_S0_NEXT_DESC_PTR, val) +#define bfin_read_MDMA1_S0_START_ADDR() bfin_readPTR(MDMA1_S0_START_ADDR) +#define bfin_write_MDMA1_S0_START_ADDR(val) bfin_writePTR(MDMA1_S0_START_ADDR, val) +#define bfin_read_MDMA1_S0_CONFIG() bfin_read16(MDMA1_S0_CONFIG) +#define bfin_write_MDMA1_S0_CONFIG(val) bfin_write16(MDMA1_S0_CONFIG, val) +#define bfin_read_MDMA1_S0_X_COUNT() bfin_read16(MDMA1_S0_X_COUNT) +#define bfin_write_MDMA1_S0_X_COUNT(val) bfin_write16(MDMA1_S0_X_COUNT, val) +#define bfin_read_MDMA1_S0_X_MODIFY() bfin_read16(MDMA1_S0_X_MODIFY) +#define bfin_write_MDMA1_S0_X_MODIFY(val) bfin_write16(MDMA1_S0_X_MODIFY, val) +#define bfin_read_MDMA1_S0_Y_COUNT() bfin_read16(MDMA1_S0_Y_COUNT) +#define bfin_write_MDMA1_S0_Y_COUNT(val) bfin_write16(MDMA1_S0_Y_COUNT, val) +#define bfin_read_MDMA1_S0_Y_MODIFY() bfin_read16(MDMA1_S0_Y_MODIFY) +#define bfin_write_MDMA1_S0_Y_MODIFY(val) bfin_write16(MDMA1_S0_Y_MODIFY, val) +#define bfin_read_MDMA1_S0_CURR_DESC_PTR() bfin_readPTR(MDMA1_S0_CURR_DESC_PTR) +#define bfin_write_MDMA1_S0_CURR_DESC_PTR(val) bfin_writePTR(MDMA1_S0_CURR_DESC_PTR, val) +#define bfin_read_MDMA1_S0_CURR_ADDR() bfin_readPTR(MDMA1_S0_CURR_ADDR) +#define bfin_write_MDMA1_S0_CURR_ADDR(val) bfin_writePTR(MDMA1_S0_CURR_ADDR, val) +#define bfin_read_MDMA1_S0_IRQ_STATUS() bfin_read16(MDMA1_S0_IRQ_STATUS) +#define bfin_write_MDMA1_S0_IRQ_STATUS(val) bfin_write16(MDMA1_S0_IRQ_STATUS, val) +#define bfin_read_MDMA1_S0_PERIPHERAL_MAP() bfin_read16(MDMA1_S0_PERIPHERAL_MAP) +#define bfin_write_MDMA1_S0_PERIPHERAL_MAP(val) bfin_write16(MDMA1_S0_PERIPHERAL_MAP, val) +#define bfin_read_MDMA1_S0_CURR_X_COUNT() bfin_read16(MDMA1_S0_CURR_X_COUNT) +#define bfin_write_MDMA1_S0_CURR_X_COUNT(val) bfin_write16(MDMA1_S0_CURR_X_COUNT, val) +#define bfin_read_MDMA1_S0_CURR_Y_COUNT() bfin_read16(MDMA1_S0_CURR_Y_COUNT) +#define bfin_write_MDMA1_S0_CURR_Y_COUNT(val) bfin_write16(MDMA1_S0_CURR_Y_COUNT, val) +#define bfin_read_MDMA1_D1_NEXT_DESC_PTR() bfin_readPTR(MDMA1_D1_NEXT_DESC_PTR) +#define bfin_write_MDMA1_D1_NEXT_DESC_PTR(val) bfin_writePTR(MDMA1_D1_NEXT_DESC_PTR, val) +#define bfin_read_MDMA1_D1_START_ADDR() bfin_readPTR(MDMA1_D1_START_ADDR) +#define bfin_write_MDMA1_D1_START_ADDR(val) bfin_writePTR(MDMA1_D1_START_ADDR, val) +#define bfin_read_MDMA1_D1_CONFIG() bfin_read16(MDMA1_D1_CONFIG) +#define bfin_write_MDMA1_D1_CONFIG(val) bfin_write16(MDMA1_D1_CONFIG, val) +#define bfin_read_MDMA1_D1_X_COUNT() bfin_read16(MDMA1_D1_X_COUNT) +#define bfin_write_MDMA1_D1_X_COUNT(val) bfin_write16(MDMA1_D1_X_COUNT, val) +#define bfin_read_MDMA1_D1_X_MODIFY() bfin_read16(MDMA1_D1_X_MODIFY) +#define bfin_write_MDMA1_D1_X_MODIFY(val) bfin_write16(MDMA1_D1_X_MODIFY, val) +#define bfin_read_MDMA1_D1_Y_COUNT() bfin_read16(MDMA1_D1_Y_COUNT) +#define bfin_write_MDMA1_D1_Y_COUNT(val) bfin_write16(MDMA1_D1_Y_COUNT, val) +#define bfin_read_MDMA1_D1_Y_MODIFY() bfin_read16(MDMA1_D1_Y_MODIFY) +#define bfin_write_MDMA1_D1_Y_MODIFY(val) bfin_write16(MDMA1_D1_Y_MODIFY, val) +#define bfin_read_MDMA1_D1_CURR_DESC_PTR() bfin_readPTR(MDMA1_D1_CURR_DESC_PTR) +#define bfin_write_MDMA1_D1_CURR_DESC_PTR(val) bfin_writePTR(MDMA1_D1_CURR_DESC_PTR, val) +#define bfin_read_MDMA1_D1_CURR_ADDR() bfin_readPTR(MDMA1_D1_CURR_ADDR) +#define bfin_write_MDMA1_D1_CURR_ADDR(val) bfin_writePTR(MDMA1_D1_CURR_ADDR, val) +#define bfin_read_MDMA1_D1_IRQ_STATUS() bfin_read16(MDMA1_D1_IRQ_STATUS) +#define bfin_write_MDMA1_D1_IRQ_STATUS(val) bfin_write16(MDMA1_D1_IRQ_STATUS, val) +#define bfin_read_MDMA1_D1_PERIPHERAL_MAP() bfin_read16(MDMA1_D1_PERIPHERAL_MAP) +#define bfin_write_MDMA1_D1_PERIPHERAL_MAP(val) bfin_write16(MDMA1_D1_PERIPHERAL_MAP, val) +#define bfin_read_MDMA1_D1_CURR_X_COUNT() bfin_read16(MDMA1_D1_CURR_X_COUNT) +#define bfin_write_MDMA1_D1_CURR_X_COUNT(val) bfin_write16(MDMA1_D1_CURR_X_COUNT, val) +#define bfin_read_MDMA1_D1_CURR_Y_COUNT() bfin_read16(MDMA1_D1_CURR_Y_COUNT) +#define bfin_write_MDMA1_D1_CURR_Y_COUNT(val) bfin_write16(MDMA1_D1_CURR_Y_COUNT, val) +#define bfin_read_MDMA1_S1_NEXT_DESC_PTR() bfin_readPTR(MDMA1_S1_NEXT_DESC_PTR) +#define bfin_write_MDMA1_S1_NEXT_DESC_PTR(val) bfin_writePTR(MDMA1_S1_NEXT_DESC_PTR, val) +#define bfin_read_MDMA1_S1_START_ADDR() bfin_readPTR(MDMA1_S1_START_ADDR) +#define bfin_write_MDMA1_S1_START_ADDR(val) bfin_writePTR(MDMA1_S1_START_ADDR, val) +#define bfin_read_MDMA1_S1_CONFIG() bfin_read16(MDMA1_S1_CONFIG) +#define bfin_write_MDMA1_S1_CONFIG(val) bfin_write16(MDMA1_S1_CONFIG, val) +#define bfin_read_MDMA1_S1_X_COUNT() bfin_read16(MDMA1_S1_X_COUNT) +#define bfin_write_MDMA1_S1_X_COUNT(val) bfin_write16(MDMA1_S1_X_COUNT, val) +#define bfin_read_MDMA1_S1_X_MODIFY() bfin_read16(MDMA1_S1_X_MODIFY) +#define bfin_write_MDMA1_S1_X_MODIFY(val) bfin_write16(MDMA1_S1_X_MODIFY, val) +#define bfin_read_MDMA1_S1_Y_COUNT() bfin_read16(MDMA1_S1_Y_COUNT) +#define bfin_write_MDMA1_S1_Y_COUNT(val) bfin_write16(MDMA1_S1_Y_COUNT, val) +#define bfin_read_MDMA1_S1_Y_MODIFY() bfin_read16(MDMA1_S1_Y_MODIFY) +#define bfin_write_MDMA1_S1_Y_MODIFY(val) bfin_write16(MDMA1_S1_Y_MODIFY, val) +#define bfin_read_MDMA1_S1_CURR_DESC_PTR() bfin_readPTR(MDMA1_S1_CURR_DESC_PTR) +#define bfin_write_MDMA1_S1_CURR_DESC_PTR(val) bfin_writePTR(MDMA1_S1_CURR_DESC_PTR, val) +#define bfin_read_MDMA1_S1_CURR_ADDR() bfin_readPTR(MDMA1_S1_CURR_ADDR) +#define bfin_write_MDMA1_S1_CURR_ADDR(val) bfin_writePTR(MDMA1_S1_CURR_ADDR, val) +#define bfin_read_MDMA1_S1_IRQ_STATUS() bfin_read16(MDMA1_S1_IRQ_STATUS) +#define bfin_write_MDMA1_S1_IRQ_STATUS(val) bfin_write16(MDMA1_S1_IRQ_STATUS, val) +#define bfin_read_MDMA1_S1_PERIPHERAL_MAP() bfin_read16(MDMA1_S1_PERIPHERAL_MAP) +#define bfin_write_MDMA1_S1_PERIPHERAL_MAP(val) bfin_write16(MDMA1_S1_PERIPHERAL_MAP, val) +#define bfin_read_MDMA1_S1_CURR_X_COUNT() bfin_read16(MDMA1_S1_CURR_X_COUNT) +#define bfin_write_MDMA1_S1_CURR_X_COUNT(val) bfin_write16(MDMA1_S1_CURR_X_COUNT, val) +#define bfin_read_MDMA1_S1_CURR_Y_COUNT() bfin_read16(MDMA1_S1_CURR_Y_COUNT) +#define bfin_write_MDMA1_S1_CURR_Y_COUNT(val) bfin_write16(MDMA1_S1_CURR_Y_COUNT, val) +#define bfin_read_PPI_CONTROL() bfin_read16(PPI_CONTROL) +#define bfin_write_PPI_CONTROL(val) bfin_write16(PPI_CONTROL, val) +#define bfin_read_PPI_STATUS() bfin_read16(PPI_STATUS) +#define bfin_write_PPI_STATUS(val) bfin_write16(PPI_STATUS, val) +#define bfin_read_PPI_DELAY() bfin_read16(PPI_DELAY) +#define bfin_write_PPI_DELAY(val) bfin_write16(PPI_DELAY, val) +#define bfin_read_PPI_COUNT() bfin_read16(PPI_COUNT) +#define bfin_write_PPI_COUNT(val) bfin_write16(PPI_COUNT, val) +#define bfin_read_PPI_FRAME() bfin_read16(PPI_FRAME) +#define bfin_write_PPI_FRAME(val) bfin_write16(PPI_FRAME, val) +#define bfin_read_TWI0_CLKDIV() bfin_read16(TWI0_CLKDIV) +#define bfin_write_TWI0_CLKDIV(val) bfin_write16(TWI0_CLKDIV, val) +#define bfin_read_TWI0_CONTROL() bfin_read16(TWI0_CONTROL) +#define bfin_write_TWI0_CONTROL(val) bfin_write16(TWI0_CONTROL, val) +#define bfin_read_TWI0_SLAVE_CTRL() bfin_read16(TWI0_SLAVE_CTRL) +#define bfin_write_TWI0_SLAVE_CTRL(val) bfin_write16(TWI0_SLAVE_CTRL, val) +#define bfin_read_TWI0_SLAVE_STAT() bfin_read16(TWI0_SLAVE_STAT) +#define bfin_write_TWI0_SLAVE_STAT(val) bfin_write16(TWI0_SLAVE_STAT, val) +#define bfin_read_TWI0_SLAVE_ADDR() bfin_read16(TWI0_SLAVE_ADDR) +#define bfin_write_TWI0_SLAVE_ADDR(val) bfin_write16(TWI0_SLAVE_ADDR, val) +#define bfin_read_TWI0_MASTER_CTL() bfin_read16(TWI0_MASTER_CTL) +#define bfin_write_TWI0_MASTER_CTL(val) bfin_write16(TWI0_MASTER_CTL, val) +#define bfin_read_TWI0_MASTER_STAT() bfin_read16(TWI0_MASTER_STAT) +#define bfin_write_TWI0_MASTER_STAT(val) bfin_write16(TWI0_MASTER_STAT, val) +#define bfin_read_TWI0_MASTER_ADDR() bfin_read16(TWI0_MASTER_ADDR) +#define bfin_write_TWI0_MASTER_ADDR(val) bfin_write16(TWI0_MASTER_ADDR, val) +#define bfin_read_TWI0_INT_STAT() bfin_read16(TWI0_INT_STAT) +#define bfin_write_TWI0_INT_STAT(val) bfin_write16(TWI0_INT_STAT, val) +#define bfin_read_TWI0_INT_MASK() bfin_read16(TWI0_INT_MASK) +#define bfin_write_TWI0_INT_MASK(val) bfin_write16(TWI0_INT_MASK, val) +#define bfin_read_TWI0_FIFO_CTL() bfin_read16(TWI0_FIFO_CTL) +#define bfin_write_TWI0_FIFO_CTL(val) bfin_write16(TWI0_FIFO_CTL, val) +#define bfin_read_TWI0_FIFO_STAT() bfin_read16(TWI0_FIFO_STAT) +#define bfin_write_TWI0_FIFO_STAT(val) bfin_write16(TWI0_FIFO_STAT, val) +#define bfin_read_TWI0_XMT_DATA8() bfin_read16(TWI0_XMT_DATA8) +#define bfin_write_TWI0_XMT_DATA8(val) bfin_write16(TWI0_XMT_DATA8, val) +#define bfin_read_TWI0_XMT_DATA16() bfin_read16(TWI0_XMT_DATA16) +#define bfin_write_TWI0_XMT_DATA16(val) bfin_write16(TWI0_XMT_DATA16, val) +#define bfin_read_TWI0_RCV_DATA8() bfin_read16(TWI0_RCV_DATA8) +#define bfin_write_TWI0_RCV_DATA8(val) bfin_write16(TWI0_RCV_DATA8, val) +#define bfin_read_TWI0_RCV_DATA16() bfin_read16(TWI0_RCV_DATA16) +#define bfin_write_TWI0_RCV_DATA16(val) bfin_write16(TWI0_RCV_DATA16, val) +#define bfin_read_TWI1_CLKDIV() bfin_read16(TWI1_CLKDIV) +#define bfin_write_TWI1_CLKDIV(val) bfin_write16(TWI1_CLKDIV, val) +#define bfin_read_TWI1_CONTROL() bfin_read16(TWI1_CONTROL) +#define bfin_write_TWI1_CONTROL(val) bfin_write16(TWI1_CONTROL, val) +#define bfin_read_TWI1_SLAVE_CTRL() bfin_read16(TWI1_SLAVE_CTRL) +#define bfin_write_TWI1_SLAVE_CTRL(val) bfin_write16(TWI1_SLAVE_CTRL, val) +#define bfin_read_TWI1_SLAVE_STAT() bfin_read16(TWI1_SLAVE_STAT) +#define bfin_write_TWI1_SLAVE_STAT(val) bfin_write16(TWI1_SLAVE_STAT, val) +#define bfin_read_TWI1_SLAVE_ADDR() bfin_read16(TWI1_SLAVE_ADDR) +#define bfin_write_TWI1_SLAVE_ADDR(val) bfin_write16(TWI1_SLAVE_ADDR, val) +#define bfin_read_TWI1_MASTER_CTL() bfin_read16(TWI1_MASTER_CTL) +#define bfin_write_TWI1_MASTER_CTL(val) bfin_write16(TWI1_MASTER_CTL, val) +#define bfin_read_TWI1_MASTER_STAT() bfin_read16(TWI1_MASTER_STAT) +#define bfin_write_TWI1_MASTER_STAT(val) bfin_write16(TWI1_MASTER_STAT, val) +#define bfin_read_TWI1_MASTER_ADDR() bfin_read16(TWI1_MASTER_ADDR) +#define bfin_write_TWI1_MASTER_ADDR(val) bfin_write16(TWI1_MASTER_ADDR, val) +#define bfin_read_TWI1_INT_STAT() bfin_read16(TWI1_INT_STAT) +#define bfin_write_TWI1_INT_STAT(val) bfin_write16(TWI1_INT_STAT, val) +#define bfin_read_TWI1_INT_MASK() bfin_read16(TWI1_INT_MASK) +#define bfin_write_TWI1_INT_MASK(val) bfin_write16(TWI1_INT_MASK, val) +#define bfin_read_TWI1_FIFO_CTL() bfin_read16(TWI1_FIFO_CTL) +#define bfin_write_TWI1_FIFO_CTL(val) bfin_write16(TWI1_FIFO_CTL, val) +#define bfin_read_TWI1_FIFO_STAT() bfin_read16(TWI1_FIFO_STAT) +#define bfin_write_TWI1_FIFO_STAT(val) bfin_write16(TWI1_FIFO_STAT, val) +#define bfin_read_TWI1_XMT_DATA8() bfin_read16(TWI1_XMT_DATA8) +#define bfin_write_TWI1_XMT_DATA8(val) bfin_write16(TWI1_XMT_DATA8, val) +#define bfin_read_TWI1_XMT_DATA16() bfin_read16(TWI1_XMT_DATA16) +#define bfin_write_TWI1_XMT_DATA16(val) bfin_write16(TWI1_XMT_DATA16, val) +#define bfin_read_TWI1_RCV_DATA8() bfin_read16(TWI1_RCV_DATA8) +#define bfin_write_TWI1_RCV_DATA8(val) bfin_write16(TWI1_RCV_DATA8, val) +#define bfin_read_TWI1_RCV_DATA16() bfin_read16(TWI1_RCV_DATA16) +#define bfin_write_TWI1_RCV_DATA16(val) bfin_write16(TWI1_RCV_DATA16, val) +#define bfin_read_CAN_MC1() bfin_read16(CAN_MC1) +#define bfin_write_CAN_MC1(val) bfin_write16(CAN_MC1, val) +#define bfin_read_CAN_MD1() bfin_read16(CAN_MD1) +#define bfin_write_CAN_MD1(val) bfin_write16(CAN_MD1, val) +#define bfin_read_CAN_TRS1() bfin_read16(CAN_TRS1) +#define bfin_write_CAN_TRS1(val) bfin_write16(CAN_TRS1, val) +#define bfin_read_CAN_TRR1() bfin_read16(CAN_TRR1) +#define bfin_write_CAN_TRR1(val) bfin_write16(CAN_TRR1, val) +#define bfin_read_CAN_TA1() bfin_read16(CAN_TA1) +#define bfin_write_CAN_TA1(val) bfin_write16(CAN_TA1, val) +#define bfin_read_CAN_AA1() bfin_read16(CAN_AA1) +#define bfin_write_CAN_AA1(val) bfin_write16(CAN_AA1, val) +#define bfin_read_CAN_RMP1() bfin_read16(CAN_RMP1) +#define bfin_write_CAN_RMP1(val) bfin_write16(CAN_RMP1, val) +#define bfin_read_CAN_RML1() bfin_read16(CAN_RML1) +#define bfin_write_CAN_RML1(val) bfin_write16(CAN_RML1, val) +#define bfin_read_CAN_MBTIF1() bfin_read16(CAN_MBTIF1) +#define bfin_write_CAN_MBTIF1(val) bfin_write16(CAN_MBTIF1, val) +#define bfin_read_CAN_MBRIF1() bfin_read16(CAN_MBRIF1) +#define bfin_write_CAN_MBRIF1(val) bfin_write16(CAN_MBRIF1, val) +#define bfin_read_CAN_MBIM1() bfin_read16(CAN_MBIM1) +#define bfin_write_CAN_MBIM1(val) bfin_write16(CAN_MBIM1, val) +#define bfin_read_CAN_RFH1() bfin_read16(CAN_RFH1) +#define bfin_write_CAN_RFH1(val) bfin_write16(CAN_RFH1, val) +#define bfin_read_CAN_OPSS1() bfin_read16(CAN_OPSS1) +#define bfin_write_CAN_OPSS1(val) bfin_write16(CAN_OPSS1, val) +#define bfin_read_CAN_MC2() bfin_read16(CAN_MC2) +#define bfin_write_CAN_MC2(val) bfin_write16(CAN_MC2, val) +#define bfin_read_CAN_MD2() bfin_read16(CAN_MD2) +#define bfin_write_CAN_MD2(val) bfin_write16(CAN_MD2, val) +#define bfin_read_CAN_TRS2() bfin_read16(CAN_TRS2) +#define bfin_write_CAN_TRS2(val) bfin_write16(CAN_TRS2, val) +#define bfin_read_CAN_TRR2() bfin_read16(CAN_TRR2) +#define bfin_write_CAN_TRR2(val) bfin_write16(CAN_TRR2, val) +#define bfin_read_CAN_TA2() bfin_read16(CAN_TA2) +#define bfin_write_CAN_TA2(val) bfin_write16(CAN_TA2, val) +#define bfin_read_CAN_AA2() bfin_read16(CAN_AA2) +#define bfin_write_CAN_AA2(val) bfin_write16(CAN_AA2, val) +#define bfin_read_CAN_RMP2() bfin_read16(CAN_RMP2) +#define bfin_write_CAN_RMP2(val) bfin_write16(CAN_RMP2, val) +#define bfin_read_CAN_RML2() bfin_read16(CAN_RML2) +#define bfin_write_CAN_RML2(val) bfin_write16(CAN_RML2, val) +#define bfin_read_CAN_MBTIF2() bfin_read16(CAN_MBTIF2) +#define bfin_write_CAN_MBTIF2(val) bfin_write16(CAN_MBTIF2, val) +#define bfin_read_CAN_MBRIF2() bfin_read16(CAN_MBRIF2) +#define bfin_write_CAN_MBRIF2(val) bfin_write16(CAN_MBRIF2, val) +#define bfin_read_CAN_MBIM2() bfin_read16(CAN_MBIM2) +#define bfin_write_CAN_MBIM2(val) bfin_write16(CAN_MBIM2, val) +#define bfin_read_CAN_RFH2() bfin_read16(CAN_RFH2) +#define bfin_write_CAN_RFH2(val) bfin_write16(CAN_RFH2, val) +#define bfin_read_CAN_OPSS2() bfin_read16(CAN_OPSS2) +#define bfin_write_CAN_OPSS2(val) bfin_write16(CAN_OPSS2, val) +#define bfin_read_CAN_CLOCK() bfin_read16(CAN_CLOCK) +#define bfin_write_CAN_CLOCK(val) bfin_write16(CAN_CLOCK, val) +#define bfin_read_CAN_TIMING() bfin_read16(CAN_TIMING) +#define bfin_write_CAN_TIMING(val) bfin_write16(CAN_TIMING, val) +#define bfin_read_CAN_DEBUG() bfin_read16(CAN_DEBUG) +#define bfin_write_CAN_DEBUG(val) bfin_write16(CAN_DEBUG, val) +#define bfin_read_CAN_STATUS() bfin_read16(CAN_STATUS) +#define bfin_write_CAN_STATUS(val) bfin_write16(CAN_STATUS, val) +#define bfin_read_CAN_CEC() bfin_read16(CAN_CEC) +#define bfin_write_CAN_CEC(val) bfin_write16(CAN_CEC, val) +#define bfin_read_CAN_GIS() bfin_read16(CAN_GIS) +#define bfin_write_CAN_GIS(val) bfin_write16(CAN_GIS, val) +#define bfin_read_CAN_GIM() bfin_read16(CAN_GIM) +#define bfin_write_CAN_GIM(val) bfin_write16(CAN_GIM, val) +#define bfin_read_CAN_GIF() bfin_read16(CAN_GIF) +#define bfin_write_CAN_GIF(val) bfin_write16(CAN_GIF, val) +#define bfin_read_CAN_CONTROL() bfin_read16(CAN_CONTROL) +#define bfin_write_CAN_CONTROL(val) bfin_write16(CAN_CONTROL, val) +#define bfin_read_CAN_INTR() bfin_read16(CAN_INTR) +#define bfin_write_CAN_INTR(val) bfin_write16(CAN_INTR, val) +#define bfin_read_CAN_VERSION() bfin_read16(CAN_VERSION) +#define bfin_write_CAN_VERSION(val) bfin_write16(CAN_VERSION, val) +#define bfin_read_CAN_MBTD() bfin_read16(CAN_MBTD) +#define bfin_write_CAN_MBTD(val) bfin_write16(CAN_MBTD, val) +#define bfin_read_CAN_EWR() bfin_read16(CAN_EWR) +#define bfin_write_CAN_EWR(val) bfin_write16(CAN_EWR, val) +#define bfin_read_CAN_ESR() bfin_read16(CAN_ESR) +#define bfin_write_CAN_ESR(val) bfin_write16(CAN_ESR, val) +#define bfin_read_CAN_UCREG() bfin_read16(CAN_UCREG) +#define bfin_write_CAN_UCREG(val) bfin_write16(CAN_UCREG, val) +#define bfin_read_CAN_UCCNT() bfin_read16(CAN_UCCNT) +#define bfin_write_CAN_UCCNT(val) bfin_write16(CAN_UCCNT, val) +#define bfin_read_CAN_UCRC() bfin_read16(CAN_UCRC) +#define bfin_write_CAN_UCRC(val) bfin_write16(CAN_UCRC, val) +#define bfin_read_CAN_UCCNF() bfin_read16(CAN_UCCNF) +#define bfin_write_CAN_UCCNF(val) bfin_write16(CAN_UCCNF, val) +#define bfin_read_CAN_VERSION2() bfin_read16(CAN_VERSION2) +#define bfin_write_CAN_VERSION2(val) bfin_write16(CAN_VERSION2, val) +#define bfin_read_CAN_AM00L() bfin_read16(CAN_AM00L) +#define bfin_write_CAN_AM00L(val) bfin_write16(CAN_AM00L, val) +#define bfin_read_CAN_AM00H() bfin_read16(CAN_AM00H) +#define bfin_write_CAN_AM00H(val) bfin_write16(CAN_AM00H, val) +#define bfin_read_CAN_AM01L() bfin_read16(CAN_AM01L) +#define bfin_write_CAN_AM01L(val) bfin_write16(CAN_AM01L, val) +#define bfin_read_CAN_AM01H() bfin_read16(CAN_AM01H) +#define bfin_write_CAN_AM01H(val) bfin_write16(CAN_AM01H, val) +#define bfin_read_CAN_AM02L() bfin_read16(CAN_AM02L) +#define bfin_write_CAN_AM02L(val) bfin_write16(CAN_AM02L, val) +#define bfin_read_CAN_AM02H() bfin_read16(CAN_AM02H) +#define bfin_write_CAN_AM02H(val) bfin_write16(CAN_AM02H, val) +#define bfin_read_CAN_AM03L() bfin_read16(CAN_AM03L) +#define bfin_write_CAN_AM03L(val) bfin_write16(CAN_AM03L, val) +#define bfin_read_CAN_AM03H() bfin_read16(CAN_AM03H) +#define bfin_write_CAN_AM03H(val) bfin_write16(CAN_AM03H, val) +#define bfin_read_CAN_AM04L() bfin_read16(CAN_AM04L) +#define bfin_write_CAN_AM04L(val) bfin_write16(CAN_AM04L, val) +#define bfin_read_CAN_AM04H() bfin_read16(CAN_AM04H) +#define bfin_write_CAN_AM04H(val) bfin_write16(CAN_AM04H, val) +#define bfin_read_CAN_AM05L() bfin_read16(CAN_AM05L) +#define bfin_write_CAN_AM05L(val) bfin_write16(CAN_AM05L, val) +#define bfin_read_CAN_AM05H() bfin_read16(CAN_AM05H) +#define bfin_write_CAN_AM05H(val) bfin_write16(CAN_AM05H, val) +#define bfin_read_CAN_AM06L() bfin_read16(CAN_AM06L) +#define bfin_write_CAN_AM06L(val) bfin_write16(CAN_AM06L, val) +#define bfin_read_CAN_AM06H() bfin_read16(CAN_AM06H) +#define bfin_write_CAN_AM06H(val) bfin_write16(CAN_AM06H, val) +#define bfin_read_CAN_AM07L() bfin_read16(CAN_AM07L) +#define bfin_write_CAN_AM07L(val) bfin_write16(CAN_AM07L, val) +#define bfin_read_CAN_AM07H() bfin_read16(CAN_AM07H) +#define bfin_write_CAN_AM07H(val) bfin_write16(CAN_AM07H, val) +#define bfin_read_CAN_AM08L() bfin_read16(CAN_AM08L) +#define bfin_write_CAN_AM08L(val) bfin_write16(CAN_AM08L, val) +#define bfin_read_CAN_AM08H() bfin_read16(CAN_AM08H) +#define bfin_write_CAN_AM08H(val) bfin_write16(CAN_AM08H, val) +#define bfin_read_CAN_AM09L() bfin_read16(CAN_AM09L) +#define bfin_write_CAN_AM09L(val) bfin_write16(CAN_AM09L, val) +#define bfin_read_CAN_AM09H() bfin_read16(CAN_AM09H) +#define bfin_write_CAN_AM09H(val) bfin_write16(CAN_AM09H, val) +#define bfin_read_CAN_AM10L() bfin_read16(CAN_AM10L) +#define bfin_write_CAN_AM10L(val) bfin_write16(CAN_AM10L, val) +#define bfin_read_CAN_AM10H() bfin_read16(CAN_AM10H) +#define bfin_write_CAN_AM10H(val) bfin_write16(CAN_AM10H, val) +#define bfin_read_CAN_AM11L() bfin_read16(CAN_AM11L) +#define bfin_write_CAN_AM11L(val) bfin_write16(CAN_AM11L, val) +#define bfin_read_CAN_AM11H() bfin_read16(CAN_AM11H) +#define bfin_write_CAN_AM11H(val) bfin_write16(CAN_AM11H, val) +#define bfin_read_CAN_AM12L() bfin_read16(CAN_AM12L) +#define bfin_write_CAN_AM12L(val) bfin_write16(CAN_AM12L, val) +#define bfin_read_CAN_AM12H() bfin_read16(CAN_AM12H) +#define bfin_write_CAN_AM12H(val) bfin_write16(CAN_AM12H, val) +#define bfin_read_CAN_AM13L() bfin_read16(CAN_AM13L) +#define bfin_write_CAN_AM13L(val) bfin_write16(CAN_AM13L, val) +#define bfin_read_CAN_AM13H() bfin_read16(CAN_AM13H) +#define bfin_write_CAN_AM13H(val) bfin_write16(CAN_AM13H, val) +#define bfin_read_CAN_AM14L() bfin_read16(CAN_AM14L) +#define bfin_write_CAN_AM14L(val) bfin_write16(CAN_AM14L, val) +#define bfin_read_CAN_AM14H() bfin_read16(CAN_AM14H) +#define bfin_write_CAN_AM14H(val) bfin_write16(CAN_AM14H, val) +#define bfin_read_CAN_AM15L() bfin_read16(CAN_AM15L) +#define bfin_write_CAN_AM15L(val) bfin_write16(CAN_AM15L, val) +#define bfin_read_CAN_AM15H() bfin_read16(CAN_AM15H) +#define bfin_write_CAN_AM15H(val) bfin_write16(CAN_AM15H, val) +#define bfin_read_CAN_AM16L() bfin_read16(CAN_AM16L) +#define bfin_write_CAN_AM16L(val) bfin_write16(CAN_AM16L, val) +#define bfin_read_CAN_AM16H() bfin_read16(CAN_AM16H) +#define bfin_write_CAN_AM16H(val) bfin_write16(CAN_AM16H, val) +#define bfin_read_CAN_AM17L() bfin_read16(CAN_AM17L) +#define bfin_write_CAN_AM17L(val) bfin_write16(CAN_AM17L, val) +#define bfin_read_CAN_AM17H() bfin_read16(CAN_AM17H) +#define bfin_write_CAN_AM17H(val) bfin_write16(CAN_AM17H, val) +#define bfin_read_CAN_AM18L() bfin_read16(CAN_AM18L) +#define bfin_write_CAN_AM18L(val) bfin_write16(CAN_AM18L, val) +#define bfin_read_CAN_AM18H() bfin_read16(CAN_AM18H) +#define bfin_write_CAN_AM18H(val) bfin_write16(CAN_AM18H, val) +#define bfin_read_CAN_AM19L() bfin_read16(CAN_AM19L) +#define bfin_write_CAN_AM19L(val) bfin_write16(CAN_AM19L, val) +#define bfin_read_CAN_AM19H() bfin_read16(CAN_AM19H) +#define bfin_write_CAN_AM19H(val) bfin_write16(CAN_AM19H, val) +#define bfin_read_CAN_AM20L() bfin_read16(CAN_AM20L) +#define bfin_write_CAN_AM20L(val) bfin_write16(CAN_AM20L, val) +#define bfin_read_CAN_AM20H() bfin_read16(CAN_AM20H) +#define bfin_write_CAN_AM20H(val) bfin_write16(CAN_AM20H, val) +#define bfin_read_CAN_AM21L() bfin_read16(CAN_AM21L) +#define bfin_write_CAN_AM21L(val) bfin_write16(CAN_AM21L, val) +#define bfin_read_CAN_AM21H() bfin_read16(CAN_AM21H) +#define bfin_write_CAN_AM21H(val) bfin_write16(CAN_AM21H, val) +#define bfin_read_CAN_AM22L() bfin_read16(CAN_AM22L) +#define bfin_write_CAN_AM22L(val) bfin_write16(CAN_AM22L, val) +#define bfin_read_CAN_AM22H() bfin_read16(CAN_AM22H) +#define bfin_write_CAN_AM22H(val) bfin_write16(CAN_AM22H, val) +#define bfin_read_CAN_AM23L() bfin_read16(CAN_AM23L) +#define bfin_write_CAN_AM23L(val) bfin_write16(CAN_AM23L, val) +#define bfin_read_CAN_AM23H() bfin_read16(CAN_AM23H) +#define bfin_write_CAN_AM23H(val) bfin_write16(CAN_AM23H, val) +#define bfin_read_CAN_AM24L() bfin_read16(CAN_AM24L) +#define bfin_write_CAN_AM24L(val) bfin_write16(CAN_AM24L, val) +#define bfin_read_CAN_AM24H() bfin_read16(CAN_AM24H) +#define bfin_write_CAN_AM24H(val) bfin_write16(CAN_AM24H, val) +#define bfin_read_CAN_AM25L() bfin_read16(CAN_AM25L) +#define bfin_write_CAN_AM25L(val) bfin_write16(CAN_AM25L, val) +#define bfin_read_CAN_AM25H() bfin_read16(CAN_AM25H) +#define bfin_write_CAN_AM25H(val) bfin_write16(CAN_AM25H, val) +#define bfin_read_CAN_AM26L() bfin_read16(CAN_AM26L) +#define bfin_write_CAN_AM26L(val) bfin_write16(CAN_AM26L, val) +#define bfin_read_CAN_AM26H() bfin_read16(CAN_AM26H) +#define bfin_write_CAN_AM26H(val) bfin_write16(CAN_AM26H, val) +#define bfin_read_CAN_AM27L() bfin_read16(CAN_AM27L) +#define bfin_write_CAN_AM27L(val) bfin_write16(CAN_AM27L, val) +#define bfin_read_CAN_AM27H() bfin_read16(CAN_AM27H) +#define bfin_write_CAN_AM27H(val) bfin_write16(CAN_AM27H, val) +#define bfin_read_CAN_AM28L() bfin_read16(CAN_AM28L) +#define bfin_write_CAN_AM28L(val) bfin_write16(CAN_AM28L, val) +#define bfin_read_CAN_AM28H() bfin_read16(CAN_AM28H) +#define bfin_write_CAN_AM28H(val) bfin_write16(CAN_AM28H, val) +#define bfin_read_CAN_AM29L() bfin_read16(CAN_AM29L) +#define bfin_write_CAN_AM29L(val) bfin_write16(CAN_AM29L, val) +#define bfin_read_CAN_AM29H() bfin_read16(CAN_AM29H) +#define bfin_write_CAN_AM29H(val) bfin_write16(CAN_AM29H, val) +#define bfin_read_CAN_AM30L() bfin_read16(CAN_AM30L) +#define bfin_write_CAN_AM30L(val) bfin_write16(CAN_AM30L, val) +#define bfin_read_CAN_AM30H() bfin_read16(CAN_AM30H) +#define bfin_write_CAN_AM30H(val) bfin_write16(CAN_AM30H, val) +#define bfin_read_CAN_AM31L() bfin_read16(CAN_AM31L) +#define bfin_write_CAN_AM31L(val) bfin_write16(CAN_AM31L, val) +#define bfin_read_CAN_AM31H() bfin_read16(CAN_AM31H) +#define bfin_write_CAN_AM31H(val) bfin_write16(CAN_AM31H, val) +#define bfin_read_CAN_MB00_DATA0() bfin_read16(CAN_MB00_DATA0) +#define bfin_write_CAN_MB00_DATA0(val) bfin_write16(CAN_MB00_DATA0, val) +#define bfin_read_CAN_MB00_DATA1() bfin_read16(CAN_MB00_DATA1) +#define bfin_write_CAN_MB00_DATA1(val) bfin_write16(CAN_MB00_DATA1, val) +#define bfin_read_CAN_MB00_DATA2() bfin_read16(CAN_MB00_DATA2) +#define bfin_write_CAN_MB00_DATA2(val) bfin_write16(CAN_MB00_DATA2, val) +#define bfin_read_CAN_MB00_DATA3() bfin_read16(CAN_MB00_DATA3) +#define bfin_write_CAN_MB00_DATA3(val) bfin_write16(CAN_MB00_DATA3, val) +#define bfin_read_CAN_MB00_LENGTH() bfin_read16(CAN_MB00_LENGTH) +#define bfin_write_CAN_MB00_LENGTH(val) bfin_write16(CAN_MB00_LENGTH, val) +#define bfin_read_CAN_MB00_TIMESTAMP() bfin_read16(CAN_MB00_TIMESTAMP) +#define bfin_write_CAN_MB00_TIMESTAMP(val) bfin_write16(CAN_MB00_TIMESTAMP, val) +#define bfin_read_CAN_MB00_ID0() bfin_read16(CAN_MB00_ID0) +#define bfin_write_CAN_MB00_ID0(val) bfin_write16(CAN_MB00_ID0, val) +#define bfin_read_CAN_MB00_ID1() bfin_read16(CAN_MB00_ID1) +#define bfin_write_CAN_MB00_ID1(val) bfin_write16(CAN_MB00_ID1, val) +#define bfin_read_CAN_MB01_DATA0() bfin_read16(CAN_MB01_DATA0) +#define bfin_write_CAN_MB01_DATA0(val) bfin_write16(CAN_MB01_DATA0, val) +#define bfin_read_CAN_MB01_DATA1() bfin_read16(CAN_MB01_DATA1) +#define bfin_write_CAN_MB01_DATA1(val) bfin_write16(CAN_MB01_DATA1, val) +#define bfin_read_CAN_MB01_DATA2() bfin_read16(CAN_MB01_DATA2) +#define bfin_write_CAN_MB01_DATA2(val) bfin_write16(CAN_MB01_DATA2, val) +#define bfin_read_CAN_MB01_DATA3() bfin_read16(CAN_MB01_DATA3) +#define bfin_write_CAN_MB01_DATA3(val) bfin_write16(CAN_MB01_DATA3, val) +#define bfin_read_CAN_MB01_LENGTH() bfin_read16(CAN_MB01_LENGTH) +#define bfin_write_CAN_MB01_LENGTH(val) bfin_write16(CAN_MB01_LENGTH, val) +#define bfin_read_CAN_MB01_TIMESTAMP() bfin_read16(CAN_MB01_TIMESTAMP) +#define bfin_write_CAN_MB01_TIMESTAMP(val) bfin_write16(CAN_MB01_TIMESTAMP, val) +#define bfin_read_CAN_MB01_ID0() bfin_read16(CAN_MB01_ID0) +#define bfin_write_CAN_MB01_ID0(val) bfin_write16(CAN_MB01_ID0, val) +#define bfin_read_CAN_MB01_ID1() bfin_read16(CAN_MB01_ID1) +#define bfin_write_CAN_MB01_ID1(val) bfin_write16(CAN_MB01_ID1, val) +#define bfin_read_CAN_MB02_DATA0() bfin_read16(CAN_MB02_DATA0) +#define bfin_write_CAN_MB02_DATA0(val) bfin_write16(CAN_MB02_DATA0, val) +#define bfin_read_CAN_MB02_DATA1() bfin_read16(CAN_MB02_DATA1) +#define bfin_write_CAN_MB02_DATA1(val) bfin_write16(CAN_MB02_DATA1, val) +#define bfin_read_CAN_MB02_DATA2() bfin_read16(CAN_MB02_DATA2) +#define bfin_write_CAN_MB02_DATA2(val) bfin_write16(CAN_MB02_DATA2, val) +#define bfin_read_CAN_MB02_DATA3() bfin_read16(CAN_MB02_DATA3) +#define bfin_write_CAN_MB02_DATA3(val) bfin_write16(CAN_MB02_DATA3, val) +#define bfin_read_CAN_MB02_LENGTH() bfin_read16(CAN_MB02_LENGTH) +#define bfin_write_CAN_MB02_LENGTH(val) bfin_write16(CAN_MB02_LENGTH, val) +#define bfin_read_CAN_MB02_TIMESTAMP() bfin_read16(CAN_MB02_TIMESTAMP) +#define bfin_write_CAN_MB02_TIMESTAMP(val) bfin_write16(CAN_MB02_TIMESTAMP, val) +#define bfin_read_CAN_MB02_ID0() bfin_read16(CAN_MB02_ID0) +#define bfin_write_CAN_MB02_ID0(val) bfin_write16(CAN_MB02_ID0, val) +#define bfin_read_CAN_MB02_ID1() bfin_read16(CAN_MB02_ID1) +#define bfin_write_CAN_MB02_ID1(val) bfin_write16(CAN_MB02_ID1, val) +#define bfin_read_CAN_MB03_DATA0() bfin_read16(CAN_MB03_DATA0) +#define bfin_write_CAN_MB03_DATA0(val) bfin_write16(CAN_MB03_DATA0, val) +#define bfin_read_CAN_MB03_DATA1() bfin_read16(CAN_MB03_DATA1) +#define bfin_write_CAN_MB03_DATA1(val) bfin_write16(CAN_MB03_DATA1, val) +#define bfin_read_CAN_MB03_DATA2() bfin_read16(CAN_MB03_DATA2) +#define bfin_write_CAN_MB03_DATA2(val) bfin_write16(CAN_MB03_DATA2, val) +#define bfin_read_CAN_MB03_DATA3() bfin_read16(CAN_MB03_DATA3) +#define bfin_write_CAN_MB03_DATA3(val) bfin_write16(CAN_MB03_DATA3, val) +#define bfin_read_CAN_MB03_LENGTH() bfin_read16(CAN_MB03_LENGTH) +#define bfin_write_CAN_MB03_LENGTH(val) bfin_write16(CAN_MB03_LENGTH, val) +#define bfin_read_CAN_MB03_TIMESTAMP() bfin_read16(CAN_MB03_TIMESTAMP) +#define bfin_write_CAN_MB03_TIMESTAMP(val) bfin_write16(CAN_MB03_TIMESTAMP, val) +#define bfin_read_CAN_MB03_ID0() bfin_read16(CAN_MB03_ID0) +#define bfin_write_CAN_MB03_ID0(val) bfin_write16(CAN_MB03_ID0, val) +#define bfin_read_CAN_MB03_ID1() bfin_read16(CAN_MB03_ID1) +#define bfin_write_CAN_MB03_ID1(val) bfin_write16(CAN_MB03_ID1, val) +#define bfin_read_CAN_MB04_DATA0() bfin_read16(CAN_MB04_DATA0) +#define bfin_write_CAN_MB04_DATA0(val) bfin_write16(CAN_MB04_DATA0, val) +#define bfin_read_CAN_MB04_DATA1() bfin_read16(CAN_MB04_DATA1) +#define bfin_write_CAN_MB04_DATA1(val) bfin_write16(CAN_MB04_DATA1, val) +#define bfin_read_CAN_MB04_DATA2() bfin_read16(CAN_MB04_DATA2) +#define bfin_write_CAN_MB04_DATA2(val) bfin_write16(CAN_MB04_DATA2, val) +#define bfin_read_CAN_MB04_DATA3() bfin_read16(CAN_MB04_DATA3) +#define bfin_write_CAN_MB04_DATA3(val) bfin_write16(CAN_MB04_DATA3, val) +#define bfin_read_CAN_MB04_LENGTH() bfin_read16(CAN_MB04_LENGTH) +#define bfin_write_CAN_MB04_LENGTH(val) bfin_write16(CAN_MB04_LENGTH, val) +#define bfin_read_CAN_MB04_TIMESTAMP() bfin_read16(CAN_MB04_TIMESTAMP) +#define bfin_write_CAN_MB04_TIMESTAMP(val) bfin_write16(CAN_MB04_TIMESTAMP, val) +#define bfin_read_CAN_MB04_ID0() bfin_read16(CAN_MB04_ID0) +#define bfin_write_CAN_MB04_ID0(val) bfin_write16(CAN_MB04_ID0, val) +#define bfin_read_CAN_MB04_ID1() bfin_read16(CAN_MB04_ID1) +#define bfin_write_CAN_MB04_ID1(val) bfin_write16(CAN_MB04_ID1, val) +#define bfin_read_CAN_MB05_DATA0() bfin_read16(CAN_MB05_DATA0) +#define bfin_write_CAN_MB05_DATA0(val) bfin_write16(CAN_MB05_DATA0, val) +#define bfin_read_CAN_MB05_DATA1() bfin_read16(CAN_MB05_DATA1) +#define bfin_write_CAN_MB05_DATA1(val) bfin_write16(CAN_MB05_DATA1, val) +#define bfin_read_CAN_MB05_DATA2() bfin_read16(CAN_MB05_DATA2) +#define bfin_write_CAN_MB05_DATA2(val) bfin_write16(CAN_MB05_DATA2, val) +#define bfin_read_CAN_MB05_DATA3() bfin_read16(CAN_MB05_DATA3) +#define bfin_write_CAN_MB05_DATA3(val) bfin_write16(CAN_MB05_DATA3, val) +#define bfin_read_CAN_MB05_LENGTH() bfin_read16(CAN_MB05_LENGTH) +#define bfin_write_CAN_MB05_LENGTH(val) bfin_write16(CAN_MB05_LENGTH, val) +#define bfin_read_CAN_MB05_TIMESTAMP() bfin_read16(CAN_MB05_TIMESTAMP) +#define bfin_write_CAN_MB05_TIMESTAMP(val) bfin_write16(CAN_MB05_TIMESTAMP, val) +#define bfin_read_CAN_MB05_ID0() bfin_read16(CAN_MB05_ID0) +#define bfin_write_CAN_MB05_ID0(val) bfin_write16(CAN_MB05_ID0, val) +#define bfin_read_CAN_MB05_ID1() bfin_read16(CAN_MB05_ID1) +#define bfin_write_CAN_MB05_ID1(val) bfin_write16(CAN_MB05_ID1, val) +#define bfin_read_CAN_MB06_DATA0() bfin_read16(CAN_MB06_DATA0) +#define bfin_write_CAN_MB06_DATA0(val) bfin_write16(CAN_MB06_DATA0, val) +#define bfin_read_CAN_MB06_DATA1() bfin_read16(CAN_MB06_DATA1) +#define bfin_write_CAN_MB06_DATA1(val) bfin_write16(CAN_MB06_DATA1, val) +#define bfin_read_CAN_MB06_DATA2() bfin_read16(CAN_MB06_DATA2) +#define bfin_write_CAN_MB06_DATA2(val) bfin_write16(CAN_MB06_DATA2, val) +#define bfin_read_CAN_MB06_DATA3() bfin_read16(CAN_MB06_DATA3) +#define bfin_write_CAN_MB06_DATA3(val) bfin_write16(CAN_MB06_DATA3, val) +#define bfin_read_CAN_MB06_LENGTH() bfin_read16(CAN_MB06_LENGTH) +#define bfin_write_CAN_MB06_LENGTH(val) bfin_write16(CAN_MB06_LENGTH, val) +#define bfin_read_CAN_MB06_TIMESTAMP() bfin_read16(CAN_MB06_TIMESTAMP) +#define bfin_write_CAN_MB06_TIMESTAMP(val) bfin_write16(CAN_MB06_TIMESTAMP, val) +#define bfin_read_CAN_MB06_ID0() bfin_read16(CAN_MB06_ID0) +#define bfin_write_CAN_MB06_ID0(val) bfin_write16(CAN_MB06_ID0, val) +#define bfin_read_CAN_MB06_ID1() bfin_read16(CAN_MB06_ID1) +#define bfin_write_CAN_MB06_ID1(val) bfin_write16(CAN_MB06_ID1, val) +#define bfin_read_CAN_MB07_DATA0() bfin_read16(CAN_MB07_DATA0) +#define bfin_write_CAN_MB07_DATA0(val) bfin_write16(CAN_MB07_DATA0, val) +#define bfin_read_CAN_MB07_DATA1() bfin_read16(CAN_MB07_DATA1) +#define bfin_write_CAN_MB07_DATA1(val) bfin_write16(CAN_MB07_DATA1, val) +#define bfin_read_CAN_MB07_DATA2() bfin_read16(CAN_MB07_DATA2) +#define bfin_write_CAN_MB07_DATA2(val) bfin_write16(CAN_MB07_DATA2, val) +#define bfin_read_CAN_MB07_DATA3() bfin_read16(CAN_MB07_DATA3) +#define bfin_write_CAN_MB07_DATA3(val) bfin_write16(CAN_MB07_DATA3, val) +#define bfin_read_CAN_MB07_LENGTH() bfin_read16(CAN_MB07_LENGTH) +#define bfin_write_CAN_MB07_LENGTH(val) bfin_write16(CAN_MB07_LENGTH, val) +#define bfin_read_CAN_MB07_TIMESTAMP() bfin_read16(CAN_MB07_TIMESTAMP) +#define bfin_write_CAN_MB07_TIMESTAMP(val) bfin_write16(CAN_MB07_TIMESTAMP, val) +#define bfin_read_CAN_MB07_ID0() bfin_read16(CAN_MB07_ID0) +#define bfin_write_CAN_MB07_ID0(val) bfin_write16(CAN_MB07_ID0, val) +#define bfin_read_CAN_MB07_ID1() bfin_read16(CAN_MB07_ID1) +#define bfin_write_CAN_MB07_ID1(val) bfin_write16(CAN_MB07_ID1, val) +#define bfin_read_CAN_MB08_DATA0() bfin_read16(CAN_MB08_DATA0) +#define bfin_write_CAN_MB08_DATA0(val) bfin_write16(CAN_MB08_DATA0, val) +#define bfin_read_CAN_MB08_DATA1() bfin_read16(CAN_MB08_DATA1) +#define bfin_write_CAN_MB08_DATA1(val) bfin_write16(CAN_MB08_DATA1, val) +#define bfin_read_CAN_MB08_DATA2() bfin_read16(CAN_MB08_DATA2) +#define bfin_write_CAN_MB08_DATA2(val) bfin_write16(CAN_MB08_DATA2, val) +#define bfin_read_CAN_MB08_DATA3() bfin_read16(CAN_MB08_DATA3) +#define bfin_write_CAN_MB08_DATA3(val) bfin_write16(CAN_MB08_DATA3, val) +#define bfin_read_CAN_MB08_LENGTH() bfin_read16(CAN_MB08_LENGTH) +#define bfin_write_CAN_MB08_LENGTH(val) bfin_write16(CAN_MB08_LENGTH, val) +#define bfin_read_CAN_MB08_TIMESTAMP() bfin_read16(CAN_MB08_TIMESTAMP) +#define bfin_write_CAN_MB08_TIMESTAMP(val) bfin_write16(CAN_MB08_TIMESTAMP, val) +#define bfin_read_CAN_MB08_ID0() bfin_read16(CAN_MB08_ID0) +#define bfin_write_CAN_MB08_ID0(val) bfin_write16(CAN_MB08_ID0, val) +#define bfin_read_CAN_MB08_ID1() bfin_read16(CAN_MB08_ID1) +#define bfin_write_CAN_MB08_ID1(val) bfin_write16(CAN_MB08_ID1, val) +#define bfin_read_CAN_MB09_DATA0() bfin_read16(CAN_MB09_DATA0) +#define bfin_write_CAN_MB09_DATA0(val) bfin_write16(CAN_MB09_DATA0, val) +#define bfin_read_CAN_MB09_DATA1() bfin_read16(CAN_MB09_DATA1) +#define bfin_write_CAN_MB09_DATA1(val) bfin_write16(CAN_MB09_DATA1, val) +#define bfin_read_CAN_MB09_DATA2() bfin_read16(CAN_MB09_DATA2) +#define bfin_write_CAN_MB09_DATA2(val) bfin_write16(CAN_MB09_DATA2, val) +#define bfin_read_CAN_MB09_DATA3() bfin_read16(CAN_MB09_DATA3) +#define bfin_write_CAN_MB09_DATA3(val) bfin_write16(CAN_MB09_DATA3, val) +#define bfin_read_CAN_MB09_LENGTH() bfin_read16(CAN_MB09_LENGTH) +#define bfin_write_CAN_MB09_LENGTH(val) bfin_write16(CAN_MB09_LENGTH, val) +#define bfin_read_CAN_MB09_TIMESTAMP() bfin_read16(CAN_MB09_TIMESTAMP) +#define bfin_write_CAN_MB09_TIMESTAMP(val) bfin_write16(CAN_MB09_TIMESTAMP, val) +#define bfin_read_CAN_MB09_ID0() bfin_read16(CAN_MB09_ID0) +#define bfin_write_CAN_MB09_ID0(val) bfin_write16(CAN_MB09_ID0, val) +#define bfin_read_CAN_MB09_ID1() bfin_read16(CAN_MB09_ID1) +#define bfin_write_CAN_MB09_ID1(val) bfin_write16(CAN_MB09_ID1, val) +#define bfin_read_CAN_MB10_DATA0() bfin_read16(CAN_MB10_DATA0) +#define bfin_write_CAN_MB10_DATA0(val) bfin_write16(CAN_MB10_DATA0, val) +#define bfin_read_CAN_MB10_DATA1() bfin_read16(CAN_MB10_DATA1) +#define bfin_write_CAN_MB10_DATA1(val) bfin_write16(CAN_MB10_DATA1, val) +#define bfin_read_CAN_MB10_DATA2() bfin_read16(CAN_MB10_DATA2) +#define bfin_write_CAN_MB10_DATA2(val) bfin_write16(CAN_MB10_DATA2, val) +#define bfin_read_CAN_MB10_DATA3() bfin_read16(CAN_MB10_DATA3) +#define bfin_write_CAN_MB10_DATA3(val) bfin_write16(CAN_MB10_DATA3, val) +#define bfin_read_CAN_MB10_LENGTH() bfin_read16(CAN_MB10_LENGTH) +#define bfin_write_CAN_MB10_LENGTH(val) bfin_write16(CAN_MB10_LENGTH, val) +#define bfin_read_CAN_MB10_TIMESTAMP() bfin_read16(CAN_MB10_TIMESTAMP) +#define bfin_write_CAN_MB10_TIMESTAMP(val) bfin_write16(CAN_MB10_TIMESTAMP, val) +#define bfin_read_CAN_MB10_ID0() bfin_read16(CAN_MB10_ID0) +#define bfin_write_CAN_MB10_ID0(val) bfin_write16(CAN_MB10_ID0, val) +#define bfin_read_CAN_MB10_ID1() bfin_read16(CAN_MB10_ID1) +#define bfin_write_CAN_MB10_ID1(val) bfin_write16(CAN_MB10_ID1, val) +#define bfin_read_CAN_MB11_DATA0() bfin_read16(CAN_MB11_DATA0) +#define bfin_write_CAN_MB11_DATA0(val) bfin_write16(CAN_MB11_DATA0, val) +#define bfin_read_CAN_MB11_DATA1() bfin_read16(CAN_MB11_DATA1) +#define bfin_write_CAN_MB11_DATA1(val) bfin_write16(CAN_MB11_DATA1, val) +#define bfin_read_CAN_MB11_DATA2() bfin_read16(CAN_MB11_DATA2) +#define bfin_write_CAN_MB11_DATA2(val) bfin_write16(CAN_MB11_DATA2, val) +#define bfin_read_CAN_MB11_DATA3() bfin_read16(CAN_MB11_DATA3) +#define bfin_write_CAN_MB11_DATA3(val) bfin_write16(CAN_MB11_DATA3, val) +#define bfin_read_CAN_MB11_LENGTH() bfin_read16(CAN_MB11_LENGTH) +#define bfin_write_CAN_MB11_LENGTH(val) bfin_write16(CAN_MB11_LENGTH, val) +#define bfin_read_CAN_MB11_TIMESTAMP() bfin_read16(CAN_MB11_TIMESTAMP) +#define bfin_write_CAN_MB11_TIMESTAMP(val) bfin_write16(CAN_MB11_TIMESTAMP, val) +#define bfin_read_CAN_MB11_ID0() bfin_read16(CAN_MB11_ID0) +#define bfin_write_CAN_MB11_ID0(val) bfin_write16(CAN_MB11_ID0, val) +#define bfin_read_CAN_MB11_ID1() bfin_read16(CAN_MB11_ID1) +#define bfin_write_CAN_MB11_ID1(val) bfin_write16(CAN_MB11_ID1, val) +#define bfin_read_CAN_MB12_DATA0() bfin_read16(CAN_MB12_DATA0) +#define bfin_write_CAN_MB12_DATA0(val) bfin_write16(CAN_MB12_DATA0, val) +#define bfin_read_CAN_MB12_DATA1() bfin_read16(CAN_MB12_DATA1) +#define bfin_write_CAN_MB12_DATA1(val) bfin_write16(CAN_MB12_DATA1, val) +#define bfin_read_CAN_MB12_DATA2() bfin_read16(CAN_MB12_DATA2) +#define bfin_write_CAN_MB12_DATA2(val) bfin_write16(CAN_MB12_DATA2, val) +#define bfin_read_CAN_MB12_DATA3() bfin_read16(CAN_MB12_DATA3) +#define bfin_write_CAN_MB12_DATA3(val) bfin_write16(CAN_MB12_DATA3, val) +#define bfin_read_CAN_MB12_LENGTH() bfin_read16(CAN_MB12_LENGTH) +#define bfin_write_CAN_MB12_LENGTH(val) bfin_write16(CAN_MB12_LENGTH, val) +#define bfin_read_CAN_MB12_TIMESTAMP() bfin_read16(CAN_MB12_TIMESTAMP) +#define bfin_write_CAN_MB12_TIMESTAMP(val) bfin_write16(CAN_MB12_TIMESTAMP, val) +#define bfin_read_CAN_MB12_ID0() bfin_read16(CAN_MB12_ID0) +#define bfin_write_CAN_MB12_ID0(val) bfin_write16(CAN_MB12_ID0, val) +#define bfin_read_CAN_MB12_ID1() bfin_read16(CAN_MB12_ID1) +#define bfin_write_CAN_MB12_ID1(val) bfin_write16(CAN_MB12_ID1, val) +#define bfin_read_CAN_MB13_DATA0() bfin_read16(CAN_MB13_DATA0) +#define bfin_write_CAN_MB13_DATA0(val) bfin_write16(CAN_MB13_DATA0, val) +#define bfin_read_CAN_MB13_DATA1() bfin_read16(CAN_MB13_DATA1) +#define bfin_write_CAN_MB13_DATA1(val) bfin_write16(CAN_MB13_DATA1, val) +#define bfin_read_CAN_MB13_DATA2() bfin_read16(CAN_MB13_DATA2) +#define bfin_write_CAN_MB13_DATA2(val) bfin_write16(CAN_MB13_DATA2, val) +#define bfin_read_CAN_MB13_DATA3() bfin_read16(CAN_MB13_DATA3) +#define bfin_write_CAN_MB13_DATA3(val) bfin_write16(CAN_MB13_DATA3, val) +#define bfin_read_CAN_MB13_LENGTH() bfin_read16(CAN_MB13_LENGTH) +#define bfin_write_CAN_MB13_LENGTH(val) bfin_write16(CAN_MB13_LENGTH, val) +#define bfin_read_CAN_MB13_TIMESTAMP() bfin_read16(CAN_MB13_TIMESTAMP) +#define bfin_write_CAN_MB13_TIMESTAMP(val) bfin_write16(CAN_MB13_TIMESTAMP, val) +#define bfin_read_CAN_MB13_ID0() bfin_read16(CAN_MB13_ID0) +#define bfin_write_CAN_MB13_ID0(val) bfin_write16(CAN_MB13_ID0, val) +#define bfin_read_CAN_MB13_ID1() bfin_read16(CAN_MB13_ID1) +#define bfin_write_CAN_MB13_ID1(val) bfin_write16(CAN_MB13_ID1, val) +#define bfin_read_CAN_MB14_DATA0() bfin_read16(CAN_MB14_DATA0) +#define bfin_write_CAN_MB14_DATA0(val) bfin_write16(CAN_MB14_DATA0, val) +#define bfin_read_CAN_MB14_DATA1() bfin_read16(CAN_MB14_DATA1) +#define bfin_write_CAN_MB14_DATA1(val) bfin_write16(CAN_MB14_DATA1, val) +#define bfin_read_CAN_MB14_DATA2() bfin_read16(CAN_MB14_DATA2) +#define bfin_write_CAN_MB14_DATA2(val) bfin_write16(CAN_MB14_DATA2, val) +#define bfin_read_CAN_MB14_DATA3() bfin_read16(CAN_MB14_DATA3) +#define bfin_write_CAN_MB14_DATA3(val) bfin_write16(CAN_MB14_DATA3, val) +#define bfin_read_CAN_MB14_LENGTH() bfin_read16(CAN_MB14_LENGTH) +#define bfin_write_CAN_MB14_LENGTH(val) bfin_write16(CAN_MB14_LENGTH, val) +#define bfin_read_CAN_MB14_TIMESTAMP() bfin_read16(CAN_MB14_TIMESTAMP) +#define bfin_write_CAN_MB14_TIMESTAMP(val) bfin_write16(CAN_MB14_TIMESTAMP, val) +#define bfin_read_CAN_MB14_ID0() bfin_read16(CAN_MB14_ID0) +#define bfin_write_CAN_MB14_ID0(val) bfin_write16(CAN_MB14_ID0, val) +#define bfin_read_CAN_MB14_ID1() bfin_read16(CAN_MB14_ID1) +#define bfin_write_CAN_MB14_ID1(val) bfin_write16(CAN_MB14_ID1, val) +#define bfin_read_CAN_MB15_DATA0() bfin_read16(CAN_MB15_DATA0) +#define bfin_write_CAN_MB15_DATA0(val) bfin_write16(CAN_MB15_DATA0, val) +#define bfin_read_CAN_MB15_DATA1() bfin_read16(CAN_MB15_DATA1) +#define bfin_write_CAN_MB15_DATA1(val) bfin_write16(CAN_MB15_DATA1, val) +#define bfin_read_CAN_MB15_DATA2() bfin_read16(CAN_MB15_DATA2) +#define bfin_write_CAN_MB15_DATA2(val) bfin_write16(CAN_MB15_DATA2, val) +#define bfin_read_CAN_MB15_DATA3() bfin_read16(CAN_MB15_DATA3) +#define bfin_write_CAN_MB15_DATA3(val) bfin_write16(CAN_MB15_DATA3, val) +#define bfin_read_CAN_MB15_LENGTH() bfin_read16(CAN_MB15_LENGTH) +#define bfin_write_CAN_MB15_LENGTH(val) bfin_write16(CAN_MB15_LENGTH, val) +#define bfin_read_CAN_MB15_TIMESTAMP() bfin_read16(CAN_MB15_TIMESTAMP) +#define bfin_write_CAN_MB15_TIMESTAMP(val) bfin_write16(CAN_MB15_TIMESTAMP, val) +#define bfin_read_CAN_MB15_ID0() bfin_read16(CAN_MB15_ID0) +#define bfin_write_CAN_MB15_ID0(val) bfin_write16(CAN_MB15_ID0, val) +#define bfin_read_CAN_MB15_ID1() bfin_read16(CAN_MB15_ID1) +#define bfin_write_CAN_MB15_ID1(val) bfin_write16(CAN_MB15_ID1, val) +#define bfin_read_CAN_MB16_DATA0() bfin_read16(CAN_MB16_DATA0) +#define bfin_write_CAN_MB16_DATA0(val) bfin_write16(CAN_MB16_DATA0, val) +#define bfin_read_CAN_MB16_DATA1() bfin_read16(CAN_MB16_DATA1) +#define bfin_write_CAN_MB16_DATA1(val) bfin_write16(CAN_MB16_DATA1, val) +#define bfin_read_CAN_MB16_DATA2() bfin_read16(CAN_MB16_DATA2) +#define bfin_write_CAN_MB16_DATA2(val) bfin_write16(CAN_MB16_DATA2, val) +#define bfin_read_CAN_MB16_DATA3() bfin_read16(CAN_MB16_DATA3) +#define bfin_write_CAN_MB16_DATA3(val) bfin_write16(CAN_MB16_DATA3, val) +#define bfin_read_CAN_MB16_LENGTH() bfin_read16(CAN_MB16_LENGTH) +#define bfin_write_CAN_MB16_LENGTH(val) bfin_write16(CAN_MB16_LENGTH, val) +#define bfin_read_CAN_MB16_TIMESTAMP() bfin_read16(CAN_MB16_TIMESTAMP) +#define bfin_write_CAN_MB16_TIMESTAMP(val) bfin_write16(CAN_MB16_TIMESTAMP, val) +#define bfin_read_CAN_MB16_ID0() bfin_read16(CAN_MB16_ID0) +#define bfin_write_CAN_MB16_ID0(val) bfin_write16(CAN_MB16_ID0, val) +#define bfin_read_CAN_MB16_ID1() bfin_read16(CAN_MB16_ID1) +#define bfin_write_CAN_MB16_ID1(val) bfin_write16(CAN_MB16_ID1, val) +#define bfin_read_CAN_MB17_DATA0() bfin_read16(CAN_MB17_DATA0) +#define bfin_write_CAN_MB17_DATA0(val) bfin_write16(CAN_MB17_DATA0, val) +#define bfin_read_CAN_MB17_DATA1() bfin_read16(CAN_MB17_DATA1) +#define bfin_write_CAN_MB17_DATA1(val) bfin_write16(CAN_MB17_DATA1, val) +#define bfin_read_CAN_MB17_DATA2() bfin_read16(CAN_MB17_DATA2) +#define bfin_write_CAN_MB17_DATA2(val) bfin_write16(CAN_MB17_DATA2, val) +#define bfin_read_CAN_MB17_DATA3() bfin_read16(CAN_MB17_DATA3) +#define bfin_write_CAN_MB17_DATA3(val) bfin_write16(CAN_MB17_DATA3, val) +#define bfin_read_CAN_MB17_LENGTH() bfin_read16(CAN_MB17_LENGTH) +#define bfin_write_CAN_MB17_LENGTH(val) bfin_write16(CAN_MB17_LENGTH, val) +#define bfin_read_CAN_MB17_TIMESTAMP() bfin_read16(CAN_MB17_TIMESTAMP) +#define bfin_write_CAN_MB17_TIMESTAMP(val) bfin_write16(CAN_MB17_TIMESTAMP, val) +#define bfin_read_CAN_MB17_ID0() bfin_read16(CAN_MB17_ID0) +#define bfin_write_CAN_MB17_ID0(val) bfin_write16(CAN_MB17_ID0, val) +#define bfin_read_CAN_MB17_ID1() bfin_read16(CAN_MB17_ID1) +#define bfin_write_CAN_MB17_ID1(val) bfin_write16(CAN_MB17_ID1, val) +#define bfin_read_CAN_MB18_DATA0() bfin_read16(CAN_MB18_DATA0) +#define bfin_write_CAN_MB18_DATA0(val) bfin_write16(CAN_MB18_DATA0, val) +#define bfin_read_CAN_MB18_DATA1() bfin_read16(CAN_MB18_DATA1) +#define bfin_write_CAN_MB18_DATA1(val) bfin_write16(CAN_MB18_DATA1, val) +#define bfin_read_CAN_MB18_DATA2() bfin_read16(CAN_MB18_DATA2) +#define bfin_write_CAN_MB18_DATA2(val) bfin_write16(CAN_MB18_DATA2, val) +#define bfin_read_CAN_MB18_DATA3() bfin_read16(CAN_MB18_DATA3) +#define bfin_write_CAN_MB18_DATA3(val) bfin_write16(CAN_MB18_DATA3, val) +#define bfin_read_CAN_MB18_LENGTH() bfin_read16(CAN_MB18_LENGTH) +#define bfin_write_CAN_MB18_LENGTH(val) bfin_write16(CAN_MB18_LENGTH, val) +#define bfin_read_CAN_MB18_TIMESTAMP() bfin_read16(CAN_MB18_TIMESTAMP) +#define bfin_write_CAN_MB18_TIMESTAMP(val) bfin_write16(CAN_MB18_TIMESTAMP, val) +#define bfin_read_CAN_MB18_ID0() bfin_read16(CAN_MB18_ID0) +#define bfin_write_CAN_MB18_ID0(val) bfin_write16(CAN_MB18_ID0, val) +#define bfin_read_CAN_MB18_ID1() bfin_read16(CAN_MB18_ID1) +#define bfin_write_CAN_MB18_ID1(val) bfin_write16(CAN_MB18_ID1, val) +#define bfin_read_CAN_MB19_DATA0() bfin_read16(CAN_MB19_DATA0) +#define bfin_write_CAN_MB19_DATA0(val) bfin_write16(CAN_MB19_DATA0, val) +#define bfin_read_CAN_MB19_DATA1() bfin_read16(CAN_MB19_DATA1) +#define bfin_write_CAN_MB19_DATA1(val) bfin_write16(CAN_MB19_DATA1, val) +#define bfin_read_CAN_MB19_DATA2() bfin_read16(CAN_MB19_DATA2) +#define bfin_write_CAN_MB19_DATA2(val) bfin_write16(CAN_MB19_DATA2, val) +#define bfin_read_CAN_MB19_DATA3() bfin_read16(CAN_MB19_DATA3) +#define bfin_write_CAN_MB19_DATA3(val) bfin_write16(CAN_MB19_DATA3, val) +#define bfin_read_CAN_MB19_LENGTH() bfin_read16(CAN_MB19_LENGTH) +#define bfin_write_CAN_MB19_LENGTH(val) bfin_write16(CAN_MB19_LENGTH, val) +#define bfin_read_CAN_MB19_TIMESTAMP() bfin_read16(CAN_MB19_TIMESTAMP) +#define bfin_write_CAN_MB19_TIMESTAMP(val) bfin_write16(CAN_MB19_TIMESTAMP, val) +#define bfin_read_CAN_MB19_ID0() bfin_read16(CAN_MB19_ID0) +#define bfin_write_CAN_MB19_ID0(val) bfin_write16(CAN_MB19_ID0, val) +#define bfin_read_CAN_MB19_ID1() bfin_read16(CAN_MB19_ID1) +#define bfin_write_CAN_MB19_ID1(val) bfin_write16(CAN_MB19_ID1, val) +#define bfin_read_CAN_MB20_DATA0() bfin_read16(CAN_MB20_DATA0) +#define bfin_write_CAN_MB20_DATA0(val) bfin_write16(CAN_MB20_DATA0, val) +#define bfin_read_CAN_MB20_DATA1() bfin_read16(CAN_MB20_DATA1) +#define bfin_write_CAN_MB20_DATA1(val) bfin_write16(CAN_MB20_DATA1, val) +#define bfin_read_CAN_MB20_DATA2() bfin_read16(CAN_MB20_DATA2) +#define bfin_write_CAN_MB20_DATA2(val) bfin_write16(CAN_MB20_DATA2, val) +#define bfin_read_CAN_MB20_DATA3() bfin_read16(CAN_MB20_DATA3) +#define bfin_write_CAN_MB20_DATA3(val) bfin_write16(CAN_MB20_DATA3, val) +#define bfin_read_CAN_MB20_LENGTH() bfin_read16(CAN_MB20_LENGTH) +#define bfin_write_CAN_MB20_LENGTH(val) bfin_write16(CAN_MB20_LENGTH, val) +#define bfin_read_CAN_MB20_TIMESTAMP() bfin_read16(CAN_MB20_TIMESTAMP) +#define bfin_write_CAN_MB20_TIMESTAMP(val) bfin_write16(CAN_MB20_TIMESTAMP, val) +#define bfin_read_CAN_MB20_ID0() bfin_read16(CAN_MB20_ID0) +#define bfin_write_CAN_MB20_ID0(val) bfin_write16(CAN_MB20_ID0, val) +#define bfin_read_CAN_MB20_ID1() bfin_read16(CAN_MB20_ID1) +#define bfin_write_CAN_MB20_ID1(val) bfin_write16(CAN_MB20_ID1, val) +#define bfin_read_CAN_MB21_DATA0() bfin_read16(CAN_MB21_DATA0) +#define bfin_write_CAN_MB21_DATA0(val) bfin_write16(CAN_MB21_DATA0, val) +#define bfin_read_CAN_MB21_DATA1() bfin_read16(CAN_MB21_DATA1) +#define bfin_write_CAN_MB21_DATA1(val) bfin_write16(CAN_MB21_DATA1, val) +#define bfin_read_CAN_MB21_DATA2() bfin_read16(CAN_MB21_DATA2) +#define bfin_write_CAN_MB21_DATA2(val) bfin_write16(CAN_MB21_DATA2, val) +#define bfin_read_CAN_MB21_DATA3() bfin_read16(CAN_MB21_DATA3) +#define bfin_write_CAN_MB21_DATA3(val) bfin_write16(CAN_MB21_DATA3, val) +#define bfin_read_CAN_MB21_LENGTH() bfin_read16(CAN_MB21_LENGTH) +#define bfin_write_CAN_MB21_LENGTH(val) bfin_write16(CAN_MB21_LENGTH, val) +#define bfin_read_CAN_MB21_TIMESTAMP() bfin_read16(CAN_MB21_TIMESTAMP) +#define bfin_write_CAN_MB21_TIMESTAMP(val) bfin_write16(CAN_MB21_TIMESTAMP, val) +#define bfin_read_CAN_MB21_ID0() bfin_read16(CAN_MB21_ID0) +#define bfin_write_CAN_MB21_ID0(val) bfin_write16(CAN_MB21_ID0, val) +#define bfin_read_CAN_MB21_ID1() bfin_read16(CAN_MB21_ID1) +#define bfin_write_CAN_MB21_ID1(val) bfin_write16(CAN_MB21_ID1, val) +#define bfin_read_CAN_MB22_DATA0() bfin_read16(CAN_MB22_DATA0) +#define bfin_write_CAN_MB22_DATA0(val) bfin_write16(CAN_MB22_DATA0, val) +#define bfin_read_CAN_MB22_DATA1() bfin_read16(CAN_MB22_DATA1) +#define bfin_write_CAN_MB22_DATA1(val) bfin_write16(CAN_MB22_DATA1, val) +#define bfin_read_CAN_MB22_DATA2() bfin_read16(CAN_MB22_DATA2) +#define bfin_write_CAN_MB22_DATA2(val) bfin_write16(CAN_MB22_DATA2, val) +#define bfin_read_CAN_MB22_DATA3() bfin_read16(CAN_MB22_DATA3) +#define bfin_write_CAN_MB22_DATA3(val) bfin_write16(CAN_MB22_DATA3, val) +#define bfin_read_CAN_MB22_LENGTH() bfin_read16(CAN_MB22_LENGTH) +#define bfin_write_CAN_MB22_LENGTH(val) bfin_write16(CAN_MB22_LENGTH, val) +#define bfin_read_CAN_MB22_TIMESTAMP() bfin_read16(CAN_MB22_TIMESTAMP) +#define bfin_write_CAN_MB22_TIMESTAMP(val) bfin_write16(CAN_MB22_TIMESTAMP, val) +#define bfin_read_CAN_MB22_ID0() bfin_read16(CAN_MB22_ID0) +#define bfin_write_CAN_MB22_ID0(val) bfin_write16(CAN_MB22_ID0, val) +#define bfin_read_CAN_MB22_ID1() bfin_read16(CAN_MB22_ID1) +#define bfin_write_CAN_MB22_ID1(val) bfin_write16(CAN_MB22_ID1, val) +#define bfin_read_CAN_MB23_DATA0() bfin_read16(CAN_MB23_DATA0) +#define bfin_write_CAN_MB23_DATA0(val) bfin_write16(CAN_MB23_DATA0, val) +#define bfin_read_CAN_MB23_DATA1() bfin_read16(CAN_MB23_DATA1) +#define bfin_write_CAN_MB23_DATA1(val) bfin_write16(CAN_MB23_DATA1, val) +#define bfin_read_CAN_MB23_DATA2() bfin_read16(CAN_MB23_DATA2) +#define bfin_write_CAN_MB23_DATA2(val) bfin_write16(CAN_MB23_DATA2, val) +#define bfin_read_CAN_MB23_DATA3() bfin_read16(CAN_MB23_DATA3) +#define bfin_write_CAN_MB23_DATA3(val) bfin_write16(CAN_MB23_DATA3, val) +#define bfin_read_CAN_MB23_LENGTH() bfin_read16(CAN_MB23_LENGTH) +#define bfin_write_CAN_MB23_LENGTH(val) bfin_write16(CAN_MB23_LENGTH, val) +#define bfin_read_CAN_MB23_TIMESTAMP() bfin_read16(CAN_MB23_TIMESTAMP) +#define bfin_write_CAN_MB23_TIMESTAMP(val) bfin_write16(CAN_MB23_TIMESTAMP, val) +#define bfin_read_CAN_MB23_ID0() bfin_read16(CAN_MB23_ID0) +#define bfin_write_CAN_MB23_ID0(val) bfin_write16(CAN_MB23_ID0, val) +#define bfin_read_CAN_MB23_ID1() bfin_read16(CAN_MB23_ID1) +#define bfin_write_CAN_MB23_ID1(val) bfin_write16(CAN_MB23_ID1, val) +#define bfin_read_CAN_MB24_DATA0() bfin_read16(CAN_MB24_DATA0) +#define bfin_write_CAN_MB24_DATA0(val) bfin_write16(CAN_MB24_DATA0, val) +#define bfin_read_CAN_MB24_DATA1() bfin_read16(CAN_MB24_DATA1) +#define bfin_write_CAN_MB24_DATA1(val) bfin_write16(CAN_MB24_DATA1, val) +#define bfin_read_CAN_MB24_DATA2() bfin_read16(CAN_MB24_DATA2) +#define bfin_write_CAN_MB24_DATA2(val) bfin_write16(CAN_MB24_DATA2, val) +#define bfin_read_CAN_MB24_DATA3() bfin_read16(CAN_MB24_DATA3) +#define bfin_write_CAN_MB24_DATA3(val) bfin_write16(CAN_MB24_DATA3, val) +#define bfin_read_CAN_MB24_LENGTH() bfin_read16(CAN_MB24_LENGTH) +#define bfin_write_CAN_MB24_LENGTH(val) bfin_write16(CAN_MB24_LENGTH, val) +#define bfin_read_CAN_MB24_TIMESTAMP() bfin_read16(CAN_MB24_TIMESTAMP) +#define bfin_write_CAN_MB24_TIMESTAMP(val) bfin_write16(CAN_MB24_TIMESTAMP, val) +#define bfin_read_CAN_MB24_ID0() bfin_read16(CAN_MB24_ID0) +#define bfin_write_CAN_MB24_ID0(val) bfin_write16(CAN_MB24_ID0, val) +#define bfin_read_CAN_MB24_ID1() bfin_read16(CAN_MB24_ID1) +#define bfin_write_CAN_MB24_ID1(val) bfin_write16(CAN_MB24_ID1, val) +#define bfin_read_CAN_MB25_DATA0() bfin_read16(CAN_MB25_DATA0) +#define bfin_write_CAN_MB25_DATA0(val) bfin_write16(CAN_MB25_DATA0, val) +#define bfin_read_CAN_MB25_DATA1() bfin_read16(CAN_MB25_DATA1) +#define bfin_write_CAN_MB25_DATA1(val) bfin_write16(CAN_MB25_DATA1, val) +#define bfin_read_CAN_MB25_DATA2() bfin_read16(CAN_MB25_DATA2) +#define bfin_write_CAN_MB25_DATA2(val) bfin_write16(CAN_MB25_DATA2, val) +#define bfin_read_CAN_MB25_DATA3() bfin_read16(CAN_MB25_DATA3) +#define bfin_write_CAN_MB25_DATA3(val) bfin_write16(CAN_MB25_DATA3, val) +#define bfin_read_CAN_MB25_LENGTH() bfin_read16(CAN_MB25_LENGTH) +#define bfin_write_CAN_MB25_LENGTH(val) bfin_write16(CAN_MB25_LENGTH, val) +#define bfin_read_CAN_MB25_TIMESTAMP() bfin_read16(CAN_MB25_TIMESTAMP) +#define bfin_write_CAN_MB25_TIMESTAMP(val) bfin_write16(CAN_MB25_TIMESTAMP, val) +#define bfin_read_CAN_MB25_ID0() bfin_read16(CAN_MB25_ID0) +#define bfin_write_CAN_MB25_ID0(val) bfin_write16(CAN_MB25_ID0, val) +#define bfin_read_CAN_MB25_ID1() bfin_read16(CAN_MB25_ID1) +#define bfin_write_CAN_MB25_ID1(val) bfin_write16(CAN_MB25_ID1, val) +#define bfin_read_CAN_MB26_DATA0() bfin_read16(CAN_MB26_DATA0) +#define bfin_write_CAN_MB26_DATA0(val) bfin_write16(CAN_MB26_DATA0, val) +#define bfin_read_CAN_MB26_DATA1() bfin_read16(CAN_MB26_DATA1) +#define bfin_write_CAN_MB26_DATA1(val) bfin_write16(CAN_MB26_DATA1, val) +#define bfin_read_CAN_MB26_DATA2() bfin_read16(CAN_MB26_DATA2) +#define bfin_write_CAN_MB26_DATA2(val) bfin_write16(CAN_MB26_DATA2, val) +#define bfin_read_CAN_MB26_DATA3() bfin_read16(CAN_MB26_DATA3) +#define bfin_write_CAN_MB26_DATA3(val) bfin_write16(CAN_MB26_DATA3, val) +#define bfin_read_CAN_MB26_LENGTH() bfin_read16(CAN_MB26_LENGTH) +#define bfin_write_CAN_MB26_LENGTH(val) bfin_write16(CAN_MB26_LENGTH, val) +#define bfin_read_CAN_MB26_TIMESTAMP() bfin_read16(CAN_MB26_TIMESTAMP) +#define bfin_write_CAN_MB26_TIMESTAMP(val) bfin_write16(CAN_MB26_TIMESTAMP, val) +#define bfin_read_CAN_MB26_ID0() bfin_read16(CAN_MB26_ID0) +#define bfin_write_CAN_MB26_ID0(val) bfin_write16(CAN_MB26_ID0, val) +#define bfin_read_CAN_MB26_ID1() bfin_read16(CAN_MB26_ID1) +#define bfin_write_CAN_MB26_ID1(val) bfin_write16(CAN_MB26_ID1, val) +#define bfin_read_CAN_MB27_DATA0() bfin_read16(CAN_MB27_DATA0) +#define bfin_write_CAN_MB27_DATA0(val) bfin_write16(CAN_MB27_DATA0, val) +#define bfin_read_CAN_MB27_DATA1() bfin_read16(CAN_MB27_DATA1) +#define bfin_write_CAN_MB27_DATA1(val) bfin_write16(CAN_MB27_DATA1, val) +#define bfin_read_CAN_MB27_DATA2() bfin_read16(CAN_MB27_DATA2) +#define bfin_write_CAN_MB27_DATA2(val) bfin_write16(CAN_MB27_DATA2, val) +#define bfin_read_CAN_MB27_DATA3() bfin_read16(CAN_MB27_DATA3) +#define bfin_write_CAN_MB27_DATA3(val) bfin_write16(CAN_MB27_DATA3, val) +#define bfin_read_CAN_MB27_LENGTH() bfin_read16(CAN_MB27_LENGTH) +#define bfin_write_CAN_MB27_LENGTH(val) bfin_write16(CAN_MB27_LENGTH, val) +#define bfin_read_CAN_MB27_TIMESTAMP() bfin_read16(CAN_MB27_TIMESTAMP) +#define bfin_write_CAN_MB27_TIMESTAMP(val) bfin_write16(CAN_MB27_TIMESTAMP, val) +#define bfin_read_CAN_MB27_ID0() bfin_read16(CAN_MB27_ID0) +#define bfin_write_CAN_MB27_ID0(val) bfin_write16(CAN_MB27_ID0, val) +#define bfin_read_CAN_MB27_ID1() bfin_read16(CAN_MB27_ID1) +#define bfin_write_CAN_MB27_ID1(val) bfin_write16(CAN_MB27_ID1, val) +#define bfin_read_CAN_MB28_DATA0() bfin_read16(CAN_MB28_DATA0) +#define bfin_write_CAN_MB28_DATA0(val) bfin_write16(CAN_MB28_DATA0, val) +#define bfin_read_CAN_MB28_DATA1() bfin_read16(CAN_MB28_DATA1) +#define bfin_write_CAN_MB28_DATA1(val) bfin_write16(CAN_MB28_DATA1, val) +#define bfin_read_CAN_MB28_DATA2() bfin_read16(CAN_MB28_DATA2) +#define bfin_write_CAN_MB28_DATA2(val) bfin_write16(CAN_MB28_DATA2, val) +#define bfin_read_CAN_MB28_DATA3() bfin_read16(CAN_MB28_DATA3) +#define bfin_write_CAN_MB28_DATA3(val) bfin_write16(CAN_MB28_DATA3, val) +#define bfin_read_CAN_MB28_LENGTH() bfin_read16(CAN_MB28_LENGTH) +#define bfin_write_CAN_MB28_LENGTH(val) bfin_write16(CAN_MB28_LENGTH, val) +#define bfin_read_CAN_MB28_TIMESTAMP() bfin_read16(CAN_MB28_TIMESTAMP) +#define bfin_write_CAN_MB28_TIMESTAMP(val) bfin_write16(CAN_MB28_TIMESTAMP, val) +#define bfin_read_CAN_MB28_ID0() bfin_read16(CAN_MB28_ID0) +#define bfin_write_CAN_MB28_ID0(val) bfin_write16(CAN_MB28_ID0, val) +#define bfin_read_CAN_MB28_ID1() bfin_read16(CAN_MB28_ID1) +#define bfin_write_CAN_MB28_ID1(val) bfin_write16(CAN_MB28_ID1, val) +#define bfin_read_CAN_MB29_DATA0() bfin_read16(CAN_MB29_DATA0) +#define bfin_write_CAN_MB29_DATA0(val) bfin_write16(CAN_MB29_DATA0, val) +#define bfin_read_CAN_MB29_DATA1() bfin_read16(CAN_MB29_DATA1) +#define bfin_write_CAN_MB29_DATA1(val) bfin_write16(CAN_MB29_DATA1, val) +#define bfin_read_CAN_MB29_DATA2() bfin_read16(CAN_MB29_DATA2) +#define bfin_write_CAN_MB29_DATA2(val) bfin_write16(CAN_MB29_DATA2, val) +#define bfin_read_CAN_MB29_DATA3() bfin_read16(CAN_MB29_DATA3) +#define bfin_write_CAN_MB29_DATA3(val) bfin_write16(CAN_MB29_DATA3, val) +#define bfin_read_CAN_MB29_LENGTH() bfin_read16(CAN_MB29_LENGTH) +#define bfin_write_CAN_MB29_LENGTH(val) bfin_write16(CAN_MB29_LENGTH, val) +#define bfin_read_CAN_MB29_TIMESTAMP() bfin_read16(CAN_MB29_TIMESTAMP) +#define bfin_write_CAN_MB29_TIMESTAMP(val) bfin_write16(CAN_MB29_TIMESTAMP, val) +#define bfin_read_CAN_MB29_ID0() bfin_read16(CAN_MB29_ID0) +#define bfin_write_CAN_MB29_ID0(val) bfin_write16(CAN_MB29_ID0, val) +#define bfin_read_CAN_MB29_ID1() bfin_read16(CAN_MB29_ID1) +#define bfin_write_CAN_MB29_ID1(val) bfin_write16(CAN_MB29_ID1, val) +#define bfin_read_CAN_MB30_DATA0() bfin_read16(CAN_MB30_DATA0) +#define bfin_write_CAN_MB30_DATA0(val) bfin_write16(CAN_MB30_DATA0, val) +#define bfin_read_CAN_MB30_DATA1() bfin_read16(CAN_MB30_DATA1) +#define bfin_write_CAN_MB30_DATA1(val) bfin_write16(CAN_MB30_DATA1, val) +#define bfin_read_CAN_MB30_DATA2() bfin_read16(CAN_MB30_DATA2) +#define bfin_write_CAN_MB30_DATA2(val) bfin_write16(CAN_MB30_DATA2, val) +#define bfin_read_CAN_MB30_DATA3() bfin_read16(CAN_MB30_DATA3) +#define bfin_write_CAN_MB30_DATA3(val) bfin_write16(CAN_MB30_DATA3, val) +#define bfin_read_CAN_MB30_LENGTH() bfin_read16(CAN_MB30_LENGTH) +#define bfin_write_CAN_MB30_LENGTH(val) bfin_write16(CAN_MB30_LENGTH, val) +#define bfin_read_CAN_MB30_TIMESTAMP() bfin_read16(CAN_MB30_TIMESTAMP) +#define bfin_write_CAN_MB30_TIMESTAMP(val) bfin_write16(CAN_MB30_TIMESTAMP, val) +#define bfin_read_CAN_MB30_ID0() bfin_read16(CAN_MB30_ID0) +#define bfin_write_CAN_MB30_ID0(val) bfin_write16(CAN_MB30_ID0, val) +#define bfin_read_CAN_MB30_ID1() bfin_read16(CAN_MB30_ID1) +#define bfin_write_CAN_MB30_ID1(val) bfin_write16(CAN_MB30_ID1, val) +#define bfin_read_CAN_MB31_DATA0() bfin_read16(CAN_MB31_DATA0) +#define bfin_write_CAN_MB31_DATA0(val) bfin_write16(CAN_MB31_DATA0, val) +#define bfin_read_CAN_MB31_DATA1() bfin_read16(CAN_MB31_DATA1) +#define bfin_write_CAN_MB31_DATA1(val) bfin_write16(CAN_MB31_DATA1, val) +#define bfin_read_CAN_MB31_DATA2() bfin_read16(CAN_MB31_DATA2) +#define bfin_write_CAN_MB31_DATA2(val) bfin_write16(CAN_MB31_DATA2, val) +#define bfin_read_CAN_MB31_DATA3() bfin_read16(CAN_MB31_DATA3) +#define bfin_write_CAN_MB31_DATA3(val) bfin_write16(CAN_MB31_DATA3, val) +#define bfin_read_CAN_MB31_LENGTH() bfin_read16(CAN_MB31_LENGTH) +#define bfin_write_CAN_MB31_LENGTH(val) bfin_write16(CAN_MB31_LENGTH, val) +#define bfin_read_CAN_MB31_TIMESTAMP() bfin_read16(CAN_MB31_TIMESTAMP) +#define bfin_write_CAN_MB31_TIMESTAMP(val) bfin_write16(CAN_MB31_TIMESTAMP, val) +#define bfin_read_CAN_MB31_ID0() bfin_read16(CAN_MB31_ID0) +#define bfin_write_CAN_MB31_ID0(val) bfin_write16(CAN_MB31_ID0, val) +#define bfin_read_CAN_MB31_ID1() bfin_read16(CAN_MB31_ID1) +#define bfin_write_CAN_MB31_ID1(val) bfin_write16(CAN_MB31_ID1, val) + +#endif diff --git a/arch/blackfin/mach-bf538/include/mach/cdefBF539.h b/arch/blackfin/mach-bf538/include/mach/cdefBF539.h new file mode 100644 index 000000000000..198c4bbc8e5d --- /dev/null +++ b/arch/blackfin/mach-bf538/include/mach/cdefBF539.h @@ -0,0 +1,240 @@ +/* DO NOT EDIT THIS FILE + * Automatically generated by generate-cdef-headers.xsl + * DO NOT EDIT THIS FILE + */ + +#ifndef _CDEF_BF539_H +#define _CDEF_BF539_H + +/* Include MMRs Common to BF538 */ +#include "cdefBF538.h" + + +#define bfin_read_MXVR_CONFIG() bfin_read16(MXVR_CONFIG) +#define bfin_write_MXVR_CONFIG(val) bfin_write16(MXVR_CONFIG, val) +#define bfin_read_MXVR_PLL_CTL_0() bfin_read32(MXVR_PLL_CTL_0) +#define bfin_write_MXVR_PLL_CTL_0(val) bfin_write32(MXVR_PLL_CTL_0, val) +#define bfin_read_MXVR_STATE_0() bfin_read32(MXVR_STATE_0) +#define bfin_write_MXVR_STATE_0(val) bfin_write32(MXVR_STATE_0, val) +#define bfin_read_MXVR_STATE_1() bfin_read32(MXVR_STATE_1) +#define bfin_write_MXVR_STATE_1(val) bfin_write32(MXVR_STATE_1, val) +#define bfin_read_MXVR_INT_STAT_0() bfin_read32(MXVR_INT_STAT_0) +#define bfin_write_MXVR_INT_STAT_0(val) bfin_write32(MXVR_INT_STAT_0, val) +#define bfin_read_MXVR_INT_STAT_1() bfin_read32(MXVR_INT_STAT_1) +#define bfin_write_MXVR_INT_STAT_1(val) bfin_write32(MXVR_INT_STAT_1, val) +#define bfin_read_MXVR_INT_EN_0() bfin_read32(MXVR_INT_EN_0) +#define bfin_write_MXVR_INT_EN_0(val) bfin_write32(MXVR_INT_EN_0, val) +#define bfin_read_MXVR_INT_EN_1() bfin_read32(MXVR_INT_EN_1) +#define bfin_write_MXVR_INT_EN_1(val) bfin_write32(MXVR_INT_EN_1, val) +#define bfin_read_MXVR_POSITION() bfin_read16(MXVR_POSITION) +#define bfin_write_MXVR_POSITION(val) bfin_write16(MXVR_POSITION, val) +#define bfin_read_MXVR_MAX_POSITION() bfin_read16(MXVR_MAX_POSITION) +#define bfin_write_MXVR_MAX_POSITION(val) bfin_write16(MXVR_MAX_POSITION, val) +#define bfin_read_MXVR_DELAY() bfin_read16(MXVR_DELAY) +#define bfin_write_MXVR_DELAY(val) bfin_write16(MXVR_DELAY, val) +#define bfin_read_MXVR_MAX_DELAY() bfin_read16(MXVR_MAX_DELAY) +#define bfin_write_MXVR_MAX_DELAY(val) bfin_write16(MXVR_MAX_DELAY, val) +#define bfin_read_MXVR_LADDR() bfin_read32(MXVR_LADDR) +#define bfin_write_MXVR_LADDR(val) bfin_write32(MXVR_LADDR, val) +#define bfin_read_MXVR_GADDR() bfin_read16(MXVR_GADDR) +#define bfin_write_MXVR_GADDR(val) bfin_write16(MXVR_GADDR, val) +#define bfin_read_MXVR_AADDR() bfin_read32(MXVR_AADDR) +#define bfin_write_MXVR_AADDR(val) bfin_write32(MXVR_AADDR, val) +#define bfin_read_MXVR_ALLOC_0() bfin_read32(MXVR_ALLOC_0) +#define bfin_write_MXVR_ALLOC_0(val) bfin_write32(MXVR_ALLOC_0, val) +#define bfin_read_MXVR_ALLOC_1() bfin_read32(MXVR_ALLOC_1) +#define bfin_write_MXVR_ALLOC_1(val) bfin_write32(MXVR_ALLOC_1, val) +#define bfin_read_MXVR_ALLOC_2() bfin_read32(MXVR_ALLOC_2) +#define bfin_write_MXVR_ALLOC_2(val) bfin_write32(MXVR_ALLOC_2, val) +#define bfin_read_MXVR_ALLOC_3() bfin_read32(MXVR_ALLOC_3) +#define bfin_write_MXVR_ALLOC_3(val) bfin_write32(MXVR_ALLOC_3, val) +#define bfin_read_MXVR_ALLOC_4() bfin_read32(MXVR_ALLOC_4) +#define bfin_write_MXVR_ALLOC_4(val) bfin_write32(MXVR_ALLOC_4, val) +#define bfin_read_MXVR_ALLOC_5() bfin_read32(MXVR_ALLOC_5) +#define bfin_write_MXVR_ALLOC_5(val) bfin_write32(MXVR_ALLOC_5, val) +#define bfin_read_MXVR_ALLOC_6() bfin_read32(MXVR_ALLOC_6) +#define bfin_write_MXVR_ALLOC_6(val) bfin_write32(MXVR_ALLOC_6, val) +#define bfin_read_MXVR_ALLOC_7() bfin_read32(MXVR_ALLOC_7) +#define bfin_write_MXVR_ALLOC_7(val) bfin_write32(MXVR_ALLOC_7, val) +#define bfin_read_MXVR_ALLOC_8() bfin_read32(MXVR_ALLOC_8) +#define bfin_write_MXVR_ALLOC_8(val) bfin_write32(MXVR_ALLOC_8, val) +#define bfin_read_MXVR_ALLOC_9() bfin_read32(MXVR_ALLOC_9) +#define bfin_write_MXVR_ALLOC_9(val) bfin_write32(MXVR_ALLOC_9, val) +#define bfin_read_MXVR_ALLOC_10() bfin_read32(MXVR_ALLOC_10) +#define bfin_write_MXVR_ALLOC_10(val) bfin_write32(MXVR_ALLOC_10, val) +#define bfin_read_MXVR_ALLOC_11() bfin_read32(MXVR_ALLOC_11) +#define bfin_write_MXVR_ALLOC_11(val) bfin_write32(MXVR_ALLOC_11, val) +#define bfin_read_MXVR_ALLOC_12() bfin_read32(MXVR_ALLOC_12) +#define bfin_write_MXVR_ALLOC_12(val) bfin_write32(MXVR_ALLOC_12, val) +#define bfin_read_MXVR_ALLOC_13() bfin_read32(MXVR_ALLOC_13) +#define bfin_write_MXVR_ALLOC_13(val) bfin_write32(MXVR_ALLOC_13, val) +#define bfin_read_MXVR_ALLOC_14() bfin_read32(MXVR_ALLOC_14) +#define bfin_write_MXVR_ALLOC_14(val) bfin_write32(MXVR_ALLOC_14, val) +#define bfin_read_MXVR_SYNC_LCHAN_0() bfin_read32(MXVR_SYNC_LCHAN_0) +#define bfin_write_MXVR_SYNC_LCHAN_0(val) bfin_write32(MXVR_SYNC_LCHAN_0, val) +#define bfin_read_MXVR_SYNC_LCHAN_1() bfin_read32(MXVR_SYNC_LCHAN_1) +#define bfin_write_MXVR_SYNC_LCHAN_1(val) bfin_write32(MXVR_SYNC_LCHAN_1, val) +#define bfin_read_MXVR_SYNC_LCHAN_2() bfin_read32(MXVR_SYNC_LCHAN_2) +#define bfin_write_MXVR_SYNC_LCHAN_2(val) bfin_write32(MXVR_SYNC_LCHAN_2, val) +#define bfin_read_MXVR_SYNC_LCHAN_3() bfin_read32(MXVR_SYNC_LCHAN_3) +#define bfin_write_MXVR_SYNC_LCHAN_3(val) bfin_write32(MXVR_SYNC_LCHAN_3, val) +#define bfin_read_MXVR_SYNC_LCHAN_4() bfin_read32(MXVR_SYNC_LCHAN_4) +#define bfin_write_MXVR_SYNC_LCHAN_4(val) bfin_write32(MXVR_SYNC_LCHAN_4, val) +#define bfin_read_MXVR_SYNC_LCHAN_5() bfin_read32(MXVR_SYNC_LCHAN_5) +#define bfin_write_MXVR_SYNC_LCHAN_5(val) bfin_write32(MXVR_SYNC_LCHAN_5, val) +#define bfin_read_MXVR_SYNC_LCHAN_6() bfin_read32(MXVR_SYNC_LCHAN_6) +#define bfin_write_MXVR_SYNC_LCHAN_6(val) bfin_write32(MXVR_SYNC_LCHAN_6, val) +#define bfin_read_MXVR_SYNC_LCHAN_7() bfin_read32(MXVR_SYNC_LCHAN_7) +#define bfin_write_MXVR_SYNC_LCHAN_7(val) bfin_write32(MXVR_SYNC_LCHAN_7, val) +#define bfin_read_MXVR_DMA0_CONFIG() bfin_read32(MXVR_DMA0_CONFIG) +#define bfin_write_MXVR_DMA0_CONFIG(val) bfin_write32(MXVR_DMA0_CONFIG, val) +#define bfin_read_MXVR_DMA0_START_ADDR() bfin_readPTR(MXVR_DMA0_START_ADDR) +#define bfin_write_MXVR_DMA0_START_ADDR(val) bfin_writePTR(MXVR_DMA0_START_ADDR, val) +#define bfin_read_MXVR_DMA0_COUNT() bfin_read16(MXVR_DMA0_COUNT) +#define bfin_write_MXVR_DMA0_COUNT(val) bfin_write16(MXVR_DMA0_COUNT, val) +#define bfin_read_MXVR_DMA0_CURR_ADDR() bfin_readPTR(MXVR_DMA0_CURR_ADDR) +#define bfin_write_MXVR_DMA0_CURR_ADDR(val) bfin_writePTR(MXVR_DMA0_CURR_ADDR, val) +#define bfin_read_MXVR_DMA0_CURR_COUNT() bfin_read16(MXVR_DMA0_CURR_COUNT) +#define bfin_write_MXVR_DMA0_CURR_COUNT(val) bfin_write16(MXVR_DMA0_CURR_COUNT, val) +#define bfin_read_MXVR_DMA1_CONFIG() bfin_read32(MXVR_DMA1_CONFIG) +#define bfin_write_MXVR_DMA1_CONFIG(val) bfin_write32(MXVR_DMA1_CONFIG, val) +#define bfin_read_MXVR_DMA1_START_ADDR() bfin_readPTR(MXVR_DMA1_START_ADDR) +#define bfin_write_MXVR_DMA1_START_ADDR(val) bfin_writePTR(MXVR_DMA1_START_ADDR, val) +#define bfin_read_MXVR_DMA1_COUNT() bfin_read16(MXVR_DMA1_COUNT) +#define bfin_write_MXVR_DMA1_COUNT(val) bfin_write16(MXVR_DMA1_COUNT, val) +#define bfin_read_MXVR_DMA1_CURR_ADDR() bfin_readPTR(MXVR_DMA1_CURR_ADDR) +#define bfin_write_MXVR_DMA1_CURR_ADDR(val) bfin_writePTR(MXVR_DMA1_CURR_ADDR, val) +#define bfin_read_MXVR_DMA1_CURR_COUNT() bfin_read16(MXVR_DMA1_CURR_COUNT) +#define bfin_write_MXVR_DMA1_CURR_COUNT(val) bfin_write16(MXVR_DMA1_CURR_COUNT, val) +#define bfin_read_MXVR_DMA2_CONFIG() bfin_read32(MXVR_DMA2_CONFIG) +#define bfin_write_MXVR_DMA2_CONFIG(val) bfin_write32(MXVR_DMA2_CONFIG, val) +#define bfin_read_MXVR_DMA2_START_ADDR() bfin_readPTR(MXVR_DMA2_START_ADDR) +#define bfin_write_MXVR_DMA2_START_ADDR(val) bfin_writePTR(MXVR_DMA2_START_ADDR, val) +#define bfin_read_MXVR_DMA2_COUNT() bfin_read16(MXVR_DMA2_COUNT) +#define bfin_write_MXVR_DMA2_COUNT(val) bfin_write16(MXVR_DMA2_COUNT, val) +#define bfin_read_MXVR_DMA2_CURR_ADDR() bfin_readPTR(MXVR_DMA2_CURR_ADDR) +#define bfin_write_MXVR_DMA2_CURR_ADDR(val) bfin_writePTR(MXVR_DMA2_CURR_ADDR, val) +#define bfin_read_MXVR_DMA2_CURR_COUNT() bfin_read16(MXVR_DMA2_CURR_COUNT) +#define bfin_write_MXVR_DMA2_CURR_COUNT(val) bfin_write16(MXVR_DMA2_CURR_COUNT, val) +#define bfin_read_MXVR_DMA3_CONFIG() bfin_read32(MXVR_DMA3_CONFIG) +#define bfin_write_MXVR_DMA3_CONFIG(val) bfin_write32(MXVR_DMA3_CONFIG, val) +#define bfin_read_MXVR_DMA3_START_ADDR() bfin_readPTR(MXVR_DMA3_START_ADDR) +#define bfin_write_MXVR_DMA3_START_ADDR(val) bfin_writePTR(MXVR_DMA3_START_ADDR, val) +#define bfin_read_MXVR_DMA3_COUNT() bfin_read16(MXVR_DMA3_COUNT) +#define bfin_write_MXVR_DMA3_COUNT(val) bfin_write16(MXVR_DMA3_COUNT, val) +#define bfin_read_MXVR_DMA3_CURR_ADDR() bfin_readPTR(MXVR_DMA3_CURR_ADDR) +#define bfin_write_MXVR_DMA3_CURR_ADDR(val) bfin_writePTR(MXVR_DMA3_CURR_ADDR, val) +#define bfin_read_MXVR_DMA3_CURR_COUNT() bfin_read16(MXVR_DMA3_CURR_COUNT) +#define bfin_write_MXVR_DMA3_CURR_COUNT(val) bfin_write16(MXVR_DMA3_CURR_COUNT, val) +#define bfin_read_MXVR_DMA4_CONFIG() bfin_read32(MXVR_DMA4_CONFIG) +#define bfin_write_MXVR_DMA4_CONFIG(val) bfin_write32(MXVR_DMA4_CONFIG, val) +#define bfin_read_MXVR_DMA4_START_ADDR() bfin_readPTR(MXVR_DMA4_START_ADDR) +#define bfin_write_MXVR_DMA4_START_ADDR(val) bfin_writePTR(MXVR_DMA4_START_ADDR, val) +#define bfin_read_MXVR_DMA4_COUNT() bfin_read16(MXVR_DMA4_COUNT) +#define bfin_write_MXVR_DMA4_COUNT(val) bfin_write16(MXVR_DMA4_COUNT, val) +#define bfin_read_MXVR_DMA4_CURR_ADDR() bfin_readPTR(MXVR_DMA4_CURR_ADDR) +#define bfin_write_MXVR_DMA4_CURR_ADDR(val) bfin_writePTR(MXVR_DMA4_CURR_ADDR, val) +#define bfin_read_MXVR_DMA4_CURR_COUNT() bfin_read16(MXVR_DMA4_CURR_COUNT) +#define bfin_write_MXVR_DMA4_CURR_COUNT(val) bfin_write16(MXVR_DMA4_CURR_COUNT, val) +#define bfin_read_MXVR_DMA5_CONFIG() bfin_read32(MXVR_DMA5_CONFIG) +#define bfin_write_MXVR_DMA5_CONFIG(val) bfin_write32(MXVR_DMA5_CONFIG, val) +#define bfin_read_MXVR_DMA5_START_ADDR() bfin_readPTR(MXVR_DMA5_START_ADDR) +#define bfin_write_MXVR_DMA5_START_ADDR(val) bfin_writePTR(MXVR_DMA5_START_ADDR, val) +#define bfin_read_MXVR_DMA5_COUNT() bfin_read16(MXVR_DMA5_COUNT) +#define bfin_write_MXVR_DMA5_COUNT(val) bfin_write16(MXVR_DMA5_COUNT, val) +#define bfin_read_MXVR_DMA5_CURR_ADDR() bfin_readPTR(MXVR_DMA5_CURR_ADDR) +#define bfin_write_MXVR_DMA5_CURR_ADDR(val) bfin_writePTR(MXVR_DMA5_CURR_ADDR, val) +#define bfin_read_MXVR_DMA5_CURR_COUNT() bfin_read16(MXVR_DMA5_CURR_COUNT) +#define bfin_write_MXVR_DMA5_CURR_COUNT(val) bfin_write16(MXVR_DMA5_CURR_COUNT, val) +#define bfin_read_MXVR_DMA6_CONFIG() bfin_read32(MXVR_DMA6_CONFIG) +#define bfin_write_MXVR_DMA6_CONFIG(val) bfin_write32(MXVR_DMA6_CONFIG, val) +#define bfin_read_MXVR_DMA6_START_ADDR() bfin_readPTR(MXVR_DMA6_START_ADDR) +#define bfin_write_MXVR_DMA6_START_ADDR(val) bfin_writePTR(MXVR_DMA6_START_ADDR, val) +#define bfin_read_MXVR_DMA6_COUNT() bfin_read16(MXVR_DMA6_COUNT) +#define bfin_write_MXVR_DMA6_COUNT(val) bfin_write16(MXVR_DMA6_COUNT, val) +#define bfin_read_MXVR_DMA6_CURR_ADDR() bfin_readPTR(MXVR_DMA6_CURR_ADDR) +#define bfin_write_MXVR_DMA6_CURR_ADDR(val) bfin_writePTR(MXVR_DMA6_CURR_ADDR, val) +#define bfin_read_MXVR_DMA6_CURR_COUNT() bfin_read16(MXVR_DMA6_CURR_COUNT) +#define bfin_write_MXVR_DMA6_CURR_COUNT(val) bfin_write16(MXVR_DMA6_CURR_COUNT, val) +#define bfin_read_MXVR_DMA7_CONFIG() bfin_read32(MXVR_DMA7_CONFIG) +#define bfin_write_MXVR_DMA7_CONFIG(val) bfin_write32(MXVR_DMA7_CONFIG, val) +#define bfin_read_MXVR_DMA7_START_ADDR() bfin_readPTR(MXVR_DMA7_START_ADDR) +#define bfin_write_MXVR_DMA7_START_ADDR(val) bfin_writePTR(MXVR_DMA7_START_ADDR, val) +#define bfin_read_MXVR_DMA7_COUNT() bfin_read16(MXVR_DMA7_COUNT) +#define bfin_write_MXVR_DMA7_COUNT(val) bfin_write16(MXVR_DMA7_COUNT, val) +#define bfin_read_MXVR_DMA7_CURR_ADDR() bfin_readPTR(MXVR_DMA7_CURR_ADDR) +#define bfin_write_MXVR_DMA7_CURR_ADDR(val) bfin_writePTR(MXVR_DMA7_CURR_ADDR, val) +#define bfin_read_MXVR_DMA7_CURR_COUNT() bfin_read16(MXVR_DMA7_CURR_COUNT) +#define bfin_write_MXVR_DMA7_CURR_COUNT(val) bfin_write16(MXVR_DMA7_CURR_COUNT, val) +#define bfin_read_MXVR_AP_CTL() bfin_read16(MXVR_AP_CTL) +#define bfin_write_MXVR_AP_CTL(val) bfin_write16(MXVR_AP_CTL, val) +#define bfin_read_MXVR_APRB_START_ADDR() bfin_readPTR(MXVR_APRB_START_ADDR) +#define bfin_write_MXVR_APRB_START_ADDR(val) bfin_writePTR(MXVR_APRB_START_ADDR, val) +#define bfin_read_MXVR_APRB_CURR_ADDR() bfin_readPTR(MXVR_APRB_CURR_ADDR) +#define bfin_write_MXVR_APRB_CURR_ADDR(val) bfin_writePTR(MXVR_APRB_CURR_ADDR, val) +#define bfin_read_MXVR_APTB_START_ADDR() bfin_readPTR(MXVR_APTB_START_ADDR) +#define bfin_write_MXVR_APTB_START_ADDR(val) bfin_writePTR(MXVR_APTB_START_ADDR, val) +#define bfin_read_MXVR_APTB_CURR_ADDR() bfin_readPTR(MXVR_APTB_CURR_ADDR) +#define bfin_write_MXVR_APTB_CURR_ADDR(val) bfin_writePTR(MXVR_APTB_CURR_ADDR, val) +#define bfin_read_MXVR_CM_CTL() bfin_read32(MXVR_CM_CTL) +#define bfin_write_MXVR_CM_CTL(val) bfin_write32(MXVR_CM_CTL, val) +#define bfin_read_MXVR_CMRB_START_ADDR() bfin_readPTR(MXVR_CMRB_START_ADDR) +#define bfin_write_MXVR_CMRB_START_ADDR(val) bfin_writePTR(MXVR_CMRB_START_ADDR, val) +#define bfin_read_MXVR_CMRB_CURR_ADDR() bfin_readPTR(MXVR_CMRB_CURR_ADDR) +#define bfin_write_MXVR_CMRB_CURR_ADDR(val) bfin_writePTR(MXVR_CMRB_CURR_ADDR, val) +#define bfin_read_MXVR_CMTB_START_ADDR() bfin_readPTR(MXVR_CMTB_START_ADDR) +#define bfin_write_MXVR_CMTB_START_ADDR(val) bfin_writePTR(MXVR_CMTB_START_ADDR, val) +#define bfin_read_MXVR_CMTB_CURR_ADDR() bfin_readPTR(MXVR_CMTB_CURR_ADDR) +#define bfin_write_MXVR_CMTB_CURR_ADDR(val) bfin_writePTR(MXVR_CMTB_CURR_ADDR, val) +#define bfin_read_MXVR_RRDB_START_ADDR() bfin_readPTR(MXVR_RRDB_START_ADDR) +#define bfin_write_MXVR_RRDB_START_ADDR(val) bfin_writePTR(MXVR_RRDB_START_ADDR, val) +#define bfin_read_MXVR_RRDB_CURR_ADDR() bfin_readPTR(MXVR_RRDB_CURR_ADDR) +#define bfin_write_MXVR_RRDB_CURR_ADDR(val) bfin_writePTR(MXVR_RRDB_CURR_ADDR, val) +#define bfin_read_MXVR_PAT_DATA_0() bfin_read32(MXVR_PAT_DATA_0) +#define bfin_write_MXVR_PAT_DATA_0(val) bfin_write32(MXVR_PAT_DATA_0, val) +#define bfin_read_MXVR_PAT_EN_0() bfin_read32(MXVR_PAT_EN_0) +#define bfin_write_MXVR_PAT_EN_0(val) bfin_write32(MXVR_PAT_EN_0, val) +#define bfin_read_MXVR_PAT_DATA_1() bfin_read32(MXVR_PAT_DATA_1) +#define bfin_write_MXVR_PAT_DATA_1(val) bfin_write32(MXVR_PAT_DATA_1, val) +#define bfin_read_MXVR_PAT_EN_1() bfin_read32(MXVR_PAT_EN_1) +#define bfin_write_MXVR_PAT_EN_1(val) bfin_write32(MXVR_PAT_EN_1, val) +#define bfin_read_MXVR_FRAME_CNT_0() bfin_read16(MXVR_FRAME_CNT_0) +#define bfin_write_MXVR_FRAME_CNT_0(val) bfin_write16(MXVR_FRAME_CNT_0, val) +#define bfin_read_MXVR_FRAME_CNT_1() bfin_read16(MXVR_FRAME_CNT_1) +#define bfin_write_MXVR_FRAME_CNT_1(val) bfin_write16(MXVR_FRAME_CNT_1, val) +#define bfin_read_MXVR_ROUTING_0() bfin_read32(MXVR_ROUTING_0) +#define bfin_write_MXVR_ROUTING_0(val) bfin_write32(MXVR_ROUTING_0, val) +#define bfin_read_MXVR_ROUTING_1() bfin_read32(MXVR_ROUTING_1) +#define bfin_write_MXVR_ROUTING_1(val) bfin_write32(MXVR_ROUTING_1, val) +#define bfin_read_MXVR_ROUTING_2() bfin_read32(MXVR_ROUTING_2) +#define bfin_write_MXVR_ROUTING_2(val) bfin_write32(MXVR_ROUTING_2, val) +#define bfin_read_MXVR_ROUTING_3() bfin_read32(MXVR_ROUTING_3) +#define bfin_write_MXVR_ROUTING_3(val) bfin_write32(MXVR_ROUTING_3, val) +#define bfin_read_MXVR_ROUTING_4() bfin_read32(MXVR_ROUTING_4) +#define bfin_write_MXVR_ROUTING_4(val) bfin_write32(MXVR_ROUTING_4, val) +#define bfin_read_MXVR_ROUTING_5() bfin_read32(MXVR_ROUTING_5) +#define bfin_write_MXVR_ROUTING_5(val) bfin_write32(MXVR_ROUTING_5, val) +#define bfin_read_MXVR_ROUTING_6() bfin_read32(MXVR_ROUTING_6) +#define bfin_write_MXVR_ROUTING_6(val) bfin_write32(MXVR_ROUTING_6, val) +#define bfin_read_MXVR_ROUTING_7() bfin_read32(MXVR_ROUTING_7) +#define bfin_write_MXVR_ROUTING_7(val) bfin_write32(MXVR_ROUTING_7, val) +#define bfin_read_MXVR_ROUTING_8() bfin_read32(MXVR_ROUTING_8) +#define bfin_write_MXVR_ROUTING_8(val) bfin_write32(MXVR_ROUTING_8, val) +#define bfin_read_MXVR_ROUTING_9() bfin_read32(MXVR_ROUTING_9) +#define bfin_write_MXVR_ROUTING_9(val) bfin_write32(MXVR_ROUTING_9, val) +#define bfin_read_MXVR_ROUTING_10() bfin_read32(MXVR_ROUTING_10) +#define bfin_write_MXVR_ROUTING_10(val) bfin_write32(MXVR_ROUTING_10, val) +#define bfin_read_MXVR_ROUTING_11() bfin_read32(MXVR_ROUTING_11) +#define bfin_write_MXVR_ROUTING_11(val) bfin_write32(MXVR_ROUTING_11, val) +#define bfin_read_MXVR_ROUTING_12() bfin_read32(MXVR_ROUTING_12) +#define bfin_write_MXVR_ROUTING_12(val) bfin_write32(MXVR_ROUTING_12, val) +#define bfin_read_MXVR_ROUTING_13() bfin_read32(MXVR_ROUTING_13) +#define bfin_write_MXVR_ROUTING_13(val) bfin_write32(MXVR_ROUTING_13, val) +#define bfin_read_MXVR_ROUTING_14() bfin_read32(MXVR_ROUTING_14) +#define bfin_write_MXVR_ROUTING_14(val) bfin_write32(MXVR_ROUTING_14, val) +#define bfin_read_MXVR_PLL_CTL_1() bfin_read32(MXVR_PLL_CTL_1) +#define bfin_write_MXVR_PLL_CTL_1(val) bfin_write32(MXVR_PLL_CTL_1, val) +#define bfin_read_MXVR_BLOCK_CNT() bfin_read16(MXVR_BLOCK_CNT) +#define bfin_write_MXVR_BLOCK_CNT(val) bfin_write16(MXVR_BLOCK_CNT, val) + +#endif /* _CDEF_BF539_H */ diff --git a/arch/blackfin/mach-bf538/include/mach/defBF539.h b/arch/blackfin/mach-bf538/include/mach/defBF539.h new file mode 100644 index 000000000000..6adbfcc65a35 --- /dev/null +++ b/arch/blackfin/mach-bf538/include/mach/defBF539.h @@ -0,0 +1,4243 @@ +/************************************************************************ + * + * This file is subject to the terms and conditions of the GNU Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Non-GPL License also available as part of VisualDSP++ + * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html + * + * (c) Copyright 2001-2005 Analog Devices, Inc. All rights reserved + * + * This file under source code control, please send bugs or changes to: + * dsptools.support@analog.com + * + ************************************************************************/ +/* + * File: include/asm-blackfin/mach-bf538/defBF539.h + * Based on: + * Author: + * + * Created: + * Description: + * + * Rev: + * + * Modified: + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +/* SYSTEM & MM REGISTER BIT & ADDRESS DEFINITIONS FOR ADSP-BF538/9 */ + +#ifndef _DEF_BF539_H +#define _DEF_BF539_H + +/* include all Core registers and bit definitions */ +#include + + +/*********************************************************************************** */ +/* System MMR Register Map */ +/*********************************************************************************** */ +/* Clock/Regulator Control (0xFFC00000 - 0xFFC000FF) */ +#define PLL_CTL 0xFFC00000 /* PLL Control register (16-bit) */ +#define PLL_DIV 0xFFC00004 /* PLL Divide Register (16-bit) */ +#define VR_CTL 0xFFC00008 /* Voltage Regulator Control Register (16-bit) */ +#define PLL_STAT 0xFFC0000C /* PLL Status register (16-bit) */ +#define PLL_LOCKCNT 0xFFC00010 /* PLL Lock Count register (16-bit) */ +#define CHIPID 0xFFC00014 /* Chip ID Register */ + +/* CHIPID Masks */ +#define CHIPID_VERSION 0xF0000000 +#define CHIPID_FAMILY 0x0FFFF000 +#define CHIPID_MANUFACTURE 0x00000FFE + +/* System Interrupt Controller (0xFFC00100 - 0xFFC001FF) */ +#define SWRST 0xFFC00100 /* Software Reset Register (16-bit) */ +#define SYSCR 0xFFC00104 /* System Configuration registe */ +#define SIC_IMASK0 0xFFC0010C /* Interrupt Mask Register */ +#define SIC_IAR0 0xFFC00110 /* Interrupt Assignment Register 0 */ +#define SIC_IAR1 0xFFC00114 /* Interrupt Assignment Register 1 */ +#define SIC_IAR2 0xFFC00118 /* Interrupt Assignment Register 2 */ +#define SIC_IAR3 0xFFC0011C /* Interrupt Assignment Register 3 */ +#define SIC_ISR0 0xFFC00120 /* Interrupt Status Register */ +#define SIC_IWR0 0xFFC00124 /* Interrupt Wakeup Register */ +#define SIC_IMASK1 0xFFC00128 /* Interrupt Mask Register 1 */ +#define SIC_ISR1 0xFFC0012C /* Interrupt Status Register 1 */ +#define SIC_IWR1 0xFFC00130 /* Interrupt Wakeup Register 1 */ +#define SIC_IAR4 0xFFC00134 /* Interrupt Assignment Register 4 */ +#define SIC_IAR5 0xFFC00138 /* Interrupt Assignment Register 5 */ +#define SIC_IAR6 0xFFC0013C /* Interrupt Assignment Register 6 */ + + +/* Watchdog Timer (0xFFC00200 - 0xFFC002FF) */ +#define WDOG_CTL 0xFFC00200 /* Watchdog Control Register */ +#define WDOG_CNT 0xFFC00204 /* Watchdog Count Register */ +#define WDOG_STAT 0xFFC00208 /* Watchdog Status Register */ + + +/* Real Time Clock (0xFFC00300 - 0xFFC003FF) */ +#define RTC_STAT 0xFFC00300 /* RTC Status Register */ +#define RTC_ICTL 0xFFC00304 /* RTC Interrupt Control Register */ +#define RTC_ISTAT 0xFFC00308 /* RTC Interrupt Status Register */ +#define RTC_SWCNT 0xFFC0030C /* RTC Stopwatch Count Register */ +#define RTC_ALARM 0xFFC00310 /* RTC Alarm Time Register */ +#define RTC_FAST 0xFFC00314 /* RTC Prescaler Enable Register */ +#define RTC_PREN 0xFFC00314 /* RTC Prescaler Enable Register (alternate macro) */ + + +/* UART0 Controller (0xFFC00400 - 0xFFC004FF) */ +#define UART0_THR 0xFFC00400 /* Transmit Holding register */ +#define UART0_RBR 0xFFC00400 /* Receive Buffer register */ +#define UART0_DLL 0xFFC00400 /* Divisor Latch (Low-Byte) */ +#define UART0_IER 0xFFC00404 /* Interrupt Enable Register */ +#define UART0_DLH 0xFFC00404 /* Divisor Latch (High-Byte) */ +#define UART0_IIR 0xFFC00408 /* Interrupt Identification Register */ +#define UART0_LCR 0xFFC0040C /* Line Control Register */ +#define UART0_MCR 0xFFC00410 /* Modem Control Register */ +#define UART0_LSR 0xFFC00414 /* Line Status Register */ +#define UART0_SCR 0xFFC0041C /* SCR Scratch Register */ +#define UART0_GCTL 0xFFC00424 /* Global Control Register */ + + +/* SPI0 Controller (0xFFC00500 - 0xFFC005FF) */ + +#define SPI0_CTL 0xFFC00500 /* SPI0 Control Register */ +#define SPI0_FLG 0xFFC00504 /* SPI0 Flag register */ +#define SPI0_STAT 0xFFC00508 /* SPI0 Status register */ +#define SPI0_TDBR 0xFFC0050C /* SPI0 Transmit Data Buffer Register */ +#define SPI0_RDBR 0xFFC00510 /* SPI0 Receive Data Buffer Register */ +#define SPI0_BAUD 0xFFC00514 /* SPI0 Baud rate Register */ +#define SPI0_SHADOW 0xFFC00518 /* SPI0_RDBR Shadow Register */ +#define SPI0_REGBASE SPI0_CTL + + +/* TIMER 0, 1, 2 Registers (0xFFC00600 - 0xFFC006FF) */ +#define TIMER0_CONFIG 0xFFC00600 /* Timer 0 Configuration Register */ +#define TIMER0_COUNTER 0xFFC00604 /* Timer 0 Counter Register */ +#define TIMER0_PERIOD 0xFFC00608 /* Timer 0 Period Register */ +#define TIMER0_WIDTH 0xFFC0060C /* Timer 0 Width Register */ + +#define TIMER1_CONFIG 0xFFC00610 /* Timer 1 Configuration Register */ +#define TIMER1_COUNTER 0xFFC00614 /* Timer 1 Counter Register */ +#define TIMER1_PERIOD 0xFFC00618 /* Timer 1 Period Register */ +#define TIMER1_WIDTH 0xFFC0061C /* Timer 1 Width Register */ + +#define TIMER2_CONFIG 0xFFC00620 /* Timer 2 Configuration Register */ +#define TIMER2_COUNTER 0xFFC00624 /* Timer 2 Counter Register */ +#define TIMER2_PERIOD 0xFFC00628 /* Timer 2 Period Register */ +#define TIMER2_WIDTH 0xFFC0062C /* Timer 2 Width Register */ + +#define TIMER_ENABLE 0xFFC00640 /* Timer Enable Register */ +#define TIMER_DISABLE 0xFFC00644 /* Timer Disable Register */ +#define TIMER_STATUS 0xFFC00648 /* Timer Status Register */ + + +/* Programmable Flags (0xFFC00700 - 0xFFC007FF) */ +#define FIO_FLAG_D 0xFFC00700 /* Flag Mask to directly specify state of pins */ +#define FIO_FLAG_C 0xFFC00704 /* Peripheral Interrupt Flag Register (clear) */ +#define FIO_FLAG_S 0xFFC00708 /* Peripheral Interrupt Flag Register (set) */ +#define FIO_FLAG_T 0xFFC0070C /* Flag Mask to directly toggle state of pins */ +#define FIO_MASKA_D 0xFFC00710 /* Flag Mask Interrupt A Register (set directly) */ +#define FIO_MASKA_C 0xFFC00714 /* Flag Mask Interrupt A Register (clear) */ +#define FIO_MASKA_S 0xFFC00718 /* Flag Mask Interrupt A Register (set) */ +#define FIO_MASKA_T 0xFFC0071C /* Flag Mask Interrupt A Register (toggle) */ +#define FIO_MASKB_D 0xFFC00720 /* Flag Mask Interrupt B Register (set directly) */ +#define FIO_MASKB_C 0xFFC00724 /* Flag Mask Interrupt B Register (clear) */ +#define FIO_MASKB_S 0xFFC00728 /* Flag Mask Interrupt B Register (set) */ +#define FIO_MASKB_T 0xFFC0072C /* Flag Mask Interrupt B Register (toggle) */ +#define FIO_DIR 0xFFC00730 /* Peripheral Flag Direction Register */ +#define FIO_POLAR 0xFFC00734 /* Flag Source Polarity Register */ +#define FIO_EDGE 0xFFC00738 /* Flag Source Sensitivity Register */ +#define FIO_BOTH 0xFFC0073C /* Flag Set on BOTH Edges Register */ +#define FIO_INEN 0xFFC00740 /* Flag Input Enable Register */ + + +/* SPORT0 Controller (0xFFC00800 - 0xFFC008FF) */ +#define SPORT0_TCR1 0xFFC00800 /* SPORT0 Transmit Configuration 1 Register */ +#define SPORT0_TCR2 0xFFC00804 /* SPORT0 Transmit Configuration 2 Register */ +#define SPORT0_TCLKDIV 0xFFC00808 /* SPORT0 Transmit Clock Divider */ +#define SPORT0_TFSDIV 0xFFC0080C /* SPORT0 Transmit Frame Sync Divider */ +#define SPORT0_TX 0xFFC00810 /* SPORT0 TX Data Register */ +#define SPORT0_RX 0xFFC00818 /* SPORT0 RX Data Register */ +#define SPORT0_RCR1 0xFFC00820 /* SPORT0 Transmit Configuration 1 Register */ +#define SPORT0_RCR2 0xFFC00824 /* SPORT0 Transmit Configuration 2 Register */ +#define SPORT0_RCLKDIV 0xFFC00828 /* SPORT0 Receive Clock Divider */ +#define SPORT0_RFSDIV 0xFFC0082C /* SPORT0 Receive Frame Sync Divider */ +#define SPORT0_STAT 0xFFC00830 /* SPORT0 Status Register */ +#define SPORT0_CHNL 0xFFC00834 /* SPORT0 Current Channel Register */ +#define SPORT0_MCMC1 0xFFC00838 /* SPORT0 Multi-Channel Configuration Register 1 */ +#define SPORT0_MCMC2 0xFFC0083C /* SPORT0 Multi-Channel Configuration Register 2 */ +#define SPORT0_MTCS0 0xFFC00840 /* SPORT0 Multi-Channel Transmit Select Register 0 */ +#define SPORT0_MTCS1 0xFFC00844 /* SPORT0 Multi-Channel Transmit Select Register 1 */ +#define SPORT0_MTCS2 0xFFC00848 /* SPORT0 Multi-Channel Transmit Select Register 2 */ +#define SPORT0_MTCS3 0xFFC0084C /* SPORT0 Multi-Channel Transmit Select Register 3 */ +#define SPORT0_MRCS0 0xFFC00850 /* SPORT0 Multi-Channel Receive Select Register 0 */ +#define SPORT0_MRCS1 0xFFC00854 /* SPORT0 Multi-Channel Receive Select Register 1 */ +#define SPORT0_MRCS2 0xFFC00858 /* SPORT0 Multi-Channel Receive Select Register 2 */ +#define SPORT0_MRCS3 0xFFC0085C /* SPORT0 Multi-Channel Receive Select Register 3 */ + + +/* SPORT1 Controller (0xFFC00900 - 0xFFC009FF) */ +#define SPORT1_TCR1 0xFFC00900 /* SPORT1 Transmit Configuration 1 Register */ +#define SPORT1_TCR2 0xFFC00904 /* SPORT1 Transmit Configuration 2 Register */ +#define SPORT1_TCLKDIV 0xFFC00908 /* SPORT1 Transmit Clock Divider */ +#define SPORT1_TFSDIV 0xFFC0090C /* SPORT1 Transmit Frame Sync Divider */ +#define SPORT1_TX 0xFFC00910 /* SPORT1 TX Data Register */ +#define SPORT1_RX 0xFFC00918 /* SPORT1 RX Data Register */ +#define SPORT1_RCR1 0xFFC00920 /* SPORT1 Transmit Configuration 1 Register */ +#define SPORT1_RCR2 0xFFC00924 /* SPORT1 Transmit Configuration 2 Register */ +#define SPORT1_RCLKDIV 0xFFC00928 /* SPORT1 Receive Clock Divider */ +#define SPORT1_RFSDIV 0xFFC0092C /* SPORT1 Receive Frame Sync Divider */ +#define SPORT1_STAT 0xFFC00930 /* SPORT1 Status Register */ +#define SPORT1_CHNL 0xFFC00934 /* SPORT1 Current Channel Register */ +#define SPORT1_MCMC1 0xFFC00938 /* SPORT1 Multi-Channel Configuration Register 1 */ +#define SPORT1_MCMC2 0xFFC0093C /* SPORT1 Multi-Channel Configuration Register 2 */ +#define SPORT1_MTCS0 0xFFC00940 /* SPORT1 Multi-Channel Transmit Select Register 0 */ +#define SPORT1_MTCS1 0xFFC00944 /* SPORT1 Multi-Channel Transmit Select Register 1 */ +#define SPORT1_MTCS2 0xFFC00948 /* SPORT1 Multi-Channel Transmit Select Register 2 */ +#define SPORT1_MTCS3 0xFFC0094C /* SPORT1 Multi-Channel Transmit Select Register 3 */ +#define SPORT1_MRCS0 0xFFC00950 /* SPORT1 Multi-Channel Receive Select Register 0 */ +#define SPORT1_MRCS1 0xFFC00954 /* SPORT1 Multi-Channel Receive Select Register 1 */ +#define SPORT1_MRCS2 0xFFC00958 /* SPORT1 Multi-Channel Receive Select Register 2 */ +#define SPORT1_MRCS3 0xFFC0095C /* SPORT1 Multi-Channel Receive Select Register 3 */ + + +/* External Bus Interface Unit (0xFFC00A00 - 0xFFC00AFF) */ +/* Asynchronous Memory Controller */ +#define EBIU_AMGCTL 0xFFC00A00 /* Asynchronous Memory Global Control Register */ +#define EBIU_AMBCTL0 0xFFC00A04 /* Asynchronous Memory Bank Control Register 0 */ +#define EBIU_AMBCTL1 0xFFC00A08 /* Asynchronous Memory Bank Control Register 1 */ + +/* SDRAM Controller */ +#define EBIU_SDGCTL 0xFFC00A10 /* SDRAM Global Control Register */ +#define EBIU_SDBCTL 0xFFC00A14 /* SDRAM Bank Control Register */ +#define EBIU_SDRRC 0xFFC00A18 /* SDRAM Refresh Rate Control Register */ +#define EBIU_SDSTAT 0xFFC00A1C /* SDRAM Status Register */ + + + +/* DMA Controller 0 Traffic Control Registers (0xFFC00B00 - 0xFFC00BFF) */ + +#define DMAC0_TC_PER 0xFFC00B0C /* DMA Controller 0 Traffic Control Periods Register */ +#define DMAC0_TC_CNT 0xFFC00B10 /* DMA Controller 0 Traffic Control Current Counts Register */ + +/* Alternate deprecated register names (below) provided for backwards code compatibility */ +#define DMA0_TCPER DMAC0_TC_PER +#define DMA0_TCCNT DMAC0_TC_CNT + + +/* DMA Controller 0 (0xFFC00C00 - 0xFFC00FFF) */ + +#define DMA0_NEXT_DESC_PTR 0xFFC00C00 /* DMA Channel 0 Next Descriptor Pointer Register */ +#define DMA0_START_ADDR 0xFFC00C04 /* DMA Channel 0 Start Address Register */ +#define DMA0_CONFIG 0xFFC00C08 /* DMA Channel 0 Configuration Register */ +#define DMA0_X_COUNT 0xFFC00C10 /* DMA Channel 0 X Count Register */ +#define DMA0_X_MODIFY 0xFFC00C14 /* DMA Channel 0 X Modify Register */ +#define DMA0_Y_COUNT 0xFFC00C18 /* DMA Channel 0 Y Count Register */ +#define DMA0_Y_MODIFY 0xFFC00C1C /* DMA Channel 0 Y Modify Register */ +#define DMA0_CURR_DESC_PTR 0xFFC00C20 /* DMA Channel 0 Current Descriptor Pointer Register */ +#define DMA0_CURR_ADDR 0xFFC00C24 /* DMA Channel 0 Current Address Register */ +#define DMA0_IRQ_STATUS 0xFFC00C28 /* DMA Channel 0 Interrupt/Status Register */ +#define DMA0_PERIPHERAL_MAP 0xFFC00C2C /* DMA Channel 0 Peripheral Map Register */ +#define DMA0_CURR_X_COUNT 0xFFC00C30 /* DMA Channel 0 Current X Count Register */ +#define DMA0_CURR_Y_COUNT 0xFFC00C38 /* DMA Channel 0 Current Y Count Register */ + +#define DMA1_NEXT_DESC_PTR 0xFFC00C40 /* DMA Channel 1 Next Descriptor Pointer Register */ +#define DMA1_START_ADDR 0xFFC00C44 /* DMA Channel 1 Start Address Register */ +#define DMA1_CONFIG 0xFFC00C48 /* DMA Channel 1 Configuration Register */ +#define DMA1_X_COUNT 0xFFC00C50 /* DMA Channel 1 X Count Register */ +#define DMA1_X_MODIFY 0xFFC00C54 /* DMA Channel 1 X Modify Register */ +#define DMA1_Y_COUNT 0xFFC00C58 /* DMA Channel 1 Y Count Register */ +#define DMA1_Y_MODIFY 0xFFC00C5C /* DMA Channel 1 Y Modify Register */ +#define DMA1_CURR_DESC_PTR 0xFFC00C60 /* DMA Channel 1 Current Descriptor Pointer Register */ +#define DMA1_CURR_ADDR 0xFFC00C64 /* DMA Channel 1 Current Address Register */ +#define DMA1_IRQ_STATUS 0xFFC00C68 /* DMA Channel 1 Interrupt/Status Register */ +#define DMA1_PERIPHERAL_MAP 0xFFC00C6C /* DMA Channel 1 Peripheral Map Register */ +#define DMA1_CURR_X_COUNT 0xFFC00C70 /* DMA Channel 1 Current X Count Register */ +#define DMA1_CURR_Y_COUNT 0xFFC00C78 /* DMA Channel 1 Current Y Count Register */ + +#define DMA2_NEXT_DESC_PTR 0xFFC00C80 /* DMA Channel 2 Next Descriptor Pointer Register */ +#define DMA2_START_ADDR 0xFFC00C84 /* DMA Channel 2 Start Address Register */ +#define DMA2_CONFIG 0xFFC00C88 /* DMA Channel 2 Configuration Register */ +#define DMA2_X_COUNT 0xFFC00C90 /* DMA Channel 2 X Count Register */ +#define DMA2_X_MODIFY 0xFFC00C94 /* DMA Channel 2 X Modify Register */ +#define DMA2_Y_COUNT 0xFFC00C98 /* DMA Channel 2 Y Count Register */ +#define DMA2_Y_MODIFY 0xFFC00C9C /* DMA Channel 2 Y Modify Register */ +#define DMA2_CURR_DESC_PTR 0xFFC00CA0 /* DMA Channel 2 Current Descriptor Pointer Register */ +#define DMA2_CURR_ADDR 0xFFC00CA4 /* DMA Channel 2 Current Address Register */ +#define DMA2_IRQ_STATUS 0xFFC00CA8 /* DMA Channel 2 Interrupt/Status Register */ +#define DMA2_PERIPHERAL_MAP 0xFFC00CAC /* DMA Channel 2 Peripheral Map Register */ +#define DMA2_CURR_X_COUNT 0xFFC00CB0 /* DMA Channel 2 Current X Count Register */ +#define DMA2_CURR_Y_COUNT 0xFFC00CB8 /* DMA Channel 2 Current Y Count Register */ + +#define DMA3_NEXT_DESC_PTR 0xFFC00CC0 /* DMA Channel 3 Next Descriptor Pointer Register */ +#define DMA3_START_ADDR 0xFFC00CC4 /* DMA Channel 3 Start Address Register */ +#define DMA3_CONFIG 0xFFC00CC8 /* DMA Channel 3 Configuration Register */ +#define DMA3_X_COUNT 0xFFC00CD0 /* DMA Channel 3 X Count Register */ +#define DMA3_X_MODIFY 0xFFC00CD4 /* DMA Channel 3 X Modify Register */ +#define DMA3_Y_COUNT 0xFFC00CD8 /* DMA Channel 3 Y Count Register */ +#define DMA3_Y_MODIFY 0xFFC00CDC /* DMA Channel 3 Y Modify Register */ +#define DMA3_CURR_DESC_PTR 0xFFC00CE0 /* DMA Channel 3 Current Descriptor Pointer Register */ +#define DMA3_CURR_ADDR 0xFFC00CE4 /* DMA Channel 3 Current Address Register */ +#define DMA3_IRQ_STATUS 0xFFC00CE8 /* DMA Channel 3 Interrupt/Status Register */ +#define DMA3_PERIPHERAL_MAP 0xFFC00CEC /* DMA Channel 3 Peripheral Map Register */ +#define DMA3_CURR_X_COUNT 0xFFC00CF0 /* DMA Channel 3 Current X Count Register */ +#define DMA3_CURR_Y_COUNT 0xFFC00CF8 /* DMA Channel 3 Current Y Count Register */ + +#define DMA4_NEXT_DESC_PTR 0xFFC00D00 /* DMA Channel 4 Next Descriptor Pointer Register */ +#define DMA4_START_ADDR 0xFFC00D04 /* DMA Channel 4 Start Address Register */ +#define DMA4_CONFIG 0xFFC00D08 /* DMA Channel 4 Configuration Register */ +#define DMA4_X_COUNT 0xFFC00D10 /* DMA Channel 4 X Count Register */ +#define DMA4_X_MODIFY 0xFFC00D14 /* DMA Channel 4 X Modify Register */ +#define DMA4_Y_COUNT 0xFFC00D18 /* DMA Channel 4 Y Count Register */ +#define DMA4_Y_MODIFY 0xFFC00D1C /* DMA Channel 4 Y Modify Register */ +#define DMA4_CURR_DESC_PTR 0xFFC00D20 /* DMA Channel 4 Current Descriptor Pointer Register */ +#define DMA4_CURR_ADDR 0xFFC00D24 /* DMA Channel 4 Current Address Register */ +#define DMA4_IRQ_STATUS 0xFFC00D28 /* DMA Channel 4 Interrupt/Status Register */ +#define DMA4_PERIPHERAL_MAP 0xFFC00D2C /* DMA Channel 4 Peripheral Map Register */ +#define DMA4_CURR_X_COUNT 0xFFC00D30 /* DMA Channel 4 Current X Count Register */ +#define DMA4_CURR_Y_COUNT 0xFFC00D38 /* DMA Channel 4 Current Y Count Register */ + +#define DMA5_NEXT_DESC_PTR 0xFFC00D40 /* DMA Channel 5 Next Descriptor Pointer Register */ +#define DMA5_START_ADDR 0xFFC00D44 /* DMA Channel 5 Start Address Register */ +#define DMA5_CONFIG 0xFFC00D48 /* DMA Channel 5 Configuration Register */ +#define DMA5_X_COUNT 0xFFC00D50 /* DMA Channel 5 X Count Register */ +#define DMA5_X_MODIFY 0xFFC00D54 /* DMA Channel 5 X Modify Register */ +#define DMA5_Y_COUNT 0xFFC00D58 /* DMA Channel 5 Y Count Register */ +#define DMA5_Y_MODIFY 0xFFC00D5C /* DMA Channel 5 Y Modify Register */ +#define DMA5_CURR_DESC_PTR 0xFFC00D60 /* DMA Channel 5 Current Descriptor Pointer Register */ +#define DMA5_CURR_ADDR 0xFFC00D64 /* DMA Channel 5 Current Address Register */ +#define DMA5_IRQ_STATUS 0xFFC00D68 /* DMA Channel 5 Interrupt/Status Register */ +#define DMA5_PERIPHERAL_MAP 0xFFC00D6C /* DMA Channel 5 Peripheral Map Register */ +#define DMA5_CURR_X_COUNT 0xFFC00D70 /* DMA Channel 5 Current X Count Register */ +#define DMA5_CURR_Y_COUNT 0xFFC00D78 /* DMA Channel 5 Current Y Count Register */ + +#define DMA6_NEXT_DESC_PTR 0xFFC00D80 /* DMA Channel 6 Next Descriptor Pointer Register */ +#define DMA6_START_ADDR 0xFFC00D84 /* DMA Channel 6 Start Address Register */ +#define DMA6_CONFIG 0xFFC00D88 /* DMA Channel 6 Configuration Register */ +#define DMA6_X_COUNT 0xFFC00D90 /* DMA Channel 6 X Count Register */ +#define DMA6_X_MODIFY 0xFFC00D94 /* DMA Channel 6 X Modify Register */ +#define DMA6_Y_COUNT 0xFFC00D98 /* DMA Channel 6 Y Count Register */ +#define DMA6_Y_MODIFY 0xFFC00D9C /* DMA Channel 6 Y Modify Register */ +#define DMA6_CURR_DESC_PTR 0xFFC00DA0 /* DMA Channel 6 Current Descriptor Pointer Register */ +#define DMA6_CURR_ADDR 0xFFC00DA4 /* DMA Channel 6 Current Address Register */ +#define DMA6_IRQ_STATUS 0xFFC00DA8 /* DMA Channel 6 Interrupt/Status Register */ +#define DMA6_PERIPHERAL_MAP 0xFFC00DAC /* DMA Channel 6 Peripheral Map Register */ +#define DMA6_CURR_X_COUNT 0xFFC00DB0 /* DMA Channel 6 Current X Count Register */ +#define DMA6_CURR_Y_COUNT 0xFFC00DB8 /* DMA Channel 6 Current Y Count Register */ + +#define DMA7_NEXT_DESC_PTR 0xFFC00DC0 /* DMA Channel 7 Next Descriptor Pointer Register */ +#define DMA7_START_ADDR 0xFFC00DC4 /* DMA Channel 7 Start Address Register */ +#define DMA7_CONFIG 0xFFC00DC8 /* DMA Channel 7 Configuration Register */ +#define DMA7_X_COUNT 0xFFC00DD0 /* DMA Channel 7 X Count Register */ +#define DMA7_X_MODIFY 0xFFC00DD4 /* DMA Channel 7 X Modify Register */ +#define DMA7_Y_COUNT 0xFFC00DD8 /* DMA Channel 7 Y Count Register */ +#define DMA7_Y_MODIFY 0xFFC00DDC /* DMA Channel 7 Y Modify Register */ +#define DMA7_CURR_DESC_PTR 0xFFC00DE0 /* DMA Channel 7 Current Descriptor Pointer Register */ +#define DMA7_CURR_ADDR 0xFFC00DE4 /* DMA Channel 7 Current Address Register */ +#define DMA7_IRQ_STATUS 0xFFC00DE8 /* DMA Channel 7 Interrupt/Status Register */ +#define DMA7_PERIPHERAL_MAP 0xFFC00DEC /* DMA Channel 7 Peripheral Map Register */ +#define DMA7_CURR_X_COUNT 0xFFC00DF0 /* DMA Channel 7 Current X Count Register */ +#define DMA7_CURR_Y_COUNT 0xFFC00DF8 /* DMA Channel 7 Current Y Count Register */ + +#define MDMA0_D0_NEXT_DESC_PTR 0xFFC00E00 /* MemDMA0 Stream 0 Destination Next Descriptor Pointer Register */ +#define MDMA0_D0_START_ADDR 0xFFC00E04 /* MemDMA0 Stream 0 Destination Start Address Register */ +#define MDMA0_D0_CONFIG 0xFFC00E08 /* MemDMA0 Stream 0 Destination Configuration Register */ +#define MDMA0_D0_X_COUNT 0xFFC00E10 /* MemDMA0 Stream 0 Destination X Count Register */ +#define MDMA0_D0_X_MODIFY 0xFFC00E14 /* MemDMA0 Stream 0 Destination X Modify Register */ +#define MDMA0_D0_Y_COUNT 0xFFC00E18 /* MemDMA0 Stream 0 Destination Y Count Register */ +#define MDMA0_D0_Y_MODIFY 0xFFC00E1C /* MemDMA0 Stream 0 Destination Y Modify Register */ +#define MDMA0_D0_CURR_DESC_PTR 0xFFC00E20 /* MemDMA0 Stream 0 Destination Current Descriptor Pointer Register */ +#define MDMA0_D0_CURR_ADDR 0xFFC00E24 /* MemDMA0 Stream 0 Destination Current Address Register */ +#define MDMA0_D0_IRQ_STATUS 0xFFC00E28 /* MemDMA0 Stream 0 Destination Interrupt/Status Register */ +#define MDMA0_D0_PERIPHERAL_MAP 0xFFC00E2C /* MemDMA0 Stream 0 Destination Peripheral Map Register */ +#define MDMA0_D0_CURR_X_COUNT 0xFFC00E30 /* MemDMA0 Stream 0 Destination Current X Count Register */ +#define MDMA0_D0_CURR_Y_COUNT 0xFFC00E38 /* MemDMA0 Stream 0 Destination Current Y Count Register */ + +#define MDMA0_S0_NEXT_DESC_PTR 0xFFC00E40 /* MemDMA0 Stream 0 Source Next Descriptor Pointer Register */ +#define MDMA0_S0_START_ADDR 0xFFC00E44 /* MemDMA0 Stream 0 Source Start Address Register */ +#define MDMA0_S0_CONFIG 0xFFC00E48 /* MemDMA0 Stream 0 Source Configuration Register */ +#define MDMA0_S0_X_COUNT 0xFFC00E50 /* MemDMA0 Stream 0 Source X Count Register */ +#define MDMA0_S0_X_MODIFY 0xFFC00E54 /* MemDMA0 Stream 0 Source X Modify Register */ +#define MDMA0_S0_Y_COUNT 0xFFC00E58 /* MemDMA0 Stream 0 Source Y Count Register */ +#define MDMA0_S0_Y_MODIFY 0xFFC00E5C /* MemDMA0 Stream 0 Source Y Modify Register */ +#define MDMA0_S0_CURR_DESC_PTR 0xFFC00E60 /* MemDMA0 Stream 0 Source Current Descriptor Pointer Register */ +#define MDMA0_S0_CURR_ADDR 0xFFC00E64 /* MemDMA0 Stream 0 Source Current Address Register */ +#define MDMA0_S0_IRQ_STATUS 0xFFC00E68 /* MemDMA0 Stream 0 Source Interrupt/Status Register */ +#define MDMA0_S0_PERIPHERAL_MAP 0xFFC00E6C /* MemDMA0 Stream 0 Source Peripheral Map Register */ +#define MDMA0_S0_CURR_X_COUNT 0xFFC00E70 /* MemDMA0 Stream 0 Source Current X Count Register */ +#define MDMA0_S0_CURR_Y_COUNT 0xFFC00E78 /* MemDMA0 Stream 0 Source Current Y Count Register */ + +#define MDMA0_D1_NEXT_DESC_PTR 0xFFC00E80 /* MemDMA0 Stream 1 Destination Next Descriptor Pointer Register */ +#define MDMA0_D1_START_ADDR 0xFFC00E84 /* MemDMA0 Stream 1 Destination Start Address Register */ +#define MDMA0_D1_CONFIG 0xFFC00E88 /* MemDMA0 Stream 1 Destination Configuration Register */ +#define MDMA0_D1_X_COUNT 0xFFC00E90 /* MemDMA0 Stream 1 Destination X Count Register */ +#define MDMA0_D1_X_MODIFY 0xFFC00E94 /* MemDMA0 Stream 1 Destination X Modify Register */ +#define MDMA0_D1_Y_COUNT 0xFFC00E98 /* MemDMA0 Stream 1 Destination Y Count Register */ +#define MDMA0_D1_Y_MODIFY 0xFFC00E9C /* MemDMA0 Stream 1 Destination Y Modify Register */ +#define MDMA0_D1_CURR_DESC_PTR 0xFFC00EA0 /* MemDMA0 Stream 1 Destination Current Descriptor Pointer Register */ +#define MDMA0_D1_CURR_ADDR 0xFFC00EA4 /* MemDMA0 Stream 1 Destination Current Address Register */ +#define MDMA0_D1_IRQ_STATUS 0xFFC00EA8 /* MemDMA0 Stream 1 Destination Interrupt/Status Register */ +#define MDMA0_D1_PERIPHERAL_MAP 0xFFC00EAC /* MemDMA0 Stream 1 Destination Peripheral Map Register */ +#define MDMA0_D1_CURR_X_COUNT 0xFFC00EB0 /* MemDMA0 Stream 1 Destination Current X Count Register */ +#define MDMA0_D1_CURR_Y_COUNT 0xFFC00EB8 /* MemDMA0 Stream 1 Destination Current Y Count Register */ + +#define MDMA0_S1_NEXT_DESC_PTR 0xFFC00EC0 /* MemDMA0 Stream 1 Source Next Descriptor Pointer Register */ +#define MDMA0_S1_START_ADDR 0xFFC00EC4 /* MemDMA0 Stream 1 Source Start Address Register */ +#define MDMA0_S1_CONFIG 0xFFC00EC8 /* MemDMA0 Stream 1 Source Configuration Register */ +#define MDMA0_S1_X_COUNT 0xFFC00ED0 /* MemDMA0 Stream 1 Source X Count Register */ +#define MDMA0_S1_X_MODIFY 0xFFC00ED4 /* MemDMA0 Stream 1 Source X Modify Register */ +#define MDMA0_S1_Y_COUNT 0xFFC00ED8 /* MemDMA0 Stream 1 Source Y Count Register */ +#define MDMA0_S1_Y_MODIFY 0xFFC00EDC /* MemDMA0 Stream 1 Source Y Modify Register */ +#define MDMA0_S1_CURR_DESC_PTR 0xFFC00EE0 /* MemDMA0 Stream 1 Source Current Descriptor Pointer Register */ +#define MDMA0_S1_CURR_ADDR 0xFFC00EE4 /* MemDMA0 Stream 1 Source Current Address Register */ +#define MDMA0_S1_IRQ_STATUS 0xFFC00EE8 /* MemDMA0 Stream 1 Source Interrupt/Status Register */ +#define MDMA0_S1_PERIPHERAL_MAP 0xFFC00EEC /* MemDMA0 Stream 1 Source Peripheral Map Register */ +#define MDMA0_S1_CURR_X_COUNT 0xFFC00EF0 /* MemDMA0 Stream 1 Source Current X Count Register */ +#define MDMA0_S1_CURR_Y_COUNT 0xFFC00EF8 /* MemDMA0 Stream 1 Source Current Y Count Register */ + + +/* Parallel Peripheral Interface (PPI) (0xFFC01000 - 0xFFC010FF) */ +#define PPI_CONTROL 0xFFC01000 /* PPI Control Register */ +#define PPI_STATUS 0xFFC01004 /* PPI Status Register */ +#define PPI_COUNT 0xFFC01008 /* PPI Transfer Count Register */ +#define PPI_DELAY 0xFFC0100C /* PPI Delay Count Register */ +#define PPI_FRAME 0xFFC01010 /* PPI Frame Length Register */ + + +/* Two-Wire Interface 0 (0xFFC01400 - 0xFFC014FF) */ +#define TWI0_CLKDIV 0xFFC01400 /* Serial Clock Divider Register */ +#define TWI0_CONTROL 0xFFC01404 /* TWI0 Master Internal Time Reference Register */ +#define TWI0_SLAVE_CTRL 0xFFC01408 /* Slave Mode Control Register */ +#define TWI0_SLAVE_STAT 0xFFC0140C /* Slave Mode Status Register */ +#define TWI0_SLAVE_ADDR 0xFFC01410 /* Slave Mode Address Register */ +#define TWI0_MASTER_CTRL 0xFFC01414 /* Master Mode Control Register */ +#define TWI0_MASTER_STAT 0xFFC01418 /* Master Mode Status Register */ +#define TWI0_MASTER_ADDR 0xFFC0141C /* Master Mode Address Register */ +#define TWI0_INT_STAT 0xFFC01420 /* TWI0 Master Interrupt Register */ +#define TWI0_INT_MASK 0xFFC01424 /* TWI0 Master Interrupt Mask Register */ +#define TWI0_FIFO_CTRL 0xFFC01428 /* FIFO Control Register */ +#define TWI0_FIFO_STAT 0xFFC0142C /* FIFO Status Register */ +#define TWI0_XMT_DATA8 0xFFC01480 /* FIFO Transmit Data Single Byte Register */ +#define TWI0_XMT_DATA16 0xFFC01484 /* FIFO Transmit Data Double Byte Register */ +#define TWI0_RCV_DATA8 0xFFC01488 /* FIFO Receive Data Single Byte Register */ +#define TWI0_RCV_DATA16 0xFFC0148C /* FIFO Receive Data Double Byte Register */ + +#define TWI0_REGBASE TWI0_CLKDIV + +/* the following are for backwards compatibility */ +#define TWI0_PRESCALE TWI0_CONTROL +#define TWI0_INT_SRC TWI0_INT_STAT +#define TWI0_INT_ENABLE TWI0_INT_MASK + + +/* General-Purpose Ports (0xFFC01500 - 0xFFC015FF) */ + +/* GPIO Port C Register Names */ +#define GPIO_C_CNFG 0xFFC01500 /* GPIO Pin Port C Configuration Register */ +#define GPIO_C_D 0xFFC01510 /* GPIO Pin Port C Data Register */ +#define GPIO_C_C 0xFFC01520 /* Clear GPIO Pin Port C Register */ +#define GPIO_C_S 0xFFC01530 /* Set GPIO Pin Port C Register */ +#define GPIO_C_T 0xFFC01540 /* Toggle GPIO Pin Port C Register */ +#define GPIO_C_DIR 0xFFC01550 /* GPIO Pin Port C Direction Register */ +#define GPIO_C_INEN 0xFFC01560 /* GPIO Pin Port C Input Enable Register */ + +/* GPIO Port D Register Names */ +#define GPIO_D_CNFG 0xFFC01504 /* GPIO Pin Port D Configuration Register */ +#define GPIO_D_D 0xFFC01514 /* GPIO Pin Port D Data Register */ +#define GPIO_D_C 0xFFC01524 /* Clear GPIO Pin Port D Register */ +#define GPIO_D_S 0xFFC01534 /* Set GPIO Pin Port D Register */ +#define GPIO_D_T 0xFFC01544 /* Toggle GPIO Pin Port D Register */ +#define GPIO_D_DIR 0xFFC01554 /* GPIO Pin Port D Direction Register */ +#define GPIO_D_INEN 0xFFC01564 /* GPIO Pin Port D Input Enable Register */ + +/* GPIO Port E Register Names */ +#define GPIO_E_CNFG 0xFFC01508 /* GPIO Pin Port E Configuration Register */ +#define GPIO_E_D 0xFFC01518 /* GPIO Pin Port E Data Register */ +#define GPIO_E_C 0xFFC01528 /* Clear GPIO Pin Port E Register */ +#define GPIO_E_S 0xFFC01538 /* Set GPIO Pin Port E Register */ +#define GPIO_E_T 0xFFC01548 /* Toggle GPIO Pin Port E Register */ +#define GPIO_E_DIR 0xFFC01558 /* GPIO Pin Port E Direction Register */ +#define GPIO_E_INEN 0xFFC01568 /* GPIO Pin Port E Input Enable Register */ + +/* DMA Controller 1 Traffic Control Registers (0xFFC01B00 - 0xFFC01BFF) */ + +#define DMAC1_TC_PER 0xFFC01B0C /* DMA Controller 1 Traffic Control Periods Register */ +#define DMAC1_TC_CNT 0xFFC01B10 /* DMA Controller 1 Traffic Control Current Counts Register */ + +/* Alternate deprecated register names (below) provided for backwards code compatibility */ +#define DMA1_TCPER DMAC1_TC_PER +#define DMA1_TCCNT DMAC1_TC_CNT + + +/* DMA Controller 1 (0xFFC01C00 - 0xFFC01FFF) */ +#define DMA8_NEXT_DESC_PTR 0xFFC01C00 /* DMA Channel 8 Next Descriptor Pointer Register */ +#define DMA8_START_ADDR 0xFFC01C04 /* DMA Channel 8 Start Address Register */ +#define DMA8_CONFIG 0xFFC01C08 /* DMA Channel 8 Configuration Register */ +#define DMA8_X_COUNT 0xFFC01C10 /* DMA Channel 8 X Count Register */ +#define DMA8_X_MODIFY 0xFFC01C14 /* DMA Channel 8 X Modify Register */ +#define DMA8_Y_COUNT 0xFFC01C18 /* DMA Channel 8 Y Count Register */ +#define DMA8_Y_MODIFY 0xFFC01C1C /* DMA Channel 8 Y Modify Register */ +#define DMA8_CURR_DESC_PTR 0xFFC01C20 /* DMA Channel 8 Current Descriptor Pointer Register */ +#define DMA8_CURR_ADDR 0xFFC01C24 /* DMA Channel 8 Current Address Register */ +#define DMA8_IRQ_STATUS 0xFFC01C28 /* DMA Channel 8 Interrupt/Status Register */ +#define DMA8_PERIPHERAL_MAP 0xFFC01C2C /* DMA Channel 8 Peripheral Map Register */ +#define DMA8_CURR_X_COUNT 0xFFC01C30 /* DMA Channel 8 Current X Count Register */ +#define DMA8_CURR_Y_COUNT 0xFFC01C38 /* DMA Channel 8 Current Y Count Register */ + +#define DMA9_NEXT_DESC_PTR 0xFFC01C40 /* DMA Channel 9 Next Descriptor Pointer Register */ +#define DMA9_START_ADDR 0xFFC01C44 /* DMA Channel 9 Start Address Register */ +#define DMA9_CONFIG 0xFFC01C48 /* DMA Channel 9 Configuration Register */ +#define DMA9_X_COUNT 0xFFC01C50 /* DMA Channel 9 X Count Register */ +#define DMA9_X_MODIFY 0xFFC01C54 /* DMA Channel 9 X Modify Register */ +#define DMA9_Y_COUNT 0xFFC01C58 /* DMA Channel 9 Y Count Register */ +#define DMA9_Y_MODIFY 0xFFC01C5C /* DMA Channel 9 Y Modify Register */ +#define DMA9_CURR_DESC_PTR 0xFFC01C60 /* DMA Channel 9 Current Descriptor Pointer Register */ +#define DMA9_CURR_ADDR 0xFFC01C64 /* DMA Channel 9 Current Address Register */ +#define DMA9_IRQ_STATUS 0xFFC01C68 /* DMA Channel 9 Interrupt/Status Register */ +#define DMA9_PERIPHERAL_MAP 0xFFC01C6C /* DMA Channel 9 Peripheral Map Register */ +#define DMA9_CURR_X_COUNT 0xFFC01C70 /* DMA Channel 9 Current X Count Register */ +#define DMA9_CURR_Y_COUNT 0xFFC01C78 /* DMA Channel 9 Current Y Count Register */ + +#define DMA10_NEXT_DESC_PTR 0xFFC01C80 /* DMA Channel 10 Next Descriptor Pointer Register */ +#define DMA10_START_ADDR 0xFFC01C84 /* DMA Channel 10 Start Address Register */ +#define DMA10_CONFIG 0xFFC01C88 /* DMA Channel 10 Configuration Register */ +#define DMA10_X_COUNT 0xFFC01C90 /* DMA Channel 10 X Count Register */ +#define DMA10_X_MODIFY 0xFFC01C94 /* DMA Channel 10 X Modify Register */ +#define DMA10_Y_COUNT 0xFFC01C98 /* DMA Channel 10 Y Count Register */ +#define DMA10_Y_MODIFY 0xFFC01C9C /* DMA Channel 10 Y Modify Register */ +#define DMA10_CURR_DESC_PTR 0xFFC01CA0 /* DMA Channel 10 Current Descriptor Pointer Register */ +#define DMA10_CURR_ADDR 0xFFC01CA4 /* DMA Channel 10 Current Address Register */ +#define DMA10_IRQ_STATUS 0xFFC01CA8 /* DMA Channel 10 Interrupt/Status Register */ +#define DMA10_PERIPHERAL_MAP 0xFFC01CAC /* DMA Channel 10 Peripheral Map Register */ +#define DMA10_CURR_X_COUNT 0xFFC01CB0 /* DMA Channel 10 Current X Count Register */ +#define DMA10_CURR_Y_COUNT 0xFFC01CB8 /* DMA Channel 10 Current Y Count Register */ + +#define DMA11_NEXT_DESC_PTR 0xFFC01CC0 /* DMA Channel 11 Next Descriptor Pointer Register */ +#define DMA11_START_ADDR 0xFFC01CC4 /* DMA Channel 11 Start Address Register */ +#define DMA11_CONFIG 0xFFC01CC8 /* DMA Channel 11 Configuration Register */ +#define DMA11_X_COUNT 0xFFC01CD0 /* DMA Channel 11 X Count Register */ +#define DMA11_X_MODIFY 0xFFC01CD4 /* DMA Channel 11 X Modify Register */ +#define DMA11_Y_COUNT 0xFFC01CD8 /* DMA Channel 11 Y Count Register */ +#define DMA11_Y_MODIFY 0xFFC01CDC /* DMA Channel 11 Y Modify Register */ +#define DMA11_CURR_DESC_PTR 0xFFC01CE0 /* DMA Channel 11 Current Descriptor Pointer Register */ +#define DMA11_CURR_ADDR 0xFFC01CE4 /* DMA Channel 11 Current Address Register */ +#define DMA11_IRQ_STATUS 0xFFC01CE8 /* DMA Channel 11 Interrupt/Status Register */ +#define DMA11_PERIPHERAL_MAP 0xFFC01CEC /* DMA Channel 11 Peripheral Map Register */ +#define DMA11_CURR_X_COUNT 0xFFC01CF0 /* DMA Channel 11 Current X Count Register */ +#define DMA11_CURR_Y_COUNT 0xFFC01CF8 /* DMA Channel 11 Current Y Count Register */ + +#define DMA12_NEXT_DESC_PTR 0xFFC01D00 /* DMA Channel 12 Next Descriptor Pointer Register */ +#define DMA12_START_ADDR 0xFFC01D04 /* DMA Channel 12 Start Address Register */ +#define DMA12_CONFIG 0xFFC01D08 /* DMA Channel 12 Configuration Register */ +#define DMA12_X_COUNT 0xFFC01D10 /* DMA Channel 12 X Count Register */ +#define DMA12_X_MODIFY 0xFFC01D14 /* DMA Channel 12 X Modify Register */ +#define DMA12_Y_COUNT 0xFFC01D18 /* DMA Channel 12 Y Count Register */ +#define DMA12_Y_MODIFY 0xFFC01D1C /* DMA Channel 12 Y Modify Register */ +#define DMA12_CURR_DESC_PTR 0xFFC01D20 /* DMA Channel 12 Current Descriptor Pointer Register */ +#define DMA12_CURR_ADDR 0xFFC01D24 /* DMA Channel 12 Current Address Register */ +#define DMA12_IRQ_STATUS 0xFFC01D28 /* DMA Channel 12 Interrupt/Status Register */ +#define DMA12_PERIPHERAL_MAP 0xFFC01D2C /* DMA Channel 12 Peripheral Map Register */ +#define DMA12_CURR_X_COUNT 0xFFC01D30 /* DMA Channel 12 Current X Count Register */ +#define DMA12_CURR_Y_COUNT 0xFFC01D38 /* DMA Channel 12 Current Y Count Register */ + +#define DMA13_NEXT_DESC_PTR 0xFFC01D40 /* DMA Channel 13 Next Descriptor Pointer Register */ +#define DMA13_START_ADDR 0xFFC01D44 /* DMA Channel 13 Start Address Register */ +#define DMA13_CONFIG 0xFFC01D48 /* DMA Channel 13 Configuration Register */ +#define DMA13_X_COUNT 0xFFC01D50 /* DMA Channel 13 X Count Register */ +#define DMA13_X_MODIFY 0xFFC01D54 /* DMA Channel 13 X Modify Register */ +#define DMA13_Y_COUNT 0xFFC01D58 /* DMA Channel 13 Y Count Register */ +#define DMA13_Y_MODIFY 0xFFC01D5C /* DMA Channel 13 Y Modify Register */ +#define DMA13_CURR_DESC_PTR 0xFFC01D60 /* DMA Channel 13 Current Descriptor Pointer Register */ +#define DMA13_CURR_ADDR 0xFFC01D64 /* DMA Channel 13 Current Address Register */ +#define DMA13_IRQ_STATUS 0xFFC01D68 /* DMA Channel 13 Interrupt/Status Register */ +#define DMA13_PERIPHERAL_MAP 0xFFC01D6C /* DMA Channel 13 Peripheral Map Register */ +#define DMA13_CURR_X_COUNT 0xFFC01D70 /* DMA Channel 13 Current X Count Register */ +#define DMA13_CURR_Y_COUNT 0xFFC01D78 /* DMA Channel 13 Current Y Count Register */ + +#define DMA14_NEXT_DESC_PTR 0xFFC01D80 /* DMA Channel 14 Next Descriptor Pointer Register */ +#define DMA14_START_ADDR 0xFFC01D84 /* DMA Channel 14 Start Address Register */ +#define DMA14_CONFIG 0xFFC01D88 /* DMA Channel 14 Configuration Register */ +#define DMA14_X_COUNT 0xFFC01D90 /* DMA Channel 14 X Count Register */ +#define DMA14_X_MODIFY 0xFFC01D94 /* DMA Channel 14 X Modify Register */ +#define DMA14_Y_COUNT 0xFFC01D98 /* DMA Channel 14 Y Count Register */ +#define DMA14_Y_MODIFY 0xFFC01D9C /* DMA Channel 14 Y Modify Register */ +#define DMA14_CURR_DESC_PTR 0xFFC01DA0 /* DMA Channel 14 Current Descriptor Pointer Register */ +#define DMA14_CURR_ADDR 0xFFC01DA4 /* DMA Channel 14 Current Address Register */ +#define DMA14_IRQ_STATUS 0xFFC01DA8 /* DMA Channel 14 Interrupt/Status Register */ +#define DMA14_PERIPHERAL_MAP 0xFFC01DAC /* DMA Channel 14 Peripheral Map Register */ +#define DMA14_CURR_X_COUNT 0xFFC01DB0 /* DMA Channel 14 Current X Count Register */ +#define DMA14_CURR_Y_COUNT 0xFFC01DB8 /* DMA Channel 14 Current Y Count Register */ + +#define DMA15_NEXT_DESC_PTR 0xFFC01DC0 /* DMA Channel 15 Next Descriptor Pointer Register */ +#define DMA15_START_ADDR 0xFFC01DC4 /* DMA Channel 15 Start Address Register */ +#define DMA15_CONFIG 0xFFC01DC8 /* DMA Channel 15 Configuration Register */ +#define DMA15_X_COUNT 0xFFC01DD0 /* DMA Channel 15 X Count Register */ +#define DMA15_X_MODIFY 0xFFC01DD4 /* DMA Channel 15 X Modify Register */ +#define DMA15_Y_COUNT 0xFFC01DD8 /* DMA Channel 15 Y Count Register */ +#define DMA15_Y_MODIFY 0xFFC01DDC /* DMA Channel 15 Y Modify Register */ +#define DMA15_CURR_DESC_PTR 0xFFC01DE0 /* DMA Channel 15 Current Descriptor Pointer Register */ +#define DMA15_CURR_ADDR 0xFFC01DE4 /* DMA Channel 15 Current Address Register */ +#define DMA15_IRQ_STATUS 0xFFC01DE8 /* DMA Channel 15 Interrupt/Status Register */ +#define DMA15_PERIPHERAL_MAP 0xFFC01DEC /* DMA Channel 15 Peripheral Map Register */ +#define DMA15_CURR_X_COUNT 0xFFC01DF0 /* DMA Channel 15 Current X Count Register */ +#define DMA15_CURR_Y_COUNT 0xFFC01DF8 /* DMA Channel 15 Current Y Count Register */ + +#define DMA16_NEXT_DESC_PTR 0xFFC01E00 /* DMA Channel 16 Next Descriptor Pointer Register */ +#define DMA16_START_ADDR 0xFFC01E04 /* DMA Channel 16 Start Address Register */ +#define DMA16_CONFIG 0xFFC01E08 /* DMA Channel 16 Configuration Register */ +#define DMA16_X_COUNT 0xFFC01E10 /* DMA Channel 16 X Count Register */ +#define DMA16_X_MODIFY 0xFFC01E14 /* DMA Channel 16 X Modify Register */ +#define DMA16_Y_COUNT 0xFFC01E18 /* DMA Channel 16 Y Count Register */ +#define DMA16_Y_MODIFY 0xFFC01E1C /* DMA Channel 16 Y Modify Register */ +#define DMA16_CURR_DESC_PTR 0xFFC01E20 /* DMA Channel 16 Current Descriptor Pointer Register */ +#define DMA16_CURR_ADDR 0xFFC01E24 /* DMA Channel 16 Current Address Register */ +#define DMA16_IRQ_STATUS 0xFFC01E28 /* DMA Channel 16 Interrupt/Status Register */ +#define DMA16_PERIPHERAL_MAP 0xFFC01E2C /* DMA Channel 16 Peripheral Map Register */ +#define DMA16_CURR_X_COUNT 0xFFC01E30 /* DMA Channel 16 Current X Count Register */ +#define DMA16_CURR_Y_COUNT 0xFFC01E38 /* DMA Channel 16 Current Y Count Register */ + +#define DMA17_NEXT_DESC_PTR 0xFFC01E40 /* DMA Channel 17 Next Descriptor Pointer Register */ +#define DMA17_START_ADDR 0xFFC01E44 /* DMA Channel 17 Start Address Register */ +#define DMA17_CONFIG 0xFFC01E48 /* DMA Channel 17 Configuration Register */ +#define DMA17_X_COUNT 0xFFC01E50 /* DMA Channel 17 X Count Register */ +#define DMA17_X_MODIFY 0xFFC01E54 /* DMA Channel 17 X Modify Register */ +#define DMA17_Y_COUNT 0xFFC01E58 /* DMA Channel 17 Y Count Register */ +#define DMA17_Y_MODIFY 0xFFC01E5C /* DMA Channel 17 Y Modify Register */ +#define DMA17_CURR_DESC_PTR 0xFFC01E60 /* DMA Channel 17 Current Descriptor Pointer Register */ +#define DMA17_CURR_ADDR 0xFFC01E64 /* DMA Channel 17 Current Address Register */ +#define DMA17_IRQ_STATUS 0xFFC01E68 /* DMA Channel 17 Interrupt/Status Register */ +#define DMA17_PERIPHERAL_MAP 0xFFC01E6C /* DMA Channel 17 Peripheral Map Register */ +#define DMA17_CURR_X_COUNT 0xFFC01E70 /* DMA Channel 17 Current X Count Register */ +#define DMA17_CURR_Y_COUNT 0xFFC01E78 /* DMA Channel 17 Current Y Count Register */ + +#define DMA18_NEXT_DESC_PTR 0xFFC01E80 /* DMA Channel 18 Next Descriptor Pointer Register */ +#define DMA18_START_ADDR 0xFFC01E84 /* DMA Channel 18 Start Address Register */ +#define DMA18_CONFIG 0xFFC01E88 /* DMA Channel 18 Configuration Register */ +#define DMA18_X_COUNT 0xFFC01E90 /* DMA Channel 18 X Count Register */ +#define DMA18_X_MODIFY 0xFFC01E94 /* DMA Channel 18 X Modify Register */ +#define DMA18_Y_COUNT 0xFFC01E98 /* DMA Channel 18 Y Count Register */ +#define DMA18_Y_MODIFY 0xFFC01E9C /* DMA Channel 18 Y Modify Register */ +#define DMA18_CURR_DESC_PTR 0xFFC01EA0 /* DMA Channel 18 Current Descriptor Pointer Register */ +#define DMA18_CURR_ADDR 0xFFC01EA4 /* DMA Channel 18 Current Address Register */ +#define DMA18_IRQ_STATUS 0xFFC01EA8 /* DMA Channel 18 Interrupt/Status Register */ +#define DMA18_PERIPHERAL_MAP 0xFFC01EAC /* DMA Channel 18 Peripheral Map Register */ +#define DMA18_CURR_X_COUNT 0xFFC01EB0 /* DMA Channel 18 Current X Count Register */ +#define DMA18_CURR_Y_COUNT 0xFFC01EB8 /* DMA Channel 18 Current Y Count Register */ + +#define DMA19_NEXT_DESC_PTR 0xFFC01EC0 /* DMA Channel 19 Next Descriptor Pointer Register */ +#define DMA19_START_ADDR 0xFFC01EC4 /* DMA Channel 19 Start Address Register */ +#define DMA19_CONFIG 0xFFC01EC8 /* DMA Channel 19 Configuration Register */ +#define DMA19_X_COUNT 0xFFC01ED0 /* DMA Channel 19 X Count Register */ +#define DMA19_X_MODIFY 0xFFC01ED4 /* DMA Channel 19 X Modify Register */ +#define DMA19_Y_COUNT 0xFFC01ED8 /* DMA Channel 19 Y Count Register */ +#define DMA19_Y_MODIFY 0xFFC01EDC /* DMA Channel 19 Y Modify Register */ +#define DMA19_CURR_DESC_PTR 0xFFC01EE0 /* DMA Channel 19 Current Descriptor Pointer Register */ +#define DMA19_CURR_ADDR 0xFFC01EE4 /* DMA Channel 19 Current Address Register */ +#define DMA19_IRQ_STATUS 0xFFC01EE8 /* DMA Channel 19 Interrupt/Status Register */ +#define DMA19_PERIPHERAL_MAP 0xFFC01EEC /* DMA Channel 19 Peripheral Map Register */ +#define DMA19_CURR_X_COUNT 0xFFC01EF0 /* DMA Channel 19 Current X Count Register */ +#define DMA19_CURR_Y_COUNT 0xFFC01EF8 /* DMA Channel 19 Current Y Count Register */ + +#define MDMA1_D0_NEXT_DESC_PTR 0xFFC01F00 /* MemDMA1 Stream 0 Destination Next Descriptor Pointer Register */ +#define MDMA1_D0_START_ADDR 0xFFC01F04 /* MemDMA1 Stream 0 Destination Start Address Register */ +#define MDMA1_D0_CONFIG 0xFFC01F08 /* MemDMA1 Stream 0 Destination Configuration Register */ +#define MDMA1_D0_X_COUNT 0xFFC01F10 /* MemDMA1 Stream 0 Destination X Count Register */ +#define MDMA1_D0_X_MODIFY 0xFFC01F14 /* MemDMA1 Stream 0 Destination X Modify Register */ +#define MDMA1_D0_Y_COUNT 0xFFC01F18 /* MemDMA1 Stream 0 Destination Y Count Register */ +#define MDMA1_D0_Y_MODIFY 0xFFC01F1C /* MemDMA1 Stream 0 Destination Y Modify Register */ +#define MDMA1_D0_CURR_DESC_PTR 0xFFC01F20 /* MemDMA1 Stream 0 Destination Current Descriptor Pointer Register */ +#define MDMA1_D0_CURR_ADDR 0xFFC01F24 /* MemDMA1 Stream 0 Destination Current Address Register */ +#define MDMA1_D0_IRQ_STATUS 0xFFC01F28 /* MemDMA1 Stream 0 Destination Interrupt/Status Register */ +#define MDMA1_D0_PERIPHERAL_MAP 0xFFC01F2C /* MemDMA1 Stream 0 Destination Peripheral Map Register */ +#define MDMA1_D0_CURR_X_COUNT 0xFFC01F30 /* MemDMA1 Stream 0 Destination Current X Count Register */ +#define MDMA1_D0_CURR_Y_COUNT 0xFFC01F38 /* MemDMA1 Stream 0 Destination Current Y Count Register */ + +#define MDMA1_S0_NEXT_DESC_PTR 0xFFC01F40 /* MemDMA1 Stream 0 Source Next Descriptor Pointer Register */ +#define MDMA1_S0_START_ADDR 0xFFC01F44 /* MemDMA1 Stream 0 Source Start Address Register */ +#define MDMA1_S0_CONFIG 0xFFC01F48 /* MemDMA1 Stream 0 Source Configuration Register */ +#define MDMA1_S0_X_COUNT 0xFFC01F50 /* MemDMA1 Stream 0 Source X Count Register */ +#define MDMA1_S0_X_MODIFY 0xFFC01F54 /* MemDMA1 Stream 0 Source X Modify Register */ +#define MDMA1_S0_Y_COUNT 0xFFC01F58 /* MemDMA1 Stream 0 Source Y Count Register */ +#define MDMA1_S0_Y_MODIFY 0xFFC01F5C /* MemDMA1 Stream 0 Source Y Modify Register */ +#define MDMA1_S0_CURR_DESC_PTR 0xFFC01F60 /* MemDMA1 Stream 0 Source Current Descriptor Pointer Register */ +#define MDMA1_S0_CURR_ADDR 0xFFC01F64 /* MemDMA1 Stream 0 Source Current Address Register */ +#define MDMA1_S0_IRQ_STATUS 0xFFC01F68 /* MemDMA1 Stream 0 Source Interrupt/Status Register */ +#define MDMA1_S0_PERIPHERAL_MAP 0xFFC01F6C /* MemDMA1 Stream 0 Source Peripheral Map Register */ +#define MDMA1_S0_CURR_X_COUNT 0xFFC01F70 /* MemDMA1 Stream 0 Source Current X Count Register */ +#define MDMA1_S0_CURR_Y_COUNT 0xFFC01F78 /* MemDMA1 Stream 0 Source Current Y Count Register */ + +#define MDMA1_D1_NEXT_DESC_PTR 0xFFC01F80 /* MemDMA1 Stream 1 Destination Next Descriptor Pointer Register */ +#define MDMA1_D1_START_ADDR 0xFFC01F84 /* MemDMA1 Stream 1 Destination Start Address Register */ +#define MDMA1_D1_CONFIG 0xFFC01F88 /* MemDMA1 Stream 1 Destination Configuration Register */ +#define MDMA1_D1_X_COUNT 0xFFC01F90 /* MemDMA1 Stream 1 Destination X Count Register */ +#define MDMA1_D1_X_MODIFY 0xFFC01F94 /* MemDMA1 Stream 1 Destination X Modify Register */ +#define MDMA1_D1_Y_COUNT 0xFFC01F98 /* MemDMA1 Stream 1 Destination Y Count Register */ +#define MDMA1_D1_Y_MODIFY 0xFFC01F9C /* MemDMA1 Stream 1 Destination Y Modify Register */ +#define MDMA1_D1_CURR_DESC_PTR 0xFFC01FA0 /* MemDMA1 Stream 1 Destination Current Descriptor Pointer Register */ +#define MDMA1_D1_CURR_ADDR 0xFFC01FA4 /* MemDMA1 Stream 1 Destination Current Address Register */ +#define MDMA1_D1_IRQ_STATUS 0xFFC01FA8 /* MemDMA1 Stream 1 Destination Interrupt/Status Register */ +#define MDMA1_D1_PERIPHERAL_MAP 0xFFC01FAC /* MemDMA1 Stream 1 Destination Peripheral Map Register */ +#define MDMA1_D1_CURR_X_COUNT 0xFFC01FB0 /* MemDMA1 Stream 1 Destination Current X Count Register */ +#define MDMA1_D1_CURR_Y_COUNT 0xFFC01FB8 /* MemDMA1 Stream 1 Destination Current Y Count Register */ + +#define MDMA1_S1_NEXT_DESC_PTR 0xFFC01FC0 /* MemDMA1 Stream 1 Source Next Descriptor Pointer Register */ +#define MDMA1_S1_START_ADDR 0xFFC01FC4 /* MemDMA1 Stream 1 Source Start Address Register */ +#define MDMA1_S1_CONFIG 0xFFC01FC8 /* MemDMA1 Stream 1 Source Configuration Register */ +#define MDMA1_S1_X_COUNT 0xFFC01FD0 /* MemDMA1 Stream 1 Source X Count Register */ +#define MDMA1_S1_X_MODIFY 0xFFC01FD4 /* MemDMA1 Stream 1 Source X Modify Register */ +#define MDMA1_S1_Y_COUNT 0xFFC01FD8 /* MemDMA1 Stream 1 Source Y Count Register */ +#define MDMA1_S1_Y_MODIFY 0xFFC01FDC /* MemDMA1 Stream 1 Source Y Modify Register */ +#define MDMA1_S1_CURR_DESC_PTR 0xFFC01FE0 /* MemDMA1 Stream 1 Source Current Descriptor Pointer Register */ +#define MDMA1_S1_CURR_ADDR 0xFFC01FE4 /* MemDMA1 Stream 1 Source Current Address Register */ +#define MDMA1_S1_IRQ_STATUS 0xFFC01FE8 /* MemDMA1 Stream 1 Source Interrupt/Status Register */ +#define MDMA1_S1_PERIPHERAL_MAP 0xFFC01FEC /* MemDMA1 Stream 1 Source Peripheral Map Register */ +#define MDMA1_S1_CURR_X_COUNT 0xFFC01FF0 /* MemDMA1 Stream 1 Source Current X Count Register */ +#define MDMA1_S1_CURR_Y_COUNT 0xFFC01FF8 /* MemDMA1 Stream 1 Source Current Y Count Register */ + + +/* UART1 Controller (0xFFC02000 - 0xFFC020FF) */ +#define UART1_THR 0xFFC02000 /* Transmit Holding register */ +#define UART1_RBR 0xFFC02000 /* Receive Buffer register */ +#define UART1_DLL 0xFFC02000 /* Divisor Latch (Low-Byte) */ +#define UART1_IER 0xFFC02004 /* Interrupt Enable Register */ +#define UART1_DLH 0xFFC02004 /* Divisor Latch (High-Byte) */ +#define UART1_IIR 0xFFC02008 /* Interrupt Identification Register */ +#define UART1_LCR 0xFFC0200C /* Line Control Register */ +#define UART1_MCR 0xFFC02010 /* Modem Control Register */ +#define UART1_LSR 0xFFC02014 /* Line Status Register */ +#define UART1_SCR 0xFFC0201C /* SCR Scratch Register */ +#define UART1_GCTL 0xFFC02024 /* Global Control Register */ + + +/* UART2 Controller (0xFFC02100 - 0xFFC021FF) */ +#define UART2_THR 0xFFC02100 /* Transmit Holding register */ +#define UART2_RBR 0xFFC02100 /* Receive Buffer register */ +#define UART2_DLL 0xFFC02100 /* Divisor Latch (Low-Byte) */ +#define UART2_IER 0xFFC02104 /* Interrupt Enable Register */ +#define UART2_DLH 0xFFC02104 /* Divisor Latch (High-Byte) */ +#define UART2_IIR 0xFFC02108 /* Interrupt Identification Register */ +#define UART2_LCR 0xFFC0210C /* Line Control Register */ +#define UART2_MCR 0xFFC02110 /* Modem Control Register */ +#define UART2_LSR 0xFFC02114 /* Line Status Register */ +#define UART2_SCR 0xFFC0211C /* SCR Scratch Register */ +#define UART2_GCTL 0xFFC02124 /* Global Control Register */ + + +/* Two-Wire Interface 1 (0xFFC02200 - 0xFFC022FF) */ +#define TWI1_CLKDIV 0xFFC02200 /* Serial Clock Divider Register */ +#define TWI1_CONTROL 0xFFC02204 /* TWI1 Master Internal Time Reference Register */ +#define TWI1_SLAVE_CTRL 0xFFC02208 /* Slave Mode Control Register */ +#define TWI1_SLAVE_STAT 0xFFC0220C /* Slave Mode Status Register */ +#define TWI1_SLAVE_ADDR 0xFFC02210 /* Slave Mode Address Register */ +#define TWI1_MASTER_CTRL 0xFFC02214 /* Master Mode Control Register */ +#define TWI1_MASTER_STAT 0xFFC02218 /* Master Mode Status Register */ +#define TWI1_MASTER_ADDR 0xFFC0221C /* Master Mode Address Register */ +#define TWI1_INT_STAT 0xFFC02220 /* TWI1 Master Interrupt Register */ +#define TWI1_INT_MASK 0xFFC02224 /* TWI1 Master Interrupt Mask Register */ +#define TWI1_FIFO_CTRL 0xFFC02228 /* FIFO Control Register */ +#define TWI1_FIFO_STAT 0xFFC0222C /* FIFO Status Register */ +#define TWI1_XMT_DATA8 0xFFC02280 /* FIFO Transmit Data Single Byte Register */ +#define TWI1_XMT_DATA16 0xFFC02284 /* FIFO Transmit Data Double Byte Register */ +#define TWI1_RCV_DATA8 0xFFC02288 /* FIFO Receive Data Single Byte Register */ +#define TWI1_RCV_DATA16 0xFFC0228C /* FIFO Receive Data Double Byte Register */ +#define TWI1_REGBASE TWI1_CLKDIV + + +/* the following are for backwards compatibility */ +#define TWI1_PRESCALE TWI1_CONTROL +#define TWI1_INT_SRC TWI1_INT_STAT +#define TWI1_INT_ENABLE TWI1_INT_MASK + + +/* SPI1 Controller (0xFFC02300 - 0xFFC023FF) */ +#define SPI1_CTL 0xFFC02300 /* SPI1 Control Register */ +#define SPI1_FLG 0xFFC02304 /* SPI1 Flag register */ +#define SPI1_STAT 0xFFC02308 /* SPI1 Status register */ +#define SPI1_TDBR 0xFFC0230C /* SPI1 Transmit Data Buffer Register */ +#define SPI1_RDBR 0xFFC02310 /* SPI1 Receive Data Buffer Register */ +#define SPI1_BAUD 0xFFC02314 /* SPI1 Baud rate Register */ +#define SPI1_SHADOW 0xFFC02318 /* SPI1_RDBR Shadow Register */ +#define SPI1_REGBASE SPI1_CTL + +/* SPI2 Controller (0xFFC02400 - 0xFFC024FF) */ +#define SPI2_CTL 0xFFC02400 /* SPI2 Control Register */ +#define SPI2_FLG 0xFFC02404 /* SPI2 Flag register */ +#define SPI2_STAT 0xFFC02408 /* SPI2 Status register */ +#define SPI2_TDBR 0xFFC0240C /* SPI2 Transmit Data Buffer Register */ +#define SPI2_RDBR 0xFFC02410 /* SPI2 Receive Data Buffer Register */ +#define SPI2_BAUD 0xFFC02414 /* SPI2 Baud rate Register */ +#define SPI2_SHADOW 0xFFC02418 /* SPI2_RDBR Shadow Register */ +#define SPI2_REGBASE SPI2_CTL + +/* SPORT2 Controller (0xFFC02500 - 0xFFC025FF) */ +#define SPORT2_TCR1 0xFFC02500 /* SPORT2 Transmit Configuration 1 Register */ +#define SPORT2_TCR2 0xFFC02504 /* SPORT2 Transmit Configuration 2 Register */ +#define SPORT2_TCLKDIV 0xFFC02508 /* SPORT2 Transmit Clock Divider */ +#define SPORT2_TFSDIV 0xFFC0250C /* SPORT2 Transmit Frame Sync Divider */ +#define SPORT2_TX 0xFFC02510 /* SPORT2 TX Data Register */ +#define SPORT2_RX 0xFFC02518 /* SPORT2 RX Data Register */ +#define SPORT2_RCR1 0xFFC02520 /* SPORT2 Transmit Configuration 1 Register */ +#define SPORT2_RCR2 0xFFC02524 /* SPORT2 Transmit Configuration 2 Register */ +#define SPORT2_RCLKDIV 0xFFC02528 /* SPORT2 Receive Clock Divider */ +#define SPORT2_RFSDIV 0xFFC0252C /* SPORT2 Receive Frame Sync Divider */ +#define SPORT2_STAT 0xFFC02530 /* SPORT2 Status Register */ +#define SPORT2_CHNL 0xFFC02534 /* SPORT2 Current Channel Register */ +#define SPORT2_MCMC1 0xFFC02538 /* SPORT2 Multi-Channel Configuration Register 1 */ +#define SPORT2_MCMC2 0xFFC0253C /* SPORT2 Multi-Channel Configuration Register 2 */ +#define SPORT2_MTCS0 0xFFC02540 /* SPORT2 Multi-Channel Transmit Select Register 0 */ +#define SPORT2_MTCS1 0xFFC02544 /* SPORT2 Multi-Channel Transmit Select Register 1 */ +#define SPORT2_MTCS2 0xFFC02548 /* SPORT2 Multi-Channel Transmit Select Register 2 */ +#define SPORT2_MTCS3 0xFFC0254C /* SPORT2 Multi-Channel Transmit Select Register 3 */ +#define SPORT2_MRCS0 0xFFC02550 /* SPORT2 Multi-Channel Receive Select Register 0 */ +#define SPORT2_MRCS1 0xFFC02554 /* SPORT2 Multi-Channel Receive Select Register 1 */ +#define SPORT2_MRCS2 0xFFC02558 /* SPORT2 Multi-Channel Receive Select Register 2 */ +#define SPORT2_MRCS3 0xFFC0255C /* SPORT2 Multi-Channel Receive Select Register 3 */ + + +/* SPORT3 Controller (0xFFC02600 - 0xFFC026FF) */ +#define SPORT3_TCR1 0xFFC02600 /* SPORT3 Transmit Configuration 1 Register */ +#define SPORT3_TCR2 0xFFC02604 /* SPORT3 Transmit Configuration 2 Register */ +#define SPORT3_TCLKDIV 0xFFC02608 /* SPORT3 Transmit Clock Divider */ +#define SPORT3_TFSDIV 0xFFC0260C /* SPORT3 Transmit Frame Sync Divider */ +#define SPORT3_TX 0xFFC02610 /* SPORT3 TX Data Register */ +#define SPORT3_RX 0xFFC02618 /* SPORT3 RX Data Register */ +#define SPORT3_RCR1 0xFFC02620 /* SPORT3 Transmit Configuration 1 Register */ +#define SPORT3_RCR2 0xFFC02624 /* SPORT3 Transmit Configuration 2 Register */ +#define SPORT3_RCLKDIV 0xFFC02628 /* SPORT3 Receive Clock Divider */ +#define SPORT3_RFSDIV 0xFFC0262C /* SPORT3 Receive Frame Sync Divider */ +#define SPORT3_STAT 0xFFC02630 /* SPORT3 Status Register */ +#define SPORT3_CHNL 0xFFC02634 /* SPORT3 Current Channel Register */ +#define SPORT3_MCMC1 0xFFC02638 /* SPORT3 Multi-Channel Configuration Register 1 */ +#define SPORT3_MCMC2 0xFFC0263C /* SPORT3 Multi-Channel Configuration Register 2 */ +#define SPORT3_MTCS0 0xFFC02640 /* SPORT3 Multi-Channel Transmit Select Register 0 */ +#define SPORT3_MTCS1 0xFFC02644 /* SPORT3 Multi-Channel Transmit Select Register 1 */ +#define SPORT3_MTCS2 0xFFC02648 /* SPORT3 Multi-Channel Transmit Select Register 2 */ +#define SPORT3_MTCS3 0xFFC0264C /* SPORT3 Multi-Channel Transmit Select Register 3 */ +#define SPORT3_MRCS0 0xFFC02650 /* SPORT3 Multi-Channel Receive Select Register 0 */ +#define SPORT3_MRCS1 0xFFC02654 /* SPORT3 Multi-Channel Receive Select Register 1 */ +#define SPORT3_MRCS2 0xFFC02658 /* SPORT3 Multi-Channel Receive Select Register 2 */ +#define SPORT3_MRCS3 0xFFC0265C /* SPORT3 Multi-Channel Receive Select Register 3 */ + + +/* Media Transceiver (MXVR) (0xFFC02700 - 0xFFC028FF) */ + +#define MXVR_CONFIG 0xFFC02700 /* MXVR Configuration Register */ +#define MXVR_PLL_CTL_0 0xFFC02704 /* MXVR Phase Lock Loop Control Register 0 */ + +#define MXVR_STATE_0 0xFFC02708 /* MXVR State Register 0 */ +#define MXVR_STATE_1 0xFFC0270C /* MXVR State Register 1 */ + +#define MXVR_INT_STAT_0 0xFFC02710 /* MXVR Interrupt Status Register 0 */ +#define MXVR_INT_STAT_1 0xFFC02714 /* MXVR Interrupt Status Register 1 */ + +#define MXVR_INT_EN_0 0xFFC02718 /* MXVR Interrupt Enable Register 0 */ +#define MXVR_INT_EN_1 0xFFC0271C /* MXVR Interrupt Enable Register 1 */ + +#define MXVR_POSITION 0xFFC02720 /* MXVR Node Position Register */ +#define MXVR_MAX_POSITION 0xFFC02724 /* MXVR Maximum Node Position Register */ + +#define MXVR_DELAY 0xFFC02728 /* MXVR Node Frame Delay Register */ +#define MXVR_MAX_DELAY 0xFFC0272C /* MXVR Maximum Node Frame Delay Register */ + +#define MXVR_LADDR 0xFFC02730 /* MXVR Logical Address Register */ +#define MXVR_GADDR 0xFFC02734 /* MXVR Group Address Register */ +#define MXVR_AADDR 0xFFC02738 /* MXVR Alternate Address Register */ + +#define MXVR_ALLOC_0 0xFFC0273C /* MXVR Allocation Table Register 0 */ +#define MXVR_ALLOC_1 0xFFC02740 /* MXVR Allocation Table Register 1 */ +#define MXVR_ALLOC_2 0xFFC02744 /* MXVR Allocation Table Register 2 */ +#define MXVR_ALLOC_3 0xFFC02748 /* MXVR Allocation Table Register 3 */ +#define MXVR_ALLOC_4 0xFFC0274C /* MXVR Allocation Table Register 4 */ +#define MXVR_ALLOC_5 0xFFC02750 /* MXVR Allocation Table Register 5 */ +#define MXVR_ALLOC_6 0xFFC02754 /* MXVR Allocation Table Register 6 */ +#define MXVR_ALLOC_7 0xFFC02758 /* MXVR Allocation Table Register 7 */ +#define MXVR_ALLOC_8 0xFFC0275C /* MXVR Allocation Table Register 8 */ +#define MXVR_ALLOC_9 0xFFC02760 /* MXVR Allocation Table Register 9 */ +#define MXVR_ALLOC_10 0xFFC02764 /* MXVR Allocation Table Register 10 */ +#define MXVR_ALLOC_11 0xFFC02768 /* MXVR Allocation Table Register 11 */ +#define MXVR_ALLOC_12 0xFFC0276C /* MXVR Allocation Table Register 12 */ +#define MXVR_ALLOC_13 0xFFC02770 /* MXVR Allocation Table Register 13 */ +#define MXVR_ALLOC_14 0xFFC02774 /* MXVR Allocation Table Register 14 */ + +#define MXVR_SYNC_LCHAN_0 0xFFC02778 /* MXVR Sync Data Logical Channel Assign Register 0 */ +#define MXVR_SYNC_LCHAN_1 0xFFC0277C /* MXVR Sync Data Logical Channel Assign Register 1 */ +#define MXVR_SYNC_LCHAN_2 0xFFC02780 /* MXVR Sync Data Logical Channel Assign Register 2 */ +#define MXVR_SYNC_LCHAN_3 0xFFC02784 /* MXVR Sync Data Logical Channel Assign Register 3 */ +#define MXVR_SYNC_LCHAN_4 0xFFC02788 /* MXVR Sync Data Logical Channel Assign Register 4 */ +#define MXVR_SYNC_LCHAN_5 0xFFC0278C /* MXVR Sync Data Logical Channel Assign Register 5 */ +#define MXVR_SYNC_LCHAN_6 0xFFC02790 /* MXVR Sync Data Logical Channel Assign Register 6 */ +#define MXVR_SYNC_LCHAN_7 0xFFC02794 /* MXVR Sync Data Logical Channel Assign Register 7 */ + +#define MXVR_DMA0_CONFIG 0xFFC02798 /* MXVR Sync Data DMA0 Config Register */ +#define MXVR_DMA0_START_ADDR 0xFFC0279C /* MXVR Sync Data DMA0 Start Address Register */ +#define MXVR_DMA0_COUNT 0xFFC027A0 /* MXVR Sync Data DMA0 Loop Count Register */ +#define MXVR_DMA0_CURR_ADDR 0xFFC027A4 /* MXVR Sync Data DMA0 Current Address Register */ +#define MXVR_DMA0_CURR_COUNT 0xFFC027A8 /* MXVR Sync Data DMA0 Current Loop Count Register */ + +#define MXVR_DMA1_CONFIG 0xFFC027AC /* MXVR Sync Data DMA1 Config Register */ +#define MXVR_DMA1_START_ADDR 0xFFC027B0 /* MXVR Sync Data DMA1 Start Address Register */ +#define MXVR_DMA1_COUNT 0xFFC027B4 /* MXVR Sync Data DMA1 Loop Count Register */ +#define MXVR_DMA1_CURR_ADDR 0xFFC027B8 /* MXVR Sync Data DMA1 Current Address Register */ +#define MXVR_DMA1_CURR_COUNT 0xFFC027BC /* MXVR Sync Data DMA1 Current Loop Count Register */ + +#define MXVR_DMA2_CONFIG 0xFFC027C0 /* MXVR Sync Data DMA2 Config Register */ +#define MXVR_DMA2_START_ADDR 0xFFC027C4 /* MXVR Sync Data DMA2 Start Address Register */ +#define MXVR_DMA2_COUNT 0xFFC027C8 /* MXVR Sync Data DMA2 Loop Count Register */ +#define MXVR_DMA2_CURR_ADDR 0xFFC027CC /* MXVR Sync Data DMA2 Current Address Register */ +#define MXVR_DMA2_CURR_COUNT 0xFFC027D0 /* MXVR Sync Data DMA2 Current Loop Count Register */ + +#define MXVR_DMA3_CONFIG 0xFFC027D4 /* MXVR Sync Data DMA3 Config Register */ +#define MXVR_DMA3_START_ADDR 0xFFC027D8 /* MXVR Sync Data DMA3 Start Address Register */ +#define MXVR_DMA3_COUNT 0xFFC027DC /* MXVR Sync Data DMA3 Loop Count Register */ +#define MXVR_DMA3_CURR_ADDR 0xFFC027E0 /* MXVR Sync Data DMA3 Current Address Register */ +#define MXVR_DMA3_CURR_COUNT 0xFFC027E4 /* MXVR Sync Data DMA3 Current Loop Count Register */ + +#define MXVR_DMA4_CONFIG 0xFFC027E8 /* MXVR Sync Data DMA4 Config Register */ +#define MXVR_DMA4_START_ADDR 0xFFC027EC /* MXVR Sync Data DMA4 Start Address Register */ +#define MXVR_DMA4_COUNT 0xFFC027F0 /* MXVR Sync Data DMA4 Loop Count Register */ +#define MXVR_DMA4_CURR_ADDR 0xFFC027F4 /* MXVR Sync Data DMA4 Current Address Register */ +#define MXVR_DMA4_CURR_COUNT 0xFFC027F8 /* MXVR Sync Data DMA4 Current Loop Count Register */ + +#define MXVR_DMA5_CONFIG 0xFFC027FC /* MXVR Sync Data DMA5 Config Register */ +#define MXVR_DMA5_START_ADDR 0xFFC02800 /* MXVR Sync Data DMA5 Start Address Register */ +#define MXVR_DMA5_COUNT 0xFFC02804 /* MXVR Sync Data DMA5 Loop Count Register */ +#define MXVR_DMA5_CURR_ADDR 0xFFC02808 /* MXVR Sync Data DMA5 Current Address Register */ +#define MXVR_DMA5_CURR_COUNT 0xFFC0280C /* MXVR Sync Data DMA5 Current Loop Count Register */ + +#define MXVR_DMA6_CONFIG 0xFFC02810 /* MXVR Sync Data DMA6 Config Register */ +#define MXVR_DMA6_START_ADDR 0xFFC02814 /* MXVR Sync Data DMA6 Start Address Register */ +#define MXVR_DMA6_COUNT 0xFFC02818 /* MXVR Sync Data DMA6 Loop Count Register */ +#define MXVR_DMA6_CURR_ADDR 0xFFC0281C /* MXVR Sync Data DMA6 Current Address Register */ +#define MXVR_DMA6_CURR_COUNT 0xFFC02820 /* MXVR Sync Data DMA6 Current Loop Count Register */ + +#define MXVR_DMA7_CONFIG 0xFFC02824 /* MXVR Sync Data DMA7 Config Register */ +#define MXVR_DMA7_START_ADDR 0xFFC02828 /* MXVR Sync Data DMA7 Start Address Register */ +#define MXVR_DMA7_COUNT 0xFFC0282C /* MXVR Sync Data DMA7 Loop Count Register */ +#define MXVR_DMA7_CURR_ADDR 0xFFC02830 /* MXVR Sync Data DMA7 Current Address Register */ +#define MXVR_DMA7_CURR_COUNT 0xFFC02834 /* MXVR Sync Data DMA7 Current Loop Count Register */ + +#define MXVR_AP_CTL 0xFFC02838 /* MXVR Async Packet Control Register */ +#define MXVR_APRB_START_ADDR 0xFFC0283C /* MXVR Async Packet RX Buffer Start Addr Register */ +#define MXVR_APRB_CURR_ADDR 0xFFC02840 /* MXVR Async Packet RX Buffer Current Addr Register */ +#define MXVR_APTB_START_ADDR 0xFFC02844 /* MXVR Async Packet TX Buffer Start Addr Register */ +#define MXVR_APTB_CURR_ADDR 0xFFC02848 /* MXVR Async Packet TX Buffer Current Addr Register */ + +#define MXVR_CM_CTL 0xFFC0284C /* MXVR Control Message Control Register */ +#define MXVR_CMRB_START_ADDR 0xFFC02850 /* MXVR Control Message RX Buffer Start Addr Register */ +#define MXVR_CMRB_CURR_ADDR 0xFFC02854 /* MXVR Control Message RX Buffer Current Address */ +#define MXVR_CMTB_START_ADDR 0xFFC02858 /* MXVR Control Message TX Buffer Start Addr Register */ +#define MXVR_CMTB_CURR_ADDR 0xFFC0285C /* MXVR Control Message TX Buffer Current Address */ + +#define MXVR_RRDB_START_ADDR 0xFFC02860 /* MXVR Remote Read Buffer Start Addr Register */ +#define MXVR_RRDB_CURR_ADDR 0xFFC02864 /* MXVR Remote Read Buffer Current Addr Register */ + +#define MXVR_PAT_DATA_0 0xFFC02868 /* MXVR Pattern Data Register 0 */ +#define MXVR_PAT_EN_0 0xFFC0286C /* MXVR Pattern Enable Register 0 */ +#define MXVR_PAT_DATA_1 0xFFC02870 /* MXVR Pattern Data Register 1 */ +#define MXVR_PAT_EN_1 0xFFC02874 /* MXVR Pattern Enable Register 1 */ + +#define MXVR_FRAME_CNT_0 0xFFC02878 /* MXVR Frame Counter 0 */ +#define MXVR_FRAME_CNT_1 0xFFC0287C /* MXVR Frame Counter 1 */ + +#define MXVR_ROUTING_0 0xFFC02880 /* MXVR Routing Table Register 0 */ +#define MXVR_ROUTING_1 0xFFC02884 /* MXVR Routing Table Register 1 */ +#define MXVR_ROUTING_2 0xFFC02888 /* MXVR Routing Table Register 2 */ +#define MXVR_ROUTING_3 0xFFC0288C /* MXVR Routing Table Register 3 */ +#define MXVR_ROUTING_4 0xFFC02890 /* MXVR Routing Table Register 4 */ +#define MXVR_ROUTING_5 0xFFC02894 /* MXVR Routing Table Register 5 */ +#define MXVR_ROUTING_6 0xFFC02898 /* MXVR Routing Table Register 6 */ +#define MXVR_ROUTING_7 0xFFC0289C /* MXVR Routing Table Register 7 */ +#define MXVR_ROUTING_8 0xFFC028A0 /* MXVR Routing Table Register 8 */ +#define MXVR_ROUTING_9 0xFFC028A4 /* MXVR Routing Table Register 9 */ +#define MXVR_ROUTING_10 0xFFC028A8 /* MXVR Routing Table Register 10 */ +#define MXVR_ROUTING_11 0xFFC028AC /* MXVR Routing Table Register 11 */ +#define MXVR_ROUTING_12 0xFFC028B0 /* MXVR Routing Table Register 12 */ +#define MXVR_ROUTING_13 0xFFC028B4 /* MXVR Routing Table Register 13 */ +#define MXVR_ROUTING_14 0xFFC028B8 /* MXVR Routing Table Register 14 */ + +#define MXVR_PLL_CTL_1 0xFFC028BC /* MXVR Phase Lock Loop Control Register 1 */ +#define MXVR_BLOCK_CNT 0xFFC028C0 /* MXVR Block Counter */ +#define MXVR_PLL_CTL_2 0xFFC028C4 /* MXVR Phase Lock Loop Control Register 2 */ + + +/* CAN Controller (0xFFC02A00 - 0xFFC02FFF) */ +/* For Mailboxes 0-15 */ +#define CAN_MC1 0xFFC02A00 /* Mailbox config reg 1 */ +#define CAN_MD1 0xFFC02A04 /* Mailbox direction reg 1 */ +#define CAN_TRS1 0xFFC02A08 /* Transmit Request Set reg 1 */ +#define CAN_TRR1 0xFFC02A0C /* Transmit Request Reset reg 1 */ +#define CAN_TA1 0xFFC02A10 /* Transmit Acknowledge reg 1 */ +#define CAN_AA1 0xFFC02A14 /* Transmit Abort Acknowledge reg 1 */ +#define CAN_RMP1 0xFFC02A18 /* Receive Message Pending reg 1 */ +#define CAN_RML1 0xFFC02A1C /* Receive Message Lost reg 1 */ +#define CAN_MBTIF1 0xFFC02A20 /* Mailbox Transmit Interrupt Flag reg 1 */ +#define CAN_MBRIF1 0xFFC02A24 /* Mailbox Receive Interrupt Flag reg 1 */ +#define CAN_MBIM1 0xFFC02A28 /* Mailbox Interrupt Mask reg 1 */ +#define CAN_RFH1 0xFFC02A2C /* Remote Frame Handling reg 1 */ +#define CAN_OPSS1 0xFFC02A30 /* Overwrite Protection Single Shot Xmission reg 1 */ + +/* For Mailboxes 16-31 */ +#define CAN_MC2 0xFFC02A40 /* Mailbox config reg 2 */ +#define CAN_MD2 0xFFC02A44 /* Mailbox direction reg 2 */ +#define CAN_TRS2 0xFFC02A48 /* Transmit Request Set reg 2 */ +#define CAN_TRR2 0xFFC02A4C /* Transmit Request Reset reg 2 */ +#define CAN_TA2 0xFFC02A50 /* Transmit Acknowledge reg 2 */ +#define CAN_AA2 0xFFC02A54 /* Transmit Abort Acknowledge reg 2 */ +#define CAN_RMP2 0xFFC02A58 /* Receive Message Pending reg 2 */ +#define CAN_RML2 0xFFC02A5C /* Receive Message Lost reg 2 */ +#define CAN_MBTIF2 0xFFC02A60 /* Mailbox Transmit Interrupt Flag reg 2 */ +#define CAN_MBRIF2 0xFFC02A64 /* Mailbox Receive Interrupt Flag reg 2 */ +#define CAN_MBIM2 0xFFC02A68 /* Mailbox Interrupt Mask reg 2 */ +#define CAN_RFH2 0xFFC02A6C /* Remote Frame Handling reg 2 */ +#define CAN_OPSS2 0xFFC02A70 /* Overwrite Protection Single Shot Xmission reg 2 */ + +#define CAN_CLOCK 0xFFC02A80 /* Bit Timing Configuration register 0 */ +#define CAN_TIMING 0xFFC02A84 /* Bit Timing Configuration register 1 */ + +#define CAN_DEBUG 0xFFC02A88 /* Debug Register */ +/* the following is for backwards compatibility */ +#define CAN_CNF CAN_DEBUG + +#define CAN_STATUS 0xFFC02A8C /* Global Status Register */ +#define CAN_CEC 0xFFC02A90 /* Error Counter Register */ +#define CAN_GIS 0xFFC02A94 /* Global Interrupt Status Register */ +#define CAN_GIM 0xFFC02A98 /* Global Interrupt Mask Register */ +#define CAN_GIF 0xFFC02A9C /* Global Interrupt Flag Register */ +#define CAN_CONTROL 0xFFC02AA0 /* Master Control Register */ +#define CAN_INTR 0xFFC02AA4 /* Interrupt Pending Register */ +#define CAN_MBTD 0xFFC02AAC /* Mailbox Temporary Disable Feature */ +#define CAN_EWR 0xFFC02AB0 /* Programmable Warning Level */ +#define CAN_ESR 0xFFC02AB4 /* Error Status Register */ +#define CAN_UCCNT 0xFFC02AC4 /* Universal Counter */ +#define CAN_UCRC 0xFFC02AC8 /* Universal Counter Reload/Capture Register */ +#define CAN_UCCNF 0xFFC02ACC /* Universal Counter Configuration Register */ + +/* Mailbox Acceptance Masks */ +#define CAN_AM00L 0xFFC02B00 /* Mailbox 0 Low Acceptance Mask */ +#define CAN_AM00H 0xFFC02B04 /* Mailbox 0 High Acceptance Mask */ +#define CAN_AM01L 0xFFC02B08 /* Mailbox 1 Low Acceptance Mask */ +#define CAN_AM01H 0xFFC02B0C /* Mailbox 1 High Acceptance Mask */ +#define CAN_AM02L 0xFFC02B10 /* Mailbox 2 Low Acceptance Mask */ +#define CAN_AM02H 0xFFC02B14 /* Mailbox 2 High Acceptance Mask */ +#define CAN_AM03L 0xFFC02B18 /* Mailbox 3 Low Acceptance Mask */ +#define CAN_AM03H 0xFFC02B1C /* Mailbox 3 High Acceptance Mask */ +#define CAN_AM04L 0xFFC02B20 /* Mailbox 4 Low Acceptance Mask */ +#define CAN_AM04H 0xFFC02B24 /* Mailbox 4 High Acceptance Mask */ +#define CAN_AM05L 0xFFC02B28 /* Mailbox 5 Low Acceptance Mask */ +#define CAN_AM05H 0xFFC02B2C /* Mailbox 5 High Acceptance Mask */ +#define CAN_AM06L 0xFFC02B30 /* Mailbox 6 Low Acceptance Mask */ +#define CAN_AM06H 0xFFC02B34 /* Mailbox 6 High Acceptance Mask */ +#define CAN_AM07L 0xFFC02B38 /* Mailbox 7 Low Acceptance Mask */ +#define CAN_AM07H 0xFFC02B3C /* Mailbox 7 High Acceptance Mask */ +#define CAN_AM08L 0xFFC02B40 /* Mailbox 8 Low Acceptance Mask */ +#define CAN_AM08H 0xFFC02B44 /* Mailbox 8 High Acceptance Mask */ +#define CAN_AM09L 0xFFC02B48 /* Mailbox 9 Low Acceptance Mask */ +#define CAN_AM09H 0xFFC02B4C /* Mailbox 9 High Acceptance Mask */ +#define CAN_AM10L 0xFFC02B50 /* Mailbox 10 Low Acceptance Mask */ +#define CAN_AM10H 0xFFC02B54 /* Mailbox 10 High Acceptance Mask */ +#define CAN_AM11L 0xFFC02B58 /* Mailbox 11 Low Acceptance Mask */ +#define CAN_AM11H 0xFFC02B5C /* Mailbox 11 High Acceptance Mask */ +#define CAN_AM12L 0xFFC02B60 /* Mailbox 12 Low Acceptance Mask */ +#define CAN_AM12H 0xFFC02B64 /* Mailbox 12 High Acceptance Mask */ +#define CAN_AM13L 0xFFC02B68 /* Mailbox 13 Low Acceptance Mask */ +#define CAN_AM13H 0xFFC02B6C /* Mailbox 13 High Acceptance Mask */ +#define CAN_AM14L 0xFFC02B70 /* Mailbox 14 Low Acceptance Mask */ +#define CAN_AM14H 0xFFC02B74 /* Mailbox 14 High Acceptance Mask */ +#define CAN_AM15L 0xFFC02B78 /* Mailbox 15 Low Acceptance Mask */ +#define CAN_AM15H 0xFFC02B7C /* Mailbox 15 High Acceptance Mask */ + +#define CAN_AM16L 0xFFC02B80 /* Mailbox 16 Low Acceptance Mask */ +#define CAN_AM16H 0xFFC02B84 /* Mailbox 16 High Acceptance Mask */ +#define CAN_AM17L 0xFFC02B88 /* Mailbox 17 Low Acceptance Mask */ +#define CAN_AM17H 0xFFC02B8C /* Mailbox 17 High Acceptance Mask */ +#define CAN_AM18L 0xFFC02B90 /* Mailbox 18 Low Acceptance Mask */ +#define CAN_AM18H 0xFFC02B94 /* Mailbox 18 High Acceptance Mask */ +#define CAN_AM19L 0xFFC02B98 /* Mailbox 19 Low Acceptance Mask */ +#define CAN_AM19H 0xFFC02B9C /* Mailbox 19 High Acceptance Mask */ +#define CAN_AM20L 0xFFC02BA0 /* Mailbox 20 Low Acceptance Mask */ +#define CAN_AM20H 0xFFC02BA4 /* Mailbox 20 High Acceptance Mask */ +#define CAN_AM21L 0xFFC02BA8 /* Mailbox 21 Low Acceptance Mask */ +#define CAN_AM21H 0xFFC02BAC /* Mailbox 21 High Acceptance Mask */ +#define CAN_AM22L 0xFFC02BB0 /* Mailbox 22 Low Acceptance Mask */ +#define CAN_AM22H 0xFFC02BB4 /* Mailbox 22 High Acceptance Mask */ +#define CAN_AM23L 0xFFC02BB8 /* Mailbox 23 Low Acceptance Mask */ +#define CAN_AM23H 0xFFC02BBC /* Mailbox 23 High Acceptance Mask */ +#define CAN_AM24L 0xFFC02BC0 /* Mailbox 24 Low Acceptance Mask */ +#define CAN_AM24H 0xFFC02BC4 /* Mailbox 24 High Acceptance Mask */ +#define CAN_AM25L 0xFFC02BC8 /* Mailbox 25 Low Acceptance Mask */ +#define CAN_AM25H 0xFFC02BCC /* Mailbox 25 High Acceptance Mask */ +#define CAN_AM26L 0xFFC02BD0 /* Mailbox 26 Low Acceptance Mask */ +#define CAN_AM26H 0xFFC02BD4 /* Mailbox 26 High Acceptance Mask */ +#define CAN_AM27L 0xFFC02BD8 /* Mailbox 27 Low Acceptance Mask */ +#define CAN_AM27H 0xFFC02BDC /* Mailbox 27 High Acceptance Mask */ +#define CAN_AM28L 0xFFC02BE0 /* Mailbox 28 Low Acceptance Mask */ +#define CAN_AM28H 0xFFC02BE4 /* Mailbox 28 High Acceptance Mask */ +#define CAN_AM29L 0xFFC02BE8 /* Mailbox 29 Low Acceptance Mask */ +#define CAN_AM29H 0xFFC02BEC /* Mailbox 29 High Acceptance Mask */ +#define CAN_AM30L 0xFFC02BF0 /* Mailbox 30 Low Acceptance Mask */ +#define CAN_AM30H 0xFFC02BF4 /* Mailbox 30 High Acceptance Mask */ +#define CAN_AM31L 0xFFC02BF8 /* Mailbox 31 Low Acceptance Mask */ +#define CAN_AM31H 0xFFC02BFC /* Mailbox 31 High Acceptance Mask */ + +/* CAN Acceptance Mask Macros */ +#define CAN_AM_L(x) (CAN_AM00L+((x)*0x8)) +#define CAN_AM_H(x) (CAN_AM00H+((x)*0x8)) + +/* Mailbox Registers */ +#define CAN_MB00_DATA0 0xFFC02C00 /* Mailbox 0 Data Word 0 [15:0] Register */ +#define CAN_MB00_DATA1 0xFFC02C04 /* Mailbox 0 Data Word 1 [31:16] Register */ +#define CAN_MB00_DATA2 0xFFC02C08 /* Mailbox 0 Data Word 2 [47:32] Register */ +#define CAN_MB00_DATA3 0xFFC02C0C /* Mailbox 0 Data Word 3 [63:48] Register */ +#define CAN_MB00_LENGTH 0xFFC02C10 /* Mailbox 0 Data Length Code Register */ +#define CAN_MB00_TIMESTAMP 0xFFC02C14 /* Mailbox 0 Time Stamp Value Register */ +#define CAN_MB00_ID0 0xFFC02C18 /* Mailbox 0 Identifier Low Register */ +#define CAN_MB00_ID1 0xFFC02C1C /* Mailbox 0 Identifier High Register */ + +#define CAN_MB01_DATA0 0xFFC02C20 /* Mailbox 1 Data Word 0 [15:0] Register */ +#define CAN_MB01_DATA1 0xFFC02C24 /* Mailbox 1 Data Word 1 [31:16] Register */ +#define CAN_MB01_DATA2 0xFFC02C28 /* Mailbox 1 Data Word 2 [47:32] Register */ +#define CAN_MB01_DATA3 0xFFC02C2C /* Mailbox 1 Data Word 3 [63:48] Register */ +#define CAN_MB01_LENGTH 0xFFC02C30 /* Mailbox 1 Data Length Code Register */ +#define CAN_MB01_TIMESTAMP 0xFFC02C34 /* Mailbox 1 Time Stamp Value Register */ +#define CAN_MB01_ID0 0xFFC02C38 /* Mailbox 1 Identifier Low Register */ +#define CAN_MB01_ID1 0xFFC02C3C /* Mailbox 1 Identifier High Register */ + +#define CAN_MB02_DATA0 0xFFC02C40 /* Mailbox 2 Data Word 0 [15:0] Register */ +#define CAN_MB02_DATA1 0xFFC02C44 /* Mailbox 2 Data Word 1 [31:16] Register */ +#define CAN_MB02_DATA2 0xFFC02C48 /* Mailbox 2 Data Word 2 [47:32] Register */ +#define CAN_MB02_DATA3 0xFFC02C4C /* Mailbox 2 Data Word 3 [63:48] Register */ +#define CAN_MB02_LENGTH 0xFFC02C50 /* Mailbox 2 Data Length Code Register */ +#define CAN_MB02_TIMESTAMP 0xFFC02C54 /* Mailbox 2 Time Stamp Value Register */ +#define CAN_MB02_ID0 0xFFC02C58 /* Mailbox 2 Identifier Low Register */ +#define CAN_MB02_ID1 0xFFC02C5C /* Mailbox 2 Identifier High Register */ + +#define CAN_MB03_DATA0 0xFFC02C60 /* Mailbox 3 Data Word 0 [15:0] Register */ +#define CAN_MB03_DATA1 0xFFC02C64 /* Mailbox 3 Data Word 1 [31:16] Register */ +#define CAN_MB03_DATA2 0xFFC02C68 /* Mailbox 3 Data Word 2 [47:32] Register */ +#define CAN_MB03_DATA3 0xFFC02C6C /* Mailbox 3 Data Word 3 [63:48] Register */ +#define CAN_MB03_LENGTH 0xFFC02C70 /* Mailbox 3 Data Length Code Register */ +#define CAN_MB03_TIMESTAMP 0xFFC02C74 /* Mailbox 3 Time Stamp Value Register */ +#define CAN_MB03_ID0 0xFFC02C78 /* Mailbox 3 Identifier Low Register */ +#define CAN_MB03_ID1 0xFFC02C7C /* Mailbox 3 Identifier High Register */ + +#define CAN_MB04_DATA0 0xFFC02C80 /* Mailbox 4 Data Word 0 [15:0] Register */ +#define CAN_MB04_DATA1 0xFFC02C84 /* Mailbox 4 Data Word 1 [31:16] Register */ +#define CAN_MB04_DATA2 0xFFC02C88 /* Mailbox 4 Data Word 2 [47:32] Register */ +#define CAN_MB04_DATA3 0xFFC02C8C /* Mailbox 4 Data Word 3 [63:48] Register */ +#define CAN_MB04_LENGTH 0xFFC02C90 /* Mailbox 4 Data Length Code Register */ +#define CAN_MB04_TIMESTAMP 0xFFC02C94 /* Mailbox 4 Time Stamp Value Register */ +#define CAN_MB04_ID0 0xFFC02C98 /* Mailbox 4 Identifier Low Register */ +#define CAN_MB04_ID1 0xFFC02C9C /* Mailbox 4 Identifier High Register */ + +#define CAN_MB05_DATA0 0xFFC02CA0 /* Mailbox 5 Data Word 0 [15:0] Register */ +#define CAN_MB05_DATA1 0xFFC02CA4 /* Mailbox 5 Data Word 1 [31:16] Register */ +#define CAN_MB05_DATA2 0xFFC02CA8 /* Mailbox 5 Data Word 2 [47:32] Register */ +#define CAN_MB05_DATA3 0xFFC02CAC /* Mailbox 5 Data Word 3 [63:48] Register */ +#define CAN_MB05_LENGTH 0xFFC02CB0 /* Mailbox 5 Data Length Code Register */ +#define CAN_MB05_TIMESTAMP 0xFFC02CB4 /* Mailbox 5 Time Stamp Value Register */ +#define CAN_MB05_ID0 0xFFC02CB8 /* Mailbox 5 Identifier Low Register */ +#define CAN_MB05_ID1 0xFFC02CBC /* Mailbox 5 Identifier High Register */ + +#define CAN_MB06_DATA0 0xFFC02CC0 /* Mailbox 6 Data Word 0 [15:0] Register */ +#define CAN_MB06_DATA1 0xFFC02CC4 /* Mailbox 6 Data Word 1 [31:16] Register */ +#define CAN_MB06_DATA2 0xFFC02CC8 /* Mailbox 6 Data Word 2 [47:32] Register */ +#define CAN_MB06_DATA3 0xFFC02CCC /* Mailbox 6 Data Word 3 [63:48] Register */ +#define CAN_MB06_LENGTH 0xFFC02CD0 /* Mailbox 6 Data Length Code Register */ +#define CAN_MB06_TIMESTAMP 0xFFC02CD4 /* Mailbox 6 Time Stamp Value Register */ +#define CAN_MB06_ID0 0xFFC02CD8 /* Mailbox 6 Identifier Low Register */ +#define CAN_MB06_ID1 0xFFC02CDC /* Mailbox 6 Identifier High Register */ + +#define CAN_MB07_DATA0 0xFFC02CE0 /* Mailbox 7 Data Word 0 [15:0] Register */ +#define CAN_MB07_DATA1 0xFFC02CE4 /* Mailbox 7 Data Word 1 [31:16] Register */ +#define CAN_MB07_DATA2 0xFFC02CE8 /* Mailbox 7 Data Word 2 [47:32] Register */ +#define CAN_MB07_DATA3 0xFFC02CEC /* Mailbox 7 Data Word 3 [63:48] Register */ +#define CAN_MB07_LENGTH 0xFFC02CF0 /* Mailbox 7 Data Length Code Register */ +#define CAN_MB07_TIMESTAMP 0xFFC02CF4 /* Mailbox 7 Time Stamp Value Register */ +#define CAN_MB07_ID0 0xFFC02CF8 /* Mailbox 7 Identifier Low Register */ +#define CAN_MB07_ID1 0xFFC02CFC /* Mailbox 7 Identifier High Register */ + +#define CAN_MB08_DATA0 0xFFC02D00 /* Mailbox 8 Data Word 0 [15:0] Register */ +#define CAN_MB08_DATA1 0xFFC02D04 /* Mailbox 8 Data Word 1 [31:16] Register */ +#define CAN_MB08_DATA2 0xFFC02D08 /* Mailbox 8 Data Word 2 [47:32] Register */ +#define CAN_MB08_DATA3 0xFFC02D0C /* Mailbox 8 Data Word 3 [63:48] Register */ +#define CAN_MB08_LENGTH 0xFFC02D10 /* Mailbox 8 Data Length Code Register */ +#define CAN_MB08_TIMESTAMP 0xFFC02D14 /* Mailbox 8 Time Stamp Value Register */ +#define CAN_MB08_ID0 0xFFC02D18 /* Mailbox 8 Identifier Low Register */ +#define CAN_MB08_ID1 0xFFC02D1C /* Mailbox 8 Identifier High Register */ + +#define CAN_MB09_DATA0 0xFFC02D20 /* Mailbox 9 Data Word 0 [15:0] Register */ +#define CAN_MB09_DATA1 0xFFC02D24 /* Mailbox 9 Data Word 1 [31:16] Register */ +#define CAN_MB09_DATA2 0xFFC02D28 /* Mailbox 9 Data Word 2 [47:32] Register */ +#define CAN_MB09_DATA3 0xFFC02D2C /* Mailbox 9 Data Word 3 [63:48] Register */ +#define CAN_MB09_LENGTH 0xFFC02D30 /* Mailbox 9 Data Length Code Register */ +#define CAN_MB09_TIMESTAMP 0xFFC02D34 /* Mailbox 9 Time Stamp Value Register */ +#define CAN_MB09_ID0 0xFFC02D38 /* Mailbox 9 Identifier Low Register */ +#define CAN_MB09_ID1 0xFFC02D3C /* Mailbox 9 Identifier High Register */ + +#define CAN_MB10_DATA0 0xFFC02D40 /* Mailbox 10 Data Word 0 [15:0] Register */ +#define CAN_MB10_DATA1 0xFFC02D44 /* Mailbox 10 Data Word 1 [31:16] Register */ +#define CAN_MB10_DATA2 0xFFC02D48 /* Mailbox 10 Data Word 2 [47:32] Register */ +#define CAN_MB10_DATA3 0xFFC02D4C /* Mailbox 10 Data Word 3 [63:48] Register */ +#define CAN_MB10_LENGTH 0xFFC02D50 /* Mailbox 10 Data Length Code Register */ +#define CAN_MB10_TIMESTAMP 0xFFC02D54 /* Mailbox 10 Time Stamp Value Register */ +#define CAN_MB10_ID0 0xFFC02D58 /* Mailbox 10 Identifier Low Register */ +#define CAN_MB10_ID1 0xFFC02D5C /* Mailbox 10 Identifier High Register */ + +#define CAN_MB11_DATA0 0xFFC02D60 /* Mailbox 11 Data Word 0 [15:0] Register */ +#define CAN_MB11_DATA1 0xFFC02D64 /* Mailbox 11 Data Word 1 [31:16] Register */ +#define CAN_MB11_DATA2 0xFFC02D68 /* Mailbox 11 Data Word 2 [47:32] Register */ +#define CAN_MB11_DATA3 0xFFC02D6C /* Mailbox 11 Data Word 3 [63:48] Register */ +#define CAN_MB11_LENGTH 0xFFC02D70 /* Mailbox 11 Data Length Code Register */ +#define CAN_MB11_TIMESTAMP 0xFFC02D74 /* Mailbox 11 Time Stamp Value Register */ +#define CAN_MB11_ID0 0xFFC02D78 /* Mailbox 11 Identifier Low Register */ +#define CAN_MB11_ID1 0xFFC02D7C /* Mailbox 11 Identifier High Register */ + +#define CAN_MB12_DATA0 0xFFC02D80 /* Mailbox 12 Data Word 0 [15:0] Register */ +#define CAN_MB12_DATA1 0xFFC02D84 /* Mailbox 12 Data Word 1 [31:16] Register */ +#define CAN_MB12_DATA2 0xFFC02D88 /* Mailbox 12 Data Word 2 [47:32] Register */ +#define CAN_MB12_DATA3 0xFFC02D8C /* Mailbox 12 Data Word 3 [63:48] Register */ +#define CAN_MB12_LENGTH 0xFFC02D90 /* Mailbox 12 Data Length Code Register */ +#define CAN_MB12_TIMESTAMP 0xFFC02D94 /* Mailbox 12 Time Stamp Value Register */ +#define CAN_MB12_ID0 0xFFC02D98 /* Mailbox 12 Identifier Low Register */ +#define CAN_MB12_ID1 0xFFC02D9C /* Mailbox 12 Identifier High Register */ + +#define CAN_MB13_DATA0 0xFFC02DA0 /* Mailbox 13 Data Word 0 [15:0] Register */ +#define CAN_MB13_DATA1 0xFFC02DA4 /* Mailbox 13 Data Word 1 [31:16] Register */ +#define CAN_MB13_DATA2 0xFFC02DA8 /* Mailbox 13 Data Word 2 [47:32] Register */ +#define CAN_MB13_DATA3 0xFFC02DAC /* Mailbox 13 Data Word 3 [63:48] Register */ +#define CAN_MB13_LENGTH 0xFFC02DB0 /* Mailbox 13 Data Length Code Register */ +#define CAN_MB13_TIMESTAMP 0xFFC02DB4 /* Mailbox 13 Time Stamp Value Register */ +#define CAN_MB13_ID0 0xFFC02DB8 /* Mailbox 13 Identifier Low Register */ +#define CAN_MB13_ID1 0xFFC02DBC /* Mailbox 13 Identifier High Register */ + +#define CAN_MB14_DATA0 0xFFC02DC0 /* Mailbox 14 Data Word 0 [15:0] Register */ +#define CAN_MB14_DATA1 0xFFC02DC4 /* Mailbox 14 Data Word 1 [31:16] Register */ +#define CAN_MB14_DATA2 0xFFC02DC8 /* Mailbox 14 Data Word 2 [47:32] Register */ +#define CAN_MB14_DATA3 0xFFC02DCC /* Mailbox 14 Data Word 3 [63:48] Register */ +#define CAN_MB14_LENGTH 0xFFC02DD0 /* Mailbox 14 Data Length Code Register */ +#define CAN_MB14_TIMESTAMP 0xFFC02DD4 /* Mailbox 14 Time Stamp Value Register */ +#define CAN_MB14_ID0 0xFFC02DD8 /* Mailbox 14 Identifier Low Register */ +#define CAN_MB14_ID1 0xFFC02DDC /* Mailbox 14 Identifier High Register */ + +#define CAN_MB15_DATA0 0xFFC02DE0 /* Mailbox 15 Data Word 0 [15:0] Register */ +#define CAN_MB15_DATA1 0xFFC02DE4 /* Mailbox 15 Data Word 1 [31:16] Register */ +#define CAN_MB15_DATA2 0xFFC02DE8 /* Mailbox 15 Data Word 2 [47:32] Register */ +#define CAN_MB15_DATA3 0xFFC02DEC /* Mailbox 15 Data Word 3 [63:48] Register */ +#define CAN_MB15_LENGTH 0xFFC02DF0 /* Mailbox 15 Data Length Code Register */ +#define CAN_MB15_TIMESTAMP 0xFFC02DF4 /* Mailbox 15 Time Stamp Value Register */ +#define CAN_MB15_ID0 0xFFC02DF8 /* Mailbox 15 Identifier Low Register */ +#define CAN_MB15_ID1 0xFFC02DFC /* Mailbox 15 Identifier High Register */ + +#define CAN_MB16_DATA0 0xFFC02E00 /* Mailbox 16 Data Word 0 [15:0] Register */ +#define CAN_MB16_DATA1 0xFFC02E04 /* Mailbox 16 Data Word 1 [31:16] Register */ +#define CAN_MB16_DATA2 0xFFC02E08 /* Mailbox 16 Data Word 2 [47:32] Register */ +#define CAN_MB16_DATA3 0xFFC02E0C /* Mailbox 16 Data Word 3 [63:48] Register */ +#define CAN_MB16_LENGTH 0xFFC02E10 /* Mailbox 16 Data Length Code Register */ +#define CAN_MB16_TIMESTAMP 0xFFC02E14 /* Mailbox 16 Time Stamp Value Register */ +#define CAN_MB16_ID0 0xFFC02E18 /* Mailbox 16 Identifier Low Register */ +#define CAN_MB16_ID1 0xFFC02E1C /* Mailbox 16 Identifier High Register */ + +#define CAN_MB17_DATA0 0xFFC02E20 /* Mailbox 17 Data Word 0 [15:0] Register */ +#define CAN_MB17_DATA1 0xFFC02E24 /* Mailbox 17 Data Word 1 [31:16] Register */ +#define CAN_MB17_DATA2 0xFFC02E28 /* Mailbox 17 Data Word 2 [47:32] Register */ +#define CAN_MB17_DATA3 0xFFC02E2C /* Mailbox 17 Data Word 3 [63:48] Register */ +#define CAN_MB17_LENGTH 0xFFC02E30 /* Mailbox 17 Data Length Code Register */ +#define CAN_MB17_TIMESTAMP 0xFFC02E34 /* Mailbox 17 Time Stamp Value Register */ +#define CAN_MB17_ID0 0xFFC02E38 /* Mailbox 17 Identifier Low Register */ +#define CAN_MB17_ID1 0xFFC02E3C /* Mailbox 17 Identifier High Register */ + +#define CAN_MB18_DATA0 0xFFC02E40 /* Mailbox 18 Data Word 0 [15:0] Register */ +#define CAN_MB18_DATA1 0xFFC02E44 /* Mailbox 18 Data Word 1 [31:16] Register */ +#define CAN_MB18_DATA2 0xFFC02E48 /* Mailbox 18 Data Word 2 [47:32] Register */ +#define CAN_MB18_DATA3 0xFFC02E4C /* Mailbox 18 Data Word 3 [63:48] Register */ +#define CAN_MB18_LENGTH 0xFFC02E50 /* Mailbox 18 Data Length Code Register */ +#define CAN_MB18_TIMESTAMP 0xFFC02E54 /* Mailbox 18 Time Stamp Value Register */ +#define CAN_MB18_ID0 0xFFC02E58 /* Mailbox 18 Identifier Low Register */ +#define CAN_MB18_ID1 0xFFC02E5C /* Mailbox 18 Identifier High Register */ + +#define CAN_MB19_DATA0 0xFFC02E60 /* Mailbox 19 Data Word 0 [15:0] Register */ +#define CAN_MB19_DATA1 0xFFC02E64 /* Mailbox 19 Data Word 1 [31:16] Register */ +#define CAN_MB19_DATA2 0xFFC02E68 /* Mailbox 19 Data Word 2 [47:32] Register */ +#define CAN_MB19_DATA3 0xFFC02E6C /* Mailbox 19 Data Word 3 [63:48] Register */ +#define CAN_MB19_LENGTH 0xFFC02E70 /* Mailbox 19 Data Length Code Register */ +#define CAN_MB19_TIMESTAMP 0xFFC02E74 /* Mailbox 19 Time Stamp Value Register */ +#define CAN_MB19_ID0 0xFFC02E78 /* Mailbox 19 Identifier Low Register */ +#define CAN_MB19_ID1 0xFFC02E7C /* Mailbox 19 Identifier High Register */ + +#define CAN_MB20_DATA0 0xFFC02E80 /* Mailbox 20 Data Word 0 [15:0] Register */ +#define CAN_MB20_DATA1 0xFFC02E84 /* Mailbox 20 Data Word 1 [31:16] Register */ +#define CAN_MB20_DATA2 0xFFC02E88 /* Mailbox 20 Data Word 2 [47:32] Register */ +#define CAN_MB20_DATA3 0xFFC02E8C /* Mailbox 20 Data Word 3 [63:48] Register */ +#define CAN_MB20_LENGTH 0xFFC02E90 /* Mailbox 20 Data Length Code Register */ +#define CAN_MB20_TIMESTAMP 0xFFC02E94 /* Mailbox 20 Time Stamp Value Register */ +#define CAN_MB20_ID0 0xFFC02E98 /* Mailbox 20 Identifier Low Register */ +#define CAN_MB20_ID1 0xFFC02E9C /* Mailbox 20 Identifier High Register */ + +#define CAN_MB21_DATA0 0xFFC02EA0 /* Mailbox 21 Data Word 0 [15:0] Register */ +#define CAN_MB21_DATA1 0xFFC02EA4 /* Mailbox 21 Data Word 1 [31:16] Register */ +#define CAN_MB21_DATA2 0xFFC02EA8 /* Mailbox 21 Data Word 2 [47:32] Register */ +#define CAN_MB21_DATA3 0xFFC02EAC /* Mailbox 21 Data Word 3 [63:48] Register */ +#define CAN_MB21_LENGTH 0xFFC02EB0 /* Mailbox 21 Data Length Code Register */ +#define CAN_MB21_TIMESTAMP 0xFFC02EB4 /* Mailbox 21 Time Stamp Value Register */ +#define CAN_MB21_ID0 0xFFC02EB8 /* Mailbox 21 Identifier Low Register */ +#define CAN_MB21_ID1 0xFFC02EBC /* Mailbox 21 Identifier High Register */ + +#define CAN_MB22_DATA0 0xFFC02EC0 /* Mailbox 22 Data Word 0 [15:0] Register */ +#define CAN_MB22_DATA1 0xFFC02EC4 /* Mailbox 22 Data Word 1 [31:16] Register */ +#define CAN_MB22_DATA2 0xFFC02EC8 /* Mailbox 22 Data Word 2 [47:32] Register */ +#define CAN_MB22_DATA3 0xFFC02ECC /* Mailbox 22 Data Word 3 [63:48] Register */ +#define CAN_MB22_LENGTH 0xFFC02ED0 /* Mailbox 22 Data Length Code Register */ +#define CAN_MB22_TIMESTAMP 0xFFC02ED4 /* Mailbox 22 Time Stamp Value Register */ +#define CAN_MB22_ID0 0xFFC02ED8 /* Mailbox 22 Identifier Low Register */ +#define CAN_MB22_ID1 0xFFC02EDC /* Mailbox 22 Identifier High Register */ + +#define CAN_MB23_DATA0 0xFFC02EE0 /* Mailbox 23 Data Word 0 [15:0] Register */ +#define CAN_MB23_DATA1 0xFFC02EE4 /* Mailbox 23 Data Word 1 [31:16] Register */ +#define CAN_MB23_DATA2 0xFFC02EE8 /* Mailbox 23 Data Word 2 [47:32] Register */ +#define CAN_MB23_DATA3 0xFFC02EEC /* Mailbox 23 Data Word 3 [63:48] Register */ +#define CAN_MB23_LENGTH 0xFFC02EF0 /* Mailbox 23 Data Length Code Register */ +#define CAN_MB23_TIMESTAMP 0xFFC02EF4 /* Mailbox 23 Time Stamp Value Register */ +#define CAN_MB23_ID0 0xFFC02EF8 /* Mailbox 23 Identifier Low Register */ +#define CAN_MB23_ID1 0xFFC02EFC /* Mailbox 23 Identifier High Register */ + +#define CAN_MB24_DATA0 0xFFC02F00 /* Mailbox 24 Data Word 0 [15:0] Register */ +#define CAN_MB24_DATA1 0xFFC02F04 /* Mailbox 24 Data Word 1 [31:16] Register */ +#define CAN_MB24_DATA2 0xFFC02F08 /* Mailbox 24 Data Word 2 [47:32] Register */ +#define CAN_MB24_DATA3 0xFFC02F0C /* Mailbox 24 Data Word 3 [63:48] Register */ +#define CAN_MB24_LENGTH 0xFFC02F10 /* Mailbox 24 Data Length Code Register */ +#define CAN_MB24_TIMESTAMP 0xFFC02F14 /* Mailbox 24 Time Stamp Value Register */ +#define CAN_MB24_ID0 0xFFC02F18 /* Mailbox 24 Identifier Low Register */ +#define CAN_MB24_ID1 0xFFC02F1C /* Mailbox 24 Identifier High Register */ + +#define CAN_MB25_DATA0 0xFFC02F20 /* Mailbox 25 Data Word 0 [15:0] Register */ +#define CAN_MB25_DATA1 0xFFC02F24 /* Mailbox 25 Data Word 1 [31:16] Register */ +#define CAN_MB25_DATA2 0xFFC02F28 /* Mailbox 25 Data Word 2 [47:32] Register */ +#define CAN_MB25_DATA3 0xFFC02F2C /* Mailbox 25 Data Word 3 [63:48] Register */ +#define CAN_MB25_LENGTH 0xFFC02F30 /* Mailbox 25 Data Length Code Register */ +#define CAN_MB25_TIMESTAMP 0xFFC02F34 /* Mailbox 25 Time Stamp Value Register */ +#define CAN_MB25_ID0 0xFFC02F38 /* Mailbox 25 Identifier Low Register */ +#define CAN_MB25_ID1 0xFFC02F3C /* Mailbox 25 Identifier High Register */ + +#define CAN_MB26_DATA0 0xFFC02F40 /* Mailbox 26 Data Word 0 [15:0] Register */ +#define CAN_MB26_DATA1 0xFFC02F44 /* Mailbox 26 Data Word 1 [31:16] Register */ +#define CAN_MB26_DATA2 0xFFC02F48 /* Mailbox 26 Data Word 2 [47:32] Register */ +#define CAN_MB26_DATA3 0xFFC02F4C /* Mailbox 26 Data Word 3 [63:48] Register */ +#define CAN_MB26_LENGTH 0xFFC02F50 /* Mailbox 26 Data Length Code Register */ +#define CAN_MB26_TIMESTAMP 0xFFC02F54 /* Mailbox 26 Time Stamp Value Register */ +#define CAN_MB26_ID0 0xFFC02F58 /* Mailbox 26 Identifier Low Register */ +#define CAN_MB26_ID1 0xFFC02F5C /* Mailbox 26 Identifier High Register */ + +#define CAN_MB27_DATA0 0xFFC02F60 /* Mailbox 27 Data Word 0 [15:0] Register */ +#define CAN_MB27_DATA1 0xFFC02F64 /* Mailbox 27 Data Word 1 [31:16] Register */ +#define CAN_MB27_DATA2 0xFFC02F68 /* Mailbox 27 Data Word 2 [47:32] Register */ +#define CAN_MB27_DATA3 0xFFC02F6C /* Mailbox 27 Data Word 3 [63:48] Register */ +#define CAN_MB27_LENGTH 0xFFC02F70 /* Mailbox 27 Data Length Code Register */ +#define CAN_MB27_TIMESTAMP 0xFFC02F74 /* Mailbox 27 Time Stamp Value Register */ +#define CAN_MB27_ID0 0xFFC02F78 /* Mailbox 27 Identifier Low Register */ +#define CAN_MB27_ID1 0xFFC02F7C /* Mailbox 27 Identifier High Register */ + +#define CAN_MB28_DATA0 0xFFC02F80 /* Mailbox 28 Data Word 0 [15:0] Register */ +#define CAN_MB28_DATA1 0xFFC02F84 /* Mailbox 28 Data Word 1 [31:16] Register */ +#define CAN_MB28_DATA2 0xFFC02F88 /* Mailbox 28 Data Word 2 [47:32] Register */ +#define CAN_MB28_DATA3 0xFFC02F8C /* Mailbox 28 Data Word 3 [63:48] Register */ +#define CAN_MB28_LENGTH 0xFFC02F90 /* Mailbox 28 Data Length Code Register */ +#define CAN_MB28_TIMESTAMP 0xFFC02F94 /* Mailbox 28 Time Stamp Value Register */ +#define CAN_MB28_ID0 0xFFC02F98 /* Mailbox 28 Identifier Low Register */ +#define CAN_MB28_ID1 0xFFC02F9C /* Mailbox 28 Identifier High Register */ + +#define CAN_MB29_DATA0 0xFFC02FA0 /* Mailbox 29 Data Word 0 [15:0] Register */ +#define CAN_MB29_DATA1 0xFFC02FA4 /* Mailbox 29 Data Word 1 [31:16] Register */ +#define CAN_MB29_DATA2 0xFFC02FA8 /* Mailbox 29 Data Word 2 [47:32] Register */ +#define CAN_MB29_DATA3 0xFFC02FAC /* Mailbox 29 Data Word 3 [63:48] Register */ +#define CAN_MB29_LENGTH 0xFFC02FB0 /* Mailbox 29 Data Length Code Register */ +#define CAN_MB29_TIMESTAMP 0xFFC02FB4 /* Mailbox 29 Time Stamp Value Register */ +#define CAN_MB29_ID0 0xFFC02FB8 /* Mailbox 29 Identifier Low Register */ +#define CAN_MB29_ID1 0xFFC02FBC /* Mailbox 29 Identifier High Register */ + +#define CAN_MB30_DATA0 0xFFC02FC0 /* Mailbox 30 Data Word 0 [15:0] Register */ +#define CAN_MB30_DATA1 0xFFC02FC4 /* Mailbox 30 Data Word 1 [31:16] Register */ +#define CAN_MB30_DATA2 0xFFC02FC8 /* Mailbox 30 Data Word 2 [47:32] Register */ +#define CAN_MB30_DATA3 0xFFC02FCC /* Mailbox 30 Data Word 3 [63:48] Register */ +#define CAN_MB30_LENGTH 0xFFC02FD0 /* Mailbox 30 Data Length Code Register */ +#define CAN_MB30_TIMESTAMP 0xFFC02FD4 /* Mailbox 30 Time Stamp Value Register */ +#define CAN_MB30_ID0 0xFFC02FD8 /* Mailbox 30 Identifier Low Register */ +#define CAN_MB30_ID1 0xFFC02FDC /* Mailbox 30 Identifier High Register */ + +#define CAN_MB31_DATA0 0xFFC02FE0 /* Mailbox 31 Data Word 0 [15:0] Register */ +#define CAN_MB31_DATA1 0xFFC02FE4 /* Mailbox 31 Data Word 1 [31:16] Register */ +#define CAN_MB31_DATA2 0xFFC02FE8 /* Mailbox 31 Data Word 2 [47:32] Register */ +#define CAN_MB31_DATA3 0xFFC02FEC /* Mailbox 31 Data Word 3 [63:48] Register */ +#define CAN_MB31_LENGTH 0xFFC02FF0 /* Mailbox 31 Data Length Code Register */ +#define CAN_MB31_TIMESTAMP 0xFFC02FF4 /* Mailbox 31 Time Stamp Value Register */ +#define CAN_MB31_ID0 0xFFC02FF8 /* Mailbox 31 Identifier Low Register */ +#define CAN_MB31_ID1 0xFFC02FFC /* Mailbox 31 Identifier High Register */ + +/* CAN Mailbox Area Macros */ +#define CAN_MB_ID1(x) (CAN_MB00_ID1+((x)*0x20)) +#define CAN_MB_ID0(x) (CAN_MB00_ID0+((x)*0x20)) +#define CAN_MB_TIMESTAMP(x) (CAN_MB00_TIMESTAMP+((x)*0x20)) +#define CAN_MB_LENGTH(x) (CAN_MB00_LENGTH+((x)*0x20)) +#define CAN_MB_DATA3(x) (CAN_MB00_DATA3+((x)*0x20)) +#define CAN_MB_DATA2(x) (CAN_MB00_DATA2+((x)*0x20)) +#define CAN_MB_DATA1(x) (CAN_MB00_DATA1+((x)*0x20)) +#define CAN_MB_DATA0(x) (CAN_MB00_DATA0+((x)*0x20)) + + +/*********************************************************************************** */ +/* System MMR Register Bits and Macros */ +/******************************************************************************* */ + +/* ********************* PLL AND RESET MASKS ************************ */ +/* PLL_CTL Masks */ +#define PLL_CLKIN 0x0000 /* Pass CLKIN to PLL */ +#define PLL_CLKIN_DIV2 0x0001 /* Pass CLKIN/2 to PLL */ +#define DF 0x0001 /* 0: PLL = CLKIN, 1: PLL = CLKIN/2 */ +#define PLL_OFF 0x0002 /* Shut off PLL clocks */ + +#define STOPCK 0x0008 /* Core Clock Off */ +#define PDWN 0x0020 /* Put the PLL in a Deep Sleep state */ +#define IN_DELAY 0x0014 /* EBIU Input Delay Select */ +#define OUT_DELAY 0x00C0 /* EBIU Output Delay Select */ +#define BYPASS 0x0100 /* Bypass the PLL */ +#define MSEL 0x7E00 /* Multiplier Select For CCLK/VCO Factors */ + +/* PLL_CTL Macros */ +#ifdef _MISRA_RULES +#define SET_MSEL(x) (((x)&0x3Fu) << 0x9) /* Set MSEL = 0-63 --> VCO = CLKIN*MSEL */ +#define SET_OUT_DELAY(x) (((x)&0x03u) << 0x6) +#define SET_IN_DELAY(x) ((((x)&0x02u) << 0x3) | (((x)&0x01u) << 0x2)) +#else +#define SET_MSEL(x) (((x)&0x3F) << 0x9) /* Set MSEL = 0-63 --> VCO = CLKIN*MSEL */ +#define SET_OUT_DELAY(x) (((x)&0x03) << 0x6) +#define SET_IN_DELAY(x) ((((x)&0x02) << 0x3) | (((x)&0x01) << 0x2)) +#endif /* _MISRA_RULES */ + +/* PLL_DIV Masks */ +#define SSEL 0x000F /* System Select */ +#define CSEL 0x0030 /* Core Select */ +#define CSEL_DIV1 0x0000 /* CCLK = VCO / 1 */ +#define CSEL_DIV2 0x0010 /* CCLK = VCO / 2 */ +#define CSEL_DIV4 0x0020 /* CCLK = VCO / 4 */ +#define CSEL_DIV8 0x0030 /* CCLK = VCO / 8 */ + +#define SCLK_DIV(x) (x) /* SCLK = VCO / x */ + +/* PLL_DIV Macros */ +#ifdef _MISRA_RULES +#define SET_SSEL(x) ((x)&0xFu) /* Set SSEL = 0-15 --> SCLK = VCO/SSEL */ +#else +#define SET_SSEL(x) ((x)&0xF) /* Set SSEL = 0-15 --> SCLK = VCO/SSEL */ +#endif /* _MISRA_RULES */ + +/* PLL_STAT Masks */ +#define ACTIVE_PLLENABLED 0x0001 /* Processor In Active Mode With PLL Enabled */ +#define FULL_ON 0x0002 /* Processor In Full On Mode */ +#define ACTIVE_PLLDISABLED 0x0004 /* Processor In Active Mode With PLL Disabled */ +#define PLL_LOCKED 0x0020 /* PLL_LOCKCNT Has Been Reached */ + +/* VR_CTL Masks */ +#define FREQ 0x0003 /* Switching Oscillator Frequency For Regulator */ +#define HIBERNATE 0x0000 /* Powerdown/Bypass On-Board Regulation */ +#define FREQ_333 0x0001 /* Switching Frequency Is 333 kHz */ +#define FREQ_667 0x0002 /* Switching Frequency Is 667 kHz */ +#define FREQ_1000 0x0003 /* Switching Frequency Is 1 MHz */ + +#define GAIN 0x000C /* Voltage Level Gain */ +#define GAIN_5 0x0000 /* GAIN = 5 */ +#define GAIN_10 0x0004 /* GAIN = 10 */ +#define GAIN_20 0x0008 /* GAIN = 20 */ +#define GAIN_50 0x000C /* GAIN = 50 */ + +#define VLEV 0x00F0 /* Internal Voltage Level - Only Program Values Within Specifications */ +#define VLEV_100 0x0090 /* VLEV = 1.00 V (See Datasheet for Regulator Tolerance) */ +#define VLEV_105 0x00A0 /* VLEV = 1.05 V (See Datasheet for Regulator Tolerance) */ +#define VLEV_110 0x00B0 /* VLEV = 1.10 V (See Datasheet for Regulator Tolerance) */ +#define VLEV_115 0x00C0 /* VLEV = 1.15 V (See Datasheet for Regulator Tolerance) */ +#define VLEV_120 0x00D0 /* VLEV = 1.20 V (See Datasheet for Regulator Tolerance) */ +#define VLEV_125 0x00E0 /* VLEV = 1.25 V (See Datasheet for Regulator Tolerance) */ +#define VLEV_130 0x00F0 /* VLEV = 1.30 V (See Datasheet for Regulator Tolerance) */ + +#define WAKE 0x0100 /* Enable RTC/Reset Wakeup From Hibernate */ +#define CANWE 0x0200 /* Enable CAN Wakeup From Hibernate */ +#define MXVRWE 0x0400 /* Enable MXVR Wakeup From Hibernate */ +#define SCKELOW 0x8000 /* Do Not Drive SCKE High During Reset After Hibernate */ + +/* SWRST Mask */ +#define SYSTEM_RESET 0x0007 /* Initiates A System Software Reset */ +#define DOUBLE_FAULT 0x0008 /* Core Double Fault Causes Reset */ +#define RESET_DOUBLE 0x2000 /* SW Reset Generated By Core Double-Fault */ +#define RESET_WDOG 0x4000 /* SW Reset Generated By Watchdog Timer */ +#define RESET_SOFTWARE 0x8000 /* SW Reset Occurred Since Last Read Of SWRST */ + +/* SYSCR Masks */ +#define BMODE 0x0006 /* Boot Mode - Latched During HW Reset From Mode Pins */ +#define NOBOOT 0x0010 /* Execute From L1 or ASYNC Bank 0 When BMODE = 0 */ + + +/* ************* SYSTEM INTERRUPT CONTROLLER MASKS ***************** */ + +/* Peripheral Masks For SIC0_ISR, SIC0_IWR, SIC0_IMASK */ +#define PLL_WAKEUP_IRQ 0x00000001 /* PLL Wakeup Interrupt Request */ +#define DMAC0_ERR_IRQ 0x00000002 /* DMA Controller 0 Error Interrupt Request */ +#define PPI_ERR_IRQ 0x00000004 /* PPI Error Interrupt Request */ +#define SPORT0_ERR_IRQ 0x00000008 /* SPORT0 Error Interrupt Request */ +#define SPORT1_ERR_IRQ 0x00000010 /* SPORT1 Error Interrupt Request */ +#define SPI0_ERR_IRQ 0x00000020 /* SPI0 Error Interrupt Request */ +#define UART0_ERR_IRQ 0x00000040 /* UART0 Error Interrupt Request */ +#define RTC_IRQ 0x00000080 /* Real-Time Clock Interrupt Request */ +#define DMA0_IRQ 0x00000100 /* DMA Channel 0 (PPI) Interrupt Request */ +#define DMA1_IRQ 0x00000200 /* DMA Channel 1 (SPORT0 RX) Interrupt Request */ +#define DMA2_IRQ 0x00000400 /* DMA Channel 2 (SPORT0 TX) Interrupt Request */ +#define DMA3_IRQ 0x00000800 /* DMA Channel 3 (SPORT1 RX) Interrupt Request */ +#define DMA4_IRQ 0x00001000 /* DMA Channel 4 (SPORT1 TX) Interrupt Request */ +#define DMA5_IRQ 0x00002000 /* DMA Channel 5 (SPI) Interrupt Request */ +#define DMA6_IRQ 0x00004000 /* DMA Channel 6 (UART RX) Interrupt Request */ +#define DMA7_IRQ 0x00008000 /* DMA Channel 7 (UART TX) Interrupt Request */ +#define TIMER0_IRQ 0x00010000 /* Timer 0 Interrupt Request */ +#define TIMER1_IRQ 0x00020000 /* Timer 1 Interrupt Request */ +#define TIMER2_IRQ 0x00040000 /* Timer 2 Interrupt Request */ +#define PFA_IRQ 0x00080000 /* Programmable Flag Interrupt Request A */ +#define PFB_IRQ 0x00100000 /* Programmable Flag Interrupt Request B */ +#define MDMA0_0_IRQ 0x00200000 /* MemDMA0 Stream 0 Interrupt Request */ +#define MDMA0_1_IRQ 0x00400000 /* MemDMA0 Stream 1 Interrupt Request */ +#define WDOG_IRQ 0x00800000 /* Software Watchdog Timer Interrupt Request */ +#define DMAC1_ERR_IRQ 0x01000000 /* DMA Controller 1 Error Interrupt Request */ +#define SPORT2_ERR_IRQ 0x02000000 /* SPORT2 Error Interrupt Request */ +#define SPORT3_ERR_IRQ 0x04000000 /* SPORT3 Error Interrupt Request */ +#define MXVR_SD_IRQ 0x08000000 /* MXVR Synchronous Data Interrupt Request */ +#define SPI1_ERR_IRQ 0x10000000 /* SPI1 Error Interrupt Request */ +#define SPI2_ERR_IRQ 0x20000000 /* SPI2 Error Interrupt Request */ +#define UART1_ERR_IRQ 0x40000000 /* UART1 Error Interrupt Request */ +#define UART2_ERR_IRQ 0x80000000 /* UART2 Error Interrupt Request */ + +/* the following are for backwards compatibility */ +#define DMA0_ERR_IRQ DMAC0_ERR_IRQ +#define DMA1_ERR_IRQ DMAC1_ERR_IRQ + + +/* Peripheral Masks For SIC_ISR1, SIC_IWR1, SIC_IMASK1 */ +#define CAN_ERR_IRQ 0x00000001 /* CAN Error Interrupt Request */ +#define DMA8_IRQ 0x00000002 /* DMA Channel 8 (SPORT2 RX) Interrupt Request */ +#define DMA9_IRQ 0x00000004 /* DMA Channel 9 (SPORT2 TX) Interrupt Request */ +#define DMA10_IRQ 0x00000008 /* DMA Channel 10 (SPORT3 RX) Interrupt Request */ +#define DMA11_IRQ 0x00000010 /* DMA Channel 11 (SPORT3 TX) Interrupt Request */ +#define DMA12_IRQ 0x00000020 /* DMA Channel 12 Interrupt Request */ +#define DMA13_IRQ 0x00000040 /* DMA Channel 13 Interrupt Request */ +#define DMA14_IRQ 0x00000080 /* DMA Channel 14 (SPI1) Interrupt Request */ +#define DMA15_IRQ 0x00000100 /* DMA Channel 15 (SPI2) Interrupt Request */ +#define DMA16_IRQ 0x00000200 /* DMA Channel 16 (UART1 RX) Interrupt Request */ +#define DMA17_IRQ 0x00000400 /* DMA Channel 17 (UART1 TX) Interrupt Request */ +#define DMA18_IRQ 0x00000800 /* DMA Channel 18 (UART2 RX) Interrupt Request */ +#define DMA19_IRQ 0x00001000 /* DMA Channel 19 (UART2 TX) Interrupt Request */ +#define TWI0_IRQ 0x00002000 /* TWI0 Interrupt Request */ +#define TWI1_IRQ 0x00004000 /* TWI1 Interrupt Request */ +#define CAN_RX_IRQ 0x00008000 /* CAN Receive Interrupt Request */ +#define CAN_TX_IRQ 0x00010000 /* CAN Transmit Interrupt Request */ +#define MDMA1_0_IRQ 0x00020000 /* MemDMA1 Stream 0 Interrupt Request */ +#define MDMA1_1_IRQ 0x00040000 /* MemDMA1 Stream 1 Interrupt Request */ +#define MXVR_STAT_IRQ 0x00080000 /* MXVR Status Interrupt Request */ +#define MXVR_CM_IRQ 0x00100000 /* MXVR Control Message Interrupt Request */ +#define MXVR_AP_IRQ 0x00200000 /* MXVR Asynchronous Packet Interrupt */ + +/* the following are for backwards compatibility */ +#define MDMA0_IRQ MDMA1_0_IRQ +#define MDMA1_IRQ MDMA1_1_IRQ + +#ifdef _MISRA_RULES +#define _MF15 0xFu +#define _MF7 7u +#else +#define _MF15 0xF +#define _MF7 7 +#endif /* _MISRA_RULES */ + +/* SIC_IMASKx Masks */ +#define SIC_UNMASK_ALL 0x00000000 /* Unmask all peripheral interrupts */ +#define SIC_MASK_ALL 0xFFFFFFFF /* Mask all peripheral interrupts */ +#ifdef _MISRA_RULES +#define SIC_MASK(x) (1 << ((x)&0x1Fu)) /* Mask Peripheral #x interrupt */ +#define SIC_UNMASK(x) (0xFFFFFFFFu ^ (1 << ((x)&0x1Fu))) /* Unmask Peripheral #x interrupt */ +#else +#define SIC_MASK(x) (1 << ((x)&0x1F)) /* Mask Peripheral #x interrupt */ +#define SIC_UNMASK(x) (0xFFFFFFFF ^ (1 << ((x)&0x1F))) /* Unmask Peripheral #x interrupt */ +#endif /* _MISRA_RULES */ + +/* SIC_IWRx Masks */ +#define IWR_DISABLE_ALL 0x00000000 /* Wakeup Disable all peripherals */ +#define IWR_ENABLE_ALL 0xFFFFFFFF /* Wakeup Enable all peripherals */ +#ifdef _MISRA_RULES +#define IWR_ENABLE(x) (1 << ((x)&0x1Fu)) /* Wakeup Enable Peripheral #x */ +#define IWR_DISABLE(x) (0xFFFFFFFFu ^ (1 << ((x)&0x1Fu))) /* Wakeup Disable Peripheral #x */ +#else +#define IWR_ENABLE(x) (1 << ((x)&0x1F)) /* Wakeup Enable Peripheral #x */ +#define IWR_DISABLE(x) (0xFFFFFFFF ^ (1 << ((x)&0x1F))) /* Wakeup Disable Peripheral #x */ +#endif /* _MISRA_RULES */ + + +/* ********* WATCHDOG TIMER MASKS ******************** */ +/* Watchdog Timer WDOG_CTL Register Masks */ +#ifdef _MISRA_RULES +#define WDEV(x) (((x)<<1) & 0x0006u) /* event generated on roll over */ +#else +#define WDEV(x) (((x)<<1) & 0x0006) /* event generated on roll over */ +#endif /* _MISRA_RULES */ +#define WDEV_RESET 0x0000 /* generate reset event on roll over */ +#define WDEV_NMI 0x0002 /* generate NMI event on roll over */ +#define WDEV_GPI 0x0004 /* generate GP IRQ on roll over */ +#define WDEV_NONE 0x0006 /* no event on roll over */ +#define WDEN 0x0FF0 /* enable watchdog */ +#define WDDIS 0x0AD0 /* disable watchdog */ +#define WDRO 0x8000 /* watchdog rolled over latch */ + +/* deprecated WDOG_CTL Register Masks for legacy code */ +#define ICTL WDEV +#define ENABLE_RESET WDEV_RESET +#define WDOG_RESET WDEV_RESET +#define ENABLE_NMI WDEV_NMI +#define WDOG_NMI WDEV_NMI +#define ENABLE_GPI WDEV_GPI +#define WDOG_GPI WDEV_GPI +#define DISABLE_EVT WDEV_NONE +#define WDOG_NONE WDEV_NONE + +#define TMR_EN WDEN +#define WDOG_DISABLE WDDIS +#define TRO WDRO + +#define ICTL_P0 0x01 +#define ICTL_P1 0x02 +#define TRO_P 0x0F + + +/* *************** REAL TIME CLOCK MASKS **************************/ +/* RTC_STAT and RTC_ALARM register */ +#define RTSEC 0x0000003F /* Real-Time Clock Seconds */ +#define RTMIN 0x00000FC0 /* Real-Time Clock Minutes */ +#define RTHR 0x0001F000 /* Real-Time Clock Hours */ +#define RTDAY 0xFFFE0000 /* Real-Time Clock Days */ + +/* RTC_ICTL register */ +#define SWIE 0x0001 /* Stopwatch Interrupt Enable */ +#define AIE 0x0002 /* Alarm Interrupt Enable */ +#define SIE 0x0004 /* Seconds (1 Hz) Interrupt Enable */ +#define MIE 0x0008 /* Minutes Interrupt Enable */ +#define HIE 0x0010 /* Hours Interrupt Enable */ +#define DIE 0x0020 /* 24 Hours (Days) Interrupt Enable */ +#define DAIE 0x0040 /* Day Alarm (Day, Hour, Minute, Second) Interrupt Enable */ +#define WCIE 0x8000 /* Write Complete Interrupt Enable */ + +/* RTC_ISTAT register */ +#define SWEF 0x0001 /* Stopwatch Event Flag */ +#define AEF 0x0002 /* Alarm Event Flag */ +#define SEF 0x0004 /* Seconds (1 Hz) Event Flag */ +#define MEF 0x0008 /* Minutes Event Flag */ +#define HEF 0x0010 /* Hours Event Flag */ +#define DEF 0x0020 /* 24 Hours (Days) Event Flag */ +#define DAEF 0x0040 /* Day Alarm (Day, Hour, Minute, Second) Event Flag */ +#define WPS 0x4000 /* Write Pending Status (RO) */ +#define WCOM 0x8000 /* Write Complete */ + +/* RTC_FAST Mask (RTC_PREN Mask) */ +#define ENABLE_PRESCALE 0x00000001 /* Enable prescaler so RTC runs at 1 Hz */ +#define PREN 0x00000001 + /* ** Must be set after power-up for proper operation of RTC */ + +/* Deprecated RTC_STAT and RTC_ALARM Masks */ +#define RTC_SEC RTSEC /* Real-Time Clock Seconds */ +#define RTC_MIN RTMIN /* Real-Time Clock Minutes */ +#define RTC_HR RTHR /* Real-Time Clock Hours */ +#define RTC_DAY RTDAY /* Real-Time Clock Days */ + +/* Deprecated RTC_ICTL/RTC_ISTAT Masks */ +#define STOPWATCH SWIE /* Stopwatch Interrupt Enable */ +#define ALARM AIE /* Alarm Interrupt Enable */ +#define SECOND SIE /* Seconds (1 Hz) Interrupt Enable */ +#define MINUTE MIE /* Minutes Interrupt Enable */ +#define HOUR HIE /* Hours Interrupt Enable */ +#define DAY DIE /* 24 Hours (Days) Interrupt Enable */ +#define DAY_ALARM DAIE /* Day Alarm (Day, Hour, Minute, Second) Interrupt Enable */ +#define WRITE_COMPLETE WCIE /* Write Complete Interrupt Enable */ + + +/* ***************************** UART CONTROLLER MASKS ********************** */ +/* UARTx_LCR Register */ +#ifdef _MISRA_RULES +#define WLS(x) (((x)-5u) & 0x03u) /* Word Length Select */ +#else +#define WLS(x) (((x)-5) & 0x03) /* Word Length Select */ +#endif /* _MISRA_RULES */ +#define STB 0x04 /* Stop Bits */ +#define PEN 0x08 /* Parity Enable */ +#define EPS 0x10 /* Even Parity Select */ +#define STP 0x20 /* Stick Parity */ +#define SB 0x40 /* Set Break */ +#define DLAB 0x80 /* Divisor Latch Access */ + +#define DLAB_P 0x07 +#define SB_P 0x06 +#define STP_P 0x05 +#define EPS_P 0x04 +#define PEN_P 0x03 +#define STB_P 0x02 +#define WLS_P1 0x01 +#define WLS_P0 0x00 + +/* UARTx_MCR Register */ +#define LOOP_ENA 0x10 /* Loopback Mode Enable */ +#define LOOP_ENA_P 0x04 +/* Deprecated UARTx_MCR Mask */ + +/* UARTx_LSR Register */ +#define DR 0x01 /* Data Ready */ +#define OE 0x02 /* Overrun Error */ +#define PE 0x04 /* Parity Error */ +#define FE 0x08 /* Framing Error */ +#define BI 0x10 /* Break Interrupt */ +#define THRE 0x20 /* THR Empty */ +#define TEMT 0x40 /* TSR and UART_THR Empty */ + +#define TEMP_P 0x06 +#define THRE_P 0x05 +#define BI_P 0x04 +#define FE_P 0x03 +#define PE_P 0x02 +#define OE_P 0x01 +#define DR_P 0x00 + +/* UARTx_IER Register */ +#define ERBFI 0x01 /* Enable Receive Buffer Full Interrupt */ +#define ETBEI 0x02 /* Enable Transmit Buffer Empty Interrupt */ +#define ELSI 0x04 /* Enable RX Status Interrupt */ + +#define ELSI_P 0x02 +#define ETBEI_P 0x01 +#define ERBFI_P 0x00 + +/* UARTx_IIR Register */ +#define NINT 0x01 +#define STATUS_P1 0x02 +#define STATUS_P0 0x01 +#define NINT_P 0x00 + +/* UARTx_GCTL Register */ +#define UCEN 0x01 /* Enable UARTx Clocks */ +#define IREN 0x02 /* Enable IrDA Mode */ +#define TPOLC 0x04 /* IrDA TX Polarity Change */ +#define RPOLC 0x08 /* IrDA RX Polarity Change */ +#define FPE 0x10 /* Force Parity Error On Transmit */ +#define FFE 0x20 /* Force Framing Error On Transmit */ + +#define FFE_P 0x05 +#define FPE_P 0x04 +#define RPOLC_P 0x03 +#define TPOLC_P 0x02 +#define IREN_P 0x01 +#define UCEN_P 0x00 + + +/* ********** SERIAL PORT MASKS ********************** */ +/* SPORTx_TCR1 Masks */ +#define TSPEN 0x0001 /* TX enable */ +#define ITCLK 0x0002 /* Internal TX Clock Select */ +#define TDTYPE 0x000C /* TX Data Formatting Select */ +#define DTYPE_NORM 0x0000 /* Data Format Normal */ +#define DTYPE_ULAW 0x0008 /* Compand Using u-Law */ +#define DTYPE_ALAW 0x000C /* Compand Using A-Law */ +#define TLSBIT 0x0010 /* TX Bit Order */ +#define ITFS 0x0200 /* Internal TX Frame Sync Select */ +#define TFSR 0x0400 /* TX Frame Sync Required Select */ +#define DITFS 0x0800 /* Data Independent TX Frame Sync Select */ +#define LTFS 0x1000 /* Low TX Frame Sync Select */ +#define LATFS 0x2000 /* Late TX Frame Sync Select */ +#define TCKFE 0x4000 /* TX Clock Falling Edge Select */ +/* SPORTx_RCR1 Deprecated Masks */ +#define TULAW DTYPE_ULAW /* Compand Using u-Law */ +#define TALAW DTYPE_ALAW /* Compand Using A-Law */ + +/* SPORTx_TCR2 Masks */ +#ifdef _MISRA_RULES +#define SLEN(x) ((x)&0x1Fu) /* SPORT TX Word Length (2 - 31) */ +#else +#define SLEN(x) ((x)&0x1F) /* SPORT TX Word Length (2 - 31) */ +#endif /* _MISRA_RULES */ +#define TXSE 0x0100 /*TX Secondary Enable */ +#define TSFSE 0x0200 /*TX Stereo Frame Sync Enable */ +#define TRFST 0x0400 /*TX Right-First Data Order */ + +/* SPORTx_RCR1 Masks */ +#define RSPEN 0x0001 /* RX enable */ +#define IRCLK 0x0002 /* Internal RX Clock Select */ +#define RDTYPE 0x000C /* RX Data Formatting Select */ +#define DTYPE_NORM 0x0000 /* no companding */ +#define DTYPE_ULAW 0x0008 /* Compand Using u-Law */ +#define DTYPE_ALAW 0x000C /* Compand Using A-Law */ +#define RLSBIT 0x0010 /* RX Bit Order */ +#define IRFS 0x0200 /* Internal RX Frame Sync Select */ +#define RFSR 0x0400 /* RX Frame Sync Required Select */ +#define LRFS 0x1000 /* Low RX Frame Sync Select */ +#define LARFS 0x2000 /* Late RX Frame Sync Select */ +#define RCKFE 0x4000 /* RX Clock Falling Edge Select */ +/* SPORTx_RCR1 Deprecated Masks */ +#define RULAW DTYPE_ULAW /* Compand Using u-Law */ +#define RALAW DTYPE_ALAW /* Compand Using A-Law */ + +/* SPORTx_RCR2 Masks */ +#ifdef _MISRA_RULES +#define SLEN(x) ((x)&0x1Fu) /* SPORT RX Word Length (2 - 31) */ +#else +#define SLEN(x) ((x)&0x1F) /* SPORT RX Word Length (2 - 31) */ +#endif /* _MISRA_RULES */ +#define RXSE 0x0100 /*RX Secondary Enable */ +#define RSFSE 0x0200 /*RX Stereo Frame Sync Enable */ +#define RRFST 0x0400 /*Right-First Data Order */ + +/*SPORTx_STAT Masks */ +#define RXNE 0x0001 /*RX FIFO Not Empty Status */ +#define RUVF 0x0002 /*RX Underflow Status */ +#define ROVF 0x0004 /*RX Overflow Status */ +#define TXF 0x0008 /*TX FIFO Full Status */ +#define TUVF 0x0010 /*TX Underflow Status */ +#define TOVF 0x0020 /*TX Overflow Status */ +#define TXHRE 0x0040 /*TX Hold Register Empty */ + +/*SPORTx_MCMC1 Masks */ +#define WOFF 0x000003FF /*Multichannel Window Offset Field */ +/* SPORTx_MCMC1 Macros */ +#ifdef _MISRA_RULES +#define SET_WOFF(x) ((x) & 0x3FFu) /* Multichannel Window Offset Field */ +/* Only use SET_WSIZE Macro With Logic OR While Setting Lower Order Bits */ +#define SET_WSIZE(x) (((((x)>>0x3)-1u)&0xFu) << 0xC) /* Multichannel Window Size = (x/8)-1 */ +#else +#define SET_WOFF(x) ((x) & 0x3FF) /* Multichannel Window Offset Field */ +/* Only use SET_WSIZE Macro With Logic OR While Setting Lower Order Bits */ +#define SET_WSIZE(x) (((((x)>>0x3)-1)&0xF) << 0xC) /* Multichannel Window Size = (x/8)-1 */ +#endif /* _MISRA_RULES */ + + +/*SPORTx_MCMC2 Masks */ +#define MCCRM 0x0003 /*Multichannel Clock Recovery Mode */ +#define REC_BYPASS 0x0000 /* Bypass Mode (No Clock Recovery) */ +#define REC_2FROM4 0x0002 /* Recover 2 MHz Clock from 4 MHz Clock */ +#define REC_8FROM16 0x0003 /* Recover 8 MHz Clock from 16 MHz Clock */ +#define MCDTXPE 0x0004 /*Multichannel DMA Transmit Packing */ +#define MCDRXPE 0x0008 /*Multichannel DMA Receive Packing */ +#define MCMEN 0x0010 /*Multichannel Frame Mode Enable */ +#define FSDR 0x0080 /*Multichannel Frame Sync to Data Relationship */ +#define MFD 0xF000 /*Multichannel Frame Delay */ +#define MFD_0 0x0000 /* Multichannel Frame Delay = 0 */ +#define MFD_1 0x1000 /* Multichannel Frame Delay = 1 */ +#define MFD_2 0x2000 /* Multichannel Frame Delay = 2 */ +#define MFD_3 0x3000 /* Multichannel Frame Delay = 3 */ +#define MFD_4 0x4000 /* Multichannel Frame Delay = 4 */ +#define MFD_5 0x5000 /* Multichannel Frame Delay = 5 */ +#define MFD_6 0x6000 /* Multichannel Frame Delay = 6 */ +#define MFD_7 0x7000 /* Multichannel Frame Delay = 7 */ +#define MFD_8 0x8000 /* Multichannel Frame Delay = 8 */ +#define MFD_9 0x9000 /* Multichannel Frame Delay = 9 */ +#define MFD_10 0xA000 /* Multichannel Frame Delay = 10 */ +#define MFD_11 0xB000 /* Multichannel Frame Delay = 11 */ +#define MFD_12 0xC000 /* Multichannel Frame Delay = 12 */ +#define MFD_13 0xD000 /* Multichannel Frame Delay = 13 */ +#define MFD_14 0xE000 /* Multichannel Frame Delay = 14 */ +#define MFD_15 0xF000 /* Multichannel Frame Delay = 15 */ + + +/* ********* PARALLEL PERIPHERAL INTERFACE (PPI) MASKS **************** */ +/* PPI_CONTROL Masks */ +#define PORT_EN 0x0001 /* PPI Port Enable */ +#define PORT_DIR 0x0002 /* PPI Port Direction */ +#define XFR_TYPE 0x000C /* PPI Transfer Type */ +#define PORT_CFG 0x0030 /* PPI Port Configuration */ +#define FLD_SEL 0x0040 /* PPI Active Field Select */ +#define PACK_EN 0x0080 /* PPI Packing Mode */ +/* previous versions of defBF539.h erroneously included DMA32 (PPI 32-bit DMA Enable) */ +#define SKIP_EN 0x0200 /* PPI Skip Element Enable */ +#define SKIP_EO 0x0400 /* PPI Skip Even/Odd Elements */ +#define DLENGTH 0x3800 /* PPI Data Length */ +#define DLEN_8 0x0 /* PPI Data Length mask for DLEN=8 */ +#define DLEN_10 0x0800 /* Data Length = 10 Bits */ +#define DLEN_11 0x1000 /* Data Length = 11 Bits */ +#define DLEN_12 0x1800 /* Data Length = 12 Bits */ +#define DLEN_13 0x2000 /* Data Length = 13 Bits */ +#define DLEN_14 0x2800 /* Data Length = 14 Bits */ +#define DLEN_15 0x3000 /* Data Length = 15 Bits */ +#define DLEN_16 0x3800 /* Data Length = 16 Bits */ +#ifdef _MISRA_RULES +#define DLEN(x) ((((x)-9u) & 0x07u) << 11) /* PPI Data Length (only works for x=10-->x=16) */ +#else +#define DLEN(x) ((((x)-9) & 0x07) << 11) /* PPI Data Length (only works for x=10-->x=16) */ +#endif /* _MISRA_RULES */ +#define POL 0xC000 /* PPI Signal Polarities */ +#define POLC 0x4000 /* PPI Clock Polarity */ +#define POLS 0x8000 /* PPI Frame Sync Polarity */ + + +/* PPI_STATUS Masks */ +#define FLD 0x0400 /* Field Indicator */ +#define FT_ERR 0x0800 /* Frame Track Error */ +#define OVR 0x1000 /* FIFO Overflow Error */ +#define UNDR 0x2000 /* FIFO Underrun Error */ +#define ERR_DET 0x4000 /* Error Detected Indicator */ +#define ERR_NCOR 0x8000 /* Error Not Corrected Indicator */ + + +/* ********** DMA CONTROLLER MASKS ***********************/ +/* DMAx_CONFIG, MDMA_yy_CONFIG Masks */ +#define DMAEN 0x0001 /* Channel Enable */ +#define WNR 0x0002 /* Channel Direction (W/R*) */ +#define WDSIZE_8 0x0000 /* Word Size 8 bits */ +#define WDSIZE_16 0x0004 /* Word Size 16 bits */ +#define WDSIZE_32 0x0008 /* Word Size 32 bits */ +#define DMA2D 0x0010 /* 2D/1D* Mode */ +#define RESTART 0x0020 /* Restart */ +#define DI_SEL 0x0040 /* Data Interrupt Select */ +#define DI_EN 0x0080 /* Data Interrupt Enable */ +#define NDSIZE 0x0900 /* Next Descriptor Size */ +#define NDSIZE_0 0x0000 /* Next Descriptor Size = 0 (Stop/Autobuffer) */ +#define NDSIZE_1 0x0100 /* Next Descriptor Size = 1 */ +#define NDSIZE_2 0x0200 /* Next Descriptor Size = 2 */ +#define NDSIZE_3 0x0300 /* Next Descriptor Size = 3 */ +#define NDSIZE_4 0x0400 /* Next Descriptor Size = 4 */ +#define NDSIZE_5 0x0500 /* Next Descriptor Size = 5 */ +#define NDSIZE_6 0x0600 /* Next Descriptor Size = 6 */ +#define NDSIZE_7 0x0700 /* Next Descriptor Size = 7 */ +#define NDSIZE_8 0x0800 /* Next Descriptor Size = 8 */ +#define NDSIZE_9 0x0900 /* Next Descriptor Size = 9 */ + +#define DMAFLOW 0x7000 /* Flow Control */ +#define DMAFLOW_STOP 0x0000 /* Stop Mode */ +#define DMAFLOW_AUTO 0x1000 /* Autobuffer Mode */ +#define DMAFLOW_ARRAY 0x4000 /* Descriptor Array Mode */ +#define DMAFLOW_SMALL 0x6000 /* Small Model Descriptor List Mode */ +#define DMAFLOW_LARGE 0x7000 /* Large Model Descriptor List Mode */ + +#define DMAEN_P 0x0 /* Channel Enable */ +#define WNR_P 0x1 /* Channel Direction (W/R*) */ +#define DMA2D_P 0x4 /* 2D/1D* Mode */ +#define RESTART_P 0x5 /* Restart */ +#define DI_SEL_P 0x6 /* Data Interrupt Select */ +#define DI_EN_P 0x7 /* Data Interrupt Enable */ + +/* DMAx_IRQ_STATUS, MDMA_yy_IRQ_STATUS Masks */ +#define DMA_DONE 0x0001 /* DMA Done Indicator */ +#define DMA_ERR 0x0002 /* DMA Error Indicator */ +#define DFETCH 0x0004 /* Descriptor Fetch Indicator */ +#define DMA_RUN 0x0008 /* DMA Running Indicator */ + +#define DMA_DONE_P 0x0 /* DMA Done Indicator */ +#define DMA_ERR_P 0x1 /* DMA Error Indicator */ +#define DFETCH_P 0x2 /* Descriptor Fetch Indicator */ +#define DMA_RUN_P 0x3 /* DMA Running Indicator */ + +/* DMAx_PERIPHERAL_MAP, MDMA_yy_PERIPHERAL_MAP Masks */ + +#define CTYPE 0x0040 /* DMA Channel Type Indicator */ +#define CTYPE_P 0x6 /* DMA Channel Type Indicator BIT POSITION */ +#define PCAP8 0x0080 /* DMA 8-bit Operation Indicator */ +#define PCAP16 0x0100 /* DMA 16-bit Operation Indicator */ +#define PCAP32 0x0200 /* DMA 32-bit Operation Indicator */ +#define PCAPWR 0x0400 /* DMA Write Operation Indicator */ +#define PCAPRD 0x0800 /* DMA Read Operation Indicator */ +#define PMAP 0xF000 /* DMA Peripheral Map Field */ + +/* PMAP Encodings For DMA Controller 0 */ +#define PMAP_PPI 0x0000 /* PMAP PPI Port DMA */ +#define PMAP_SPORT0RX 0x1000 /* PMAP SPORT0 Receive DMA */ +#define PMAP_SPORT0TX 0x2000 /* PMAP SPORT0 Transmit DMA */ +#define PMAP_SPORT1RX 0x3000 /* PMAP SPORT1 Receive DMA */ +#define PMAP_SPORT1TX 0x4000 /* PMAP SPORT1 Transmit DMA */ +#define PMAP_SPI0 0x5000 /* PMAP SPI DMA */ +#define PMAP_UART0RX 0x6000 /* PMAP UART Receive DMA */ +#define PMAP_UART0TX 0x7000 /* PMAP UART Transmit DMA */ + +/* PMAP Encodings For DMA Controller 1 */ +#define PMAP_SPORT2RX 0x0000 /* PMAP SPORT2 Receive DMA */ +#define PMAP_SPORT2TX 0x1000 /* PMAP SPORT2 Transmit DMA */ +#define PMAP_SPORT3RX 0x2000 /* PMAP SPORT3 Receive DMA */ +#define PMAP_SPORT3TX 0x3000 /* PMAP SPORT3 Transmit DMA */ +#define PMAP_SPI1 0x6000 /* PMAP SPI1 DMA */ +#define PMAP_SPI2 0x7000 /* PMAP SPI2 DMA */ +#define PMAP_UART1RX 0x8000 /* PMAP UART1 Receive DMA */ +#define PMAP_UART1TX 0x9000 /* PMAP UART1 Transmit DMA */ +#define PMAP_UART2RX 0xA000 /* PMAP UART2 Receive DMA */ +#define PMAP_UART2TX 0xB000 /* PMAP UART2 Transmit DMA */ + + +/* ************* GENERAL PURPOSE TIMER MASKS ******************** */ +/* PWM Timer bit definitions */ +/* TIMER_ENABLE Register */ +#define TIMEN0 0x0001 /* Enable Timer 0 */ +#define TIMEN1 0x0002 /* Enable Timer 1 */ +#define TIMEN2 0x0004 /* Enable Timer 2 */ + +#define TIMEN0_P 0x00 +#define TIMEN1_P 0x01 +#define TIMEN2_P 0x02 + +/* TIMER_DISABLE Register */ +#define TIMDIS0 0x0001 /* Disable Timer 0 */ +#define TIMDIS1 0x0002 /* Disable Timer 1 */ +#define TIMDIS2 0x0004 /* Disable Timer 2 */ + +#define TIMDIS0_P 0x00 +#define TIMDIS1_P 0x01 +#define TIMDIS2_P 0x02 + +/* TIMER_STATUS Register */ +#define TIMIL0 0x0001 /* Timer 0 Interrupt */ +#define TIMIL1 0x0002 /* Timer 1 Interrupt */ +#define TIMIL2 0x0004 /* Timer 2 Interrupt */ +#define TOVF_ERR0 0x0010 /* Timer 0 Counter Overflow */ +#define TOVF_ERR1 0x0020 /* Timer 1 Counter Overflow */ +#define TOVF_ERR2 0x0040 /* Timer 2 Counter Overflow */ +#define TRUN0 0x1000 /* Timer 0 Slave Enable Status */ +#define TRUN1 0x2000 /* Timer 1 Slave Enable Status */ +#define TRUN2 0x4000 /* Timer 2 Slave Enable Status */ + +#define TIMIL0_P 0x00 +#define TIMIL1_P 0x01 +#define TIMIL2_P 0x02 +#define TOVF_ERR0_P 0x04 +#define TOVF_ERR1_P 0x05 +#define TOVF_ERR2_P 0x06 +#define TRUN0_P 0x0C +#define TRUN1_P 0x0D +#define TRUN2_P 0x0E + +/* Alternate Deprecated Macros Provided For Backwards Code Compatibility */ +#define TOVL_ERR0 TOVF_ERR0 +#define TOVL_ERR1 TOVF_ERR1 +#define TOVL_ERR2 TOVF_ERR2 +#define TOVL_ERR0_P TOVF_ERR0_P +#define TOVL_ERR1_P TOVF_ERR1_P +#define TOVL_ERR2_P TOVF_ERR2_P + +/* TIMERx_CONFIG Registers */ +#define PWM_OUT 0x0001 +#define WDTH_CAP 0x0002 +#define EXT_CLK 0x0003 +#define PULSE_HI 0x0004 +#define PERIOD_CNT 0x0008 +#define IRQ_ENA 0x0010 +#define TIN_SEL 0x0020 +#define OUT_DIS 0x0040 +#define CLK_SEL 0x0080 +#define TOGGLE_HI 0x0100 +#define EMU_RUN 0x0200 +#ifdef _MISRA_RULES +#define ERR_TYP(x) (((x) & 0x03u) << 14) +#else +#define ERR_TYP(x) (((x) & 0x03) << 14) +#endif /* _MISRA_RULES */ + +#define TMODE_P0 0x00 +#define TMODE_P1 0x01 +#define PULSE_HI_P 0x02 +#define PERIOD_CNT_P 0x03 +#define IRQ_ENA_P 0x04 +#define TIN_SEL_P 0x05 +#define OUT_DIS_P 0x06 +#define CLK_SEL_P 0x07 +#define TOGGLE_HI_P 0x08 +#define EMU_RUN_P 0x09 +#define ERR_TYP_P0 0x0E +#define ERR_TYP_P1 0x0F + + +/*/ ****************** GENERAL-PURPOSE I/O ********************* */ +/* Flag I/O (FIO_) Masks */ +#define PF0 0x0001 +#define PF1 0x0002 +#define PF2 0x0004 +#define PF3 0x0008 +#define PF4 0x0010 +#define PF5 0x0020 +#define PF6 0x0040 +#define PF7 0x0080 +#define PF8 0x0100 +#define PF9 0x0200 +#define PF10 0x0400 +#define PF11 0x0800 +#define PF12 0x1000 +#define PF13 0x2000 +#define PF14 0x4000 +#define PF15 0x8000 + +/* PORT F BIT POSITIONS */ +#define PF0_P 0x0 +#define PF1_P 0x1 +#define PF2_P 0x2 +#define PF3_P 0x3 +#define PF4_P 0x4 +#define PF5_P 0x5 +#define PF6_P 0x6 +#define PF7_P 0x7 +#define PF8_P 0x8 +#define PF9_P 0x9 +#define PF10_P 0xA +#define PF11_P 0xB +#define PF12_P 0xC +#define PF13_P 0xD +#define PF14_P 0xE +#define PF15_P 0xF + + +/******************* GPIO MASKS *********************/ +/* Port C Masks */ +#define PC0 0x0001 +#define PC1 0x0002 +#define PC4 0x0010 +#define PC5 0x0020 +#define PC6 0x0040 +#define PC7 0x0080 +#define PC8 0x0100 +#define PC9 0x0200 +/* Port C Bit Positions */ +#define PC0_P 0x0 +#define PC1_P 0x1 +#define PC4_P 0x4 +#define PC5_P 0x5 +#define PC6_P 0x6 +#define PC7_P 0x7 +#define PC8_P 0x8 +#define PC9_P 0x9 + +/* Port D */ +#define PD0 0x0001 +#define PD1 0x0002 +#define PD2 0x0004 +#define PD3 0x0008 +#define PD4 0x0010 +#define PD5 0x0020 +#define PD6 0x0040 +#define PD7 0x0080 +#define PD8 0x0100 +#define PD9 0x0200 +#define PD10 0x0400 +#define PD11 0x0800 +#define PD12 0x1000 +#define PD13 0x2000 +#define PD14 0x4000 +#define PD15 0x8000 +/* Port D Bit Positions */ +#define PD0_P 0x0 +#define PD1_P 0x1 +#define PD2_P 0x2 +#define PD3_P 0x3 +#define PD4_P 0x4 +#define PD5_P 0x5 +#define PD6_P 0x6 +#define PD7_P 0x7 +#define PD8_P 0x8 +#define PD9_P 0x9 +#define PD10_P 0xA +#define PD11_P 0xB +#define PD12_P 0xC +#define PD13_P 0xD +#define PD14_P 0xE +#define PD15_P 0xF + +/* Port E */ +#define PE0 0x0001 +#define PE1 0x0002 +#define PE2 0x0004 +#define PE3 0x0008 +#define PE4 0x0010 +#define PE5 0x0020 +#define PE6 0x0040 +#define PE7 0x0080 +#define PE8 0x0100 +#define PE9 0x0200 +#define PE10 0x0400 +#define PE11 0x0800 +#define PE12 0x1000 +#define PE13 0x2000 +#define PE14 0x4000 +#define PE15 0x8000 +/* Port E Bit Positions */ +#define PE0_P 0x0 +#define PE1_P 0x1 +#define PE2_P 0x2 +#define PE3_P 0x3 +#define PE4_P 0x4 +#define PE5_P 0x5 +#define PE6_P 0x6 +#define PE7_P 0x7 +#define PE8_P 0x8 +#define PE9_P 0x9 +#define PE10_P 0xA +#define PE11_P 0xB +#define PE12_P 0xC +#define PE13_P 0xD +#define PE14_P 0xE +#define PE15_P 0xF + + +/* *********** SERIAL PERIPHERAL INTERFACE (SPI) MASKS **************** */ +/* SPIx_CTL Masks */ +#define TIMOD 0x0003 /* Transfer Initiate Mode */ +#define RDBR_CORE 0x0000 /* RDBR Read Initiates, IRQ When RDBR Full */ +#define TDBR_CORE 0x0001 /* TDBR Write Initiates, IRQ When TDBR Empty */ +#define RDBR_DMA 0x0002 /* DMA Read, DMA Until FIFO Empty */ +#define TDBR_DMA 0x0003 /* DMA Write, DMA Until FIFO Full */ +#define SZ 0x0004 /* Send Zero (When TDBR Empty, Send Zero/Last*) */ +#define GM 0x0008 /* Get More (When RDBR Full, Overwrite/Discard*) */ +#define PSSE 0x0010 /* Slave-Select Input Enable */ +#define EMISO 0x0020 /* Enable MISO As Output */ +#define SIZE 0x0100 /* Size of Words (16/8* Bits) */ +#define LSBF 0x0200 /* LSB First */ +#define CPHA 0x0400 /* Clock Phase */ +#define CPOL 0x0800 /* Clock Polarity */ +#define MSTR 0x1000 /* Master/Slave* */ +#define WOM 0x2000 /* Write Open Drain Master */ +#define SPE 0x4000 /* SPI Enable */ + +/* SPIx_FLG Masks */ +#define FLS1 0x0002 /* Enables (=1) SPI_FLOUT1 as flag output for SPI Slave-select */ +#define FLS2 0x0004 /* Enables (=1) SPI_FLOUT2 as flag output for SPI Slave-select */ +#define FLS3 0x0008 /* Enables (=1) SPI_FLOUT3 as flag output for SPI Slave-select */ +#define FLS4 0x0010 /* Enables (=1) SPI_FLOUT4 as flag output for SPI Slave-select */ +#define FLS5 0x0020 /* Enables (=1) SPI_FLOUT5 as flag output for SPI Slave-select */ +#define FLS6 0x0040 /* Enables (=1) SPI_FLOUT6 as flag output for SPI Slave-select */ +#define FLS7 0x0080 /* Enables (=1) SPI_FLOUT7 as flag output for SPI Slave-select */ + +#define FLG1 0x0200 /* Activates (=0) SPI_FLOUT1 as flag output for SPI Slave-select */ +#define FLG2 0x0400 /* Activates (=0) SPI_FLOUT2 as flag output for SPI Slave-select */ +#define FLG3 0x0800 /* Activates (=0) SPI_FLOUT3 as flag output for SPI Slave-select */ +#define FLG4 0x1000 /* Activates (=0) SPI_FLOUT4 as flag output for SPI Slave-select */ +#define FLG5 0x2000 /* Activates (=0) SPI_FLOUT5 as flag output for SPI Slave-select */ +#define FLG6 0x4000 /* Activates (=0) SPI_FLOUT6 as flag output for SPI Slave-select */ +#define FLG7 0x8000 /* Activates (=0) SPI_FLOUT7 as flag output for SPI Slave-select */ + +/* SPIx_FLG Bit Positions */ +#define FLS1_P 0x0001 /* Enables (=1) SPI_FLOUT1 as flag output for SPI Slave-select */ +#define FLS2_P 0x0002 /* Enables (=1) SPI_FLOUT2 as flag output for SPI Slave-select */ +#define FLS3_P 0x0003 /* Enables (=1) SPI_FLOUT3 as flag output for SPI Slave-select */ +#define FLS4_P 0x0004 /* Enables (=1) SPI_FLOUT4 as flag output for SPI Slave-select */ +#define FLS5_P 0x0005 /* Enables (=1) SPI_FLOUT5 as flag output for SPI Slave-select */ +#define FLS6_P 0x0006 /* Enables (=1) SPI_FLOUT6 as flag output for SPI Slave-select */ +#define FLS7_P 0x0007 /* Enables (=1) SPI_FLOUT7 as flag output for SPI Slave-select */ +#define FLG1_P 0x0009 /* Activates (=0) SPI_FLOUT1 as flag output for SPI Slave-select */ +#define FLG2_P 0x000A /* Activates (=0) SPI_FLOUT2 as flag output for SPI Slave-select */ +#define FLG3_P 0x000B /* Activates (=0) SPI_FLOUT3 as flag output for SPI Slave-select */ +#define FLG4_P 0x000C /* Activates (=0) SPI_FLOUT4 as flag output for SPI Slave-select */ +#define FLG5_P 0x000D /* Activates (=0) SPI_FLOUT5 as flag output for SPI Slave-select */ +#define FLG6_P 0x000E /* Activates (=0) SPI_FLOUT6 as flag output for SPI Slave-select */ +#define FLG7_P 0x000F /* Activates (=0) SPI_FLOUT7 as flag output for SPI Slave-select */ + +/* SPIx_STAT Masks */ +#define SPIF 0x0001 /* Set (=1) when SPI single-word transfer complete */ +#define MODF 0x0002 /* Set (=1) in a master device when some other device tries to become master */ +#define TXE 0x0004 /* Set (=1) when transmission occurs with no new data in SPI_TDBR */ +#define TXS 0x0008 /* SPI_TDBR Data Buffer Status (0=Empty, 1=Full) */ +#define RBSY 0x0010 /* Set (=1) when data is received with RDBR full */ +#define RXS 0x0020 /* SPI_RDBR Data Buffer Status (0=Empty, 1=Full) */ +#define TXCOL 0x0040 /* When set (=1), corrupt data may have been transmitted */ + +/* SPIx_FLG Masks */ +#define FLG1E 0xFDFF /* Activates SPI_FLOUT1 */ +#define FLG2E 0xFBFF /* Activates SPI_FLOUT2 */ +#define FLG3E 0xF7FF /* Activates SPI_FLOUT3 */ +#define FLG4E 0xEFFF /* Activates SPI_FLOUT4 */ +#define FLG5E 0xDFFF /* Activates SPI_FLOUT5 */ +#define FLG6E 0xBFFF /* Activates SPI_FLOUT6 */ +#define FLG7E 0x7FFF /* Activates SPI_FLOUT7 */ + + +/* ********************* ASYNCHRONOUS MEMORY CONTROLLER MASKS ************* */ +/* EBIU_AMGCTL Masks */ +#define AMCKEN 0x0001 /* Enable CLKOUT */ +#define AMBEN_NONE 0x0000 /* All Banks Disabled */ +#define AMBEN_B0 0x0002 /* Enable Asynchronous Memory Bank 0 only */ +#define AMBEN_B0_B1 0x0004 /* Enable Asynchronous Memory Banks 0 & 1 only */ +#define AMBEN_B0_B1_B2 0x0006 /* Enable Asynchronous Memory Banks 0, 1, and 2 */ +#define AMBEN_ALL 0x0008 /* Enable Asynchronous Memory Banks (all) 0, 1, 2, and 3 */ +#define CDPRIO 0x0100 /* DMA has priority over core for for external accesses */ + +/* EBIU_AMGCTL Bit Positions */ +#define AMCKEN_P 0x0000 /* Enable CLKOUT */ +#define AMBEN_P0 0x0001 /* Asynchronous Memory Enable, 000 - banks 0-3 disabled, 001 - Bank 0 enabled */ +#define AMBEN_P1 0x0002 /* Asynchronous Memory Enable, 010 - banks 0&1 enabled, 011 - banks 0-3 enabled */ +#define AMBEN_P2 0x0003 /* Asynchronous Memory Enable, 1xx - All banks (bank 0, 1, 2, and 3) enabled */ + +/* EBIU_AMBCTL0 Masks */ +#define B0RDYEN 0x00000001 /* Bank 0 RDY Enable, 0=disable, 1=enable */ +#define B0RDYPOL 0x00000002 /* Bank 0 RDY Active high, 0=active low, 1=active high */ +#define B0TT_1 0x00000004 /* Bank 0 Transition Time from Read to Write = 1 cycle */ +#define B0TT_2 0x00000008 /* Bank 0 Transition Time from Read to Write = 2 cycles */ +#define B0TT_3 0x0000000C /* Bank 0 Transition Time from Read to Write = 3 cycles */ +#define B0TT_4 0x00000000 /* Bank 0 Transition Time from Read to Write = 4 cycles */ +#define B0ST_1 0x00000010 /* Bank 0 Setup Time from AOE asserted to Read/Write asserted=1 cycle */ +#define B0ST_2 0x00000020 /* Bank 0 Setup Time from AOE asserted to Read/Write asserted=2 cycles */ +#define B0ST_3 0x00000030 /* Bank 0 Setup Time from AOE asserted to Read/Write asserted=3 cycles */ +#define B0ST_4 0x00000000 /* Bank 0 Setup Time from AOE asserted to Read/Write asserted=4 cycles */ +#define B0HT_1 0x00000040 /* Bank 0 Hold Time from Read/Write deasserted to AOE deasserted = 1 cycle */ +#define B0HT_2 0x00000080 /* Bank 0 Hold Time from Read/Write deasserted to AOE deasserted = 2 cycles */ +#define B0HT_3 0x000000C0 /* Bank 0 Hold Time from Read/Write deasserted to AOE deasserted = 3 cycles */ +#define B0HT_0 0x00000000 /* Bank 0 Hold Time from Read/Write deasserted to AOE deasserted = 0 cycles */ +#define B0RAT_1 0x00000100 /* Bank 0 Read Access Time = 1 cycle */ +#define B0RAT_2 0x00000200 /* Bank 0 Read Access Time = 2 cycles */ +#define B0RAT_3 0x00000300 /* Bank 0 Read Access Time = 3 cycles */ +#define B0RAT_4 0x00000400 /* Bank 0 Read Access Time = 4 cycles */ +#define B0RAT_5 0x00000500 /* Bank 0 Read Access Time = 5 cycles */ +#define B0RAT_6 0x00000600 /* Bank 0 Read Access Time = 6 cycles */ +#define B0RAT_7 0x00000700 /* Bank 0 Read Access Time = 7 cycles */ +#define B0RAT_8 0x00000800 /* Bank 0 Read Access Time = 8 cycles */ +#define B0RAT_9 0x00000900 /* Bank 0 Read Access Time = 9 cycles */ +#define B0RAT_10 0x00000A00 /* Bank 0 Read Access Time = 10 cycles */ +#define B0RAT_11 0x00000B00 /* Bank 0 Read Access Time = 11 cycles */ +#define B0RAT_12 0x00000C00 /* Bank 0 Read Access Time = 12 cycles */ +#define B0RAT_13 0x00000D00 /* Bank 0 Read Access Time = 13 cycles */ +#define B0RAT_14 0x00000E00 /* Bank 0 Read Access Time = 14 cycles */ +#define B0RAT_15 0x00000F00 /* Bank 0 Read Access Time = 15 cycles */ +#define B0WAT_1 0x00001000 /* Bank 0 Write Access Time = 1 cycle */ +#define B0WAT_2 0x00002000 /* Bank 0 Write Access Time = 2 cycles */ +#define B0WAT_3 0x00003000 /* Bank 0 Write Access Time = 3 cycles */ +#define B0WAT_4 0x00004000 /* Bank 0 Write Access Time = 4 cycles */ +#define B0WAT_5 0x00005000 /* Bank 0 Write Access Time = 5 cycles */ +#define B0WAT_6 0x00006000 /* Bank 0 Write Access Time = 6 cycles */ +#define B0WAT_7 0x00007000 /* Bank 0 Write Access Time = 7 cycles */ +#define B0WAT_8 0x00008000 /* Bank 0 Write Access Time = 8 cycles */ +#define B0WAT_9 0x00009000 /* Bank 0 Write Access Time = 9 cycles */ +#define B0WAT_10 0x0000A000 /* Bank 0 Write Access Time = 10 cycles */ +#define B0WAT_11 0x0000B000 /* Bank 0 Write Access Time = 11 cycles */ +#define B0WAT_12 0x0000C000 /* Bank 0 Write Access Time = 12 cycles */ +#define B0WAT_13 0x0000D000 /* Bank 0 Write Access Time = 13 cycles */ +#define B0WAT_14 0x0000E000 /* Bank 0 Write Access Time = 14 cycles */ +#define B0WAT_15 0x0000F000 /* Bank 0 Write Access Time = 15 cycles */ +#define B1RDYEN 0x00010000 /* Bank 1 RDY enable, 0=disable, 1=enable */ +#define B1RDYPOL 0x00020000 /* Bank 1 RDY Active high, 0=active low, 1=active high */ +#define B1TT_1 0x00040000 /* Bank 1 Transition Time from Read to Write = 1 cycle */ +#define B1TT_2 0x00080000 /* Bank 1 Transition Time from Read to Write = 2 cycles */ +#define B1TT_3 0x000C0000 /* Bank 1 Transition Time from Read to Write = 3 cycles */ +#define B1TT_4 0x00000000 /* Bank 1 Transition Time from Read to Write = 4 cycles */ +#define B1ST_1 0x00100000 /* Bank 1 Setup Time from AOE asserted to Read or Write asserted = 1 cycle */ +#define B1ST_2 0x00200000 /* Bank 1 Setup Time from AOE asserted to Read or Write asserted = 2 cycles */ +#define B1ST_3 0x00300000 /* Bank 1 Setup Time from AOE asserted to Read or Write asserted = 3 cycles */ +#define B1ST_4 0x00000000 /* Bank 1 Setup Time from AOE asserted to Read or Write asserted = 4 cycles */ +#define B1HT_1 0x00400000 /* Bank 1 Hold Time from Read or Write deasserted to AOE deasserted = 1 cycle */ +#define B1HT_2 0x00800000 /* Bank 1 Hold Time from Read or Write deasserted to AOE deasserted = 2 cycles */ +#define B1HT_3 0x00C00000 /* Bank 1 Hold Time from Read or Write deasserted to AOE deasserted = 3 cycles */ +#define B1HT_0 0x00000000 /* Bank 1 Hold Time from Read or Write deasserted to AOE deasserted = 0 cycles */ +#define B1RAT_1 0x01000000 /* Bank 1 Read Access Time = 1 cycle */ +#define B1RAT_2 0x02000000 /* Bank 1 Read Access Time = 2 cycles */ +#define B1RAT_3 0x03000000 /* Bank 1 Read Access Time = 3 cycles */ +#define B1RAT_4 0x04000000 /* Bank 1 Read Access Time = 4 cycles */ +#define B1RAT_5 0x05000000 /* Bank 1 Read Access Time = 5 cycles */ +#define B1RAT_6 0x06000000 /* Bank 1 Read Access Time = 6 cycles */ +#define B1RAT_7 0x07000000 /* Bank 1 Read Access Time = 7 cycles */ +#define B1RAT_8 0x08000000 /* Bank 1 Read Access Time = 8 cycles */ +#define B1RAT_9 0x09000000 /* Bank 1 Read Access Time = 9 cycles */ +#define B1RAT_10 0x0A000000 /* Bank 1 Read Access Time = 10 cycles */ +#define B1RAT_11 0x0B000000 /* Bank 1 Read Access Time = 11 cycles */ +#define B1RAT_12 0x0C000000 /* Bank 1 Read Access Time = 12 cycles */ +#define B1RAT_13 0x0D000000 /* Bank 1 Read Access Time = 13 cycles */ +#define B1RAT_14 0x0E000000 /* Bank 1 Read Access Time = 14 cycles */ +#define B1RAT_15 0x0F000000 /* Bank 1 Read Access Time = 15 cycles */ +#define B1WAT_1 0x10000000 /* Bank 1 Write Access Time = 1 cycle */ +#define B1WAT_2 0x20000000 /* Bank 1 Write Access Time = 2 cycles */ +#define B1WAT_3 0x30000000 /* Bank 1 Write Access Time = 3 cycles */ +#define B1WAT_4 0x40000000 /* Bank 1 Write Access Time = 4 cycles */ +#define B1WAT_5 0x50000000 /* Bank 1 Write Access Time = 5 cycles */ +#define B1WAT_6 0x60000000 /* Bank 1 Write Access Time = 6 cycles */ +#define B1WAT_7 0x70000000 /* Bank 1 Write Access Time = 7 cycles */ +#define B1WAT_8 0x80000000 /* Bank 1 Write Access Time = 8 cycles */ +#define B1WAT_9 0x90000000 /* Bank 1 Write Access Time = 9 cycles */ +#define B1WAT_10 0xA0000000 /* Bank 1 Write Access Time = 10 cycles */ +#define B1WAT_11 0xB0000000 /* Bank 1 Write Access Time = 11 cycles */ +#define B1WAT_12 0xC0000000 /* Bank 1 Write Access Time = 12 cycles */ +#define B1WAT_13 0xD0000000 /* Bank 1 Write Access Time = 13 cycles */ +#define B1WAT_14 0xE0000000 /* Bank 1 Write Access Time = 14 cycles */ +#define B1WAT_15 0xF0000000 /* Bank 1 Write Access Time = 15 cycles */ + +/* EBIU_AMBCTL1 Masks */ +#define B2RDYEN 0x00000001 /* Bank 2 RDY Enable, 0=disable, 1=enable */ +#define B2RDYPOL 0x00000002 /* Bank 2 RDY Active high, 0=active low, 1=active high */ +#define B2TT_1 0x00000004 /* Bank 2 Transition Time from Read to Write = 1 cycle */ +#define B2TT_2 0x00000008 /* Bank 2 Transition Time from Read to Write = 2 cycles */ +#define B2TT_3 0x0000000C /* Bank 2 Transition Time from Read to Write = 3 cycles */ +#define B2TT_4 0x00000000 /* Bank 2 Transition Time from Read to Write = 4 cycles */ +#define B2ST_1 0x00000010 /* Bank 2 Setup Time from AOE asserted to Read or Write asserted = 1 cycle */ +#define B2ST_2 0x00000020 /* Bank 2 Setup Time from AOE asserted to Read or Write asserted = 2 cycles */ +#define B2ST_3 0x00000030 /* Bank 2 Setup Time from AOE asserted to Read or Write asserted = 3 cycles */ +#define B2ST_4 0x00000000 /* Bank 2 Setup Time from AOE asserted to Read or Write asserted = 4 cycles */ +#define B2HT_1 0x00000040 /* Bank 2 Hold Time from Read or Write deasserted to AOE deasserted = 1 cycle */ +#define B2HT_2 0x00000080 /* Bank 2 Hold Time from Read or Write deasserted to AOE deasserted = 2 cycles */ +#define B2HT_3 0x000000C0 /* Bank 2 Hold Time from Read or Write deasserted to AOE deasserted = 3 cycles */ +#define B2HT_0 0x00000000 /* Bank 2 Hold Time from Read or Write deasserted to AOE deasserted = 0 cycles */ +#define B2RAT_1 0x00000100 /* Bank 2 Read Access Time = 1 cycle */ +#define B2RAT_2 0x00000200 /* Bank 2 Read Access Time = 2 cycles */ +#define B2RAT_3 0x00000300 /* Bank 2 Read Access Time = 3 cycles */ +#define B2RAT_4 0x00000400 /* Bank 2 Read Access Time = 4 cycles */ +#define B2RAT_5 0x00000500 /* Bank 2 Read Access Time = 5 cycles */ +#define B2RAT_6 0x00000600 /* Bank 2 Read Access Time = 6 cycles */ +#define B2RAT_7 0x00000700 /* Bank 2 Read Access Time = 7 cycles */ +#define B2RAT_8 0x00000800 /* Bank 2 Read Access Time = 8 cycles */ +#define B2RAT_9 0x00000900 /* Bank 2 Read Access Time = 9 cycles */ +#define B2RAT_10 0x00000A00 /* Bank 2 Read Access Time = 10 cycles */ +#define B2RAT_11 0x00000B00 /* Bank 2 Read Access Time = 11 cycles */ +#define B2RAT_12 0x00000C00 /* Bank 2 Read Access Time = 12 cycles */ +#define B2RAT_13 0x00000D00 /* Bank 2 Read Access Time = 13 cycles */ +#define B2RAT_14 0x00000E00 /* Bank 2 Read Access Time = 14 cycles */ +#define B2RAT_15 0x00000F00 /* Bank 2 Read Access Time = 15 cycles */ +#define B2WAT_1 0x00001000 /* Bank 2 Write Access Time = 1 cycle */ +#define B2WAT_2 0x00002000 /* Bank 2 Write Access Time = 2 cycles */ +#define B2WAT_3 0x00003000 /* Bank 2 Write Access Time = 3 cycles */ +#define B2WAT_4 0x00004000 /* Bank 2 Write Access Time = 4 cycles */ +#define B2WAT_5 0x00005000 /* Bank 2 Write Access Time = 5 cycles */ +#define B2WAT_6 0x00006000 /* Bank 2 Write Access Time = 6 cycles */ +#define B2WAT_7 0x00007000 /* Bank 2 Write Access Time = 7 cycles */ +#define B2WAT_8 0x00008000 /* Bank 2 Write Access Time = 8 cycles */ +#define B2WAT_9 0x00009000 /* Bank 2 Write Access Time = 9 cycles */ +#define B2WAT_10 0x0000A000 /* Bank 2 Write Access Time = 10 cycles */ +#define B2WAT_11 0x0000B000 /* Bank 2 Write Access Time = 11 cycles */ +#define B2WAT_12 0x0000C000 /* Bank 2 Write Access Time = 12 cycles */ +#define B2WAT_13 0x0000D000 /* Bank 2 Write Access Time = 13 cycles */ +#define B2WAT_14 0x0000E000 /* Bank 2 Write Access Time = 14 cycles */ +#define B2WAT_15 0x0000F000 /* Bank 2 Write Access Time = 15 cycles */ +#define B3RDYEN 0x00010000 /* Bank 3 RDY enable, 0=disable, 1=enable */ +#define B3RDYPOL 0x00020000 /* Bank 3 RDY Active high, 0=active low, 1=active high */ +#define B3TT_1 0x00040000 /* Bank 3 Transition Time from Read to Write = 1 cycle */ +#define B3TT_2 0x00080000 /* Bank 3 Transition Time from Read to Write = 2 cycles */ +#define B3TT_3 0x000C0000 /* Bank 3 Transition Time from Read to Write = 3 cycles */ +#define B3TT_4 0x00000000 /* Bank 3 Transition Time from Read to Write = 4 cycles */ +#define B3ST_1 0x00100000 /* Bank 3 Setup Time from AOE asserted to Read or Write asserted = 1 cycle */ +#define B3ST_2 0x00200000 /* Bank 3 Setup Time from AOE asserted to Read or Write asserted = 2 cycles */ +#define B3ST_3 0x00300000 /* Bank 3 Setup Time from AOE asserted to Read or Write asserted = 3 cycles */ +#define B3ST_4 0x00000000 /* Bank 3 Setup Time from AOE asserted to Read or Write asserted = 4 cycles */ +#define B3HT_1 0x00400000 /* Bank 3 Hold Time from Read or Write deasserted to AOE deasserted = 1 cycle */ +#define B3HT_2 0x00800000 /* Bank 3 Hold Time from Read or Write deasserted to AOE deasserted = 2 cycles */ +#define B3HT_3 0x00C00000 /* Bank 3 Hold Time from Read or Write deasserted to AOE deasserted = 3 cycles */ +#define B3HT_0 0x00000000 /* Bank 3 Hold Time from Read or Write deasserted to AOE deasserted = 0 cycles */ +#define B3RAT_1 0x01000000 /* Bank 3 Read Access Time = 1 cycle */ +#define B3RAT_2 0x02000000 /* Bank 3 Read Access Time = 2 cycles */ +#define B3RAT_3 0x03000000 /* Bank 3 Read Access Time = 3 cycles */ +#define B3RAT_4 0x04000000 /* Bank 3 Read Access Time = 4 cycles */ +#define B3RAT_5 0x05000000 /* Bank 3 Read Access Time = 5 cycles */ +#define B3RAT_6 0x06000000 /* Bank 3 Read Access Time = 6 cycles */ +#define B3RAT_7 0x07000000 /* Bank 3 Read Access Time = 7 cycles */ +#define B3RAT_8 0x08000000 /* Bank 3 Read Access Time = 8 cycles */ +#define B3RAT_9 0x09000000 /* Bank 3 Read Access Time = 9 cycles */ +#define B3RAT_10 0x0A000000 /* Bank 3 Read Access Time = 10 cycles */ +#define B3RAT_11 0x0B000000 /* Bank 3 Read Access Time = 11 cycles */ +#define B3RAT_12 0x0C000000 /* Bank 3 Read Access Time = 12 cycles */ +#define B3RAT_13 0x0D000000 /* Bank 3 Read Access Time = 13 cycles */ +#define B3RAT_14 0x0E000000 /* Bank 3 Read Access Time = 14 cycles */ +#define B3RAT_15 0x0F000000 /* Bank 3 Read Access Time = 15 cycles */ +#define B3WAT_1 0x10000000 /* Bank 3 Write Access Time = 1 cycle */ +#define B3WAT_2 0x20000000 /* Bank 3 Write Access Time = 2 cycles */ +#define B3WAT_3 0x30000000 /* Bank 3 Write Access Time = 3 cycles */ +#define B3WAT_4 0x40000000 /* Bank 3 Write Access Time = 4 cycles */ +#define B3WAT_5 0x50000000 /* Bank 3 Write Access Time = 5 cycles */ +#define B3WAT_6 0x60000000 /* Bank 3 Write Access Time = 6 cycles */ +#define B3WAT_7 0x70000000 /* Bank 3 Write Access Time = 7 cycles */ +#define B3WAT_8 0x80000000 /* Bank 3 Write Access Time = 8 cycles */ +#define B3WAT_9 0x90000000 /* Bank 3 Write Access Time = 9 cycles */ +#define B3WAT_10 0xA0000000 /* Bank 3 Write Access Time = 10 cycles */ +#define B3WAT_11 0xB0000000 /* Bank 3 Write Access Time = 11 cycles */ +#define B3WAT_12 0xC0000000 /* Bank 3 Write Access Time = 12 cycles */ +#define B3WAT_13 0xD0000000 /* Bank 3 Write Access Time = 13 cycles */ +#define B3WAT_14 0xE0000000 /* Bank 3 Write Access Time = 14 cycles */ +#define B3WAT_15 0xF0000000 /* Bank 3 Write Access Time = 15 cycles */ + +/* ********************** SDRAM CONTROLLER MASKS *************************** */ +/* EBIU_SDGCTL Masks */ +#define SCTLE 0x00000001 /* Enable SCLK[0], /SRAS, /SCAS, /SWE, SDQM[3:0] */ +#define CL_2 0x00000008 /* SDRAM CAS latency = 2 cycles */ +#define CL_3 0x0000000C /* SDRAM CAS latency = 3 cycles */ +#define PFE 0x00000010 /* Enable SDRAM prefetch */ +#define PFP 0x00000020 /* Prefetch has priority over AMC requests */ +#define PASR_ALL 0x00000000 /* All 4 SDRAM Banks Refreshed In Self-Refresh */ +#define PASR_B0_B1 0x00000010 /* SDRAM Banks 0 and 1 Are Refreshed In Self-Refresh */ +#define PASR_B0 0x00000020 /* Only SDRAM Bank 0 Is Refreshed In Self-Refresh */ +#define TRAS_1 0x00000040 /* SDRAM tRAS = 1 cycle */ +#define TRAS_2 0x00000080 /* SDRAM tRAS = 2 cycles */ +#define TRAS_3 0x000000C0 /* SDRAM tRAS = 3 cycles */ +#define TRAS_4 0x00000100 /* SDRAM tRAS = 4 cycles */ +#define TRAS_5 0x00000140 /* SDRAM tRAS = 5 cycles */ +#define TRAS_6 0x00000180 /* SDRAM tRAS = 6 cycles */ +#define TRAS_7 0x000001C0 /* SDRAM tRAS = 7 cycles */ +#define TRAS_8 0x00000200 /* SDRAM tRAS = 8 cycles */ +#define TRAS_9 0x00000240 /* SDRAM tRAS = 9 cycles */ +#define TRAS_10 0x00000280 /* SDRAM tRAS = 10 cycles */ +#define TRAS_11 0x000002C0 /* SDRAM tRAS = 11 cycles */ +#define TRAS_12 0x00000300 /* SDRAM tRAS = 12 cycles */ +#define TRAS_13 0x00000340 /* SDRAM tRAS = 13 cycles */ +#define TRAS_14 0x00000380 /* SDRAM tRAS = 14 cycles */ +#define TRAS_15 0x000003C0 /* SDRAM tRAS = 15 cycles */ +#define TRP_1 0x00000800 /* SDRAM tRP = 1 cycle */ +#define TRP_2 0x00001000 /* SDRAM tRP = 2 cycles */ +#define TRP_3 0x00001800 /* SDRAM tRP = 3 cycles */ +#define TRP_4 0x00002000 /* SDRAM tRP = 4 cycles */ +#define TRP_5 0x00002800 /* SDRAM tRP = 5 cycles */ +#define TRP_6 0x00003000 /* SDRAM tRP = 6 cycles */ +#define TRP_7 0x00003800 /* SDRAM tRP = 7 cycles */ +#define TRCD_1 0x00008000 /* SDRAM tRCD = 1 cycle */ +#define TRCD_2 0x00010000 /* SDRAM tRCD = 2 cycles */ +#define TRCD_3 0x00018000 /* SDRAM tRCD = 3 cycles */ +#define TRCD_4 0x00020000 /* SDRAM tRCD = 4 cycles */ +#define TRCD_5 0x00028000 /* SDRAM tRCD = 5 cycles */ +#define TRCD_6 0x00030000 /* SDRAM tRCD = 6 cycles */ +#define TRCD_7 0x00038000 /* SDRAM tRCD = 7 cycles */ +#define TWR_1 0x00080000 /* SDRAM tWR = 1 cycle */ +#define TWR_2 0x00100000 /* SDRAM tWR = 2 cycles */ +#define TWR_3 0x00180000 /* SDRAM tWR = 3 cycles */ +#define PUPSD 0x00200000 /*Power-up start delay */ +#define PSM 0x00400000 /* SDRAM power-up sequence = Precharge, mode register set, 8 CBR refresh cycles */ +#define PSS 0x00800000 /* enable SDRAM power-up sequence on next SDRAM access */ +#define SRFS 0x01000000 /* Start SDRAM self-refresh mode */ +#define EBUFE 0x02000000 /* Enable external buffering timing */ +#define FBBRW 0x04000000 /* Fast back-to-back read write enable */ +#define EMREN 0x10000000 /* Extended mode register enable */ +#define TCSR 0x20000000 /* Temp compensated self refresh value 85 deg C */ +#define CDDBG 0x40000000 /* Tristate SDRAM controls during bus grant */ + +/* EBIU_SDBCTL Masks */ +#define EBE 0x00000001 /* Enable SDRAM external bank */ +#define EBSZ_16 0x00000000 /* SDRAM external bank size = 16MB */ +#define EBSZ_32 0x00000002 /* SDRAM external bank size = 32MB */ +#define EBSZ_64 0x00000004 /* SDRAM external bank size = 64MB */ +#define EBSZ_128 0x00000006 /* SDRAM external bank size = 128MB */ +#define EBSZ_256 0x00000008 /* SDRAM External Bank Size = 256MB */ +#define EBSZ_512 0x0000000A /* SDRAM External Bank Size = 512MB */ +#define EBCAW_8 0x00000000 /* SDRAM external bank column address width = 8 bits */ +#define EBCAW_9 0x00000010 /* SDRAM external bank column address width = 9 bits */ +#define EBCAW_10 0x00000020 /* SDRAM external bank column address width = 9 bits */ +#define EBCAW_11 0x00000030 /* SDRAM external bank column address width = 9 bits */ + +/* EBIU_SDSTAT Masks */ +#define SDCI 0x00000001 /* SDRAM controller is idle */ +#define SDSRA 0x00000002 /* SDRAM SDRAM self refresh is active */ +#define SDPUA 0x00000004 /* SDRAM power up active */ +#define SDRS 0x00000008 /* SDRAM is in reset state */ +#define SDEASE 0x00000010 /* SDRAM EAB sticky error status - W1C */ +#define BGSTAT 0x00000020 /* Bus granted */ + + +/* ******************** TWO-WIRE INTERFACE (TWIx) MASKS ***********************/ +/* TWIx_CLKDIV Macros (Use: *pTWIx_CLKDIV = CLKLOW(x)|CLKHI(y); ) */ +#ifdef _MISRA_RULES +#define CLKLOW(x) ((x) & 0xFFu) /* Periods Clock Is Held Low */ +#define CLKHI(y) (((y)&0xFFu)<<0x8) /* Periods Before New Clock Low */ +#else +#define CLKLOW(x) ((x) & 0xFF) /* Periods Clock Is Held Low */ +#define CLKHI(y) (((y)&0xFF)<<0x8) /* Periods Before New Clock Low */ +#endif /* _MISRA_RULES */ + +/* TWIx_PRESCALE Masks */ +#define PRESCALE 0x007F /* SCLKs Per Internal Time Reference (10MHz) */ +#define TWI_ENA 0x0080 /* TWI Enable */ +#define SCCB 0x0200 /* SCCB Compatibility Enable */ + +/* TWIx_SLAVE_CTRL Masks */ +#define SEN 0x0001 /* Slave Enable */ +#define SADD_LEN 0x0002 /* Slave Address Length */ +#define STDVAL 0x0004 /* Slave Transmit Data Valid */ +#define NAK 0x0008 /* NAK/ACK* Generated At Conclusion Of Transfer */ +#define GEN 0x0010 /* General Call Adrress Matching Enabled */ + +/* TWIx_SLAVE_STAT Masks */ +#define SDIR 0x0001 /* Slave Transfer Direction (Transmit/Receive*) */ +#define GCALL 0x0002 /* General Call Indicator */ + +/* TWIx_MASTER_CTRL Masks */ +#define MEN 0x0001 /* Master Mode Enable */ +#define MADD_LEN 0x0002 /* Master Address Length */ +#define MDIR 0x0004 /* Master Transmit Direction (RX/TX*) */ +#define FAST 0x0008 /* Use Fast Mode Timing Specs */ +#define STOP 0x0010 /* Issue Stop Condition */ +#define RSTART 0x0020 /* Repeat Start or Stop* At End Of Transfer */ +#define DCNT 0x3FC0 /* Data Bytes To Transfer */ +#define SDAOVR 0x4000 /* Serial Data Override */ +#define SCLOVR 0x8000 /* Serial Clock Override */ + +/* TWIx_MASTER_STAT Masks */ +#define MPROG 0x0001 /* Master Transfer In Progress */ +#define LOSTARB 0x0002 /* Lost Arbitration Indicator (Xfer Aborted) */ +#define ANAK 0x0004 /* Address Not Acknowledged */ +#define DNAK 0x0008 /* Data Not Acknowledged */ +#define BUFRDERR 0x0010 /* Buffer Read Error */ +#define BUFWRERR 0x0020 /* Buffer Write Error */ +#define SDASEN 0x0040 /* Serial Data Sense */ +#define SCLSEN 0x0080 /* Serial Clock Sense */ +#define BUSBUSY 0x0100 /* Bus Busy Indicator */ + +/* TWIx_INT_SRC and TWIx_INT_ENABLE Masks */ +#define SINIT 0x0001 /* Slave Transfer Initiated */ +#define SCOMP 0x0002 /* Slave Transfer Complete */ +#define SERR 0x0004 /* Slave Transfer Error */ +#define SOVF 0x0008 /* Slave Overflow */ +#define MCOMP 0x0010 /* Master Transfer Complete */ +#define MERR 0x0020 /* Master Transfer Error */ +#define XMTSERV 0x0040 /* Transmit FIFO Service */ +#define RCVSERV 0x0080 /* Receive FIFO Service */ + +/* TWIx_FIFO_CTRL Masks */ +#define XMTFLUSH 0x0001 /* Transmit Buffer Flush */ +#define RCVFLUSH 0x0002 /* Receive Buffer Flush */ +#define XMTINTLEN 0x0004 /* Transmit Buffer Interrupt Length */ +#define RCVINTLEN 0x0008 /* Receive Buffer Interrupt Length */ + +/* TWIx_FIFO_STAT Masks */ +#define XMTSTAT 0x0003 /* Transmit FIFO Status */ +#define XMT_EMPTY 0x0000 /* Transmit FIFO Empty */ +#define XMT_HALF 0x0001 /* Transmit FIFO Has 1 Byte To Write */ +#define XMT_FULL 0x0003 /* Transmit FIFO Full (2 Bytes To Write) */ + +#define RCVSTAT 0x000C /* Receive FIFO Status */ +#define RCV_EMPTY 0x0000 /* Receive FIFO Empty */ +#define RCV_HALF 0x0004 /* Receive FIFO Has 1 Byte To Read */ +#define RCV_FULL 0x000C /* Receive FIFO Full (2 Bytes To Read) */ + + +/********************************* MXVR MASKS ****************************************/ + +/* MXVR_CONFIG Masks */ + +#define MXVREN 0x00000001lu +#define MMSM 0x00000002lu +#define ACTIVE 0x00000004lu +#define SDELAY 0x00000008lu +#define NCMRXEN 0x00000010lu +#define RWRRXEN 0x00000020lu +#define MTXEN 0x00000040lu +#define MTXON 0x00000080lu /*legacy*/ +#define MTXONB 0x00000080lu +#define EPARITY 0x00000100lu +#define MSB 0x00001E00lu +#define APRXEN 0x00002000lu +#define WAKEUP 0x00004000lu +#define LMECH 0x00008000lu + +#ifdef _MISRA_RULES +#define SET_MSB(x) (((x)&0xFu) << 0x9) +#else +#define SET_MSB(x) (((x)&0xF) << 0x9) +#endif /* _MISRA_RULES */ + + +/* MXVR_PLL_CTL_0 Masks */ + +#define MXTALCEN 0x00000001lu +#define MXTALFEN 0x00000002lu +#define MPLLMS 0x00000008lu +#define MXTALMUL 0x00000030lu +#define MPLLEN 0x00000040lu +#define MPLLEN0 0x00000040lu /* legacy */ +#define MPLLEN1 0x00000080lu /* legacy */ +#define MMCLKEN 0x00000100lu +#define MMCLKMUL 0x00001E00lu +#define MPLLRSTB 0x00002000lu +#define MPLLRSTB0 0x00002000lu /* legacy */ +#define MPLLRSTB1 0x00004000lu /* legacy */ +#define MBCLKEN 0x00010000lu +#define MBCLKDIV 0x001E0000lu +#define MPLLCDR 0x00200000lu +#define MPLLCDR0 0x00200000lu /* legacy */ +#define MPLLCDR1 0x00400000lu /* legacy */ +#define INVRX 0x00800000lu +#define MFSEN 0x01000000lu +#define MFSDIV 0x1E000000lu +#define MFSSEL 0x60000000lu +#define MFSSYNC 0x80000000lu + +#define MXTALMUL_256FS 0x00000000lu /* legacy */ +#define MXTALMUL_384FS 0x00000010lu /* legacy */ +#define MXTALMUL_512FS 0x00000020lu /* legacy */ +#define MXTALMUL_1024FS 0x00000030lu + +#define MMCLKMUL_1024FS 0x00000000lu +#define MMCLKMUL_512FS 0x00000200lu +#define MMCLKMUL_256FS 0x00000400lu +#define MMCLKMUL_128FS 0x00000600lu +#define MMCLKMUL_64FS 0x00000800lu +#define MMCLKMUL_32FS 0x00000A00lu +#define MMCLKMUL_16FS 0x00000C00lu +#define MMCLKMUL_8FS 0x00000E00lu +#define MMCLKMUL_4FS 0x00001000lu +#define MMCLKMUL_2FS 0x00001200lu +#define MMCLKMUL_1FS 0x00001400lu +#define MMCLKMUL_1536FS 0x00001A00lu +#define MMCLKMUL_768FS 0x00001C00lu +#define MMCLKMUL_384FS 0x00001E00lu + +#define MBCLKDIV_DIV2 0x00020000lu +#define MBCLKDIV_DIV4 0x00040000lu +#define MBCLKDIV_DIV8 0x00060000lu +#define MBCLKDIV_DIV16 0x00080000lu +#define MBCLKDIV_DIV32 0x000A0000lu +#define MBCLKDIV_DIV64 0x000C0000lu +#define MBCLKDIV_DIV128 0x000E0000lu +#define MBCLKDIV_DIV256 0x00100000lu +#define MBCLKDIV_DIV512 0x00120000lu +#define MBCLKDIV_DIV1024 0x00140000lu + +#define MFSDIV_DIV2 0x02000000lu +#define MFSDIV_DIV4 0x04000000lu +#define MFSDIV_DIV8 0x06000000lu +#define MFSDIV_DIV16 0x08000000lu +#define MFSDIV_DIV32 0x0A000000lu +#define MFSDIV_DIV64 0x0C000000lu +#define MFSDIV_DIV128 0x0E000000lu +#define MFSDIV_DIV256 0x10000000lu +#define MFSDIV_DIV512 0x12000000lu +#define MFSDIV_DIV1024 0x14000000lu + +#define MFSSEL_CLOCK 0x00000000lu +#define MFSSEL_PULSE_HI 0x20000000lu +#define MFSSEL_PULSE_LO 0x40000000lu + + +/* MXVR_PLL_CTL_1 Masks */ + +#define MSTO 0x00000001lu +#define MSTO0 0x00000001lu /* legacy */ +#define MHOGGD 0x00000004lu +#define MHOGGD0 0x00000004lu /* legacy */ +#define MHOGGD1 0x00000008lu /* legacy */ +#define MSHAPEREN 0x00000010lu +#define MSHAPEREN0 0x00000010lu /* legacy */ +#define MSHAPEREN1 0x00000020lu /* legacy */ +#define MPLLCNTEN 0x00008000lu +#define MPLLCNT 0xFFFF0000lu + +#ifdef _MISRA_RULES +#define SET_MPLLCNT(x) (((x)&0xFFFFu) << 0x10) +#else +#define SET_MPLLCNT(x) (((x)&0xFFFF) << 0x10) +#endif /* _MISRA_RULES */ + + +/* MXVR_PLL_CTL_2 Masks */ + +#define MSHAPERSEL 0x00000007lu +#define MCPSEL 0x000000E0lu + +/* MXVR_INT_STAT_0 Masks */ + +#define NI2A 0x00000001lu +#define NA2I 0x00000002lu +#define SBU2L 0x00000004lu +#define SBL2U 0x00000008lu +#define PRU 0x00000010lu +#define MPRU 0x00000020lu +#define DRU 0x00000040lu +#define MDRU 0x00000080lu +#define SBU 0x00000100lu +#define ATU 0x00000200lu +#define FCZ0 0x00000400lu +#define FCZ1 0x00000800lu +#define PERR 0x00001000lu +#define MH2L 0x00002000lu +#define ML2H 0x00004000lu +#define WUP 0x00008000lu +#define FU2L 0x00010000lu +#define FL2U 0x00020000lu +#define BU2L 0x00040000lu +#define BL2U 0x00080000lu +#define PCZ 0x00400000lu +#define FERR 0x00800000lu +#define CMR 0x01000000lu +#define CMROF 0x02000000lu +#define CMTS 0x04000000lu +#define CMTC 0x08000000lu +#define RWRC 0x10000000lu +#define BCZ 0x20000000lu +#define BMERR 0x40000000lu +#define DERR 0x80000000lu + + +/* MXVR_INT_EN_0 Masks */ + +#define NI2AEN NI2A +#define NA2IEN NA2I +#define SBU2LEN SBU2L +#define SBL2UEN SBL2U +#define PRUEN PRU +#define MPRUEN MPRU +#define DRUEN DRU +#define MDRUEN MDRU +#define SBUEN SBU +#define ATUEN ATU +#define FCZ0EN FCZ0 +#define FCZ1EN FCZ1 +#define PERREN PERR +#define MH2LEN MH2L +#define ML2HEN ML2H +#define WUPEN WUP +#define FU2LEN FU2L +#define FL2UEN FL2U +#define BU2LEN BU2L +#define BL2UEN BL2U +#define PCZEN PCZ +#define FERREN FERR +#define CMREN CMR +#define CMROFEN CMROF +#define CMTSEN CMTS +#define CMTCEN CMTC +#define RWRCEN RWRC +#define BCZEN BCZ +#define BMERREN BMERR +#define DERREN DERR + + +/* MXVR_INT_STAT_1 Masks */ + +#define APR 0x00000004lu +#define APROF 0x00000008lu +#define APTS 0x00000040lu +#define APTC 0x00000080lu +#define APRCE 0x00000400lu +#define APRPE 0x00000800lu + +#define HDONE0 0x00000001lu +#define DONE0 0x00000002lu +#define HDONE1 0x00000010lu +#define DONE1 0x00000020lu +#define HDONE2 0x00000100lu +#define DONE2 0x00000200lu +#define HDONE3 0x00001000lu +#define DONE3 0x00002000lu +#define HDONE4 0x00010000lu +#define DONE4 0x00020000lu +#define HDONE5 0x00100000lu +#define DONE5 0x00200000lu +#define HDONE6 0x01000000lu +#define DONE6 0x02000000lu +#define HDONE7 0x10000000lu +#define DONE7 0x20000000lu + +#define DONEX(x) (0x00000002 << (4 * (x))) +#define HDONEX(x) (0x00000001 << (4 * (x))) + + +/* MXVR_INT_EN_1 Masks */ + +#define APREN APR +#define APROFEN APROF +#define APTSEN APTS +#define APTCEN APTC +#define APRCEEN APRCE +#define APRPEEN APRPE + +#define HDONEEN0 HDONE0 +#define DONEEN0 DONE0 +#define HDONEEN1 HDONE1 +#define DONEEN1 DONE1 +#define HDONEEN2 HDONE2 +#define DONEEN2 DONE2 +#define HDONEEN3 HDONE3 +#define DONEEN3 DONE3 +#define HDONEEN4 HDONE4 +#define DONEEN4 DONE4 +#define HDONEEN5 HDONE5 +#define DONEEN5 DONE5 +#define HDONEEN6 HDONE6 +#define DONEEN6 DONE6 +#define HDONEEN7 HDONE7 +#define DONEEN7 DONE7 + +#define DONEENX(x) (0x00000002 << (4 * (x))) +#define HDONEENX(x) (0x00000001 << (4 * (x))) + + +/* MXVR_STATE_0 Masks */ + +#define NACT 0x00000001lu +#define SBLOCK 0x00000002lu +#define PFDLOCK 0x00000004lu +#define PFDLOCK0 0x00000004lu /* legacy */ +#define PDD 0x00000008lu +#define PDD0 0x00000008lu /* legacy */ +#define PVCO 0x00000010lu +#define PVCO0 0x00000010lu /* legacy */ +#define PFDLOCK1 0x00000020lu /* legacy */ +#define PDD1 0x00000040lu /* legacy */ +#define PVCO1 0x00000080lu /* legacy */ +#define APBSY 0x00000100lu +#define APARB 0x00000200lu +#define APTX 0x00000400lu +#define APRX 0x00000800lu +#define CMBSY 0x00001000lu +#define CMARB 0x00002000lu +#define CMTX 0x00004000lu +#define CMRX 0x00008000lu +#define MRXONB 0x00010000lu +#define RGSIP 0x00020000lu +#define DALIP 0x00040000lu +#define ALIP 0x00080000lu +#define RRDIP 0x00100000lu +#define RWRIP 0x00200000lu +#define FLOCK 0x00400000lu +#define BLOCK 0x00800000lu +#define RSB 0x0F000000lu +#define DERRNUM 0xF0000000lu + + +/* MXVR_STATE_1 Masks */ + +#define STXNUMB 0x0000000Flu +#define SRXNUMB 0x000000F0lu +#define APCONT 0x00000100lu +#define DMAACTIVEX 0x00FF0000lu +#define DMAACTIVE0 0x00010000lu +#define DMAACTIVE1 0x00020000lu +#define DMAACTIVE2 0x00040000lu +#define DMAACTIVE3 0x00080000lu +#define DMAACTIVE4 0x00100000lu +#define DMAACTIVE5 0x00200000lu +#define DMAACTIVE6 0x00400000lu +#define DMAACTIVE7 0x00800000lu +#define DMAPMENX 0xFF000000lu +#define DMAPMEN0 0x01000000lu +#define DMAPMEN1 0x02000000lu +#define DMAPMEN2 0x04000000lu +#define DMAPMEN3 0x08000000lu +#define DMAPMEN4 0x10000000lu +#define DMAPMEN5 0x20000000lu +#define DMAPMEN6 0x40000000lu +#define DMAPMEN7 0x80000000lu + + +/* MXVR_POSITION Masks */ + +#define PVALID 0x8000 +#define POSITION 0x003F + + +/* MXVR_MAX_POSITION Masks */ + +#define MPVALID 0x8000 +#define MPOSITION 0x003F + + +/* MXVR_DELAY Masks */ + +#define DVALID 0x8000 +#define DELAY 0x003F + + +/* MXVR_MAX_DELAY Masks */ + +#define MDVALID 0x8000 +#define MDELAY 0x003F + + +/* MXVR_LADDR Masks */ + +#define LVALID 0x80000000lu +#define LADDR 0x0000FFFFlu + + +/* MXVR_GADDR Masks */ + +#define GVALID 0x8000 +#define GADDRL 0x00FF + + +/* MXVR_AADDR Masks */ + +#define AVALID 0x80000000lu +#define AADDR 0x0000FFFFlu + + +/* MXVR_ALLOC_0 Masks */ + +#define CIU0 0x00000080lu +#define CIU1 0x00008000lu +#define CIU2 0x00800000lu +#define CIU3 0x80000000lu + +#define CL0 0x0000007Flu +#define CL1 0x00007F00lu +#define CL2 0x007F0000lu +#define CL3 0x7F000000lu + + +/* MXVR_ALLOC_1 Masks */ + +#define CIU4 0x00000080lu +#define CIU5 0x00008000lu +#define CIU6 0x00800000lu +#define CIU7 0x80000000lu + +#define CL4 0x0000007Flu +#define CL5 0x00007F00lu +#define CL6 0x007F0000lu +#define CL7 0x7F000000lu + + +/* MXVR_ALLOC_2 Masks */ + +#define CIU8 0x00000080lu +#define CIU9 0x00008000lu +#define CIU10 0x00800000lu +#define CIU11 0x80000000lu + +#define CL8 0x0000007Flu +#define CL9 0x00007F00lu +#define CL10 0x007F0000lu +#define CL11 0x7F000000lu + + +/* MXVR_ALLOC_3 Masks */ + +#define CIU12 0x00000080lu +#define CIU13 0x00008000lu +#define CIU14 0x00800000lu +#define CIU15 0x80000000lu + +#define CL12 0x0000007Flu +#define CL13 0x00007F00lu +#define CL14 0x007F0000lu +#define CL15 0x7F000000lu + + +/* MXVR_ALLOC_4 Masks */ + +#define CIU16 0x00000080lu +#define CIU17 0x00008000lu +#define CIU18 0x00800000lu +#define CIU19 0x80000000lu + +#define CL16 0x0000007Flu +#define CL17 0x00007F00lu +#define CL18 0x007F0000lu +#define CL19 0x7F000000lu + + +/* MXVR_ALLOC_5 Masks */ + +#define CIU20 0x00000080lu +#define CIU21 0x00008000lu +#define CIU22 0x00800000lu +#define CIU23 0x80000000lu + +#define CL20 0x0000007Flu +#define CL21 0x00007F00lu +#define CL22 0x007F0000lu +#define CL23 0x7F000000lu + + +/* MXVR_ALLOC_6 Masks */ + +#define CIU24 0x00000080lu +#define CIU25 0x00008000lu +#define CIU26 0x00800000lu +#define CIU27 0x80000000lu + +#define CL24 0x0000007Flu +#define CL25 0x00007F00lu +#define CL26 0x007F0000lu +#define CL27 0x7F000000lu + + +/* MXVR_ALLOC_7 Masks */ + +#define CIU28 0x00000080lu +#define CIU29 0x00008000lu +#define CIU30 0x00800000lu +#define CIU31 0x80000000lu + +#define CL28 0x0000007Flu +#define CL29 0x00007F00lu +#define CL30 0x007F0000lu +#define CL31 0x7F000000lu + + +/* MXVR_ALLOC_8 Masks */ + +#define CIU32 0x00000080lu +#define CIU33 0x00008000lu +#define CIU34 0x00800000lu +#define CIU35 0x80000000lu + +#define CL32 0x0000007Flu +#define CL33 0x00007F00lu +#define CL34 0x007F0000lu +#define CL35 0x7F000000lu + + +/* MXVR_ALLOC_9 Masks */ + +#define CIU36 0x00000080lu +#define CIU37 0x00008000lu +#define CIU38 0x00800000lu +#define CIU39 0x80000000lu + +#define CL36 0x0000007Flu +#define CL37 0x00007F00lu +#define CL38 0x007F0000lu +#define CL39 0x7F000000lu + + +/* MXVR_ALLOC_10 Masks */ + +#define CIU40 0x00000080lu +#define CIU41 0x00008000lu +#define CIU42 0x00800000lu +#define CIU43 0x80000000lu + +#define CL40 0x0000007Flu +#define CL41 0x00007F00lu +#define CL42 0x007F0000lu +#define CL43 0x7F000000lu + + +/* MXVR_ALLOC_11 Masks */ + +#define CIU44 0x00000080lu +#define CIU45 0x00008000lu +#define CIU46 0x00800000lu +#define CIU47 0x80000000lu + +#define CL44 0x0000007Flu +#define CL45 0x00007F00lu +#define CL46 0x007F0000lu +#define CL47 0x7F000000lu + + +/* MXVR_ALLOC_12 Masks */ + +#define CIU48 0x00000080lu +#define CIU49 0x00008000lu +#define CIU50 0x00800000lu +#define CIU51 0x80000000lu + +#define CL48 0x0000007Flu +#define CL49 0x00007F00lu +#define CL50 0x007F0000lu +#define CL51 0x7F000000lu + + +/* MXVR_ALLOC_13 Masks */ + +#define CIU52 0x00000080lu +#define CIU53 0x00008000lu +#define CIU54 0x00800000lu +#define CIU55 0x80000000lu + +#define CL52 0x0000007Flu +#define CL53 0x00007F00lu +#define CL54 0x007F0000lu +#define CL55 0x7F000000lu + + +/* MXVR_ALLOC_14 Masks */ + +#define CIU56 0x00000080lu +#define CIU57 0x00008000lu +#define CIU58 0x00800000lu +#define CIU59 0x80000000lu + +#define CL56 0x0000007Flu +#define CL57 0x00007F00lu +#define CL58 0x007F0000lu +#define CL59 0x7F000000lu + + +/* MXVR_SYNC_LCHAN_0 Masks */ + +#define LCHANPC0 0x0000000Flu +#define LCHANPC1 0x000000F0lu +#define LCHANPC2 0x00000F00lu +#define LCHANPC3 0x0000F000lu +#define LCHANPC4 0x000F0000lu +#define LCHANPC5 0x00F00000lu +#define LCHANPC6 0x0F000000lu +#define LCHANPC7 0xF0000000lu + + +/* MXVR_SYNC_LCHAN_1 Masks */ + +#define LCHANPC8 0x0000000Flu +#define LCHANPC9 0x000000F0lu +#define LCHANPC10 0x00000F00lu +#define LCHANPC11 0x0000F000lu +#define LCHANPC12 0x000F0000lu +#define LCHANPC13 0x00F00000lu +#define LCHANPC14 0x0F000000lu +#define LCHANPC15 0xF0000000lu + + +/* MXVR_SYNC_LCHAN_2 Masks */ + +#define LCHANPC16 0x0000000Flu +#define LCHANPC17 0x000000F0lu +#define LCHANPC18 0x00000F00lu +#define LCHANPC19 0x0000F000lu +#define LCHANPC20 0x000F0000lu +#define LCHANPC21 0x00F00000lu +#define LCHANPC22 0x0F000000lu +#define LCHANPC23 0xF0000000lu + + +/* MXVR_SYNC_LCHAN_3 Masks */ + +#define LCHANPC24 0x0000000Flu +#define LCHANPC25 0x000000F0lu +#define LCHANPC26 0x00000F00lu +#define LCHANPC27 0x0000F000lu +#define LCHANPC28 0x000F0000lu +#define LCHANPC29 0x00F00000lu +#define LCHANPC30 0x0F000000lu +#define LCHANPC31 0xF0000000lu + + +/* MXVR_SYNC_LCHAN_4 Masks */ + +#define LCHANPC32 0x0000000Flu +#define LCHANPC33 0x000000F0lu +#define LCHANPC34 0x00000F00lu +#define LCHANPC35 0x0000F000lu +#define LCHANPC36 0x000F0000lu +#define LCHANPC37 0x00F00000lu +#define LCHANPC38 0x0F000000lu +#define LCHANPC39 0xF0000000lu + + +/* MXVR_SYNC_LCHAN_5 Masks */ + +#define LCHANPC40 0x0000000Flu +#define LCHANPC41 0x000000F0lu +#define LCHANPC42 0x00000F00lu +#define LCHANPC43 0x0000F000lu +#define LCHANPC44 0x000F0000lu +#define LCHANPC45 0x00F00000lu +#define LCHANPC46 0x0F000000lu +#define LCHANPC47 0xF0000000lu + + +/* MXVR_SYNC_LCHAN_6 Masks */ + +#define LCHANPC48 0x0000000Flu +#define LCHANPC49 0x000000F0lu +#define LCHANPC50 0x00000F00lu +#define LCHANPC51 0x0000F000lu +#define LCHANPC52 0x000F0000lu +#define LCHANPC53 0x00F00000lu +#define LCHANPC54 0x0F000000lu +#define LCHANPC55 0xF0000000lu + + +/* MXVR_SYNC_LCHAN_7 Masks */ + +#define LCHANPC56 0x0000000Flu +#define LCHANPC57 0x000000F0lu +#define LCHANPC58 0x00000F00lu +#define LCHANPC59 0x0000F000lu + + +/* MXVR_DMAx_CONFIG Masks */ + +#define MDMAEN 0x00000001lu +#define DD 0x00000002lu +#define LCHAN 0x000003C0lu +#define BITSWAPEN 0x00000400lu +#define BYSWAPEN 0x00000800lu +#define MFLOW 0x00007000lu +#define FIXEDPM 0x00080000lu +#define STARTPAT 0x00300000lu +#define STOPPAT 0x00C00000lu +#define COUNTPOS 0x1C000000lu + +#define DD_TX 0x00000000lu +#define DD_RX 0x00000002lu + +#define LCHAN_0 0x00000000lu +#define LCHAN_1 0x00000040lu +#define LCHAN_2 0x00000080lu +#define LCHAN_3 0x000000C0lu +#define LCHAN_4 0x00000100lu +#define LCHAN_5 0x00000140lu +#define LCHAN_6 0x00000180lu +#define LCHAN_7 0x000001C0lu + +#define MFLOW_STOP 0x00000000lu +#define MFLOW_AUTO 0x00001000lu +#define MFLOW_PVC 0x00002000lu +#define MFLOW_PSS 0x00003000lu +#define MFLOW_PFC 0x00004000lu + +#define STARTPAT_0 0x00000000lu +#define STARTPAT_1 0x00100000lu + +#define STOPPAT_0 0x00000000lu +#define STOPPAT_1 0x00400000lu + +#define COUNTPOS_0 0x00000000lu +#define COUNTPOS_1 0x04000000lu +#define COUNTPOS_2 0x08000000lu +#define COUNTPOS_3 0x0C000000lu +#define COUNTPOS_4 0x10000000lu +#define COUNTPOS_5 0x14000000lu +#define COUNTPOS_6 0x18000000lu +#define COUNTPOS_7 0x1C000000lu + + +/* MXVR_AP_CTL Masks */ + +#define STARTAP 0x00000001lu +#define CANCELAP 0x00000002lu +#define RESETAP 0x00000004lu +#define APRBE0 0x00004000lu +#define APRBE1 0x00008000lu +#define APRBEX 0x0000C000lu + + +/* MXVR_CM_CTL Masks */ + +#define STARTCM 0x00000001lu +#define CANCELCM 0x00000002lu +#define CMRBEX 0xFFFF0000lu +#define CMRBE0 0x00010000lu +#define CMRBE1 0x00020000lu +#define CMRBE2 0x00040000lu +#define CMRBE3 0x00080000lu +#define CMRBE4 0x00100000lu +#define CMRBE5 0x00200000lu +#define CMRBE6 0x00400000lu +#define CMRBE7 0x00800000lu +#define CMRBE8 0x01000000lu +#define CMRBE9 0x02000000lu +#define CMRBE10 0x04000000lu +#define CMRBE11 0x08000000lu +#define CMRBE12 0x10000000lu +#define CMRBE13 0x20000000lu +#define CMRBE14 0x40000000lu +#define CMRBE15 0x80000000lu + + +/* MXVR_PAT_DATA_x Masks */ + +#define MATCH_DATA_0 0x000000FFlu +#define MATCH_DATA_1 0x0000FF00lu +#define MATCH_DATA_2 0x00FF0000lu +#define MATCH_DATA_3 0xFF000000lu + + + +/* MXVR_PAT_EN_x Masks */ + +#define MATCH_EN_0_0 0x00000001lu +#define MATCH_EN_0_1 0x00000002lu +#define MATCH_EN_0_2 0x00000004lu +#define MATCH_EN_0_3 0x00000008lu +#define MATCH_EN_0_4 0x00000010lu +#define MATCH_EN_0_5 0x00000020lu +#define MATCH_EN_0_6 0x00000040lu +#define MATCH_EN_0_7 0x00000080lu + +#define MATCH_EN_1_0 0x00000100lu +#define MATCH_EN_1_1 0x00000200lu +#define MATCH_EN_1_2 0x00000400lu +#define MATCH_EN_1_3 0x00000800lu +#define MATCH_EN_1_4 0x00001000lu +#define MATCH_EN_1_5 0x00002000lu +#define MATCH_EN_1_6 0x00004000lu +#define MATCH_EN_1_7 0x00008000lu + +#define MATCH_EN_2_0 0x00010000lu +#define MATCH_EN_2_1 0x00020000lu +#define MATCH_EN_2_2 0x00040000lu +#define MATCH_EN_2_3 0x00080000lu +#define MATCH_EN_2_4 0x00100000lu +#define MATCH_EN_2_5 0x00200000lu +#define MATCH_EN_2_6 0x00400000lu +#define MATCH_EN_2_7 0x00800000lu + +#define MATCH_EN_3_0 0x01000000lu +#define MATCH_EN_3_1 0x02000000lu +#define MATCH_EN_3_2 0x04000000lu +#define MATCH_EN_3_3 0x08000000lu +#define MATCH_EN_3_4 0x10000000lu +#define MATCH_EN_3_5 0x20000000lu +#define MATCH_EN_3_6 0x40000000lu +#define MATCH_EN_3_7 0x80000000lu + + +/* MXVR_ROUTING_0 Masks */ + +#define MUTE_CH0 0x00000080lu +#define MUTE_CH1 0x00008000lu +#define MUTE_CH2 0x00800000lu +#define MUTE_CH3 0x80000000lu + +#define TX_CH0 0x0000007Flu +#define TX_CH1 0x00007F00lu +#define TX_CH2 0x007F0000lu +#define TX_CH3 0x7F000000lu + + +/* MXVR_ROUTING_1 Masks */ + +#define MUTE_CH4 0x00000080lu +#define MUTE_CH5 0x00008000lu +#define MUTE_CH6 0x00800000lu +#define MUTE_CH7 0x80000000lu + +#define TX_CH4 0x0000007Flu +#define TX_CH5 0x00007F00lu +#define TX_CH6 0x007F0000lu +#define TX_CH7 0x7F000000lu + + +/* MXVR_ROUTING_2 Masks */ + +#define MUTE_CH8 0x00000080lu +#define MUTE_CH9 0x00008000lu +#define MUTE_CH10 0x00800000lu +#define MUTE_CH11 0x80000000lu + +#define TX_CH8 0x0000007Flu +#define TX_CH9 0x00007F00lu +#define TX_CH10 0x007F0000lu +#define TX_CH11 0x7F000000lu + +/* MXVR_ROUTING_3 Masks */ + +#define MUTE_CH12 0x00000080lu +#define MUTE_CH13 0x00008000lu +#define MUTE_CH14 0x00800000lu +#define MUTE_CH15 0x80000000lu + +#define TX_CH12 0x0000007Flu +#define TX_CH13 0x00007F00lu +#define TX_CH14 0x007F0000lu +#define TX_CH15 0x7F000000lu + + +/* MXVR_ROUTING_4 Masks */ + +#define MUTE_CH16 0x00000080lu +#define MUTE_CH17 0x00008000lu +#define MUTE_CH18 0x00800000lu +#define MUTE_CH19 0x80000000lu + +#define TX_CH16 0x0000007Flu +#define TX_CH17 0x00007F00lu +#define TX_CH18 0x007F0000lu +#define TX_CH19 0x7F000000lu + + +/* MXVR_ROUTING_5 Masks */ + +#define MUTE_CH20 0x00000080lu +#define MUTE_CH21 0x00008000lu +#define MUTE_CH22 0x00800000lu +#define MUTE_CH23 0x80000000lu + +#define TX_CH20 0x0000007Flu +#define TX_CH21 0x00007F00lu +#define TX_CH22 0x007F0000lu +#define TX_CH23 0x7F000000lu + + +/* MXVR_ROUTING_6 Masks */ + +#define MUTE_CH24 0x00000080lu +#define MUTE_CH25 0x00008000lu +#define MUTE_CH26 0x00800000lu +#define MUTE_CH27 0x80000000lu + +#define TX_CH24 0x0000007Flu +#define TX_CH25 0x00007F00lu +#define TX_CH26 0x007F0000lu +#define TX_CH27 0x7F000000lu + + +/* MXVR_ROUTING_7 Masks */ + +#define MUTE_CH28 0x00000080lu +#define MUTE_CH29 0x00008000lu +#define MUTE_CH30 0x00800000lu +#define MUTE_CH31 0x80000000lu + +#define TX_CH28 0x0000007Flu +#define TX_CH29 0x00007F00lu +#define TX_CH30 0x007F0000lu +#define TX_CH31 0x7F000000lu + + +/* MXVR_ROUTING_8 Masks */ + +#define MUTE_CH32 0x00000080lu +#define MUTE_CH33 0x00008000lu +#define MUTE_CH34 0x00800000lu +#define MUTE_CH35 0x80000000lu + +#define TX_CH32 0x0000007Flu +#define TX_CH33 0x00007F00lu +#define TX_CH34 0x007F0000lu +#define TX_CH35 0x7F000000lu + + +/* MXVR_ROUTING_9 Masks */ + +#define MUTE_CH36 0x00000080lu +#define MUTE_CH37 0x00008000lu +#define MUTE_CH38 0x00800000lu +#define MUTE_CH39 0x80000000lu + +#define TX_CH36 0x0000007Flu +#define TX_CH37 0x00007F00lu +#define TX_CH38 0x007F0000lu +#define TX_CH39 0x7F000000lu + + +/* MXVR_ROUTING_10 Masks */ + +#define MUTE_CH40 0x00000080lu +#define MUTE_CH41 0x00008000lu +#define MUTE_CH42 0x00800000lu +#define MUTE_CH43 0x80000000lu + +#define TX_CH40 0x0000007Flu +#define TX_CH41 0x00007F00lu +#define TX_CH42 0x007F0000lu +#define TX_CH43 0x7F000000lu + + +/* MXVR_ROUTING_11 Masks */ + +#define MUTE_CH44 0x00000080lu +#define MUTE_CH45 0x00008000lu +#define MUTE_CH46 0x00800000lu +#define MUTE_CH47 0x80000000lu + +#define TX_CH44 0x0000007Flu +#define TX_CH45 0x00007F00lu +#define TX_CH46 0x007F0000lu +#define TX_CH47 0x7F000000lu + + +/* MXVR_ROUTING_12 Masks */ + +#define MUTE_CH48 0x00000080lu +#define MUTE_CH49 0x00008000lu +#define MUTE_CH50 0x00800000lu +#define MUTE_CH51 0x80000000lu + +#define TX_CH48 0x0000007Flu +#define TX_CH49 0x00007F00lu +#define TX_CH50 0x007F0000lu +#define TX_CH51 0x7F000000lu + + +/* MXVR_ROUTING_13 Masks */ + +#define MUTE_CH52 0x00000080lu +#define MUTE_CH53 0x00008000lu +#define MUTE_CH54 0x00800000lu +#define MUTE_CH55 0x80000000lu + +#define TX_CH52 0x0000007Flu +#define TX_CH53 0x00007F00lu +#define TX_CH54 0x007F0000lu +#define TX_CH55 0x7F000000lu + + +/* MXVR_ROUTING_14 Masks */ + +#define MUTE_CH56 0x00000080lu +#define MUTE_CH57 0x00008000lu +#define MUTE_CH58 0x00800000lu +#define MUTE_CH59 0x80000000lu + +#define TX_CH56 0x0000007Flu +#define TX_CH57 0x00007F00lu +#define TX_CH58 0x007F0000lu +#define TX_CH59 0x7F000000lu + + +/* Control Message Receive Buffer (CMRB) Address Offsets */ + +#define CMRB_STRIDE 0x00000016lu + +#define CMRB_DST_OFFSET 0x00000000lu +#define CMRB_SRC_OFFSET 0x00000002lu +#define CMRB_DATA_OFFSET 0x00000005lu + + +/* Control Message Transmit Buffer (CMTB) Address Offsets */ + +#define CMTB_PRIO_OFFSET 0x00000000lu +#define CMTB_DST_OFFSET 0x00000002lu +#define CMTB_SRC_OFFSET 0x00000004lu +#define CMTB_TYPE_OFFSET 0x00000006lu +#define CMTB_DATA_OFFSET 0x00000007lu + +#define CMTB_ANSWER_OFFSET 0x0000000Alu + +#define CMTB_STAT_N_OFFSET 0x00000018lu +#define CMTB_STAT_A_OFFSET 0x00000016lu +#define CMTB_STAT_D_OFFSET 0x0000000Elu +#define CMTB_STAT_R_OFFSET 0x00000014lu +#define CMTB_STAT_W_OFFSET 0x00000014lu +#define CMTB_STAT_G_OFFSET 0x00000014lu + + +/* Asynchronous Packet Receive Buffer (APRB) Address Offsets */ + +#define APRB_STRIDE 0x00000400lu + +#define APRB_DST_OFFSET 0x00000000lu +#define APRB_LEN_OFFSET 0x00000002lu +#define APRB_SRC_OFFSET 0x00000004lu +#define APRB_DATA_OFFSET 0x00000006lu + + +/* Asynchronous Packet Transmit Buffer (APTB) Address Offsets */ + +#define APTB_PRIO_OFFSET 0x00000000lu +#define APTB_DST_OFFSET 0x00000002lu +#define APTB_LEN_OFFSET 0x00000004lu +#define APTB_SRC_OFFSET 0x00000006lu +#define APTB_DATA_OFFSET 0x00000008lu + + +/* Remote Read Buffer (RRDB) Address Offsets */ + +#define RRDB_WADDR_OFFSET 0x00000100lu +#define RRDB_WLEN_OFFSET 0x00000101lu + + + +/* ************ CONTROLLER AREA NETWORK (CAN) MASKS ***************/ +/* CAN_CONTROL Masks */ +#define SRS 0x0001 /* Software Reset */ +#define DNM 0x0002 /* Device Net Mode */ +#define ABO 0x0004 /* Auto-Bus On Enable */ +#define WBA 0x0010 /* Wake-Up On CAN Bus Activity Enable */ +#define SMR 0x0020 /* Sleep Mode Request */ +#define CSR 0x0040 /* CAN Suspend Mode Request */ +#define CCR 0x0080 /* CAN Configuration Mode Request */ + +/* CAN_STATUS Masks */ +#define WT 0x0001 /* TX Warning Flag */ +#define WR 0x0002 /* RX Warning Flag */ +#define EP 0x0004 /* Error Passive Mode */ +#define EBO 0x0008 /* Error Bus Off Mode */ +#define CSA 0x0040 /* Suspend Mode Acknowledge */ +#define CCA 0x0080 /* Configuration Mode Acknowledge */ +#define MBPTR 0x1F00 /* Mailbox Pointer */ +#define TRM 0x4000 /* Transmit Mode */ +#define REC 0x8000 /* Receive Mode */ + +/* CAN_CLOCK Masks */ +#define BRP 0x03FF /* Bit-Rate Pre-Scaler */ + +/* CAN_TIMING Masks */ +#define TSEG1 0x000F /* Time Segment 1 */ +#define TSEG2 0x0070 /* Time Segment 2 */ +#define SAM 0x0080 /* Sampling */ +#define SJW 0x0300 /* Synchronization Jump Width */ + +/* CAN_DEBUG Masks */ +#define DEC 0x0001 /* Disable CAN Error Counters */ +#define DRI 0x0002 /* Disable CAN RX Input */ +#define DTO 0x0004 /* Disable CAN TX Output */ +#define DIL 0x0008 /* Disable CAN Internal Loop */ +#define MAA 0x0010 /* Mode Auto-Acknowledge Enable */ +#define MRB 0x0020 /* Mode Read Back Enable */ +#define CDE 0x8000 /* CAN Debug Enable */ + +/* CAN_CEC Masks */ +#define RXECNT 0x00FF /* Receive Error Counter */ +#define TXECNT 0xFF00 /* Transmit Error Counter */ + +/* CAN_INTR Masks */ +#define MBRIRQ 0x0001 /* Mailbox Receive Interrupt */ +#define MBRIF MBRIRQ /* legacy */ +#define MBTIRQ 0x0002 /* Mailbox Transmit Interrupt */ +#define MBTIF MBTIRQ /* legacy */ +#define GIRQ 0x0004 /* Global Interrupt */ +#define SMACK 0x0008 /* Sleep Mode Acknowledge */ +#define CANTX 0x0040 /* CAN TX Bus Value */ +#define CANRX 0x0080 /* CAN RX Bus Value */ + +/* CAN_MBxx_ID1 and CAN_MBxx_ID0 Masks */ +#define DFC 0xFFFF /* Data Filtering Code (If Enabled) (ID0) */ +#define EXTID_LO 0xFFFF /* Lower 16 Bits of Extended Identifier (ID0) */ +#define EXTID_HI 0x0003 /* Upper 2 Bits of Extended Identifier (ID1) */ +#define BASEID 0x1FFC /* Base Identifier */ +#define IDE 0x2000 /* Identifier Extension */ +#define RTR 0x4000 /* Remote Frame Transmission Request */ +#define AME 0x8000 /* Acceptance Mask Enable */ + +/* CAN_MBxx_TIMESTAMP Masks */ +#define TSV 0xFFFF /* Timestamp */ + +/* CAN_MBxx_LENGTH Masks */ +#define DLC 0x000F /* Data Length Code */ + +/* CAN_AMxxH and CAN_AMxxL Masks */ +#define DFM 0xFFFF /* Data Field Mask (If Enabled) (CAN_AMxxL) */ +#define EXTID_LO 0xFFFF /* Lower 16 Bits of Extended Identifier (CAN_AMxxL) */ +#define EXTID_HI 0x0003 /* Upper 2 Bits of Extended Identifier (CAN_AMxxH) */ +#define BASEID 0x1FFC /* Base Identifier */ +#define AMIDE 0x2000 /* Acceptance Mask ID Extension Enable */ +#define FMD 0x4000 /* Full Mask Data Field Enable */ +#define FDF 0x8000 /* Filter On Data Field Enable */ + +/* CAN_MC1 Masks */ +#define MC0 0x0001 /* Enable Mailbox 0 */ +#define MC1 0x0002 /* Enable Mailbox 1 */ +#define MC2 0x0004 /* Enable Mailbox 2 */ +#define MC3 0x0008 /* Enable Mailbox 3 */ +#define MC4 0x0010 /* Enable Mailbox 4 */ +#define MC5 0x0020 /* Enable Mailbox 5 */ +#define MC6 0x0040 /* Enable Mailbox 6 */ +#define MC7 0x0080 /* Enable Mailbox 7 */ +#define MC8 0x0100 /* Enable Mailbox 8 */ +#define MC9 0x0200 /* Enable Mailbox 9 */ +#define MC10 0x0400 /* Enable Mailbox 10 */ +#define MC11 0x0800 /* Enable Mailbox 11 */ +#define MC12 0x1000 /* Enable Mailbox 12 */ +#define MC13 0x2000 /* Enable Mailbox 13 */ +#define MC14 0x4000 /* Enable Mailbox 14 */ +#define MC15 0x8000 /* Enable Mailbox 15 */ + +/* CAN_MC2 Masks */ +#define MC16 0x0001 /* Enable Mailbox 16 */ +#define MC17 0x0002 /* Enable Mailbox 17 */ +#define MC18 0x0004 /* Enable Mailbox 18 */ +#define MC19 0x0008 /* Enable Mailbox 19 */ +#define MC20 0x0010 /* Enable Mailbox 20 */ +#define MC21 0x0020 /* Enable Mailbox 21 */ +#define MC22 0x0040 /* Enable Mailbox 22 */ +#define MC23 0x0080 /* Enable Mailbox 23 */ +#define MC24 0x0100 /* Enable Mailbox 24 */ +#define MC25 0x0200 /* Enable Mailbox 25 */ +#define MC26 0x0400 /* Enable Mailbox 26 */ +#define MC27 0x0800 /* Enable Mailbox 27 */ +#define MC28 0x1000 /* Enable Mailbox 28 */ +#define MC29 0x2000 /* Enable Mailbox 29 */ +#define MC30 0x4000 /* Enable Mailbox 30 */ +#define MC31 0x8000 /* Enable Mailbox 31 */ + +/* CAN_MD1 Masks */ +#define MD0 0x0001 /* Enable Mailbox 0 For Receive */ +#define MD1 0x0002 /* Enable Mailbox 1 For Receive */ +#define MD2 0x0004 /* Enable Mailbox 2 For Receive */ +#define MD3 0x0008 /* Enable Mailbox 3 For Receive */ +#define MD4 0x0010 /* Enable Mailbox 4 For Receive */ +#define MD5 0x0020 /* Enable Mailbox 5 For Receive */ +#define MD6 0x0040 /* Enable Mailbox 6 For Receive */ +#define MD7 0x0080 /* Enable Mailbox 7 For Receive */ +#define MD8 0x0100 /* Enable Mailbox 8 For Receive */ +#define MD9 0x0200 /* Enable Mailbox 9 For Receive */ +#define MD10 0x0400 /* Enable Mailbox 10 For Receive */ +#define MD11 0x0800 /* Enable Mailbox 11 For Receive */ +#define MD12 0x1000 /* Enable Mailbox 12 For Receive */ +#define MD13 0x2000 /* Enable Mailbox 13 For Receive */ +#define MD14 0x4000 /* Enable Mailbox 14 For Receive */ +#define MD15 0x8000 /* Enable Mailbox 15 For Receive */ + +/* CAN_MD2 Masks */ +#define MD16 0x0001 /* Enable Mailbox 16 For Receive */ +#define MD17 0x0002 /* Enable Mailbox 17 For Receive */ +#define MD18 0x0004 /* Enable Mailbox 18 For Receive */ +#define MD19 0x0008 /* Enable Mailbox 19 For Receive */ +#define MD20 0x0010 /* Enable Mailbox 20 For Receive */ +#define MD21 0x0020 /* Enable Mailbox 21 For Receive */ +#define MD22 0x0040 /* Enable Mailbox 22 For Receive */ +#define MD23 0x0080 /* Enable Mailbox 23 For Receive */ +#define MD24 0x0100 /* Enable Mailbox 24 For Receive */ +#define MD25 0x0200 /* Enable Mailbox 25 For Receive */ +#define MD26 0x0400 /* Enable Mailbox 26 For Receive */ +#define MD27 0x0800 /* Enable Mailbox 27 For Receive */ +#define MD28 0x1000 /* Enable Mailbox 28 For Receive */ +#define MD29 0x2000 /* Enable Mailbox 29 For Receive */ +#define MD30 0x4000 /* Enable Mailbox 30 For Receive */ +#define MD31 0x8000 /* Enable Mailbox 31 For Receive */ + +/* CAN_RMP1 Masks */ +#define RMP0 0x0001 /* RX Message Pending In Mailbox 0 */ +#define RMP1 0x0002 /* RX Message Pending In Mailbox 1 */ +#define RMP2 0x0004 /* RX Message Pending In Mailbox 2 */ +#define RMP3 0x0008 /* RX Message Pending In Mailbox 3 */ +#define RMP4 0x0010 /* RX Message Pending In Mailbox 4 */ +#define RMP5 0x0020 /* RX Message Pending In Mailbox 5 */ +#define RMP6 0x0040 /* RX Message Pending In Mailbox 6 */ +#define RMP7 0x0080 /* RX Message Pending In Mailbox 7 */ +#define RMP8 0x0100 /* RX Message Pending In Mailbox 8 */ +#define RMP9 0x0200 /* RX Message Pending In Mailbox 9 */ +#define RMP10 0x0400 /* RX Message Pending In Mailbox 10 */ +#define RMP11 0x0800 /* RX Message Pending In Mailbox 11 */ +#define RMP12 0x1000 /* RX Message Pending In Mailbox 12 */ +#define RMP13 0x2000 /* RX Message Pending In Mailbox 13 */ +#define RMP14 0x4000 /* RX Message Pending In Mailbox 14 */ +#define RMP15 0x8000 /* RX Message Pending In Mailbox 15 */ + +/* CAN_RMP2 Masks */ +#define RMP16 0x0001 /* RX Message Pending In Mailbox 16 */ +#define RMP17 0x0002 /* RX Message Pending In Mailbox 17 */ +#define RMP18 0x0004 /* RX Message Pending In Mailbox 18 */ +#define RMP19 0x0008 /* RX Message Pending In Mailbox 19 */ +#define RMP20 0x0010 /* RX Message Pending In Mailbox 20 */ +#define RMP21 0x0020 /* RX Message Pending In Mailbox 21 */ +#define RMP22 0x0040 /* RX Message Pending In Mailbox 22 */ +#define RMP23 0x0080 /* RX Message Pending In Mailbox 23 */ +#define RMP24 0x0100 /* RX Message Pending In Mailbox 24 */ +#define RMP25 0x0200 /* RX Message Pending In Mailbox 25 */ +#define RMP26 0x0400 /* RX Message Pending In Mailbox 26 */ +#define RMP27 0x0800 /* RX Message Pending In Mailbox 27 */ +#define RMP28 0x1000 /* RX Message Pending In Mailbox 28 */ +#define RMP29 0x2000 /* RX Message Pending In Mailbox 29 */ +#define RMP30 0x4000 /* RX Message Pending In Mailbox 30 */ +#define RMP31 0x8000 /* RX Message Pending In Mailbox 31 */ + +/* CAN_RML1 Masks */ +#define RML0 0x0001 /* RX Message Lost In Mailbox 0 */ +#define RML1 0x0002 /* RX Message Lost In Mailbox 1 */ +#define RML2 0x0004 /* RX Message Lost In Mailbox 2 */ +#define RML3 0x0008 /* RX Message Lost In Mailbox 3 */ +#define RML4 0x0010 /* RX Message Lost In Mailbox 4 */ +#define RML5 0x0020 /* RX Message Lost In Mailbox 5 */ +#define RML6 0x0040 /* RX Message Lost In Mailbox 6 */ +#define RML7 0x0080 /* RX Message Lost In Mailbox 7 */ +#define RML8 0x0100 /* RX Message Lost In Mailbox 8 */ +#define RML9 0x0200 /* RX Message Lost In Mailbox 9 */ +#define RML10 0x0400 /* RX Message Lost In Mailbox 10 */ +#define RML11 0x0800 /* RX Message Lost In Mailbox 11 */ +#define RML12 0x1000 /* RX Message Lost In Mailbox 12 */ +#define RML13 0x2000 /* RX Message Lost In Mailbox 13 */ +#define RML14 0x4000 /* RX Message Lost In Mailbox 14 */ +#define RML15 0x8000 /* RX Message Lost In Mailbox 15 */ + +/* CAN_RML2 Masks */ +#define RML16 0x0001 /* RX Message Lost In Mailbox 16 */ +#define RML17 0x0002 /* RX Message Lost In Mailbox 17 */ +#define RML18 0x0004 /* RX Message Lost In Mailbox 18 */ +#define RML19 0x0008 /* RX Message Lost In Mailbox 19 */ +#define RML20 0x0010 /* RX Message Lost In Mailbox 20 */ +#define RML21 0x0020 /* RX Message Lost In Mailbox 21 */ +#define RML22 0x0040 /* RX Message Lost In Mailbox 22 */ +#define RML23 0x0080 /* RX Message Lost In Mailbox 23 */ +#define RML24 0x0100 /* RX Message Lost In Mailbox 24 */ +#define RML25 0x0200 /* RX Message Lost In Mailbox 25 */ +#define RML26 0x0400 /* RX Message Lost In Mailbox 26 */ +#define RML27 0x0800 /* RX Message Lost In Mailbox 27 */ +#define RML28 0x1000 /* RX Message Lost In Mailbox 28 */ +#define RML29 0x2000 /* RX Message Lost In Mailbox 29 */ +#define RML30 0x4000 /* RX Message Lost In Mailbox 30 */ +#define RML31 0x8000 /* RX Message Lost In Mailbox 31 */ + +/* CAN_OPSS1 Masks */ +#define OPSS0 0x0001 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 0 */ +#define OPSS1 0x0002 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 1 */ +#define OPSS2 0x0004 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 2 */ +#define OPSS3 0x0008 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 3 */ +#define OPSS4 0x0010 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 4 */ +#define OPSS5 0x0020 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 5 */ +#define OPSS6 0x0040 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 6 */ +#define OPSS7 0x0080 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 7 */ +#define OPSS8 0x0100 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 8 */ +#define OPSS9 0x0200 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 9 */ +#define OPSS10 0x0400 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 10 */ +#define OPSS11 0x0800 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 11 */ +#define OPSS12 0x1000 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 12 */ +#define OPSS13 0x2000 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 13 */ +#define OPSS14 0x4000 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 14 */ +#define OPSS15 0x8000 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 15 */ + +/* CAN_OPSS2 Masks */ +#define OPSS16 0x0001 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 16 */ +#define OPSS17 0x0002 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 17 */ +#define OPSS18 0x0004 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 18 */ +#define OPSS19 0x0008 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 19 */ +#define OPSS20 0x0010 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 20 */ +#define OPSS21 0x0020 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 21 */ +#define OPSS22 0x0040 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 22 */ +#define OPSS23 0x0080 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 23 */ +#define OPSS24 0x0100 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 24 */ +#define OPSS25 0x0200 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 25 */ +#define OPSS26 0x0400 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 26 */ +#define OPSS27 0x0800 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 27 */ +#define OPSS28 0x1000 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 28 */ +#define OPSS29 0x2000 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 29 */ +#define OPSS30 0x4000 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 30 */ +#define OPSS31 0x8000 /* Enable RX Overwrite Protection or TX Single-Shot For Mailbox 31 */ + +/* CAN_TRR1 Masks */ +#define TRR0 0x0001 /* Deny But Don't Lock Access To Mailbox 0 */ +#define TRR1 0x0002 /* Deny But Don't Lock Access To Mailbox 1 */ +#define TRR2 0x0004 /* Deny But Don't Lock Access To Mailbox 2 */ +#define TRR3 0x0008 /* Deny But Don't Lock Access To Mailbox 3 */ +#define TRR4 0x0010 /* Deny But Don't Lock Access To Mailbox 4 */ +#define TRR5 0x0020 /* Deny But Don't Lock Access To Mailbox 5 */ +#define TRR6 0x0040 /* Deny But Don't Lock Access To Mailbox 6 */ +#define TRR7 0x0080 /* Deny But Don't Lock Access To Mailbox 7 */ +#define TRR8 0x0100 /* Deny But Don't Lock Access To Mailbox 8 */ +#define TRR9 0x0200 /* Deny But Don't Lock Access To Mailbox 9 */ +#define TRR10 0x0400 /* Deny But Don't Lock Access To Mailbox 10 */ +#define TRR11 0x0800 /* Deny But Don't Lock Access To Mailbox 11 */ +#define TRR12 0x1000 /* Deny But Don't Lock Access To Mailbox 12 */ +#define TRR13 0x2000 /* Deny But Don't Lock Access To Mailbox 13 */ +#define TRR14 0x4000 /* Deny But Don't Lock Access To Mailbox 14 */ +#define TRR15 0x8000 /* Deny But Don't Lock Access To Mailbox 15 */ + +/* CAN_TRR2 Masks */ +#define TRR16 0x0001 /* Deny But Don't Lock Access To Mailbox 16 */ +#define TRR17 0x0002 /* Deny But Don't Lock Access To Mailbox 17 */ +#define TRR18 0x0004 /* Deny But Don't Lock Access To Mailbox 18 */ +#define TRR19 0x0008 /* Deny But Don't Lock Access To Mailbox 19 */ +#define TRR20 0x0010 /* Deny But Don't Lock Access To Mailbox 20 */ +#define TRR21 0x0020 /* Deny But Don't Lock Access To Mailbox 21 */ +#define TRR22 0x0040 /* Deny But Don't Lock Access To Mailbox 22 */ +#define TRR23 0x0080 /* Deny But Don't Lock Access To Mailbox 23 */ +#define TRR24 0x0100 /* Deny But Don't Lock Access To Mailbox 24 */ +#define TRR25 0x0200 /* Deny But Don't Lock Access To Mailbox 25 */ +#define TRR26 0x0400 /* Deny But Don't Lock Access To Mailbox 26 */ +#define TRR27 0x0800 /* Deny But Don't Lock Access To Mailbox 27 */ +#define TRR28 0x1000 /* Deny But Don't Lock Access To Mailbox 28 */ +#define TRR29 0x2000 /* Deny But Don't Lock Access To Mailbox 29 */ +#define TRR30 0x4000 /* Deny But Don't Lock Access To Mailbox 30 */ +#define TRR31 0x8000 /* Deny But Don't Lock Access To Mailbox 31 */ + +/* CAN_TRS1 Masks */ +#define TRS0 0x0001 /* Remote Frame Request For Mailbox 0 */ +#define TRS1 0x0002 /* Remote Frame Request For Mailbox 1 */ +#define TRS2 0x0004 /* Remote Frame Request For Mailbox 2 */ +#define TRS3 0x0008 /* Remote Frame Request For Mailbox 3 */ +#define TRS4 0x0010 /* Remote Frame Request For Mailbox 4 */ +#define TRS5 0x0020 /* Remote Frame Request For Mailbox 5 */ +#define TRS6 0x0040 /* Remote Frame Request For Mailbox 6 */ +#define TRS7 0x0080 /* Remote Frame Request For Mailbox 7 */ +#define TRS8 0x0100 /* Remote Frame Request For Mailbox 8 */ +#define TRS9 0x0200 /* Remote Frame Request For Mailbox 9 */ +#define TRS10 0x0400 /* Remote Frame Request For Mailbox 10 */ +#define TRS11 0x0800 /* Remote Frame Request For Mailbox 11 */ +#define TRS12 0x1000 /* Remote Frame Request For Mailbox 12 */ +#define TRS13 0x2000 /* Remote Frame Request For Mailbox 13 */ +#define TRS14 0x4000 /* Remote Frame Request For Mailbox 14 */ +#define TRS15 0x8000 /* Remote Frame Request For Mailbox 15 */ + +/* CAN_TRS2 Masks */ +#define TRS16 0x0001 /* Remote Frame Request For Mailbox 16 */ +#define TRS17 0x0002 /* Remote Frame Request For Mailbox 17 */ +#define TRS18 0x0004 /* Remote Frame Request For Mailbox 18 */ +#define TRS19 0x0008 /* Remote Frame Request For Mailbox 19 */ +#define TRS20 0x0010 /* Remote Frame Request For Mailbox 20 */ +#define TRS21 0x0020 /* Remote Frame Request For Mailbox 21 */ +#define TRS22 0x0040 /* Remote Frame Request For Mailbox 22 */ +#define TRS23 0x0080 /* Remote Frame Request For Mailbox 23 */ +#define TRS24 0x0100 /* Remote Frame Request For Mailbox 24 */ +#define TRS25 0x0200 /* Remote Frame Request For Mailbox 25 */ +#define TRS26 0x0400 /* Remote Frame Request For Mailbox 26 */ +#define TRS27 0x0800 /* Remote Frame Request For Mailbox 27 */ +#define TRS28 0x1000 /* Remote Frame Request For Mailbox 28 */ +#define TRS29 0x2000 /* Remote Frame Request For Mailbox 29 */ +#define TRS30 0x4000 /* Remote Frame Request For Mailbox 30 */ +#define TRS31 0x8000 /* Remote Frame Request For Mailbox 31 */ + +/* CAN_AA1 Masks */ +#define AA0 0x0001 /* Aborted Message In Mailbox 0 */ +#define AA1 0x0002 /* Aborted Message In Mailbox 1 */ +#define AA2 0x0004 /* Aborted Message In Mailbox 2 */ +#define AA3 0x0008 /* Aborted Message In Mailbox 3 */ +#define AA4 0x0010 /* Aborted Message In Mailbox 4 */ +#define AA5 0x0020 /* Aborted Message In Mailbox 5 */ +#define AA6 0x0040 /* Aborted Message In Mailbox 6 */ +#define AA7 0x0080 /* Aborted Message In Mailbox 7 */ +#define AA8 0x0100 /* Aborted Message In Mailbox 8 */ +#define AA9 0x0200 /* Aborted Message In Mailbox 9 */ +#define AA10 0x0400 /* Aborted Message In Mailbox 10 */ +#define AA11 0x0800 /* Aborted Message In Mailbox 11 */ +#define AA12 0x1000 /* Aborted Message In Mailbox 12 */ +#define AA13 0x2000 /* Aborted Message In Mailbox 13 */ +#define AA14 0x4000 /* Aborted Message In Mailbox 14 */ +#define AA15 0x8000 /* Aborted Message In Mailbox 15 */ + +/* CAN_AA2 Masks */ +#define AA16 0x0001 /* Aborted Message In Mailbox 16 */ +#define AA17 0x0002 /* Aborted Message In Mailbox 17 */ +#define AA18 0x0004 /* Aborted Message In Mailbox 18 */ +#define AA19 0x0008 /* Aborted Message In Mailbox 19 */ +#define AA20 0x0010 /* Aborted Message In Mailbox 20 */ +#define AA21 0x0020 /* Aborted Message In Mailbox 21 */ +#define AA22 0x0040 /* Aborted Message In Mailbox 22 */ +#define AA23 0x0080 /* Aborted Message In Mailbox 23 */ +#define AA24 0x0100 /* Aborted Message In Mailbox 24 */ +#define AA25 0x0200 /* Aborted Message In Mailbox 25 */ +#define AA26 0x0400 /* Aborted Message In Mailbox 26 */ +#define AA27 0x0800 /* Aborted Message In Mailbox 27 */ +#define AA28 0x1000 /* Aborted Message In Mailbox 28 */ +#define AA29 0x2000 /* Aborted Message In Mailbox 29 */ +#define AA30 0x4000 /* Aborted Message In Mailbox 30 */ +#define AA31 0x8000 /* Aborted Message In Mailbox 31 */ + +/* CAN_TA1 Masks */ +#define TA0 0x0001 /* Transmit Successful From Mailbox 0 */ +#define TA1 0x0002 /* Transmit Successful From Mailbox 1 */ +#define TA2 0x0004 /* Transmit Successful From Mailbox 2 */ +#define TA3 0x0008 /* Transmit Successful From Mailbox 3 */ +#define TA4 0x0010 /* Transmit Successful From Mailbox 4 */ +#define TA5 0x0020 /* Transmit Successful From Mailbox 5 */ +#define TA6 0x0040 /* Transmit Successful From Mailbox 6 */ +#define TA7 0x0080 /* Transmit Successful From Mailbox 7 */ +#define TA8 0x0100 /* Transmit Successful From Mailbox 8 */ +#define TA9 0x0200 /* Transmit Successful From Mailbox 9 */ +#define TA10 0x0400 /* Transmit Successful From Mailbox 10 */ +#define TA11 0x0800 /* Transmit Successful From Mailbox 11 */ +#define TA12 0x1000 /* Transmit Successful From Mailbox 12 */ +#define TA13 0x2000 /* Transmit Successful From Mailbox 13 */ +#define TA14 0x4000 /* Transmit Successful From Mailbox 14 */ +#define TA15 0x8000 /* Transmit Successful From Mailbox 15 */ + +/* CAN_TA2 Masks */ +#define TA16 0x0001 /* Transmit Successful From Mailbox 16 */ +#define TA17 0x0002 /* Transmit Successful From Mailbox 17 */ +#define TA18 0x0004 /* Transmit Successful From Mailbox 18 */ +#define TA19 0x0008 /* Transmit Successful From Mailbox 19 */ +#define TA20 0x0010 /* Transmit Successful From Mailbox 20 */ +#define TA21 0x0020 /* Transmit Successful From Mailbox 21 */ +#define TA22 0x0040 /* Transmit Successful From Mailbox 22 */ +#define TA23 0x0080 /* Transmit Successful From Mailbox 23 */ +#define TA24 0x0100 /* Transmit Successful From Mailbox 24 */ +#define TA25 0x0200 /* Transmit Successful From Mailbox 25 */ +#define TA26 0x0400 /* Transmit Successful From Mailbox 26 */ +#define TA27 0x0800 /* Transmit Successful From Mailbox 27 */ +#define TA28 0x1000 /* Transmit Successful From Mailbox 28 */ +#define TA29 0x2000 /* Transmit Successful From Mailbox 29 */ +#define TA30 0x4000 /* Transmit Successful From Mailbox 30 */ +#define TA31 0x8000 /* Transmit Successful From Mailbox 31 */ + +/* CAN_MBTD Masks */ +#define TDPTR 0x001F /* Mailbox To Temporarily Disable */ +#define TDA 0x0040 /* Temporary Disable Acknowledge */ +#define TDR 0x0080 /* Temporary Disable Request */ + +/* CAN_RFH1 Masks */ +#define RFH0 0x0001 /* Enable Automatic Remote Frame Handling For Mailbox 0 */ +#define RFH1 0x0002 /* Enable Automatic Remote Frame Handling For Mailbox 1 */ +#define RFH2 0x0004 /* Enable Automatic Remote Frame Handling For Mailbox 2 */ +#define RFH3 0x0008 /* Enable Automatic Remote Frame Handling For Mailbox 3 */ +#define RFH4 0x0010 /* Enable Automatic Remote Frame Handling For Mailbox 4 */ +#define RFH5 0x0020 /* Enable Automatic Remote Frame Handling For Mailbox 5 */ +#define RFH6 0x0040 /* Enable Automatic Remote Frame Handling For Mailbox 6 */ +#define RFH7 0x0080 /* Enable Automatic Remote Frame Handling For Mailbox 7 */ +#define RFH8 0x0100 /* Enable Automatic Remote Frame Handling For Mailbox 8 */ +#define RFH9 0x0200 /* Enable Automatic Remote Frame Handling For Mailbox 9 */ +#define RFH10 0x0400 /* Enable Automatic Remote Frame Handling For Mailbox 10 */ +#define RFH11 0x0800 /* Enable Automatic Remote Frame Handling For Mailbox 11 */ +#define RFH12 0x1000 /* Enable Automatic Remote Frame Handling For Mailbox 12 */ +#define RFH13 0x2000 /* Enable Automatic Remote Frame Handling For Mailbox 13 */ +#define RFH14 0x4000 /* Enable Automatic Remote Frame Handling For Mailbox 14 */ +#define RFH15 0x8000 /* Enable Automatic Remote Frame Handling For Mailbox 15 */ + +/* CAN_RFH2 Masks */ +#define RFH16 0x0001 /* Enable Automatic Remote Frame Handling For Mailbox 16 */ +#define RFH17 0x0002 /* Enable Automatic Remote Frame Handling For Mailbox 17 */ +#define RFH18 0x0004 /* Enable Automatic Remote Frame Handling For Mailbox 18 */ +#define RFH19 0x0008 /* Enable Automatic Remote Frame Handling For Mailbox 19 */ +#define RFH20 0x0010 /* Enable Automatic Remote Frame Handling For Mailbox 20 */ +#define RFH21 0x0020 /* Enable Automatic Remote Frame Handling For Mailbox 21 */ +#define RFH22 0x0040 /* Enable Automatic Remote Frame Handling For Mailbox 22 */ +#define RFH23 0x0080 /* Enable Automatic Remote Frame Handling For Mailbox 23 */ +#define RFH24 0x0100 /* Enable Automatic Remote Frame Handling For Mailbox 24 */ +#define RFH25 0x0200 /* Enable Automatic Remote Frame Handling For Mailbox 25 */ +#define RFH26 0x0400 /* Enable Automatic Remote Frame Handling For Mailbox 26 */ +#define RFH27 0x0800 /* Enable Automatic Remote Frame Handling For Mailbox 27 */ +#define RFH28 0x1000 /* Enable Automatic Remote Frame Handling For Mailbox 28 */ +#define RFH29 0x2000 /* Enable Automatic Remote Frame Handling For Mailbox 29 */ +#define RFH30 0x4000 /* Enable Automatic Remote Frame Handling For Mailbox 30 */ +#define RFH31 0x8000 /* Enable Automatic Remote Frame Handling For Mailbox 31 */ + +/* CAN_MBTIF1 Masks */ +#define MBTIF0 0x0001 /* TX Interrupt Active In Mailbox 0 */ +#define MBTIF1 0x0002 /* TX Interrupt Active In Mailbox 1 */ +#define MBTIF2 0x0004 /* TX Interrupt Active In Mailbox 2 */ +#define MBTIF3 0x0008 /* TX Interrupt Active In Mailbox 3 */ +#define MBTIF4 0x0010 /* TX Interrupt Active In Mailbox 4 */ +#define MBTIF5 0x0020 /* TX Interrupt Active In Mailbox 5 */ +#define MBTIF6 0x0040 /* TX Interrupt Active In Mailbox 6 */ +#define MBTIF7 0x0080 /* TX Interrupt Active In Mailbox 7 */ +#define MBTIF8 0x0100 /* TX Interrupt Active In Mailbox 8 */ +#define MBTIF9 0x0200 /* TX Interrupt Active In Mailbox 9 */ +#define MBTIF10 0x0400 /* TX Interrupt Active In Mailbox 10 */ +#define MBTIF11 0x0800 /* TX Interrupt Active In Mailbox 11 */ +#define MBTIF12 0x1000 /* TX Interrupt Active In Mailbox 12 */ +#define MBTIF13 0x2000 /* TX Interrupt Active In Mailbox 13 */ +#define MBTIF14 0x4000 /* TX Interrupt Active In Mailbox 14 */ +#define MBTIF15 0x8000 /* TX Interrupt Active In Mailbox 15 */ + +/* CAN_MBTIF2 Masks */ +#define MBTIF16 0x0001 /* TX Interrupt Active In Mailbox 16 */ +#define MBTIF17 0x0002 /* TX Interrupt Active In Mailbox 17 */ +#define MBTIF18 0x0004 /* TX Interrupt Active In Mailbox 18 */ +#define MBTIF19 0x0008 /* TX Interrupt Active In Mailbox 19 */ +#define MBTIF20 0x0010 /* TX Interrupt Active In Mailbox 20 */ +#define MBTIF21 0x0020 /* TX Interrupt Active In Mailbox 21 */ +#define MBTIF22 0x0040 /* TX Interrupt Active In Mailbox 22 */ +#define MBTIF23 0x0080 /* TX Interrupt Active In Mailbox 23 */ +#define MBTIF24 0x0100 /* TX Interrupt Active In Mailbox 24 */ +#define MBTIF25 0x0200 /* TX Interrupt Active In Mailbox 25 */ +#define MBTIF26 0x0400 /* TX Interrupt Active In Mailbox 26 */ +#define MBTIF27 0x0800 /* TX Interrupt Active In Mailbox 27 */ +#define MBTIF28 0x1000 /* TX Interrupt Active In Mailbox 28 */ +#define MBTIF29 0x2000 /* TX Interrupt Active In Mailbox 29 */ +#define MBTIF30 0x4000 /* TX Interrupt Active In Mailbox 30 */ +#define MBTIF31 0x8000 /* TX Interrupt Active In Mailbox 31 */ + +/* CAN_MBRIF1 Masks */ +#define MBRIF0 0x0001 /* RX Interrupt Active In Mailbox 0 */ +#define MBRIF1 0x0002 /* RX Interrupt Active In Mailbox 1 */ +#define MBRIF2 0x0004 /* RX Interrupt Active In Mailbox 2 */ +#define MBRIF3 0x0008 /* RX Interrupt Active In Mailbox 3 */ +#define MBRIF4 0x0010 /* RX Interrupt Active In Mailbox 4 */ +#define MBRIF5 0x0020 /* RX Interrupt Active In Mailbox 5 */ +#define MBRIF6 0x0040 /* RX Interrupt Active In Mailbox 6 */ +#define MBRIF7 0x0080 /* RX Interrupt Active In Mailbox 7 */ +#define MBRIF8 0x0100 /* RX Interrupt Active In Mailbox 8 */ +#define MBRIF9 0x0200 /* RX Interrupt Active In Mailbox 9 */ +#define MBRIF10 0x0400 /* RX Interrupt Active In Mailbox 10 */ +#define MBRIF11 0x0800 /* RX Interrupt Active In Mailbox 11 */ +#define MBRIF12 0x1000 /* RX Interrupt Active In Mailbox 12 */ +#define MBRIF13 0x2000 /* RX Interrupt Active In Mailbox 13 */ +#define MBRIF14 0x4000 /* RX Interrupt Active In Mailbox 14 */ +#define MBRIF15 0x8000 /* RX Interrupt Active In Mailbox 15 */ + +/* CAN_MBRIF2 Masks */ +#define MBRIF16 0x0001 /* RX Interrupt Active In Mailbox 16 */ +#define MBRIF17 0x0002 /* RX Interrupt Active In Mailbox 17 */ +#define MBRIF18 0x0004 /* RX Interrupt Active In Mailbox 18 */ +#define MBRIF19 0x0008 /* RX Interrupt Active In Mailbox 19 */ +#define MBRIF20 0x0010 /* RX Interrupt Active In Mailbox 20 */ +#define MBRIF21 0x0020 /* RX Interrupt Active In Mailbox 21 */ +#define MBRIF22 0x0040 /* RX Interrupt Active In Mailbox 22 */ +#define MBRIF23 0x0080 /* RX Interrupt Active In Mailbox 23 */ +#define MBRIF24 0x0100 /* RX Interrupt Active In Mailbox 24 */ +#define MBRIF25 0x0200 /* RX Interrupt Active In Mailbox 25 */ +#define MBRIF26 0x0400 /* RX Interrupt Active In Mailbox 26 */ +#define MBRIF27 0x0800 /* RX Interrupt Active In Mailbox 27 */ +#define MBRIF28 0x1000 /* RX Interrupt Active In Mailbox 28 */ +#define MBRIF29 0x2000 /* RX Interrupt Active In Mailbox 29 */ +#define MBRIF30 0x4000 /* RX Interrupt Active In Mailbox 30 */ +#define MBRIF31 0x8000 /* RX Interrupt Active In Mailbox 31 */ + +/* CAN_MBIM1 Masks */ +#define MBIM0 0x0001 /* Enable Interrupt For Mailbox 0 */ +#define MBIM1 0x0002 /* Enable Interrupt For Mailbox 1 */ +#define MBIM2 0x0004 /* Enable Interrupt For Mailbox 2 */ +#define MBIM3 0x0008 /* Enable Interrupt For Mailbox 3 */ +#define MBIM4 0x0010 /* Enable Interrupt For Mailbox 4 */ +#define MBIM5 0x0020 /* Enable Interrupt For Mailbox 5 */ +#define MBIM6 0x0040 /* Enable Interrupt For Mailbox 6 */ +#define MBIM7 0x0080 /* Enable Interrupt For Mailbox 7 */ +#define MBIM8 0x0100 /* Enable Interrupt For Mailbox 8 */ +#define MBIM9 0x0200 /* Enable Interrupt For Mailbox 9 */ +#define MBIM10 0x0400 /* Enable Interrupt For Mailbox 10 */ +#define MBIM11 0x0800 /* Enable Interrupt For Mailbox 11 */ +#define MBIM12 0x1000 /* Enable Interrupt For Mailbox 12 */ +#define MBIM13 0x2000 /* Enable Interrupt For Mailbox 13 */ +#define MBIM14 0x4000 /* Enable Interrupt For Mailbox 14 */ +#define MBIM15 0x8000 /* Enable Interrupt For Mailbox 15 */ + +/* CAN_MBIM2 Masks */ +#define MBIM16 0x0001 /* Enable Interrupt For Mailbox 16 */ +#define MBIM17 0x0002 /* Enable Interrupt For Mailbox 17 */ +#define MBIM18 0x0004 /* Enable Interrupt For Mailbox 18 */ +#define MBIM19 0x0008 /* Enable Interrupt For Mailbox 19 */ +#define MBIM20 0x0010 /* Enable Interrupt For Mailbox 20 */ +#define MBIM21 0x0020 /* Enable Interrupt For Mailbox 21 */ +#define MBIM22 0x0040 /* Enable Interrupt For Mailbox 22 */ +#define MBIM23 0x0080 /* Enable Interrupt For Mailbox 23 */ +#define MBIM24 0x0100 /* Enable Interrupt For Mailbox 24 */ +#define MBIM25 0x0200 /* Enable Interrupt For Mailbox 25 */ +#define MBIM26 0x0400 /* Enable Interrupt For Mailbox 26 */ +#define MBIM27 0x0800 /* Enable Interrupt For Mailbox 27 */ +#define MBIM28 0x1000 /* Enable Interrupt For Mailbox 28 */ +#define MBIM29 0x2000 /* Enable Interrupt For Mailbox 29 */ +#define MBIM30 0x4000 /* Enable Interrupt For Mailbox 30 */ +#define MBIM31 0x8000 /* Enable Interrupt For Mailbox 31 */ + +/* CAN_GIM Masks */ +#define EWTIM 0x0001 /* Enable TX Error Count Interrupt */ +#define EWRIM 0x0002 /* Enable RX Error Count Interrupt */ +#define EPIM 0x0004 /* Enable Error-Passive Mode Interrupt */ +#define BOIM 0x0008 /* Enable Bus Off Interrupt */ +#define WUIM 0x0010 /* Enable Wake-Up Interrupt */ +#define UIAIM 0x0020 /* Enable Access To Unimplemented Address Interrupt */ +#define AAIM 0x0040 /* Enable Abort Acknowledge Interrupt */ +#define RMLIM 0x0080 /* Enable RX Message Lost Interrupt */ +#define UCEIM 0x0100 /* Enable Universal Counter Overflow Interrupt */ +#define EXTIM 0x0200 /* Enable External Trigger Output Interrupt */ +#define ADIM 0x0400 /* Enable Access Denied Interrupt */ + +/* CAN_GIS Masks */ +#define EWTIS 0x0001 /* TX Error Count IRQ Status */ +#define EWRIS 0x0002 /* RX Error Count IRQ Status */ +#define EPIS 0x0004 /* Error-Passive Mode IRQ Status */ +#define BOIS 0x0008 /* Bus Off IRQ Status */ +#define WUIS 0x0010 /* Wake-Up IRQ Status */ +#define UIAIS 0x0020 /* Access To Unimplemented Address IRQ Status */ +#define AAIS 0x0040 /* Abort Acknowledge IRQ Status */ +#define RMLIS 0x0080 /* RX Message Lost IRQ Status */ +#define UCEIS 0x0100 /* Universal Counter Overflow IRQ Status */ +#define EXTIS 0x0200 /* External Trigger Output IRQ Status */ +#define ADIS 0x0400 /* Access Denied IRQ Status */ + +/* CAN_GIF Masks */ +#define EWTIF 0x0001 /* TX Error Count IRQ Flag */ +#define EWRIF 0x0002 /* RX Error Count IRQ Flag */ +#define EPIF 0x0004 /* Error-Passive Mode IRQ Flag */ +#define BOIF 0x0008 /* Bus Off IRQ Flag */ +#define WUIF 0x0010 /* Wake-Up IRQ Flag */ +#define UIAIF 0x0020 /* Access To Unimplemented Address IRQ Flag */ +#define AAIF 0x0040 /* Abort Acknowledge IRQ Flag */ +#define RMLIF 0x0080 /* RX Message Lost IRQ Flag */ +#define UCEIF 0x0100 /* Universal Counter Overflow IRQ Flag */ +#define EXTIF 0x0200 /* External Trigger Output IRQ Flag */ +#define ADIF 0x0400 /* Access Denied IRQ Flag */ + +/* CAN_UCCNF Masks */ +#define UCCNF 0x000F /* Universal Counter Mode */ +#define UC_STAMP 0x0001 /* Timestamp Mode */ +#define UC_WDOG 0x0002 /* Watchdog Mode */ +#define UC_AUTOTX 0x0003 /* Auto-Transmit Mode */ +#define UC_ERROR 0x0006 /* CAN Error Frame Count */ +#define UC_OVER 0x0007 /* CAN Overload Frame Count */ +#define UC_LOST 0x0008 /* Arbitration Lost During TX Count */ +#define UC_AA 0x0009 /* TX Abort Count */ +#define UC_TA 0x000A /* TX Successful Count */ +#define UC_REJECT 0x000B /* RX Message Rejected Count */ +#define UC_RML 0x000C /* RX Message Lost Count */ +#define UC_RX 0x000D /* Total Successful RX Messages Count */ +#define UC_RMP 0x000E /* Successful RX W/Matching ID Count */ +#define UC_ALL 0x000F /* Correct Message On CAN Bus Line Count */ +#define UCRC 0x0020 /* Universal Counter Reload/Clear */ +#define UCCT 0x0040 /* Universal Counter CAN Trigger */ +#define UCE 0x0080 /* Universal Counter Enable */ + +/* CAN_ESR Masks */ +#define ACKE 0x0004 /* Acknowledge Error */ +#define SER 0x0008 /* Stuff Error */ +#define CRCE 0x0010 /* CRC Error */ +#define SA0 0x0020 /* Stuck At Dominant Error */ +#define BEF 0x0040 /* Bit Error Flag */ +#define FER 0x0080 /* Form Error Flag */ + +/* CAN_EWR Masks */ +#define EWLREC 0x00FF /* RX Error Count Limit (For EWRIS) */ +#define EWLTEC 0xFF00 /* TX Error Count Limit (For EWTIS) */ + +#endif /* _DEF_BF539_H */ diff --git a/arch/blackfin/mach-bf538/include/mach/dma.h b/arch/blackfin/mach-bf538/include/mach/dma.h new file mode 100644 index 000000000000..c2210a996e68 --- /dev/null +++ b/arch/blackfin/mach-bf538/include/mach/dma.h @@ -0,0 +1,65 @@ +/* + * file: include/asm-blackfin/mach-bf538/dma.h + * based on: + * author: + * + * created: + * description: + * system mmr register map + * rev: + * + * modified: + * + * + * bugs: enter bugs at http://blackfin.uclinux.org/ + * + * this program is free software; you can redistribute it and/or modify + * it under the terms of the gnu general public license as published by + * the free software foundation; either version 2, or (at your option) + * any later version. + * + * this program is distributed in the hope that it will be useful, + * but without any warranty; without even the implied warranty of + * merchantability or fitness for a particular purpose. see the + * gnu general public license for more details. + * + * you should have received a copy of the gnu general public license + * along with this program; see the file copying. + * if not, write to the free software foundation, + * 59 temple place - suite 330, boston, ma 02111-1307, usa. + */ + +#ifndef _MACH_DMA_H_ +#define _MACH_DMA_H_ + +#define CH_PPI 0 +#define CH_SPORT0_RX 1 +#define CH_SPORT0_TX 2 +#define CH_SPORT1_RX 3 +#define CH_SPORT1_TX 4 +#define CH_SPI0 5 +#define CH_UART0_RX 6 +#define CH_UART0_TX 7 +#define CH_SPORT2_RX 8 +#define CH_SPORT2_TX 9 +#define CH_SPORT3_RX 10 +#define CH_SPORT3_TX 11 +#define CH_SPI1 14 +#define CH_SPI2 15 +#define CH_UART1_RX 16 +#define CH_UART1_TX 17 +#define CH_UART2_RX 18 +#define CH_UART2_TX 19 + +#define CH_MEM_STREAM0_DEST 20 +#define CH_MEM_STREAM0_SRC 21 +#define CH_MEM_STREAM1_DEST 22 +#define CH_MEM_STREAM1_SRC 23 +#define CH_MEM_STREAM2_DEST 24 +#define CH_MEM_STREAM2_SRC 25 +#define CH_MEM_STREAM3_DEST 26 +#define CH_MEM_STREAM3_SRC 27 + +#define MAX_BLACKFIN_DMA_CHANNEL 28 + +#endif diff --git a/arch/blackfin/mach-bf538/include/mach/irq.h b/arch/blackfin/mach-bf538/include/mach/irq.h new file mode 100644 index 000000000000..60bdac4cb6a4 --- /dev/null +++ b/arch/blackfin/mach-bf538/include/mach/irq.h @@ -0,0 +1,211 @@ +/* + * file: include/asm-blackfin/mach-bf538/irq.h + * based on: include/asm-blackfin/mach-bf537/irq.h + * author: Michael Hennerich (michael.hennerich@analog.com) + * + * created: + * description: + * system mmr register map + * rev: + * + * modified: + * + * + * bugs: enter bugs at http://blackfin.uclinux.org/ + * + * this program is free software; you can redistribute it and/or modify + * it under the terms of the gnu general public license as published by + * the free software foundation; either version 2, or (at your option) + * any later version. + * + * this program is distributed in the hope that it will be useful, + * but without any warranty; without even the implied warranty of + * merchantability or fitness for a particular purpose. see the + * gnu general public license for more details. + * + * you should have received a copy of the gnu general public license + * along with this program; see the file copying. + * if not, write to the free software foundation, + * 59 temple place - suite 330, boston, ma 02111-1307, usa. + */ + +#ifndef _BF538_IRQ_H_ +#define _BF538_IRQ_H_ + +/* + * Interrupt source definitions + Event Source Core Event Name + Core Emulation ** + Events (highest priority) EMU 0 + Reset RST 1 + NMI NMI 2 + Exception EVX 3 + Reserved -- 4 + Hardware Error IVHW 5 + Core Timer IVTMR 6 * + + ..... + + Software Interrupt 1 IVG14 31 + Software Interrupt 2 -- + (lowest priority) IVG15 32 * +*/ + +#define NR_PERI_INTS (2 * 32) + +/* The ABSTRACT IRQ definitions */ +/** the first seven of the following are fixed, the rest you change if you need to **/ +#define IRQ_EMU 0 /* Emulation */ +#define IRQ_RST 1 /* reset */ +#define IRQ_NMI 2 /* Non Maskable */ +#define IRQ_EVX 3 /* Exception */ +#define IRQ_UNUSED 4 /* - unused interrupt */ +#define IRQ_HWERR 5 /* Hardware Error */ +#define IRQ_CORETMR 6 /* Core timer */ + +#define BFIN_IRQ(x) ((x) + 7) + +#define IRQ_PLL_WAKEUP BFIN_IRQ(0) /* PLL Wakeup Interrupt */ +#define IRQ_DMA0_ERROR BFIN_IRQ(1) /* DMA Error 0 (generic) */ +#define IRQ_PPI_ERROR BFIN_IRQ(2) /* PPI Error */ +#define IRQ_SPORT0_ERROR BFIN_IRQ(3) /* SPORT0 Status */ +#define IRQ_SPORT1_ERROR BFIN_IRQ(4) /* SPORT1 Status */ +#define IRQ_SPI0_ERROR BFIN_IRQ(5) /* SPI0 Status */ +#define IRQ_UART0_ERROR BFIN_IRQ(6) /* UART0 Status */ +#define IRQ_RTC BFIN_IRQ(7) /* RTC */ +#define IRQ_PPI BFIN_IRQ(8) /* DMA Channel 0 (PPI) */ +#define IRQ_SPORT0_RX BFIN_IRQ(9) /* DMA 1 Channel (SPORT0 RX) */ +#define IRQ_SPORT0_TX BFIN_IRQ(10) /* DMA 2 Channel (SPORT0 TX) */ +#define IRQ_SPORT1_RX BFIN_IRQ(11) /* DMA 3 Channel (SPORT1 RX) */ +#define IRQ_SPORT1_TX BFIN_IRQ(12) /* DMA 4 Channel (SPORT1 TX) */ +#define IRQ_SPI0 BFIN_IRQ(13) /* DMA 5 Channel (SPI0) */ +#define IRQ_UART0_RX BFIN_IRQ(14) /* DMA 6 Channel (UART0 RX) */ +#define IRQ_UART0_TX BFIN_IRQ(15) /* DMA 7 Channel (UART0 TX) */ +#define IRQ_TMR0 BFIN_IRQ(16) /* Timer 0 */ +#define IRQ_TMR1 BFIN_IRQ(17) /* Timer 1 */ +#define IRQ_TMR2 BFIN_IRQ(18) /* Timer 2 */ +#define IRQ_PORTF_INTA BFIN_IRQ(19) /* Port F Interrupt A */ +#define IRQ_PORTF_INTB BFIN_IRQ(20) /* Port F Interrupt B */ +#define IRQ_MEM0_DMA0 BFIN_IRQ(21) /* MDMA0 Stream 0 */ +#define IRQ_MEM0_DMA1 BFIN_IRQ(22) /* MDMA0 Stream 1 */ +#define IRQ_WATCH BFIN_IRQ(23) /* Software Watchdog Timer */ +#define IRQ_DMA1_ERROR BFIN_IRQ(24) /* DMA Error 1 (generic) */ +#define IRQ_SPORT2_ERROR BFIN_IRQ(25) /* SPORT2 Status */ +#define IRQ_SPORT3_ERROR BFIN_IRQ(26) /* SPORT3 Status */ +#define IRQ_SPI1_ERROR BFIN_IRQ(28) /* SPI1 Status */ +#define IRQ_SPI2_ERROR BFIN_IRQ(29) /* SPI2 Status */ +#define IRQ_UART1_ERROR BFIN_IRQ(30) /* UART1 Status */ +#define IRQ_UART2_ERROR BFIN_IRQ(31) /* UART2 Status */ +#define IRQ_CAN_ERROR BFIN_IRQ(32) /* CAN Status (Error) Interrupt */ +#define IRQ_SPORT2_RX BFIN_IRQ(33) /* DMA 8 Channel (SPORT2 RX) */ +#define IRQ_SPORT2_TX BFIN_IRQ(34) /* DMA 9 Channel (SPORT2 TX) */ +#define IRQ_SPORT3_RX BFIN_IRQ(35) /* DMA 10 Channel (SPORT3 RX) */ +#define IRQ_SPORT3_TX BFIN_IRQ(36) /* DMA 11 Channel (SPORT3 TX) */ +#define IRQ_SPI1 BFIN_IRQ(39) /* DMA 14 Channel (SPI1) */ +#define IRQ_SPI2 BFIN_IRQ(40) /* DMA 15 Channel (SPI2) */ +#define IRQ_UART1_RX BFIN_IRQ(41) /* DMA 16 Channel (UART1 RX) */ +#define IRQ_UART1_TX BFIN_IRQ(42) /* DMA 17 Channel (UART1 TX) */ +#define IRQ_UART2_RX BFIN_IRQ(43) /* DMA 18 Channel (UART2 RX) */ +#define IRQ_UART2_TX BFIN_IRQ(44) /* DMA 19 Channel (UART2 TX) */ +#define IRQ_TWI0 BFIN_IRQ(45) /* TWI0 */ +#define IRQ_TWI1 BFIN_IRQ(46) /* TWI1 */ +#define IRQ_CAN_RX BFIN_IRQ(47) /* CAN Receive Interrupt */ +#define IRQ_CAN_TX BFIN_IRQ(48) /* CAN Transmit Interrupt */ +#define IRQ_MEM1_DMA0 BFIN_IRQ(49) /* MDMA1 Stream 0 */ +#define IRQ_MEM1_DMA1 BFIN_IRQ(50) /* MDMA1 Stream 1 */ + +#define SYS_IRQS BFIN_IRQ(63) /* 70 */ + +#define IRQ_PF0 71 +#define IRQ_PF1 72 +#define IRQ_PF2 73 +#define IRQ_PF3 74 +#define IRQ_PF4 75 +#define IRQ_PF5 76 +#define IRQ_PF6 77 +#define IRQ_PF7 78 +#define IRQ_PF8 79 +#define IRQ_PF9 80 +#define IRQ_PF10 81 +#define IRQ_PF11 82 +#define IRQ_PF12 83 +#define IRQ_PF13 84 +#define IRQ_PF14 85 +#define IRQ_PF15 86 + +#define GPIO_IRQ_BASE IRQ_PF0 + +#define NR_IRQS (IRQ_PF15+1) + +#define IVG7 7 +#define IVG8 8 +#define IVG9 9 +#define IVG10 10 +#define IVG11 11 +#define IVG12 12 +#define IVG13 13 +#define IVG14 14 +#define IVG15 15 + +/* IAR0 BIT FIELDS */ +#define IRQ_PLL_WAKEUP_POS 0 +#define IRQ_DMA0_ERROR_POS 4 +#define IRQ_PPI_ERROR_POS 8 +#define IRQ_SPORT0_ERROR_POS 12 +#define IRQ_SPORT1_ERROR_POS 16 +#define IRQ_SPI0_ERROR_POS 20 +#define IRQ_UART0_ERROR_POS 24 +#define IRQ_RTC_POS 28 + +/* IAR1 BIT FIELDS */ +#define IRQ_PPI_POS 0 +#define IRQ_SPORT0_RX_POS 4 +#define IRQ_SPORT0_TX_POS 8 +#define IRQ_SPORT1_RX_POS 12 +#define IRQ_SPORT1_TX_POS 16 +#define IRQ_SPI0_POS 20 +#define IRQ_UART0_RX_POS 24 +#define IRQ_UART0_TX_POS 28 + +/* IAR2 BIT FIELDS */ +#define IRQ_TMR0_POS 0 +#define IRQ_TMR1_POS 4 +#define IRQ_TMR2_POS 8 +#define IRQ_PORTF_INTA_POS 12 +#define IRQ_PORTF_INTB_POS 16 +#define IRQ_MEM0_DMA0_POS 20 +#define IRQ_MEM0_DMA1_POS 24 +#define IRQ_WATCH_POS 28 + +/* IAR3 BIT FIELDS */ +#define IRQ_DMA1_ERROR_POS 0 +#define IRQ_SPORT2_ERROR_POS 4 +#define IRQ_SPORT3_ERROR_POS 8 +#define IRQ_SPI1_ERROR_POS 16 +#define IRQ_SPI2_ERROR_POS 20 +#define IRQ_UART1_ERROR_POS 24 +#define IRQ_UART2_ERROR_POS 28 + +/* IAR4 BIT FIELDS */ +#define IRQ_CAN_ERROR_POS 0 +#define IRQ_SPORT2_RX_POS 4 +#define IRQ_SPORT2_TX_POS 8 +#define IRQ_SPORT3_RX_POS 12 +#define IRQ_SPORT3_TX_POS 16 +#define IRQ_SPI1_POS 28 + +/* IAR5 BIT FIELDS */ +#define IRQ_SPI2_POS 0 +#define IRQ_UART1_RX_POS 4 +#define IRQ_UART1_TX_POS 8 +#define IRQ_UART2_RX_POS 12 +#define IRQ_UART2_TX_POS 16 +#define IRQ_TWI0_POS 20 +#define IRQ_TWI1_POS 24 +#define IRQ_CAN_RX_POS 28 + +/* IAR6 BIT FIELDS */ +#define IRQ_CAN_TX_POS 0 +#define IRQ_MEM1_DMA0_POS 4 +#define IRQ_MEM1_DMA1_POS 8 +#endif /* _BF538_IRQ_H_ */ diff --git a/arch/blackfin/mach-bf538/include/mach/mem_init.h b/arch/blackfin/mach-bf538/include/mach/mem_init.h new file mode 100644 index 000000000000..d3961ba997c6 --- /dev/null +++ b/arch/blackfin/mach-bf538/include/mach/mem_init.h @@ -0,0 +1,303 @@ +/* + * File: include/asm-blackfin/mach-bf538/mem_init.h + * Based on: + * Author: + * + * Created: + * Description: + * + * Rev: + * + * Modified: + * Copyright 2004-2006 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#if (CONFIG_MEM_MT48LC16M16A2TG_75 || CONFIG_MEM_MT48LC64M4A2FB_7E || CONFIG_MEM_MT48LC16M8A2TG_75 || CONFIG_MEM_GENERIC_BOARD || CONFIG_MEM_MT48LC32M8A2_75) +#if (CONFIG_SCLK_HZ > 119402985) +#define SDRAM_tRP TRP_2 +#define SDRAM_tRP_num 2 +#define SDRAM_tRAS TRAS_7 +#define SDRAM_tRAS_num 7 +#define SDRAM_tRCD TRCD_2 +#define SDRAM_tWR TWR_2 +#endif +#if (CONFIG_SCLK_HZ > 104477612) && (CONFIG_SCLK_HZ <= 119402985) +#define SDRAM_tRP TRP_2 +#define SDRAM_tRP_num 2 +#define SDRAM_tRAS TRAS_6 +#define SDRAM_tRAS_num 6 +#define SDRAM_tRCD TRCD_2 +#define SDRAM_tWR TWR_2 +#endif +#if (CONFIG_SCLK_HZ > 89552239) && (CONFIG_SCLK_HZ <= 104477612) +#define SDRAM_tRP TRP_2 +#define SDRAM_tRP_num 2 +#define SDRAM_tRAS TRAS_5 +#define SDRAM_tRAS_num 5 +#define SDRAM_tRCD TRCD_2 +#define SDRAM_tWR TWR_2 +#endif +#if (CONFIG_SCLK_HZ > 74626866) && (CONFIG_SCLK_HZ <= 89552239) +#define SDRAM_tRP TRP_2 +#define SDRAM_tRP_num 2 +#define SDRAM_tRAS TRAS_4 +#define SDRAM_tRAS_num 4 +#define SDRAM_tRCD TRCD_2 +#define SDRAM_tWR TWR_2 +#endif +#if (CONFIG_SCLK_HZ > 66666667) && (CONFIG_SCLK_HZ <= 74626866) +#define SDRAM_tRP TRP_2 +#define SDRAM_tRP_num 2 +#define SDRAM_tRAS TRAS_3 +#define SDRAM_tRAS_num 3 +#define SDRAM_tRCD TRCD_2 +#define SDRAM_tWR TWR_2 +#endif +#if (CONFIG_SCLK_HZ > 59701493) && (CONFIG_SCLK_HZ <= 66666667) +#define SDRAM_tRP TRP_1 +#define SDRAM_tRP_num 1 +#define SDRAM_tRAS TRAS_4 +#define SDRAM_tRAS_num 3 +#define SDRAM_tRCD TRCD_1 +#define SDRAM_tWR TWR_2 +#endif +#if (CONFIG_SCLK_HZ > 44776119) && (CONFIG_SCLK_HZ <= 59701493) +#define SDRAM_tRP TRP_1 +#define SDRAM_tRP_num 1 +#define SDRAM_tRAS TRAS_3 +#define SDRAM_tRAS_num 3 +#define SDRAM_tRCD TRCD_1 +#define SDRAM_tWR TWR_2 +#endif +#if (CONFIG_SCLK_HZ > 29850746) && (CONFIG_SCLK_HZ <= 44776119) +#define SDRAM_tRP TRP_1 +#define SDRAM_tRP_num 1 +#define SDRAM_tRAS TRAS_2 +#define SDRAM_tRAS_num 2 +#define SDRAM_tRCD TRCD_1 +#define SDRAM_tWR TWR_2 +#endif +#if (CONFIG_SCLK_HZ <= 29850746) +#define SDRAM_tRP TRP_1 +#define SDRAM_tRP_num 1 +#define SDRAM_tRAS TRAS_1 +#define SDRAM_tRAS_num 1 +#define SDRAM_tRCD TRCD_1 +#define SDRAM_tWR TWR_2 +#endif +#endif + +#if (CONFIG_MEM_MT48LC16M16A2TG_75) + /*SDRAM INFORMATION: */ +#define SDRAM_Tref 64 /* Refresh period in milliseconds */ +#define SDRAM_NRA 8192 /* Number of row addresses in SDRAM */ +#define SDRAM_CL CL_3 +#endif + +#if (CONFIG_MEM_MT48LC16M8A2TG_75) + /*SDRAM INFORMATION: */ +#define SDRAM_Tref 64 /* Refresh period in milliseconds */ +#define SDRAM_NRA 4096 /* Number of row addresses in SDRAM */ +#define SDRAM_CL CL_3 +#endif + +#if (CONFIG_MEM_MT48LC32M8A2_75) + /*SDRAM INFORMATION: */ +#define SDRAM_Tref 64 /* Refresh period in milliseconds */ +#define SDRAM_NRA 8192 /* Number of row addresses in SDRAM */ +#define SDRAM_CL CL_3 +#endif + +#if (CONFIG_MEM_MT48LC64M4A2FB_7E) + /*SDRAM INFORMATION: */ +#define SDRAM_Tref 64 /* Refresh period in milliseconds */ +#define SDRAM_NRA 8192 /* Number of row addresses in SDRAM */ +#define SDRAM_CL CL_3 +#endif + +#if (CONFIG_MEM_GENERIC_BOARD) + /*SDRAM INFORMATION: Modify this for your board */ +#define SDRAM_Tref 64 /* Refresh period in milliseconds */ +#define SDRAM_NRA 8192 /* Number of row addresses in SDRAM */ +#define SDRAM_CL CL_3 +#endif + +/* Equation from section 17 (p17-46) of BF533 HRM */ +#define mem_SDRRC ((((CONFIG_SCLK_HZ / 1000) * SDRAM_Tref) / SDRAM_NRA) - (SDRAM_tRAS_num + SDRAM_tRP_num)) + +/* Enable SCLK Out */ +#define mem_SDGCTL (SCTLE | SDRAM_CL | SDRAM_tRAS | SDRAM_tRP | SDRAM_tRCD | SDRAM_tWR | PSS) + +#if defined CONFIG_CLKIN_HALF +#define CLKIN_HALF 1 +#else +#define CLKIN_HALF 0 +#endif + +#if defined CONFIG_PLL_BYPASS +#define PLL_BYPASS 1 +#else +#define PLL_BYPASS 0 +#endif + +/***************************************Currently Not Being Used *********************************/ +#define flash_EBIU_AMBCTL_WAT (((CONFIG_FLASH_SPEED_BWAT * 4) / (4000000000 / CONFIG_SCLK_HZ)) + 1) +#define flash_EBIU_AMBCTL_RAT (((CONFIG_FLASH_SPEED_BRAT * 4) / (4000000000 / CONFIG_SCLK_HZ)) + 1) +#define flash_EBIU_AMBCTL_HT (((CONFIG_FLASH_SPEED_BHT * 4) / (4000000000 / CONFIG_SCLK_HZ))) +#define flash_EBIU_AMBCTL_ST (((CONFIG_FLASH_SPEED_BST * 4) / (4000000000 / CONFIG_SCLK_HZ)) + 1) +#define flash_EBIU_AMBCTL_TT (((CONFIG_FLASH_SPEED_BTT * 4) / (4000000000 / CONFIG_SCLK_HZ)) + 1) + +#if (flash_EBIU_AMBCTL_TT > 3) +#define flash_EBIU_AMBCTL0_TT B0TT_4 +#endif +#if (flash_EBIU_AMBCTL_TT == 3) +#define flash_EBIU_AMBCTL0_TT B0TT_3 +#endif +#if (flash_EBIU_AMBCTL_TT == 2) +#define flash_EBIU_AMBCTL0_TT B0TT_2 +#endif +#if (flash_EBIU_AMBCTL_TT < 2) +#define flash_EBIU_AMBCTL0_TT B0TT_1 +#endif + +#if (flash_EBIU_AMBCTL_ST > 3) +#define flash_EBIU_AMBCTL0_ST B0ST_4 +#endif +#if (flash_EBIU_AMBCTL_ST == 3) +#define flash_EBIU_AMBCTL0_ST B0ST_3 +#endif +#if (flash_EBIU_AMBCTL_ST == 2) +#define flash_EBIU_AMBCTL0_ST B0ST_2 +#endif +#if (flash_EBIU_AMBCTL_ST < 2) +#define flash_EBIU_AMBCTL0_ST B0ST_1 +#endif + +#if (flash_EBIU_AMBCTL_HT > 2) +#define flash_EBIU_AMBCTL0_HT B0HT_3 +#endif +#if (flash_EBIU_AMBCTL_HT == 2) +#define flash_EBIU_AMBCTL0_HT B0HT_2 +#endif +#if (flash_EBIU_AMBCTL_HT == 1) +#define flash_EBIU_AMBCTL0_HT B0HT_1 +#endif +#if (flash_EBIU_AMBCTL_HT == 0 && CONFIG_FLASH_SPEED_BHT == 0) +#define flash_EBIU_AMBCTL0_HT B0HT_0 +#endif +#if (flash_EBIU_AMBCTL_HT == 0 && CONFIG_FLASH_SPEED_BHT != 0) +#define flash_EBIU_AMBCTL0_HT B0HT_1 +#endif + +#if (flash_EBIU_AMBCTL_WAT > 14) +#define flash_EBIU_AMBCTL0_WAT B0WAT_15 +#endif +#if (flash_EBIU_AMBCTL_WAT == 14) +#define flash_EBIU_AMBCTL0_WAT B0WAT_14 +#endif +#if (flash_EBIU_AMBCTL_WAT == 13) +#define flash_EBIU_AMBCTL0_WAT B0WAT_13 +#endif +#if (flash_EBIU_AMBCTL_WAT == 12) +#define flash_EBIU_AMBCTL0_WAT B0WAT_12 +#endif +#if (flash_EBIU_AMBCTL_WAT == 11) +#define flash_EBIU_AMBCTL0_WAT B0WAT_11 +#endif +#if (flash_EBIU_AMBCTL_WAT == 10) +#define flash_EBIU_AMBCTL0_WAT B0WAT_10 +#endif +#if (flash_EBIU_AMBCTL_WAT == 9) +#define flash_EBIU_AMBCTL0_WAT B0WAT_9 +#endif +#if (flash_EBIU_AMBCTL_WAT == 8) +#define flash_EBIU_AMBCTL0_WAT B0WAT_8 +#endif +#if (flash_EBIU_AMBCTL_WAT == 7) +#define flash_EBIU_AMBCTL0_WAT B0WAT_7 +#endif +#if (flash_EBIU_AMBCTL_WAT == 6) +#define flash_EBIU_AMBCTL0_WAT B0WAT_6 +#endif +#if (flash_EBIU_AMBCTL_WAT == 5) +#define flash_EBIU_AMBCTL0_WAT B0WAT_5 +#endif +#if (flash_EBIU_AMBCTL_WAT == 4) +#define flash_EBIU_AMBCTL0_WAT B0WAT_4 +#endif +#if (flash_EBIU_AMBCTL_WAT == 3) +#define flash_EBIU_AMBCTL0_WAT B0WAT_3 +#endif +#if (flash_EBIU_AMBCTL_WAT == 2) +#define flash_EBIU_AMBCTL0_WAT B0WAT_2 +#endif +#if (flash_EBIU_AMBCTL_WAT == 1) +#define flash_EBIU_AMBCTL0_WAT B0WAT_1 +#endif + +#if (flash_EBIU_AMBCTL_RAT > 14) +#define flash_EBIU_AMBCTL0_RAT B0RAT_15 +#endif +#if (flash_EBIU_AMBCTL_RAT == 14) +#define flash_EBIU_AMBCTL0_RAT B0RAT_14 +#endif +#if (flash_EBIU_AMBCTL_RAT == 13) +#define flash_EBIU_AMBCTL0_RAT B0RAT_13 +#endif +#if (flash_EBIU_AMBCTL_RAT == 12) +#define flash_EBIU_AMBCTL0_RAT B0RAT_12 +#endif +#if (flash_EBIU_AMBCTL_RAT == 11) +#define flash_EBIU_AMBCTL0_RAT B0RAT_11 +#endif +#if (flash_EBIU_AMBCTL_RAT == 10) +#define flash_EBIU_AMBCTL0_RAT B0RAT_10 +#endif +#if (flash_EBIU_AMBCTL_RAT == 9) +#define flash_EBIU_AMBCTL0_RAT B0RAT_9 +#endif +#if (flash_EBIU_AMBCTL_RAT == 8) +#define flash_EBIU_AMBCTL0_RAT B0RAT_8 +#endif +#if (flash_EBIU_AMBCTL_RAT == 7) +#define flash_EBIU_AMBCTL0_RAT B0RAT_7 +#endif +#if (flash_EBIU_AMBCTL_RAT == 6) +#define flash_EBIU_AMBCTL0_RAT B0RAT_6 +#endif +#if (flash_EBIU_AMBCTL_RAT == 5) +#define flash_EBIU_AMBCTL0_RAT B0RAT_5 +#endif +#if (flash_EBIU_AMBCTL_RAT == 4) +#define flash_EBIU_AMBCTL0_RAT B0RAT_4 +#endif +#if (flash_EBIU_AMBCTL_RAT == 3) +#define flash_EBIU_AMBCTL0_RAT B0RAT_3 +#endif +#if (flash_EBIU_AMBCTL_RAT == 2) +#define flash_EBIU_AMBCTL0_RAT B0RAT_2 +#endif +#if (flash_EBIU_AMBCTL_RAT == 1) +#define flash_EBIU_AMBCTL0_RAT B0RAT_1 +#endif + +#define flash_EBIU_AMBCTL0 \ + (flash_EBIU_AMBCTL0_WAT | flash_EBIU_AMBCTL0_RAT | flash_EBIU_AMBCTL0_HT | \ + flash_EBIU_AMBCTL0_ST | flash_EBIU_AMBCTL0_TT | CONFIG_FLASH_SPEED_RDYEN) diff --git a/arch/blackfin/mach-bf538/include/mach/mem_map.h b/arch/blackfin/mach-bf538/include/mach/mem_map.h new file mode 100644 index 000000000000..d65d4301615a --- /dev/null +++ b/arch/blackfin/mach-bf538/include/mach/mem_map.h @@ -0,0 +1,107 @@ +/* + * File: include/asm-blackfin/mach-bf538/mem_map.h + * Based on: + * Author: + * + * Created: + * Description: + * + * Rev: + * + * Modified: + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _MEM_MAP_538_H_ +#define _MEM_MAP_538_H_ + +#define COREMMR_BASE 0xFFE00000 /* Core MMRs */ +#define SYSMMR_BASE 0xFFC00000 /* System MMRs */ + +/* Async Memory Banks */ +#define ASYNC_BANK3_BASE 0x20300000 /* Async Bank 3 */ +#define ASYNC_BANK3_SIZE 0x00100000 /* 1M */ +#define ASYNC_BANK2_BASE 0x20200000 /* Async Bank 2 */ +#define ASYNC_BANK2_SIZE 0x00100000 /* 1M */ +#define ASYNC_BANK1_BASE 0x20100000 /* Async Bank 1 */ +#define ASYNC_BANK1_SIZE 0x00100000 /* 1M */ +#define ASYNC_BANK0_BASE 0x20000000 /* Async Bank 0 */ +#define ASYNC_BANK0_SIZE 0x00100000 /* 1M */ + +/* Boot ROM Memory */ + +#define BOOT_ROM_START 0xEF000000 +#define BOOT_ROM_LENGTH 0x400 + +/* Level 1 Memory */ + +#ifdef CONFIG_BFIN_ICACHE +#define BFIN_ICACHESIZE (16*1024) +#else +#define BFIN_ICACHESIZE (0*1024) +#endif + +/* Memory Map for ADSP-BF538/9 processors */ + +#define L1_CODE_START 0xFFA00000 +#define L1_DATA_A_START 0xFF800000 +#define L1_DATA_B_START 0xFF900000 + +#ifdef CONFIG_BFIN_ICACHE +#define L1_CODE_LENGTH (0x14000 - 0x4000) +#else +#define L1_CODE_LENGTH 0x14000 +#endif + +#ifdef CONFIG_BFIN_DCACHE + +#ifdef CONFIG_BFIN_DCACHE_BANKA +#define DMEM_CNTR (ACACHE_BSRAM | ENDCPLB | PORT_PREF0) +#define L1_DATA_A_LENGTH (0x8000 - 0x4000) +#define L1_DATA_B_LENGTH 0x8000 +#define BFIN_DCACHESIZE (16*1024) +#define BFIN_DSUPBANKS 1 +#else +#define DMEM_CNTR (ACACHE_BCACHE | ENDCPLB | PORT_PREF0) +#define L1_DATA_A_LENGTH (0x8000 - 0x4000) +#define L1_DATA_B_LENGTH (0x8000 - 0x4000) +#define BFIN_DCACHESIZE (32*1024) +#define BFIN_DSUPBANKS 2 +#endif + +#else +#define DMEM_CNTR (ASRAM_BSRAM | ENDCPLB | PORT_PREF0) +#define L1_DATA_A_LENGTH 0x8000 +#define L1_DATA_B_LENGTH 0x8000 +#define BFIN_DCACHESIZE (0*1024) +#define BFIN_DSUPBANKS 0 +#endif /*CONFIG_BFIN_DCACHE*/ + + +/* Level 2 Memory - none */ + +#define L2_START 0 +#define L2_LENGTH 0 + +/* Scratch Pad Memory */ + +#define L1_SCRATCH_START 0xFFB00000 +#define L1_SCRATCH_LENGTH 0x1000 + +#endif /* _MEM_MAP_538_H_ */ diff --git a/arch/blackfin/mach-bf538/include/mach/portmux.h b/arch/blackfin/mach-bf538/include/mach/portmux.h new file mode 100644 index 000000000000..1e031b588b47 --- /dev/null +++ b/arch/blackfin/mach-bf538/include/mach/portmux.h @@ -0,0 +1,106 @@ +#ifndef _MACH_PORTMUX_H_ +#define _MACH_PORTMUX_H_ + +#define MAX_RESOURCES MAX_BLACKFIN_GPIOS + +#define P_TMR2 (P_DONTCARE) +#define P_TMR1 (P_DONTCARE) +#define P_TMR0 (P_DONTCARE) +#define P_TMRCLK (P_DONTCARE) +#define P_PPI0_CLK (P_DONTCARE) +#define P_PPI0_FS1 (P_DONTCARE) +#define P_PPI0_FS2 (P_DONTCARE) + +#define P_TWI0_SCL (P_DONTCARE) +#define P_TWI0_SDA (P_DONTCARE) +#define P_TWI1_SCL (P_DONTCARE) +#define P_TWI1_SDA (P_DONTCARE) + +#define P_SPORT1_TSCLK (P_DONTCARE) +#define P_SPORT1_RSCLK (P_DONTCARE) +#define P_SPORT0_TSCLK (P_DONTCARE) +#define P_SPORT0_RSCLK (P_DONTCARE) +#define P_SPORT1_DRSEC (P_DONTCARE) +#define P_SPORT1_RFS (P_DONTCARE) +#define P_SPORT1_DTPRI (P_DONTCARE) +#define P_SPORT1_DTSEC (P_DONTCARE) +#define P_SPORT1_TFS (P_DONTCARE) +#define P_SPORT1_DRPRI (P_DONTCARE) +#define P_SPORT0_DRSEC (P_DONTCARE) +#define P_SPORT0_RFS (P_DONTCARE) +#define P_SPORT0_DTPRI (P_DONTCARE) +#define P_SPORT0_DTSEC (P_DONTCARE) +#define P_SPORT0_TFS (P_DONTCARE) +#define P_SPORT0_DRPRI (P_DONTCARE) + +#define P_UART0_RX (P_DONTCARE) +#define P_UART0_TX (P_DONTCARE) + +#define P_SPI0_MOSI (P_DONTCARE) +#define P_SPI0_MISO (P_DONTCARE) +#define P_SPI0_SCK (P_DONTCARE) + +#define P_PPI0_D0 (P_DONTCARE) +#define P_PPI0_D1 (P_DONTCARE) +#define P_PPI0_D2 (P_DONTCARE) +#define P_PPI0_D3 (P_DONTCARE) + +#define P_CAN0_TX (P_DEFINED | P_IDENT(GPIO_PC0)) +#define P_CAN0_RX (P_DEFINED | P_IDENT(GPIO_PC1)) + +#define P_SPI1_MOSI (P_DEFINED | P_IDENT(GPIO_PD0)) +#define P_SPI1_MISO (P_DEFINED | P_IDENT(GPIO_PD1)) +#define P_SPI1_SCK (P_DEFINED | P_IDENT(GPIO_PD2)) +#define P_SPI1_SS (P_DEFINED | P_IDENT(GPIO_PD3)) +#define P_SPI1_SSEL1 (P_DEFINED | P_IDENT(GPIO_PD4)) +#define P_SPI2_MOSI (P_DEFINED | P_IDENT(GPIO_PD5)) +#define P_SPI2_MISO (P_DEFINED | P_IDENT(GPIO_PD6)) +#define P_SPI2_SCK (P_DEFINED | P_IDENT(GPIO_PD7)) +#define P_SPI2_SS (P_DEFINED | P_IDENT(GPIO_PD8)) +#define P_SPI2_SSEL1 (P_DEFINED | P_IDENT(GPIO_PD9)) +#define P_UART1_RX (P_DEFINED | P_IDENT(GPIO_PD10)) +#define P_UART1_TX (P_DEFINED | P_IDENT(GPIO_PD11)) +#define P_UART2_RX (P_DEFINED | P_IDENT(GPIO_PD12)) +#define P_UART2_TX (P_DEFINED | P_IDENT(GPIO_PD13)) + +#define P_SPORT2_RSCLK (P_DEFINED | P_IDENT(GPIO_PE0)) +#define P_SPORT2_RFS (P_DEFINED | P_IDENT(GPIO_PE1)) +#define P_SPORT2_DRPRI (P_DEFINED | P_IDENT(GPIO_PE2)) +#define P_SPORT2_DRSEC (P_DEFINED | P_IDENT(GPIO_PE3)) +#define P_SPORT2_TSCLK (P_DEFINED | P_IDENT(GPIO_PE4)) +#define P_SPORT2_TFS (P_DEFINED | P_IDENT(GPIO_PE5)) +#define P_SPORT2_DTPRI (P_DEFINED | P_IDENT(GPIO_PE6)) +#define P_SPORT2_DTSEC (P_DEFINED | P_IDENT(GPIO_PE7)) +#define P_SPORT3_RSCLK (P_DEFINED | P_IDENT(GPIO_PE8)) +#define P_SPORT3_RFS (P_DEFINED | P_IDENT(GPIO_PE9)) +#define P_SPORT3_DRPRI (P_DEFINED | P_IDENT(GPIO_PE10)) +#define P_SPORT3_DRSEC (P_DEFINED | P_IDENT(GPIO_PE11)) +#define P_SPORT3_TSCLK (P_DEFINED | P_IDENT(GPIO_PE12)) +#define P_SPORT3_TFS (P_DEFINED | P_IDENT(GPIO_PE13)) +#define P_SPORT3_DTPRI (P_DEFINED | P_IDENT(GPIO_PE14)) +#define P_SPORT3_DTSEC (P_DEFINED | P_IDENT(GPIO_PE15)) + +#define P_PPI0_FS3 (P_DEFINED | P_IDENT(GPIO_PF3)) +#define P_PPI0_D15 (P_DEFINED | P_IDENT(GPIO_PF4)) +#define P_PPI0_D14 (P_DEFINED | P_IDENT(GPIO_PF5)) +#define P_PPI0_D13 (P_DEFINED | P_IDENT(GPIO_PF6)) +#define P_PPI0_D12 (P_DEFINED | P_IDENT(GPIO_PF7)) +#define P_PPI0_D11 (P_DEFINED | P_IDENT(GPIO_PF8)) +#define P_PPI0_D10 (P_DEFINED | P_IDENT(GPIO_PF9)) +#define P_PPI0_D9 (P_DEFINED | P_IDENT(GPIO_PF10)) +#define P_PPI0_D8 (P_DEFINED | P_IDENT(GPIO_PF11)) + +#define P_PPI0_D4 (P_DEFINED | P_IDENT(GPIO_PF15)) +#define P_PPI0_D5 (P_DEFINED | P_IDENT(GPIO_PF14)) +#define P_PPI0_D6 (P_DEFINED | P_IDENT(GPIO_PF13)) +#define P_PPI0_D7 (P_DEFINED | P_IDENT(GPIO_PF12)) +#define P_SPI0_SSEL7 (P_DEFINED | P_IDENT(GPIO_PF7)) +#define P_SPI0_SSEL6 (P_DEFINED | P_IDENT(GPIO_PF6)) +#define P_SPI0_SSEL5 (P_DEFINED | P_IDENT(GPIO_PF5)) +#define P_SPI0_SSEL4 (P_DEFINED | P_IDENT(GPIO_PF4)) +#define P_SPI0_SSEL3 (P_DEFINED | P_IDENT(GPIO_PF3)) +#define P_SPI0_SSEL2 (P_DEFINED | P_IDENT(GPIO_PF2)) +#define P_SPI0_SSEL1 (P_DEFINED | P_IDENT(GPIO_PF1)) +#define P_SPI0_SS (P_DEFINED | P_IDENT(GPIO_PF0)) + +#endif /* _MACH_PORTMUX_H_ */ diff --git a/arch/blackfin/mach-bf538/ints-priority.c b/arch/blackfin/mach-bf538/ints-priority.c new file mode 100644 index 000000000000..f81f2ac91840 --- /dev/null +++ b/arch/blackfin/mach-bf538/ints-priority.c @@ -0,0 +1,94 @@ +/* + * File: arch/blackfin/mach-bf538/ints-priority.c + * Based on: arch/blackfin/mach-bf533/ints-priority.c + * Author: Michael Hennerich + * + * Created: + * Description: Set up the interrupt priorities + * + * Modified: + * Copyright 2008 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include + +void __init program_IAR(void) +{ + + /* Program the IAR0 Register with the configured priority */ + bfin_write_SIC_IAR0(((CONFIG_IRQ_PLL_WAKEUP - 7) << IRQ_PLL_WAKEUP_POS) | + ((CONFIG_IRQ_DMA0_ERROR - 7) << IRQ_DMA0_ERROR_POS) | + ((CONFIG_IRQ_PPI_ERROR - 7) << IRQ_PPI_ERROR_POS) | + ((CONFIG_IRQ_SPORT0_ERROR - 7) << IRQ_SPORT0_ERROR_POS) | + ((CONFIG_IRQ_SPORT1_ERROR - 7) << IRQ_SPORT1_ERROR_POS) | + ((CONFIG_IRQ_SPI0_ERROR - 7) << IRQ_SPI0_ERROR_POS) | + ((CONFIG_IRQ_UART0_ERROR - 7) << IRQ_UART0_ERROR_POS) | + ((CONFIG_IRQ_RTC - 7) << IRQ_RTC_POS)); + + bfin_write_SIC_IAR1(((CONFIG_IRQ_PPI - 7) << IRQ_PPI_POS) | + ((CONFIG_IRQ_SPORT0_RX - 7) << IRQ_SPORT0_RX_POS) | + ((CONFIG_IRQ_SPORT0_TX - 7) << IRQ_SPORT0_TX_POS) | + ((CONFIG_IRQ_SPORT1_RX - 7) << IRQ_SPORT1_RX_POS) | + ((CONFIG_IRQ_SPORT1_TX - 7) << IRQ_SPORT1_TX_POS) | + ((CONFIG_IRQ_SPI0 - 7) << IRQ_SPI0_POS) | + ((CONFIG_IRQ_UART0_RX - 7) << IRQ_UART0_RX_POS) | + ((CONFIG_IRQ_UART0_TX - 7) << IRQ_UART0_TX_POS)); + + bfin_write_SIC_IAR2(((CONFIG_IRQ_TMR0 - 7) << IRQ_TMR0_POS) | + ((CONFIG_IRQ_TMR1 - 7) << IRQ_TMR1_POS) | + ((CONFIG_IRQ_TMR2 - 7) << IRQ_TMR2_POS) | + ((CONFIG_IRQ_PORTF_INTA - 7) << IRQ_PORTF_INTA_POS) | + ((CONFIG_IRQ_PORTF_INTB - 7) << IRQ_PORTF_INTB_POS) | + ((CONFIG_IRQ_MEM0_DMA0 - 7) << IRQ_MEM0_DMA0_POS) | + ((CONFIG_IRQ_MEM0_DMA1 - 7) << IRQ_MEM0_DMA1_POS) | + ((CONFIG_IRQ_WATCH - 7) << IRQ_WATCH_POS)); + + bfin_write_SIC_IAR3(((CONFIG_IRQ_DMA1_ERROR - 7) << IRQ_DMA1_ERROR_POS) | + ((CONFIG_IRQ_SPORT2_ERROR - 7) << IRQ_SPORT2_ERROR_POS) | + ((CONFIG_IRQ_SPORT3_ERROR - 7) << IRQ_SPORT3_ERROR_POS) | + ((CONFIG_IRQ_SPI1_ERROR - 7) << IRQ_SPI1_ERROR_POS) | + ((CONFIG_IRQ_SPI2_ERROR - 7) << IRQ_SPI2_ERROR_POS) | + ((CONFIG_IRQ_UART1_ERROR - 7) << IRQ_UART1_ERROR_POS) | + ((CONFIG_IRQ_UART2_ERROR - 7) << IRQ_UART2_ERROR_POS)); + + bfin_write_SIC_IAR4(((CONFIG_IRQ_CAN_ERROR - 7) << IRQ_CAN_ERROR_POS) | + ((CONFIG_IRQ_SPORT2_RX - 7) << IRQ_SPORT2_RX_POS) | + ((CONFIG_IRQ_SPORT2_TX - 7) << IRQ_SPORT2_TX_POS) | + ((CONFIG_IRQ_SPORT3_RX - 7) << IRQ_SPORT3_RX_POS) | + ((CONFIG_IRQ_SPORT3_TX - 7) << IRQ_SPORT3_TX_POS) | + ((CONFIG_IRQ_SPI1 - 7) << IRQ_SPI1_POS)); + + bfin_write_SIC_IAR5(((CONFIG_IRQ_SPI2 - 7) << IRQ_SPI2_POS) | + ((CONFIG_IRQ_UART1_RX - 7) << IRQ_UART1_RX_POS) | + ((CONFIG_IRQ_UART1_TX - 7) << IRQ_UART1_TX_POS) | + ((CONFIG_IRQ_UART2_RX - 7) << IRQ_UART2_RX_POS) | + ((CONFIG_IRQ_UART2_TX - 7) << IRQ_UART2_TX_POS) | + ((CONFIG_IRQ_TWI0 - 7) << IRQ_TWI0_POS) | + ((CONFIG_IRQ_TWI1 - 7) << IRQ_TWI1_POS) | + ((CONFIG_IRQ_CAN_RX - 7) << IRQ_CAN_RX_POS)); + + bfin_write_SIC_IAR6(((CONFIG_IRQ_CAN_TX - 7) << IRQ_CAN_TX_POS) | + ((CONFIG_IRQ_MEM1_DMA0 - 7) << IRQ_MEM1_DMA0_POS) | + ((CONFIG_IRQ_MEM1_DMA1 - 7) << IRQ_MEM1_DMA1_POS)); + + SSYNC(); +} diff --git a/arch/blackfin/mach-common/dpmc_modes.S b/arch/blackfin/mach-common/dpmc_modes.S index ad5431e2cd05..21f61605f1f3 100644 --- a/arch/blackfin/mach-common/dpmc_modes.S +++ b/arch/blackfin/mach-common/dpmc_modes.S @@ -247,7 +247,8 @@ ENTRY(_unset_dram_srfs) ENDPROC(_unset_dram_srfs) ENTRY(_set_sic_iwr) -#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) +#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) || \ + defined(CONFIG_BF538) || defined(CONFIG_BF539) P0.H = hi(SIC_IWR0); P0.L = lo(SIC_IWR0); P1.H = hi(SIC_IWR1); diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c index 34e8a726ffda..67700e6c90c7 100644 --- a/arch/blackfin/mach-common/ints-priority.c +++ b/arch/blackfin/mach-common/ints-priority.c @@ -103,12 +103,13 @@ static void __init search_IAR(void) for (irqn = 0; irqn < NR_PERI_INTS; irqn++) { int iar_shift = (irqn & 7) * 4; if (ivg == (0xf & -#ifndef CONFIG_BF52x +#if defined(CONFIG_BF52x) || defined(CONFIG_BF538) || defined(CONFIG_BF539) bfin_read32((unsigned long *)SIC_IAR0 + - (irqn >> 3)) >> iar_shift)) { + ((irqn % 32) >> 3) + ((irqn / 32) * + ((SIC_IAR4 - SIC_IAR0) / 4))) >> iar_shift)) { #else bfin_read32((unsigned long *)SIC_IAR0 + - ((irqn%32) >> 3) + ((irqn / 32) * 16)) >> iar_shift)) { + (irqn >> 3)) >> iar_shift)) { #endif ivg_table[irq_pos].irqno = IVG7 + irqn; ivg_table[irq_pos].isrflag = 1 << (irqn % 32); @@ -537,6 +538,10 @@ static void bfin_demux_gpio_irq(unsigned int inta_irq, irq = IRQ_PH0; break; # endif +#elif defined(CONFIG_BF538) || defined(CONFIG_BF539) + case IRQ_PORTF_INTA: + irq = IRQ_PF0; + break; #elif defined(CONFIG_BF52x) case IRQ_PORTF_INTA: irq = IRQ_PF0; @@ -984,7 +989,7 @@ int __init init_arch_irq(void) int irq; unsigned long ilat = 0; /* Disable all the peripheral intrs - page 4-29 HW Ref manual */ -#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) +#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) || defined(BF538_FAMILY) bfin_write_SIC_IMASK0(SIC_UNMASK_ALL); bfin_write_SIC_IMASK1(SIC_UNMASK_ALL); # ifdef CONFIG_BF54x @@ -1037,7 +1042,10 @@ int __init init_arch_irq(void) case IRQ_PROG0_INTA: case IRQ_PROG1_INTA: case IRQ_PROG2_INTA: +#elif defined(CONFIG_BF538) || defined(CONFIG_BF539) + case IRQ_PORTF_INTA: #endif + set_irq_chained_handler(irq, bfin_demux_gpio_irq); break; @@ -1085,7 +1093,7 @@ int __init init_arch_irq(void) IMASK_IVG14 | IMASK_IVG13 | IMASK_IVG12 | IMASK_IVG11 | IMASK_IVG10 | IMASK_IVG9 | IMASK_IVG8 | IMASK_IVG7 | IMASK_IVGHW; -#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) +#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) || defined(BF538_FAMILY) bfin_write_SIC_IWR0(IWR_DISABLE_ALL); #if defined(CONFIG_BF52x) /* BF52x system reset does not properly reset SIC_IWR1 which @@ -1117,7 +1125,7 @@ void do_irq(int vec, struct pt_regs *fp) } else { struct ivgx *ivg = ivg7_13[vec - IVG7].ifirst; struct ivgx *ivg_stop = ivg7_13[vec - IVG7].istop; -#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) +#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) || defined(BF538_FAMILY) unsigned long sic_status[3]; sic_status[0] = bfin_read_SIC_ISR0() & bfin_read_SIC_IMASK0(); diff --git a/arch/blackfin/mach-common/pm.c b/arch/blackfin/mach-common/pm.c index e28c6af1f415..f774d8aa5b03 100644 --- a/arch/blackfin/mach-common/pm.c +++ b/arch/blackfin/mach-common/pm.c @@ -82,7 +82,8 @@ void bfin_pm_suspend_standby_enter(void) bfin_pm_standby_restore(); -#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) +#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) || \ + defined(CONFIG_BF538) || defined(CONFIG_BF539) bfin_write_SIC_IWR0(IWR_DISABLE_ALL); #if defined(CONFIG_BF52x) /* BF52x system reset does not properly reset SIC_IWR1 which From fe8015ce2588e3ffe65284a2883703355804276e Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 28 Oct 2008 11:07:15 +0800 Subject: [PATCH 0750/2985] Blackfin arch: move EXPORT_SYMBOL to the place where it is actually defined - kernel_thread - irq_flags - checksum Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- arch/blackfin/kernel/bfin_ksyms.c | 9 --------- arch/blackfin/kernel/process.c | 1 + arch/blackfin/lib/checksum.c | 4 ++++ arch/blackfin/mach-common/ints-priority.c | 1 + 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/blackfin/kernel/bfin_ksyms.c b/arch/blackfin/kernel/bfin_ksyms.c index 4367330909b2..f1a4573b0442 100644 --- a/arch/blackfin/kernel/bfin_ksyms.c +++ b/arch/blackfin/kernel/bfin_ksyms.c @@ -38,16 +38,9 @@ EXPORT_SYMBOL(__ioremap); -EXPORT_SYMBOL(ip_fast_csum); - -EXPORT_SYMBOL(kernel_thread); - EXPORT_SYMBOL(is_in_rom); EXPORT_SYMBOL(bfin_return_from_exception); -/* Networking helper routines. */ -EXPORT_SYMBOL(csum_partial_copy); - /* The following are special because they're not called * explicitly (the C compiler generates them). Fortunately, * their interface isn't gonna change any time soon now, so @@ -96,7 +89,6 @@ EXPORT_SYMBOL(insw_8); EXPORT_SYMBOL(outsl); EXPORT_SYMBOL(insl); EXPORT_SYMBOL(insl_16); -EXPORT_SYMBOL(irq_flags); EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(blackfin_dcache_invalidate_range); EXPORT_SYMBOL(blackfin_icache_dcache_flush_range); @@ -104,7 +96,6 @@ EXPORT_SYMBOL(blackfin_icache_flush_range); EXPORT_SYMBOL(blackfin_dcache_flush_range); EXPORT_SYMBOL(blackfin_dflush_page); -EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(__init_begin); EXPORT_SYMBOL(__init_end); EXPORT_SYMBOL(_ebss_l1); diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index 0c3ea118b657..326e3019cd23 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -154,6 +154,7 @@ pid_t kernel_thread(int (*fn) (void *), void *arg, unsigned long flags) return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); } +EXPORT_SYMBOL(kernel_thread); void flush_thread(void) { diff --git a/arch/blackfin/lib/checksum.c b/arch/blackfin/lib/checksum.c index 5c87505165d3..762a7f02970a 100644 --- a/arch/blackfin/lib/checksum.c +++ b/arch/blackfin/lib/checksum.c @@ -29,6 +29,7 @@ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include #include #include @@ -76,6 +77,7 @@ __sum16 ip_fast_csum(unsigned char *iph, unsigned int ihl) { return (__force __sum16)~do_csum(iph, ihl * 4); } +EXPORT_SYMBOL(ip_fast_csum); /* * computes the checksum of a memory block at buff, length len, @@ -104,6 +106,7 @@ __wsum csum_partial(const void *buff, int len, __wsum sum) return sum; } +EXPORT_SYMBOL(csum_partial); /* * this routine is used for miscellaneous IP-like checksums, mainly @@ -137,3 +140,4 @@ __wsum csum_partial_copy(const void *src, void *dst, int len, __wsum sum) memcpy(dst, src, len); return csum_partial(dst, len, sum); } +EXPORT_SYMBOL(csum_partial_copy); diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c index 67700e6c90c7..5ae507f59885 100644 --- a/arch/blackfin/mach-common/ints-priority.c +++ b/arch/blackfin/mach-common/ints-priority.c @@ -65,6 +65,7 @@ * cannot actually mask out in hardware. */ unsigned long irq_flags = 0x1f; +EXPORT_SYMBOL(irq_flags); /* The number of spurious interrupts */ atomic_t num_spurious; From 6ad2b84cf014d91a9dffa3f8edb008430fbdb445 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 28 Oct 2008 11:03:09 +0800 Subject: [PATCH 0751/2985] Blackfin arch: add an option to allow people to stick exception stack into L1 scratch allow people to stick exception stack into L1 scratch and make sure it gets placed into .bss sections rather than .data Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- arch/blackfin/Kconfig | 11 +++++++++++ arch/blackfin/mach-common/entry.S | 21 +++++++++++++++------ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index ac76baac1df3..e37e993431b9 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -667,6 +667,17 @@ config APP_STACK_L1 Currently only works with FLAT binaries. +config EXCEPTION_L1_SCRATCH + bool "Locate exception stack in L1 Scratch Memory" + default n + depends on !APP_STACK_L1 && !SYSCALL_TAB_L1 + help + Whenever an exception occurs, use the L1 Scratch memory for + stack storage. You cannot place the stacks of FLAT binaries + in L1 when using this option. + + If you don't use L1 Scratch, then you should say Y here. + comment "Speed Optimizations" config BFIN_INS_LOWOVERHEAD bool "ins[bwl] low overhead, higher interrupt latency" diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index bde6dc4e2614..ad4049882291 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -1539,14 +1539,23 @@ ENTRY(_sys_call_table) .endr END(_sys_call_table) -_exception_stack: - .rept 1024 - .long 0; - .endr -_exception_stack_top: - #if ANOMALY_05000261 /* Used by the assembly entry point to work around an anomaly. */ _last_cplb_fault_retx: .long 0; #endif + +#ifdef CONFIG_EXCEPTION_L1_SCRATCH +/* .section .l1.bss.scratch */ +.set _exception_stack_top, L1_SCRATCH_START + L1_SCRATCH_LENGTH +#else +#ifdef CONFIG_SYSCALL_TAB_L1 +.section .l1.bss +#else +.bss +#endif +_exception_stack: +.size _exception_stack, 1024 * 4 +.set _exception_stack_top, _exception_stack + 1024 * 4 +.size _exception_stack_top, 0 +#endif From 0f8befa1d1fd86cac8221785a85943dfbdef4773 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 28 Oct 2008 11:12:17 +0800 Subject: [PATCH 0752/2985] Blackfin arch: drop unused exports and comment remaining exports Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- arch/blackfin/kernel/bfin_ksyms.c | 60 +++++++------------------------ 1 file changed, 12 insertions(+), 48 deletions(-) diff --git a/arch/blackfin/kernel/bfin_ksyms.c b/arch/blackfin/kernel/bfin_ksyms.c index f1a4573b0442..b66f1d4c8344 100644 --- a/arch/blackfin/kernel/bfin_ksyms.c +++ b/arch/blackfin/kernel/bfin_ksyms.c @@ -1,46 +1,26 @@ /* - * File: arch/blackfin/kernel/bfin_ksyms.c - * Based on: none - original work - * Author: + * arch/blackfin/kernel/bfin_ksyms.c - exports for random symbols * - * Created: - * Description: + * Copyright 2004-2008 Analog Devices Inc. * - * Modified: - * Copyright 2004-2006 Analog Devices Inc. - * - * Bugs: Enter bugs at http://blackfin.uclinux.org/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see the file COPYING, or write - * to the Free Software Foundation, Inc., - * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * Licensed under the GPL-2 or later. */ #include -#include #include -#include #include -/* platform dependent support */ - -EXPORT_SYMBOL(__ioremap); - -EXPORT_SYMBOL(is_in_rom); +/* Allow people to have their own Blackfin exception handler in a module */ EXPORT_SYMBOL(bfin_return_from_exception); +/* All the Blackfin cache functions: mach-common/cache.S */ +EXPORT_SYMBOL(blackfin_dcache_invalidate_range); +EXPORT_SYMBOL(blackfin_icache_dcache_flush_range); +EXPORT_SYMBOL(blackfin_icache_flush_range); +EXPORT_SYMBOL(blackfin_dcache_flush_range); +EXPORT_SYMBOL(blackfin_dflush_page); + /* The following are special because they're not called * explicitly (the C compiler generates them). Fortunately, * their interface isn't gonna change any time soon now, so @@ -67,8 +47,6 @@ extern void __modsi3(void); extern void __muldi3(void); extern void __udivsi3(void); extern void __umodsi3(void); - -/* gcc lib functions */ EXPORT_SYMBOL(__ashldi3); EXPORT_SYMBOL(__ashrdi3); EXPORT_SYMBOL(__umulsi3_highpart); @@ -80,6 +58,7 @@ EXPORT_SYMBOL(__muldi3); EXPORT_SYMBOL(__udivsi3); EXPORT_SYMBOL(__umodsi3); +/* Input/output symbols: lib/{in,out}s.S */ EXPORT_SYMBOL(outsb); EXPORT_SYMBOL(insb); EXPORT_SYMBOL(outsw); @@ -89,18 +68,3 @@ EXPORT_SYMBOL(insw_8); EXPORT_SYMBOL(outsl); EXPORT_SYMBOL(insl); EXPORT_SYMBOL(insl_16); -EXPORT_SYMBOL(iounmap); -EXPORT_SYMBOL(blackfin_dcache_invalidate_range); -EXPORT_SYMBOL(blackfin_icache_dcache_flush_range); -EXPORT_SYMBOL(blackfin_icache_flush_range); -EXPORT_SYMBOL(blackfin_dcache_flush_range); -EXPORT_SYMBOL(blackfin_dflush_page); - -EXPORT_SYMBOL(__init_begin); -EXPORT_SYMBOL(__init_end); -EXPORT_SYMBOL(_ebss_l1); -EXPORT_SYMBOL(_stext_l1); -EXPORT_SYMBOL(_etext_l1); -EXPORT_SYMBOL(_sdata_l1); -EXPORT_SYMBOL(_ebss_b_l1); -EXPORT_SYMBOL(_sdata_b_l1); From d292b00031369feb7bc6b4a14e434379fb2dbb01 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 28 Oct 2008 11:15:36 +0800 Subject: [PATCH 0753/2985] Blackfin arch: drop redundant BFIN_DMA_5XX depends Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- arch/blackfin/Kconfig | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index e37e993431b9..be54e36e8b69 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -739,13 +739,12 @@ config BFIN_GPTIMERS config BFIN_DMA_5XX bool "Enable DMA Support" - depends on (BF52x || BF53x || BF561 || BF54x || BF538 || BF539) default y help - DMA driver for BF5xx. + DMA driver for Blackfin parts. choice - prompt "Uncached SDRAM region" + prompt "Uncached DMA region" default DMA_UNCACHED_1M depends on BFIN_DMA_5XX config DMA_UNCACHED_4M From efe065a1b39974ff2bbb9aa693400f3de4ba5a6a Mon Sep 17 00:00:00 2001 From: Graf Yang Date: Tue, 28 Oct 2008 14:00:52 +0800 Subject: [PATCH 0754/2985] Blackfin arch: fix bug - kernel with SMP patch can not bootup The original code defined _exception_stack but not alloc space for the exception stack. In exception, this area is over written by exception stack. Common kernel luckly boot up, but SMP kernel stuck. Signed-off-by: Graf Yang Signed-off-by: Bryan Wu --- arch/blackfin/mach-common/entry.S | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index ad4049882291..c6ae8442fc4e 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -1555,7 +1555,8 @@ _last_cplb_fault_retx: .bss #endif _exception_stack: -.size _exception_stack, 1024 * 4 -.set _exception_stack_top, _exception_stack + 1024 * 4 -.size _exception_stack_top, 0 + .rept 1024 + .long 0 + .endr +_exception_stack_top: #endif From 2563265bdb5fb21396cdc2125c8260e57f7c3991 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 28 Oct 2008 14:11:45 +0800 Subject: [PATCH 0755/2985] Blackfin arch: put quotes around error to silence a ton of warnings from gcc-4.3 Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- arch/blackfin/mach-bf527/include/mach/bf527.h | 2 +- arch/blackfin/mach-bf533/include/mach/bf533.h | 2 +- arch/blackfin/mach-bf537/include/mach/bf537.h | 2 +- arch/blackfin/mach-bf538/include/mach/bf538.h | 2 +- arch/blackfin/mach-bf548/include/mach/bf548.h | 2 +- arch/blackfin/mach-bf561/include/mach/bf561.h | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/blackfin/mach-bf527/include/mach/bf527.h b/arch/blackfin/mach-bf527/include/mach/bf527.h index 144f08d3f8ea..9ff3b61b01e1 100644 --- a/arch/blackfin/mach-bf527/include/mach/bf527.h +++ b/arch/blackfin/mach-bf527/include/mach/bf527.h @@ -134,7 +134,7 @@ #endif #ifndef CPU -#error Unknown CPU type - This kernel doesn't seem to be configured properly +#error "Unknown CPU type - This kernel doesn't seem to be configured properly" #endif #endif /* __MACH_BF527_H__ */ diff --git a/arch/blackfin/mach-bf533/include/mach/bf533.h b/arch/blackfin/mach-bf533/include/mach/bf533.h index dfc8c1ad2d7a..6f976b2ecd71 100644 --- a/arch/blackfin/mach-bf533/include/mach/bf533.h +++ b/arch/blackfin/mach-bf533/include/mach/bf533.h @@ -153,7 +153,7 @@ #endif #ifndef CPU -#error Unknown CPU type - This kernel doesn't seem to be configured properly +#error "Unknown CPU type - This kernel doesn't seem to be configured properly" #endif #endif /* __MACH_BF533_H__ */ diff --git a/arch/blackfin/mach-bf537/include/mach/bf537.h b/arch/blackfin/mach-bf537/include/mach/bf537.h index 24d5c9d42323..f194a848ae8e 100644 --- a/arch/blackfin/mach-bf537/include/mach/bf537.h +++ b/arch/blackfin/mach-bf537/include/mach/bf537.h @@ -133,7 +133,7 @@ #endif #ifndef CPU -#error Unknown CPU type - This kernel doesn't seem to be configured properly +#error "Unknown CPU type - This kernel doesn't seem to be configured properly" #endif #endif /* __MACH_BF537_H__ */ diff --git a/arch/blackfin/mach-bf538/include/mach/bf538.h b/arch/blackfin/mach-bf538/include/mach/bf538.h index c9e8197a29fe..9c8abb307908 100644 --- a/arch/blackfin/mach-bf538/include/mach/bf538.h +++ b/arch/blackfin/mach-bf538/include/mach/bf538.h @@ -118,7 +118,7 @@ #endif #ifndef CPU -#error Unknown CPU type - This kernel doesn't seem to be configured properly +#error "Unknown CPU type - This kernel doesn't seem to be configured properly" #endif #endif /* __MACH_BF538_H__ */ diff --git a/arch/blackfin/mach-bf548/include/mach/bf548.h b/arch/blackfin/mach-bf548/include/mach/bf548.h index 49f9b403d458..f0e569984810 100644 --- a/arch/blackfin/mach-bf548/include/mach/bf548.h +++ b/arch/blackfin/mach-bf548/include/mach/bf548.h @@ -122,7 +122,7 @@ #endif #ifndef CPU -#error Unknown CPU type - This kernel doesn't seem to be configured properly +#error "Unknown CPU type - This kernel doesn't seem to be configured properly" #endif #endif /* __MACH_BF48_H__ */ diff --git a/arch/blackfin/mach-bf561/include/mach/bf561.h b/arch/blackfin/mach-bf561/include/mach/bf561.h index 18b1b3a223ab..9968362a2ee4 100644 --- a/arch/blackfin/mach-bf561/include/mach/bf561.h +++ b/arch/blackfin/mach-bf561/include/mach/bf561.h @@ -215,7 +215,7 @@ #endif #ifndef CPU -#error Unknown CPU type - This kernel doesn't seem to be configured properly +#error "Unknown CPU type - This kernel doesn't seem to be configured properly" #endif #endif /* __MACH_BF561_H__ */ From 2f6f4bcdd611cb968b800f7569c4383727856668 Mon Sep 17 00:00:00 2001 From: Bryan Wu Date: Tue, 18 Nov 2008 17:48:21 +0800 Subject: [PATCH 0756/2985] Blackfin arch: add support for Blackfin latest processor family BF51x Signed-off-by: Bryan Wu --- arch/blackfin/Kconfig | 48 +- arch/blackfin/Makefile | 8 + arch/blackfin/configs/BF518F-EZBRD_defconfig | 1130 ++++++++++ arch/blackfin/include/asm/gpio.h | 51 + arch/blackfin/kernel/bfin_gpio.c | 22 +- arch/blackfin/mach-bf518/Kconfig | 233 ++ arch/blackfin/mach-bf518/Makefile | 7 + arch/blackfin/mach-bf518/boards/Kconfig | 12 + arch/blackfin/mach-bf518/boards/Makefile | 5 + arch/blackfin/mach-bf518/boards/ezbrd.c | 613 ++++++ arch/blackfin/mach-bf518/dma.c | 118 + arch/blackfin/mach-bf518/head.S | 146 ++ .../mach-bf518/include/mach/anomaly.h | 77 + arch/blackfin/mach-bf518/include/mach/bf518.h | 132 ++ .../mach-bf518/include/mach/bfin_serial_5xx.h | 169 ++ .../mach-bf518/include/mach/bfin_sir.h | 142 ++ .../mach-bf518/include/mach/blackfin.h | 105 + .../mach-bf518/include/mach/cdefBF512.h | 46 + .../mach-bf518/include/mach/cdefBF514.h | 48 + .../mach-bf518/include/mach/cdefBF516.h | 48 + .../mach-bf518/include/mach/cdefBF518.h | 48 + .../mach-bf518/include/mach/cdefBF51x_base.h | 1204 ++++++++++ .../mach-bf518/include/mach/defBF512.h | 42 + .../mach-bf518/include/mach/defBF514.h | 113 + .../mach-bf518/include/mach/defBF516.h | 490 +++++ .../mach-bf518/include/mach/defBF518.h | 516 +++++ .../mach-bf518/include/mach/defBF51x_base.h | 1940 +++++++++++++++++ arch/blackfin/mach-bf518/include/mach/dma.h | 57 + arch/blackfin/mach-bf518/include/mach/irq.h | 260 +++ .../mach-bf518/include/mach/mem_init.h | 310 +++ .../mach-bf518/include/mach/mem_map.h | 102 + .../mach-bf518/include/mach/portmux.h | 188 ++ arch/blackfin/mach-bf518/ints-priority.c | 99 + arch/blackfin/mach-common/dpmc_modes.S | 2 +- arch/blackfin/mach-common/ints-priority.c | 20 +- arch/blackfin/mach-common/pm.c | 4 +- 36 files changed, 8524 insertions(+), 31 deletions(-) create mode 100644 arch/blackfin/configs/BF518F-EZBRD_defconfig create mode 100644 arch/blackfin/mach-bf518/Kconfig create mode 100644 arch/blackfin/mach-bf518/Makefile create mode 100644 arch/blackfin/mach-bf518/boards/Kconfig create mode 100644 arch/blackfin/mach-bf518/boards/Makefile create mode 100644 arch/blackfin/mach-bf518/boards/ezbrd.c create mode 100644 arch/blackfin/mach-bf518/dma.c create mode 100644 arch/blackfin/mach-bf518/head.S create mode 100644 arch/blackfin/mach-bf518/include/mach/anomaly.h create mode 100644 arch/blackfin/mach-bf518/include/mach/bf518.h create mode 100644 arch/blackfin/mach-bf518/include/mach/bfin_serial_5xx.h create mode 100644 arch/blackfin/mach-bf518/include/mach/bfin_sir.h create mode 100644 arch/blackfin/mach-bf518/include/mach/blackfin.h create mode 100644 arch/blackfin/mach-bf518/include/mach/cdefBF512.h create mode 100644 arch/blackfin/mach-bf518/include/mach/cdefBF514.h create mode 100644 arch/blackfin/mach-bf518/include/mach/cdefBF516.h create mode 100644 arch/blackfin/mach-bf518/include/mach/cdefBF518.h create mode 100644 arch/blackfin/mach-bf518/include/mach/cdefBF51x_base.h create mode 100644 arch/blackfin/mach-bf518/include/mach/defBF512.h create mode 100644 arch/blackfin/mach-bf518/include/mach/defBF514.h create mode 100644 arch/blackfin/mach-bf518/include/mach/defBF516.h create mode 100644 arch/blackfin/mach-bf518/include/mach/defBF518.h create mode 100644 arch/blackfin/mach-bf518/include/mach/defBF51x_base.h create mode 100644 arch/blackfin/mach-bf518/include/mach/dma.h create mode 100644 arch/blackfin/mach-bf518/include/mach/irq.h create mode 100644 arch/blackfin/mach-bf518/include/mach/mem_init.h create mode 100644 arch/blackfin/mach-bf518/include/mach/mem_map.h create mode 100644 arch/blackfin/mach-bf518/include/mach/portmux.h create mode 100644 arch/blackfin/mach-bf518/ints-priority.c diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index be54e36e8b69..19b43f37e1a3 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -77,6 +77,26 @@ choice prompt "CPU" default BF533 +config BF512 + bool "BF512" + help + BF512 Processor Support. + +config BF514 + bool "BF514" + help + BF514 Processor Support. + +config BF516 + bool "BF516" + help + BF516 Processor Support. + +config BF518 + bool "BF518" + help + BF518 Processor Support. + config BF522 bool "BF522" help @@ -181,27 +201,27 @@ endchoice config BF_REV_MIN int - default 0 if (BF52x || BF54x) + default 0 if (BF51x || BF52x || BF54x) default 2 if (BF537 || BF536 || BF534) default 3 if (BF561 ||BF533 || BF532 || BF531) - default 4 if (BF538 || BF539) + default 4 if (BF538 || BF539) config BF_REV_MAX int - default 2 if (BF52x || BF54x) + default 2 if (BF51x || BF52x || BF54x) default 3 if (BF537 || BF536 || BF534) - default 5 if (BF561|| BF538 || BF539) + default 5 if (BF561 || BF538 || BF539) default 6 if (BF533 || BF532 || BF531) choice prompt "Silicon Rev" - default BF_REV_0_1 if (BF52x || BF54x) + default BF_REV_0_1 if (BF51x || BF52x || BF54x) default BF_REV_0_2 if (BF534 || BF536 || BF537) default BF_REV_0_3 if (BF531 || BF532 || BF533 || BF561) config BF_REV_0_0 bool "0.0" - depends on (BF52x || BF54x) + depends on (BF51x || BF52x || BF54x) config BF_REV_0_1 bool "0.1" @@ -235,6 +255,11 @@ config BF_REV_NONE endchoice +config BF51x + bool + depends on (BF512 || BF514 || BF516 || BF518) + default y + config BF52x bool depends on (BF522 || BF523 || BF524 || BF525 || BF526 || BF527) @@ -282,6 +307,7 @@ config MEM_MT48LC32M16A2TG_75 depends on (BFIN527_EZKIT || BFIN532_IP0X || BLACKSTAMP || BFIN526_EZBRD) default y +source "arch/blackfin/mach-bf518/Kconfig" source "arch/blackfin/mach-bf527/Kconfig" source "arch/blackfin/mach-bf533/Kconfig" source "arch/blackfin/mach-bf561/Kconfig" @@ -330,7 +356,7 @@ config CLKIN_HZ int "Frequency of the crystal on the board in Hz" default "11059200" if BFIN533_STAMP default "27000000" if BFIN533_EZKIT - default "25000000" if (BFIN537_STAMP || BFIN527_EZKIT || H8606_HVSISTEMAS || BLACKSTAMP || BFIN526_EZBRD || BFIN538_EZKIT) + default "25000000" if (BFIN537_STAMP || BFIN527_EZKIT || H8606_HVSISTEMAS || BLACKSTAMP || BFIN526_EZBRD || BFIN538_EZKIT || BFIN518F-EZBRD) default "30000000" if BFIN561_EZKIT default "24576000" if PNAV10 default "10000000" if BFIN532_IP0X @@ -370,7 +396,7 @@ config VCO_MULT default "22" if BFIN533_BLUETECHNIX_CM default "20" if (BFIN537_BLUETECHNIX_CM || BFIN527_BLUETECHNIX_CM || BFIN561_BLUETECHNIX_CM) default "20" if BFIN561_EZKIT - default "16" if (H8606_HVSISTEMAS || BLACKSTAMP || BFIN526_EZBRD) + default "16" if (H8606_HVSISTEMAS || BLACKSTAMP || BFIN526_EZBRD || BFIN518F_EZBRD) help This controls the frequency of the on-chip PLL. This can be between 1 and 64. PLL Frequency = (Crystal Frequency) * (this setting) @@ -432,6 +458,10 @@ config MAX_MEM_SIZE # config MAX_VCO_HZ int + default 400000000 if BF512 + default 400000000 if BF514 + default 400000000 if BF516 + default 400000000 if BF518 default 600000000 if BF522 default 400000000 if BF523 default 400000000 if BF524 @@ -1025,7 +1055,7 @@ comment "Possible Suspend Mem / Hibernate Wake-Up Sources" config PM_BFIN_WAKE_PH6 bool "Allow Wake-Up from on-chip PHY or PH6 GP" - depends on PM && (BF52x || BF534 || BF536 || BF537) + depends on PM && (BF51x || BF52x || BF534 || BF536 || BF537) default n help Enable PHY and PH6 GP Wake-Up (Voltage Regulator Power-Up) diff --git a/arch/blackfin/Makefile b/arch/blackfin/Makefile index ce45df3708e8..2faad124db59 100644 --- a/arch/blackfin/Makefile +++ b/arch/blackfin/Makefile @@ -21,6 +21,10 @@ KALLSYMS += --symbol-prefix=_ KBUILD_DEFCONFIG := BF537-STAMP_defconfig # setup the machine name and the machine dependent settings +machine-$(CONFIG_BF512) := bf518 +machine-$(CONFIG_BF514) := bf518 +machine-$(CONFIG_BF516) := bf518 +machine-$(CONFIG_BF518) := bf518 machine-$(CONFIG_BF522) := bf527 machine-$(CONFIG_BF523) := bf527 machine-$(CONFIG_BF524) := bf527 @@ -44,6 +48,10 @@ machine-$(CONFIG_BF561) := bf561 MACHINE := $(machine-y) export MACHINE +cpu-$(CONFIG_BF512) := bf512 +cpu-$(CONFIG_BF514) := bf514 +cpu-$(CONFIG_BF516) := bf516 +cpu-$(CONFIG_BF518) := bf518 cpu-$(CONFIG_BF522) := bf522 cpu-$(CONFIG_BF523) := bf523 cpu-$(CONFIG_BF524) := bf524 diff --git a/arch/blackfin/configs/BF518F-EZBRD_defconfig b/arch/blackfin/configs/BF518F-EZBRD_defconfig new file mode 100644 index 000000000000..f4c4fd7d0e7b --- /dev/null +++ b/arch/blackfin/configs/BF518F-EZBRD_defconfig @@ -0,0 +1,1130 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.26.5 +# Thu Oct 23 21:38:19 2008 +# +# CONFIG_MMU is not set +# CONFIG_FPU is not set +CONFIG_RWSEM_GENERIC_SPINLOCK=y +# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set +CONFIG_BLACKFIN=y +CONFIG_ZONE_DMA=y +CONFIG_GENERIC_FIND_NEXT_BIT=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_GPIO=y +CONFIG_FORCE_MAX_ZONEORDER=14 +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +# CONFIG_POSIX_MQUEUE is not set +# CONFIG_BSD_PROCESS_ACCT is not set +# CONFIG_TASKSTATS is not set +# CONFIG_AUDIT is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_CGROUPS is not set +# CONFIG_GROUP_SCHED is not set +# CONFIG_SYSFS_DEPRECATED is not set +# CONFIG_SYSFS_DEPRECATED_V2 is not set +# CONFIG_RELAY is not set +# CONFIG_NAMESPACES is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SYSCTL=y +CONFIG_EMBEDDED=y +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_COMPAT_BRK=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_ANON_INODES=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set +CONFIG_HAVE_OPROFILE=y +# CONFIG_HAVE_KPROBES is not set +# CONFIG_HAVE_KRETPROBES is not set +# CONFIG_HAVE_DMA_ATTRS is not set +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +CONFIG_TINY_SHMEM=y +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_KMOD=y +CONFIG_BLOCK=y +# CONFIG_LBD is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LSF is not set +# CONFIG_BLK_DEV_BSG is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +# CONFIG_IOSCHED_AS is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +# CONFIG_DEFAULT_AS is not set +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +CONFIG_DEFAULT_NOOP=y +CONFIG_DEFAULT_IOSCHED="noop" +CONFIG_CLASSIC_RCU=y +# CONFIG_PREEMPT_NONE is not set +CONFIG_PREEMPT_VOLUNTARY=y +# CONFIG_PREEMPT is not set + +# +# Blackfin Processor Options +# + +# +# Processor and Board Settings +# +# CONFIG_BF512 is not set +# CONFIG_BF514 is not set +# CONFIG_BF516 is not set +CONFIG_BF518=y +# CONFIG_BF522 is not set +# CONFIG_BF523 is not set +# CONFIG_BF524 is not set +# CONFIG_BF525 is not set +# CONFIG_BF526 is not set +# CONFIG_BF527 is not set +# CONFIG_BF531 is not set +# CONFIG_BF532 is not set +# CONFIG_BF533 is not set +# CONFIG_BF534 is not set +# CONFIG_BF536 is not set +# CONFIG_BF537 is not set +# CONFIG_BF538 is not set +# CONFIG_BF539 is not set +# CONFIG_BF542 is not set +# CONFIG_BF544 is not set +# CONFIG_BF547 is not set +# CONFIG_BF548 is not set +# CONFIG_BF549 is not set +# CONFIG_BF561 is not set +CONFIG_BF_REV_MIN=0 +CONFIG_BF_REV_MAX=2 +CONFIG_BF_REV_0_0=y +# CONFIG_BF_REV_0_1 is not set +# CONFIG_BF_REV_0_2 is not set +# CONFIG_BF_REV_0_3 is not set +# CONFIG_BF_REV_0_4 is not set +# CONFIG_BF_REV_0_5 is not set +# CONFIG_BF_REV_0_6 is not set +# CONFIG_BF_REV_ANY is not set +# CONFIG_BF_REV_NONE is not set +CONFIG_BF51x=y +CONFIG_BFIN518F_EZBRD=y + +# +# BF518 Specific Configuration +# + +# +# Alternative Multiplexing Scheme +# +# CONFIG_BF518_SPORT0_PORTF is not set +CONFIG_BF518_SPORT0_PORTG=y +CONFIG_BF518_SPORT0_TSCLK_PG10=y +# CONFIG_BF518_SPORT0_TSCLK_PG14 is not set +CONFIG_BF518_UART1_PORTF=y +# CONFIG_BF518_UART1_PORTG is not set + +# +# Interrupt Priority Assignment +# + +# +# Priority +# +CONFIG_IRQ_PLL_WAKEUP=7 +CONFIG_IRQ_DMA0_ERROR=7 +CONFIG_IRQ_DMAR0_BLK=7 +CONFIG_IRQ_DMAR1_BLK=7 +CONFIG_IRQ_DMAR0_OVR=7 +CONFIG_IRQ_DMAR1_OVR=7 +CONFIG_IRQ_PPI_ERROR=7 +CONFIG_IRQ_MAC_ERROR=7 +CONFIG_IRQ_SPORT0_ERROR=7 +CONFIG_IRQ_SPORT1_ERROR=7 +CONFIG_IRQ_PTP_ERROR=7 +CONFIG_IRQ_UART0_ERROR=7 +CONFIG_IRQ_UART1_ERROR=7 +CONFIG_IRQ_RTC=8 +CONFIG_IRQ_PPI=8 +CONFIG_IRQ_SPORT0_RX=9 +CONFIG_IRQ_SPORT0_TX=9 +CONFIG_IRQ_SPORT1_RX=9 +CONFIG_IRQ_SPORT1_TX=9 +CONFIG_IRQ_TWI=10 +CONFIG_IRQ_SPI0=10 +CONFIG_IRQ_UART0_RX=10 +CONFIG_IRQ_UART0_TX=10 +CONFIG_IRQ_UART1_RX=10 +CONFIG_IRQ_UART1_TX=10 +CONFIG_IRQ_OPTSEC=11 +CONFIG_IRQ_CNT=11 +CONFIG_IRQ_MAC_RX=11 +CONFIG_IRQ_PORTH_INTA=11 +CONFIG_IRQ_MAC_TX=11 +CONFIG_IRQ_PORTH_INTB=11 +CONFIG_IRQ_TMR0=12 +CONFIG_IRQ_TMR1=12 +CONFIG_IRQ_TMR2=12 +CONFIG_IRQ_TMR3=12 +CONFIG_IRQ_TMR4=12 +CONFIG_IRQ_TMR5=12 +CONFIG_IRQ_TMR6=12 +CONFIG_IRQ_TMR7=12 +CONFIG_IRQ_PORTG_INTA=12 +CONFIG_IRQ_PORTG_INTB=12 +CONFIG_IRQ_MEM_DMA0=13 +CONFIG_IRQ_MEM_DMA1=13 +CONFIG_IRQ_WATCH=13 +CONFIG_IRQ_PORTF_INTA=13 +CONFIG_IRQ_PORTF_INTB=13 +CONFIG_IRQ_SPI0_ERROR=7 +CONFIG_IRQ_SPI1_ERROR=7 +CONFIG_IRQ_RSI_INT0=7 +CONFIG_IRQ_RSI_INT1=7 +CONFIG_IRQ_PWM_TRIP=10 +CONFIG_IRQ_PWM_SYNC=10 +CONFIG_IRQ_PTP_STAT=10 + +# +# Board customizations +# +# CONFIG_CMDLINE_BOOL is not set +CONFIG_BOOT_LOAD=0x1000 +CONFIG_ROM_BASE=0x20040000 + +# +# Clock/PLL Setup +# +CONFIG_CLKIN_HZ=25000000 +# CONFIG_BFIN_KERNEL_CLOCK is not set +CONFIG_MAX_MEM_SIZE=512 +CONFIG_MAX_VCO_HZ=400000000 +CONFIG_MIN_VCO_HZ=50000000 +CONFIG_MAX_SCLK_HZ=133333333 +CONFIG_MIN_SCLK_HZ=27000000 + +# +# Kernel Timer/Scheduler +# +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_300 is not set +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +# CONFIG_SCHED_HRTICK is not set +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CLOCKEVENTS=y +# CONFIG_CYCLES_CLOCKSOURCE is not set +# CONFIG_TICK_ONESHOT is not set +# CONFIG_NO_HZ is not set +# CONFIG_HIGH_RES_TIMERS is not set +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y + +# +# Misc +# +CONFIG_BFIN_SCRATCH_REG_RETN=y +# CONFIG_BFIN_SCRATCH_REG_RETE is not set +# CONFIG_BFIN_SCRATCH_REG_CYCLES is not set + +# +# Blackfin Kernel Optimizations +# + +# +# Memory Optimizations +# +CONFIG_I_ENTRY_L1=y +CONFIG_EXCPT_IRQ_SYSC_L1=y +CONFIG_DO_IRQ_L1=y +CONFIG_CORE_TIMER_IRQ_L1=y +CONFIG_IDLE_L1=y +# CONFIG_SCHEDULE_L1 is not set +CONFIG_ARITHMETIC_OPS_L1=y +CONFIG_ACCESS_OK_L1=y +# CONFIG_MEMSET_L1 is not set +# CONFIG_MEMCPY_L1 is not set +# CONFIG_SYS_BFIN_SPINLOCK_L1 is not set +# CONFIG_IP_CHECKSUM_L1 is not set +CONFIG_CACHELINE_ALIGNED_L1=y +# CONFIG_SYSCALL_TAB_L1 is not set +# CONFIG_CPLB_SWITCH_TAB_L1 is not set +CONFIG_APP_STACK_L1=y + +# +# Speed Optimizations +# +CONFIG_BFIN_INS_LOWOVERHEAD=y +CONFIG_RAMKERNEL=y +# CONFIG_ROMKERNEL is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set +# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set +CONFIG_PAGEFLAGS_EXTENDED=y +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_RESOURCES_64BIT is not set +CONFIG_ZONE_DMA_FLAG=1 +CONFIG_VIRT_TO_BUS=y +CONFIG_BFIN_GPTIMERS=y +CONFIG_BFIN_DMA_5XX=y +# CONFIG_DMA_UNCACHED_4M is not set +# CONFIG_DMA_UNCACHED_2M is not set +CONFIG_DMA_UNCACHED_1M=y +# CONFIG_DMA_UNCACHED_NONE is not set + +# +# Cache Support +# +CONFIG_BFIN_ICACHE=y +CONFIG_BFIN_DCACHE=y +# CONFIG_BFIN_DCACHE_BANKA is not set +# CONFIG_BFIN_ICACHE_LOCK is not set +CONFIG_BFIN_WB=y +# CONFIG_BFIN_WT is not set +# CONFIG_MPU is not set + +# +# Asynchonous Memory Configuration +# + +# +# EBIU_AMGCTL Global Control +# +CONFIG_C_AMCKEN=y +CONFIG_C_CDPRIO=y +# CONFIG_C_AMBEN is not set +# CONFIG_C_AMBEN_B0 is not set +# CONFIG_C_AMBEN_B0_B1 is not set +# CONFIG_C_AMBEN_B0_B1_B2 is not set +CONFIG_C_AMBEN_ALL=y + +# +# EBIU_AMBCTL Control +# +CONFIG_BANK_0=0x7BB0 +CONFIG_BANK_1=0x5554 +CONFIG_BANK_2=0x7BB0 +CONFIG_BANK_3=0xFFC0 + +# +# Bus options (PCI, PCMCIA, EISA, MCA, ISA) +# +# CONFIG_ARCH_SUPPORTS_MSI is not set +# CONFIG_PCCARD is not set + +# +# Executable file formats +# +CONFIG_BINFMT_ELF_FDPIC=y +CONFIG_BINFMT_FLAT=y +CONFIG_BINFMT_ZFLAT=y +# CONFIG_BINFMT_SHARED_FLAT is not set +# CONFIG_BINFMT_MISC is not set + +# +# Power management options +# +# CONFIG_PM is not set +CONFIG_ARCH_SUSPEND_POSSIBLE=y +# CONFIG_PM_WAKEUP_BY_GPIO is not set + +# +# CPU Frequency scaling +# +# CONFIG_CPU_FREQ is not set + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set +# CONFIG_NET_KEY is not set +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_PNP=y +# CONFIG_IP_PNP_DHCP is not set +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +CONFIG_SYN_COOKIES=y +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_XFRM_MODE_BEET=y +# CONFIG_INET_LRO is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +# CONFIG_IPV6 is not set +# CONFIG_NETLABEL is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set + +# +# Wireless +# +# CONFIG_CFG80211 is not set +# CONFIG_WIRELESS_EXT is not set +# CONFIG_MAC80211 is not set +# CONFIG_IEEE80211 is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_FW_LOADER is not set +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_CONNECTOR is not set +CONFIG_MTD=y +# CONFIG_MTD_DEBUG is not set +# CONFIG_MTD_CONCAT is not set +CONFIG_MTD_PARTITIONS=y +# CONFIG_MTD_REDBOOT_PARTS is not set +# CONFIG_MTD_CMDLINE_PARTS is not set +# CONFIG_MTD_AR7_PARTS is not set + +# +# User Modules And Translation Layers +# +CONFIG_MTD_CHAR=y +CONFIG_MTD_BLKDEVS=y +CONFIG_MTD_BLOCK=y +# CONFIG_FTL is not set +# CONFIG_NFTL is not set +# CONFIG_INFTL is not set +# CONFIG_RFD_FTL is not set +# CONFIG_SSFDC is not set +# CONFIG_MTD_OOPS is not set + +# +# RAM/ROM/Flash chip drivers +# +# CONFIG_MTD_CFI is not set +CONFIG_MTD_JEDECPROBE=m +CONFIG_MTD_GEN_PROBE=m +# CONFIG_MTD_CFI_ADV_OPTIONS is not set +CONFIG_MTD_MAP_BANK_WIDTH_1=y +CONFIG_MTD_MAP_BANK_WIDTH_2=y +CONFIG_MTD_MAP_BANK_WIDTH_4=y +# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set +CONFIG_MTD_CFI_I1=y +CONFIG_MTD_CFI_I2=y +# CONFIG_MTD_CFI_I4 is not set +# CONFIG_MTD_CFI_I8 is not set +# CONFIG_MTD_CFI_INTELEXT is not set +# CONFIG_MTD_CFI_AMDSTD is not set +# CONFIG_MTD_CFI_STAA is not set +CONFIG_MTD_RAM=y +CONFIG_MTD_ROM=m +# CONFIG_MTD_ABSENT is not set + +# +# Mapping drivers for chip access +# +CONFIG_MTD_COMPLEX_MAPPINGS=y +# CONFIG_MTD_PHYSMAP is not set +# CONFIG_MTD_GPIO_ADDR is not set +# CONFIG_MTD_UCLINUX is not set +# CONFIG_MTD_PLATRAM is not set + +# +# Self-contained MTD device drivers +# +# CONFIG_MTD_SLRAM is not set +# CONFIG_MTD_PHRAM is not set +# CONFIG_MTD_MTDRAM is not set +# CONFIG_MTD_BLOCK2MTD is not set + +# +# Disk-On-Chip Device Drivers +# +# CONFIG_MTD_DOC2000 is not set +# CONFIG_MTD_DOC2001 is not set +# CONFIG_MTD_DOC2001PLUS is not set +# CONFIG_MTD_NAND is not set +# CONFIG_MTD_ONENAND is not set + +# +# UBI - Unsorted block images +# +# CONFIG_MTD_UBI is not set +# CONFIG_PARPORT is not set +CONFIG_BLK_DEV=y +# CONFIG_BLK_DEV_COW_COMMON is not set +# CONFIG_BLK_DEV_LOOP is not set +# CONFIG_BLK_DEV_NBD is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +# CONFIG_BLK_DEV_XIP is not set +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +CONFIG_MISC_DEVICES=y +# CONFIG_EEPROM_93CX6 is not set +# CONFIG_ENCLOSURE_SERVICES is not set +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +# CONFIG_SCSI is not set +# CONFIG_SCSI_DMA is not set +# CONFIG_SCSI_NETLINK is not set +# CONFIG_ATA is not set +# CONFIG_MD is not set +CONFIG_NETDEVICES=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set +# CONFIG_VETH is not set +CONFIG_PHYLIB=y + +# +# MII PHY device drivers +# +# CONFIG_MARVELL_PHY is not set +# CONFIG_DAVICOM_PHY is not set +# CONFIG_QSEMI_PHY is not set +# CONFIG_LXT_PHY is not set +# CONFIG_CICADA_PHY is not set +# CONFIG_VITESSE_PHY is not set +# CONFIG_SMSC_PHY is not set +# CONFIG_BROADCOM_PHY is not set +# CONFIG_ICPLUS_PHY is not set +# CONFIG_REALTEK_PHY is not set +# CONFIG_FIXED_PHY is not set +# CONFIG_MDIO_BITBANG is not set +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_SMC91X is not set +# CONFIG_SMSC911X is not set +# CONFIG_DM9000 is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set +# CONFIG_B44 is not set +# CONFIG_NETDEV_1000 is not set +# CONFIG_NETDEV_10000 is not set + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set +# CONFIG_IWLWIFI_LEDS is not set +# CONFIG_WAN is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set + +# +# Userland interfaces +# +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_UINPUT is not set +# CONFIG_TWI_KEYPAD is not set + +# +# Hardware I/O ports +# +# CONFIG_SERIO is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +# CONFIG_AD9960 is not set +# CONFIG_SPI_ADC_BF533 is not set +# CONFIG_BF5xx_PPIFCD is not set +# CONFIG_BFIN_SIMPLE_TIMER is not set +# CONFIG_BF5xx_PPI is not set +# CONFIG_BFIN_SPORT is not set +# CONFIG_BFIN_TIMER_LATENCY is not set +# CONFIG_TWI_LCD is not set +CONFIG_SIMPLE_GPIO=m +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set +# CONFIG_DEVKMEM is not set +# CONFIG_BFIN_JTAG_COMM is not set +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +# CONFIG_SERIAL_8250 is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_BFIN=y +CONFIG_SERIAL_BFIN_CONSOLE=y +CONFIG_SERIAL_BFIN_DMA=y +# CONFIG_SERIAL_BFIN_PIO is not set +CONFIG_SERIAL_BFIN_UART0=y +# CONFIG_BFIN_UART0_CTSRTS is not set +# CONFIG_SERIAL_BFIN_UART1 is not set +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_BFIN_SPORT is not set +CONFIG_UNIX98_PTYS=y +# CONFIG_LEGACY_PTYS is not set + +# +# CAN, the car bus and industrial fieldbus +# +# CONFIG_CAN4LINUX is not set +# CONFIG_IPMI_HANDLER is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_R3964 is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +CONFIG_I2C=y +CONFIG_I2C_BOARDINFO=y +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_HELPER_AUTO=y + +# +# I2C Hardware Bus support +# +CONFIG_I2C_BLACKFIN_TWI=y +CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=50 +# CONFIG_I2C_GPIO is not set +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_SIMTEC is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_PCA_PLATFORM is not set + +# +# Miscellaneous I2C Chip support +# +# CONFIG_DS1682 is not set +# CONFIG_SENSORS_AD5252 is not set +# CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_PCF8575 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_MAX6875 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set +# CONFIG_SPI is not set +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +# CONFIG_HWMON is not set +# CONFIG_THERMAL is not set +# CONFIG_THERMAL_HWMON is not set +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +# CONFIG_SOFT_WATCHDOG is not set +CONFIG_BFIN_WDT=y + +# +# Sonics Silicon Backplane +# +CONFIG_SSB_POSSIBLE=y +# CONFIG_SSB is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set + +# +# Multimedia devices +# + +# +# Multimedia core support +# +# CONFIG_VIDEO_DEV is not set +# CONFIG_DVB_CORE is not set +# CONFIG_VIDEO_MEDIA is not set + +# +# Multimedia drivers +# +# CONFIG_DAB is not set + +# +# Graphics support +# +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +# CONFIG_FB is not set +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set + +# +# Console display driver support +# +CONFIG_DUMMY_CONSOLE=y + +# +# Sound +# +# CONFIG_SOUND is not set +# CONFIG_HID_SUPPORT is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +# CONFIG_NEW_LEDS is not set +# CONFIG_ACCESSIBILITY is not set +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_HCTOSYS=y +CONFIG_RTC_HCTOSYS_DEVICE="rtc0" +# CONFIG_RTC_DEBUG is not set + +# +# RTC interfaces +# +CONFIG_RTC_INTF_SYSFS=y +CONFIG_RTC_INTF_PROC=y +CONFIG_RTC_INTF_DEV=y +# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +# CONFIG_RTC_DRV_TEST is not set + +# +# I2C RTC drivers +# +# CONFIG_RTC_DRV_DS1307 is not set +# CONFIG_RTC_DRV_DS1374 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_MAX6900 is not set +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_ISL1208 is not set +# CONFIG_RTC_DRV_X1205 is not set +# CONFIG_RTC_DRV_PCF8563 is not set +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_S35390A is not set +# CONFIG_RTC_DRV_FM3130 is not set + +# +# SPI RTC drivers +# + +# +# Platform RTC drivers +# +# CONFIG_RTC_DRV_DS1511 is not set +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set +# CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_V3020 is not set + +# +# on-CPU RTC drivers +# +CONFIG_RTC_DRV_BFIN=y +# CONFIG_UIO is not set + +# +# File systems +# +# CONFIG_EXT2_FS is not set +# CONFIG_EXT3_FS is not set +# CONFIG_EXT4DEV_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_FS_POSIX_ACL is not set +# CONFIG_XFS_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_DNOTIFY is not set +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +# CONFIG_TMPFS is not set +# CONFIG_HUGETLB_PAGE is not set +# CONFIG_CONFIGFS_FS is not set + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_YAFFS_FS is not set +# CONFIG_JFFS2_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_NFS_FS=m +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFSD is not set +CONFIG_LOCKD=m +CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=m +# CONFIG_SUNRPC_BIND34 is not set +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_RPCSEC_GSS_SPKM3 is not set +CONFIG_SMB_FS=m +# CONFIG_SMB_NLS_DEFAULT is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ASCII is not set +CONFIG_NLS_ISO8859_1=y +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set +# CONFIG_DLM is not set + +# +# Kernel hacking +# +# CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=1024 +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_CHECK is not set +# CONFIG_DEBUG_KERNEL is not set +# CONFIG_DEBUG_BUGVERBOSE is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +CONFIG_DEBUG_VERBOSE=y +CONFIG_DEBUG_MMRS=y +# CONFIG_DEBUG_DOUBLEFAULT is not set +CONFIG_DEBUG_HUNT_FOR_ZERO=y +CONFIG_DEBUG_BFIN_HWTRACE_ON=y +CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_OFF=y +# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE is not set +# CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_TWO is not set +CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION=0 +# CONFIG_DEBUG_BFIN_HWTRACE_EXPAND is not set +# CONFIG_DEBUG_BFIN_NO_KERN_HWTRACE is not set +CONFIG_EARLY_PRINTK=y +CONFIG_CPLB_INFO=y +CONFIG_ACCESS_CHECK=y + +# +# Security options +# +# CONFIG_KEYS is not set +CONFIG_SECURITY=y +# CONFIG_SECURITY_NETWORK is not set +# CONFIG_SECURITY_CAPABILITIES is not set +CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=0 +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +# CONFIG_CRYPTO_MANAGER is not set +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_AUTHENC is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +# CONFIG_CRYPTO_CBC is not set +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +# CONFIG_CRYPTO_ECB is not set +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +# CONFIG_CRYPTO_MD5 is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_DES is not set +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_LZO is not set +CONFIG_CRYPTO_HW=y + +# +# Library routines +# +CONFIG_BITREVERSE=y +# CONFIG_GENERIC_FIND_FIRST_BIT is not set +CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +# CONFIG_LIBCRC32C is not set +CONFIG_ZLIB_INFLATE=y +CONFIG_PLIST=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y diff --git a/arch/blackfin/include/asm/gpio.h b/arch/blackfin/include/asm/gpio.h index c7d287ca5d0a..d8485624006a 100644 --- a/arch/blackfin/include/asm/gpio.h +++ b/arch/blackfin/include/asm/gpio.h @@ -143,6 +143,57 @@ #define PERIPHERAL_USAGE 1 #define GPIO_USAGE 0 +#if defined(BF518_FAMILY) +#define MAX_BLACKFIN_GPIOS 40 + +#define GPIO_PF0 0 +#define GPIO_PF1 1 +#define GPIO_PF2 2 +#define GPIO_PF3 3 +#define GPIO_PF4 4 +#define GPIO_PF5 5 +#define GPIO_PF6 6 +#define GPIO_PF7 7 +#define GPIO_PF8 8 +#define GPIO_PF9 9 +#define GPIO_PF10 10 +#define GPIO_PF11 11 +#define GPIO_PF12 12 +#define GPIO_PF13 13 +#define GPIO_PF14 14 +#define GPIO_PF15 15 +#define GPIO_PG0 16 +#define GPIO_PG1 17 +#define GPIO_PG2 18 +#define GPIO_PG3 19 +#define GPIO_PG4 20 +#define GPIO_PG5 21 +#define GPIO_PG6 22 +#define GPIO_PG7 23 +#define GPIO_PG8 24 +#define GPIO_PG9 25 +#define GPIO_PG10 26 +#define GPIO_PG11 27 +#define GPIO_PG12 28 +#define GPIO_PG13 29 +#define GPIO_PG14 30 +#define GPIO_PG15 31 +#define GPIO_PH0 32 +#define GPIO_PH1 33 +#define GPIO_PH2 34 +#define GPIO_PH3 35 +#define GPIO_PH4 36 +#define GPIO_PH5 37 +#define GPIO_PH6 38 +#define GPIO_PH7 39 + +#define PORT_F GPIO_PF0 +#define PORT_G GPIO_PG0 +#define PORT_H GPIO_PH0 + +#endif + + #ifdef BF533_FAMILY #define MAX_BLACKFIN_GPIOS 16 diff --git a/arch/blackfin/kernel/bfin_gpio.c b/arch/blackfin/kernel/bfin_gpio.c index e129102ad09b..5556e13993bf 100644 --- a/arch/blackfin/kernel/bfin_gpio.c +++ b/arch/blackfin/kernel/bfin_gpio.c @@ -125,7 +125,7 @@ static struct gpio_port_t *gpio_bankb[gpio_bank(MAX_BLACKFIN_GPIOS)] = { }; #endif -#if defined(BF527_FAMILY) || defined(BF537_FAMILY) +#if defined(BF527_FAMILY) || defined(BF537_FAMILY) || defined(BF518_FAMILY) static struct gpio_port_t *gpio_bankb[gpio_bank(MAX_BLACKFIN_GPIOS)] = { (struct gpio_port_t *) PORTFIO, (struct gpio_port_t *) PORTGIO, @@ -139,7 +139,7 @@ static unsigned short *port_fer[gpio_bank(MAX_BLACKFIN_GPIOS)] = { }; #endif -#ifdef BF527_FAMILY +#if defined(BF527_FAMILY) || defined(BF518_FAMILY) static unsigned short *port_mux[gpio_bank(MAX_BLACKFIN_GPIOS)] = { (unsigned short *) PORTF_MUX, (unsigned short *) PORTG_MUX, @@ -206,7 +206,7 @@ static unsigned int sic_iwr_irqs[gpio_bank(MAX_BLACKFIN_GPIOS)] = {IRQ_PROG_INTB static unsigned int sic_iwr_irqs[gpio_bank(MAX_BLACKFIN_GPIOS)] = {IRQ_PORTF_INTB}; #endif -#ifdef BF527_FAMILY +#if defined(BF527_FAMILY) || defined(BF518_FAMILY) static unsigned int sic_iwr_irqs[gpio_bank(MAX_BLACKFIN_GPIOS)] = {IRQ_PORTF_INTB, IRQ_PORTG_INTB, IRQ_PORTH_INTB}; #endif @@ -268,7 +268,7 @@ static int cmp_label(unsigned short ident, const char *label) return -EINVAL; } -#if defined(BF527_FAMILY) || defined(BF537_FAMILY) +#if defined(BF527_FAMILY) || defined(BF537_FAMILY) || defined(BF518_FAMILY) static void port_setup(unsigned gpio, unsigned short usage) { if (!check_gpio(gpio)) { @@ -383,7 +383,7 @@ inline u16 get_portmux(unsigned short portno) return (pmux >> (2 * gpio_sub_n(portno)) & 0x3); } -#elif defined(BF527_FAMILY) +#elif defined(BF527_FAMILY) || defined(BF518_FAMILY) inline void portmux_setup(unsigned short portno, unsigned short function) { u16 pmux, ident = P_IDENT(portno); @@ -683,7 +683,7 @@ u32 bfin_pm_standby_setup(void) gpio_bankb[bank]->maskb = 0; if (mask) { -#if defined(BF527_FAMILY) || defined(BF537_FAMILY) +#if defined(BF527_FAMILY) || defined(BF537_FAMILY) || defined(BF518_FAMILY) gpio_bank_saved[bank].fer = *port_fer[bank]; #endif gpio_bank_saved[bank].inen = gpio_bankb[bank]->inen; @@ -728,7 +728,7 @@ void bfin_pm_standby_restore(void) bank = gpio_bank(i); if (mask) { -#if defined(BF527_FAMILY) || defined(BF537_FAMILY) +#if defined(BF527_FAMILY) || defined(BF537_FAMILY) || defined(BF518_FAMILY) *port_fer[bank] = gpio_bank_saved[bank].fer; #endif gpio_bankb[bank]->inen = gpio_bank_saved[bank].inen; @@ -754,9 +754,9 @@ void bfin_gpio_pm_hibernate_suspend(void) for (i = 0; i < MAX_BLACKFIN_GPIOS; i += GPIO_BANKSIZE) { bank = gpio_bank(i); -#if defined(BF527_FAMILY) || defined(BF537_FAMILY) +#if defined(BF527_FAMILY) || defined(BF537_FAMILY) || defined(BF518_FAMILY) gpio_bank_saved[bank].fer = *port_fer[bank]; -#ifdef BF527_FAMILY +#if defined(BF527_FAMILY) || defined(BF518_FAMILY) gpio_bank_saved[bank].mux = *port_mux[bank]; #else if (bank == 0) @@ -782,8 +782,8 @@ void bfin_gpio_pm_hibernate_restore(void) for (i = 0; i < MAX_BLACKFIN_GPIOS; i += GPIO_BANKSIZE) { bank = gpio_bank(i); -#if defined(BF527_FAMILY) || defined(BF537_FAMILY) -#ifdef BF527_FAMILY +#if defined(BF527_FAMILY) || defined(BF537_FAMILY) || defined(BF518_FAMILY) +#if defined(BF527_FAMILY) || defined(BF518_FAMILY) *port_mux[bank] = gpio_bank_saved[bank].mux; #else if (bank == 0) diff --git a/arch/blackfin/mach-bf518/Kconfig b/arch/blackfin/mach-bf518/Kconfig new file mode 100644 index 000000000000..00f2d3700637 --- /dev/null +++ b/arch/blackfin/mach-bf518/Kconfig @@ -0,0 +1,233 @@ +if (BF51x) + +source "arch/blackfin/mach-bf518/boards/Kconfig" + +menu "BF518 Specific Configuration" + +comment "Alternative Multiplexing Scheme" + +choice + prompt "SPORT0" + default BF518_SPORT0_PORTG + help + Select PORT used for SPORT0. See Hardware Reference Manual + +config BF518_SPORT0_PORTF + bool "PORT F" + help + PORT F + +config BF518_SPORT0_PORTG + bool "PORT G" + help + PORT G +endchoice + +choice + prompt "SPORT0 TSCLK Location" + depends on BF518_SPORT0_PORTG + default BF518_SPORT0_TSCLK_PG10 + help + Select PIN used for SPORT0_TSCLK. See Hardware Reference Manual + +config BF518_SPORT0_TSCLK_PG10 + bool "PORT PG10" + help + PORT PG10 + +config BF518_SPORT0_TSCLK_PG14 + bool "PORT PG14" + help + PORT PG14 +endchoice + +choice + prompt "UART1" + default BF518_UART1_PORTF + help + Select PORT used for UART1. See Hardware Reference Manual + +config BF518_UART1_PORTF + bool "PORT F" + help + PORT F + +config BF518_UART1_PORTG + bool "PORT G" + help + PORT G +endchoice + +comment "Interrupt Priority Assignment" +menu "Priority" + +config IRQ_PLL_WAKEUP + int "IRQ_PLL_WAKEUP" + default 7 +config IRQ_DMA0_ERROR + int "IRQ_DMA0_ERROR" + default 7 +config IRQ_DMAR0_BLK + int "IRQ_DMAR0_BLK" + default 7 +config IRQ_DMAR1_BLK + int "IRQ_DMAR1_BLK" + default 7 +config IRQ_DMAR0_OVR + int "IRQ_DMAR0_OVR" + default 7 +config IRQ_DMAR1_OVR + int "IRQ_DMAR1_OVR" + default 7 +config IRQ_PPI_ERROR + int "IRQ_PPI_ERROR" + default 7 +config IRQ_MAC_ERROR + int "IRQ_MAC_ERROR" + default 7 +config IRQ_SPORT0_ERROR + int "IRQ_SPORT0_ERROR" + default 7 +config IRQ_SPORT1_ERROR + int "IRQ_SPORT1_ERROR" + default 7 +config IRQ_PTP_ERROR + int "IRQ_PTP_ERROR" + default 7 +config IRQ_UART0_ERROR + int "IRQ_UART0_ERROR" + default 7 +config IRQ_UART1_ERROR + int "IRQ_UART1_ERROR" + default 7 +config IRQ_RTC + int "IRQ_RTC" + default 8 +config IRQ_PPI + int "IRQ_PPI" + default 8 +config IRQ_SPORT0_RX + int "IRQ_SPORT0_RX" + default 9 +config IRQ_SPORT0_TX + int "IRQ_SPORT0_TX" + default 9 +config IRQ_SPORT1_RX + int "IRQ_SPORT1_RX" + default 9 +config IRQ_SPORT1_TX + int "IRQ_SPORT1_TX" + default 9 +config IRQ_TWI + int "IRQ_TWI" + default 10 +config IRQ_SPI0 + int "IRQ_SPI" + default 10 +config IRQ_UART0_RX + int "IRQ_UART0_RX" + default 10 +config IRQ_UART0_TX + int "IRQ_UART0_TX" + default 10 +config IRQ_UART1_RX + int "IRQ_UART1_RX" + default 10 +config IRQ_UART1_TX + int "IRQ_UART1_TX" + default 10 +config IRQ_OPTSEC + int "IRQ_OPTSEC" + default 11 +config IRQ_CNT + int "IRQ_CNT" + default 11 +config IRQ_MAC_RX + int "IRQ_MAC_RX" + default 11 +config IRQ_PORTH_INTA + int "IRQ_PORTH_INTA" + default 11 +config IRQ_MAC_TX + int "IRQ_MAC_TX/NFC" + default 11 +config IRQ_PORTH_INTB + int "IRQ_PORTH_INTB" + default 11 +config IRQ_TMR0 + int "IRQ_TMR0" + default 12 +config IRQ_TMR1 + int "IRQ_TMR1" + default 12 +config IRQ_TMR2 + int "IRQ_TMR2" + default 12 +config IRQ_TMR3 + int "IRQ_TMR3" + default 12 +config IRQ_TMR4 + int "IRQ_TMR4" + default 12 +config IRQ_TMR5 + int "IRQ_TMR5" + default 12 +config IRQ_TMR6 + int "IRQ_TMR6" + default 12 +config IRQ_TMR7 + int "IRQ_TMR7" + default 12 +config IRQ_PORTG_INTA + int "IRQ_PORTG_INTA" + default 12 +config IRQ_PORTG_INTB + int "IRQ_PORTG_INTB" + default 12 +config IRQ_MEM_DMA0 + int "IRQ_MEM_DMA0" + default 13 +config IRQ_MEM_DMA1 + int "IRQ_MEM_DMA1" + default 13 +config IRQ_WATCH + int "IRQ_WATCH" + default 13 +config IRQ_PORTF_INTA + int "IRQ_PORTF_INTA" + default 13 +config IRQ_PORTF_INTB + int "IRQ_PORTF_INTB" + default 13 +config IRQ_SPI0_ERROR + int "IRQ_SPI0_ERROR" + default 7 +config IRQ_SPI1_ERROR + int "IRQ_SPI1_ERROR" + default 7 +config IRQ_RSI_INT0 + int "IRQ_RSI_INT0" + default 7 +config IRQ_RSI_INT1 + int "IRQ_RSI_INT1" + default 7 +config IRQ_PWM_TRIP + int "IRQ_PWM_TRIP" + default 10 +config IRQ_PWM_SYNC + int "IRQ_PWM_SYNC" + default 10 +config IRQ_PTP_STAT + int "IRQ_PTP_STAT" + default 10 + + help + Enter the priority numbers between 7-13 ONLY. Others are Reserved. + This applies to all the above. It is not recommended to assign the + highest priority number 7 to UART or any other device. + +endmenu + +endmenu + +endif diff --git a/arch/blackfin/mach-bf518/Makefile b/arch/blackfin/mach-bf518/Makefile new file mode 100644 index 000000000000..9d5e16d07b5e --- /dev/null +++ b/arch/blackfin/mach-bf518/Makefile @@ -0,0 +1,7 @@ +# +# arch/blackfin/mach-bf518/Makefile +# + +extra-y := head.o + +obj-y := ints-priority.o dma.o diff --git a/arch/blackfin/mach-bf518/boards/Kconfig b/arch/blackfin/mach-bf518/boards/Kconfig new file mode 100644 index 000000000000..96163514ed22 --- /dev/null +++ b/arch/blackfin/mach-bf518/boards/Kconfig @@ -0,0 +1,12 @@ +choice + prompt "System type" + default BFIN518F_EZBRD + help + Select your board! + +config BFIN518F_EZBRD + bool "BF518F-EZBRD" + help + BF518-EZBRD board support. + +endchoice diff --git a/arch/blackfin/mach-bf518/boards/Makefile b/arch/blackfin/mach-bf518/boards/Makefile new file mode 100644 index 000000000000..172e859c3a7f --- /dev/null +++ b/arch/blackfin/mach-bf518/boards/Makefile @@ -0,0 +1,5 @@ +# +# arch/blackfin/mach-bf518/boards/Makefile +# + +obj-$(CONFIG_BFIN518F_EZBRD) += ezbrd.o diff --git a/arch/blackfin/mach-bf518/boards/ezbrd.c b/arch/blackfin/mach-bf518/boards/ezbrd.c new file mode 100644 index 000000000000..65bc602b5c99 --- /dev/null +++ b/arch/blackfin/mach-bf518/boards/ezbrd.c @@ -0,0 +1,613 @@ +/* + * File: arch/blackfin/mach-bf518/boards/ezbrd.c + * Based on: arch/blackfin/mach-bf527/boards/ezbrd.c + * Author: Bryan Wu + * + * Created: + * Description: + * + * Modified: + * Copyright 2005 National ICT Australia (NICTA) + * Copyright 2004-2008 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Name the Board for the /proc/cpuinfo + */ +const char bfin_board_name[] = "BF518F-EZBRD"; + +/* + * Driver needs to know address, irq and flag pin. + */ + +#if defined(CONFIG_MTD_PHYSMAP) || defined(CONFIG_MTD_PHYSMAP_MODULE) +static struct mtd_partition ezbrd_partitions[] = { + { + .name = "bootloader(nor)", + .size = 0x40000, + .offset = 0, + }, { + .name = "linux kernel(nor)", + .size = 0x1C0000, + .offset = MTDPART_OFS_APPEND, + }, { + .name = "file system(nor)", + .size = MTDPART_SIZ_FULL, + .offset = MTDPART_OFS_APPEND, + } +}; + +static struct physmap_flash_data ezbrd_flash_data = { + .width = 2, + .parts = ezbrd_partitions, + .nr_parts = ARRAY_SIZE(ezbrd_partitions), +}; + +static struct resource ezbrd_flash_resource = { + .start = 0x20000000, + .end = 0x203fffff, + .flags = IORESOURCE_MEM, +}; + +static struct platform_device ezbrd_flash_device = { + .name = "physmap-flash", + .id = 0, + .dev = { + .platform_data = &ezbrd_flash_data, + }, + .num_resources = 1, + .resource = &ezbrd_flash_resource, +}; +#endif + +#if defined(CONFIG_RTC_DRV_BFIN) || defined(CONFIG_RTC_DRV_BFIN_MODULE) +static struct platform_device rtc_device = { + .name = "rtc-bfin", + .id = -1, +}; +#endif + +#if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) +static struct platform_device bfin_mac_device = { + .name = "bfin_mac", +}; +#endif + +#if defined(CONFIG_MTD_M25P80) \ + || defined(CONFIG_MTD_M25P80_MODULE) +static struct mtd_partition bfin_spi_flash_partitions[] = { + { + .name = "bootloader(spi)", + .size = 0x00040000, + .offset = 0, + .mask_flags = MTD_CAP_ROM + }, { + .name = "linux kernel(spi)", + .size = MTDPART_SIZ_FULL, + .offset = MTDPART_OFS_APPEND, + } +}; + +static struct flash_platform_data bfin_spi_flash_data = { + .name = "m25p80", + .parts = bfin_spi_flash_partitions, + .nr_parts = ARRAY_SIZE(bfin_spi_flash_partitions), + .type = "m25p16", +}; + +/* SPI flash chip (m25p64) */ +static struct bfin5xx_spi_chip spi_flash_chip_info = { + .enable_dma = 0, /* use dma transfer with this chip*/ + .bits_per_word = 8, +}; +#endif + +#if defined(CONFIG_SPI_ADC_BF533) \ + || defined(CONFIG_SPI_ADC_BF533_MODULE) +/* SPI ADC chip */ +static struct bfin5xx_spi_chip spi_adc_chip_info = { + .enable_dma = 1, /* use dma transfer with this chip*/ + .bits_per_word = 16, +}; +#endif + +#if defined(CONFIG_SPI_MMC) || defined(CONFIG_SPI_MMC_MODULE) +static struct bfin5xx_spi_chip spi_mmc_chip_info = { + .enable_dma = 1, + .bits_per_word = 8, +}; +#endif + +#if defined(CONFIG_PBX) +static struct bfin5xx_spi_chip spi_si3xxx_chip_info = { + .ctl_reg = 0x4, /* send zero */ + .enable_dma = 0, + .bits_per_word = 8, + .cs_change_per_word = 1, +}; +#endif + +#if defined(CONFIG_TOUCHSCREEN_AD7877) || defined(CONFIG_TOUCHSCREEN_AD7877_MODULE) +static struct bfin5xx_spi_chip spi_ad7877_chip_info = { + .enable_dma = 0, + .bits_per_word = 16, +}; + +static const struct ad7877_platform_data bfin_ad7877_ts_info = { + .model = 7877, + .vref_delay_usecs = 50, /* internal, no capacitor */ + .x_plate_ohms = 419, + .y_plate_ohms = 486, + .pressure_max = 1000, + .pressure_min = 0, + .stopacq_polarity = 1, + .first_conversion_delay = 3, + .acquisition_time = 1, + .averaging = 1, + .pen_down_acc_interval = 1, +}; +#endif + +#if defined(CONFIG_SND_SOC_WM8731) || defined(CONFIG_SND_SOC_WM8731_MODULE) \ + && defined(CONFIG_SND_SOC_WM8731_SPI) +static struct bfin5xx_spi_chip spi_wm8731_chip_info = { + .enable_dma = 0, + .bits_per_word = 16, +}; +#endif + +#if defined(CONFIG_SPI_SPIDEV) || defined(CONFIG_SPI_SPIDEV_MODULE) +static struct bfin5xx_spi_chip spidev_chip_info = { + .enable_dma = 0, + .bits_per_word = 8, +}; +#endif + +static struct spi_board_info bfin_spi_board_info[] __initdata = { +#if defined(CONFIG_MTD_M25P80) \ + || defined(CONFIG_MTD_M25P80_MODULE) + { + /* the modalias must be the same as spi device driver name */ + .modalias = "m25p80", /* Name of spi_driver for this device */ + .max_speed_hz = 25000000, /* max spi clock (SCK) speed in HZ */ + .bus_num = 0, /* Framework bus number */ + .chip_select = 1, /* Framework chip select. On STAMP537 it is SPISSEL1*/ + .platform_data = &bfin_spi_flash_data, + .controller_data = &spi_flash_chip_info, + .mode = SPI_MODE_3, + }, +#endif + +#if defined(CONFIG_SPI_ADC_BF533) \ + || defined(CONFIG_SPI_ADC_BF533_MODULE) + { + .modalias = "bfin_spi_adc", /* Name of spi_driver for this device */ + .max_speed_hz = 6250000, /* max spi clock (SCK) speed in HZ */ + .bus_num = 0, /* Framework bus number */ + .chip_select = 1, /* Framework chip select. */ + .platform_data = NULL, /* No spi_driver specific config */ + .controller_data = &spi_adc_chip_info, + }, +#endif + +#if defined(CONFIG_SPI_MMC) || defined(CONFIG_SPI_MMC_MODULE) + { + .modalias = "spi_mmc_dummy", + .max_speed_hz = 25000000, /* max spi clock (SCK) speed in HZ */ + .bus_num = 0, + .chip_select = 0, + .platform_data = NULL, + .controller_data = &spi_mmc_chip_info, + .mode = SPI_MODE_3, + }, + { + .modalias = "spi_mmc", + .max_speed_hz = 25000000, /* max spi clock (SCK) speed in HZ */ + .bus_num = 0, + .chip_select = CONFIG_SPI_MMC_CS_CHAN, + .platform_data = NULL, + .controller_data = &spi_mmc_chip_info, + .mode = SPI_MODE_3, + }, +#endif +#if defined(CONFIG_PBX) + { + .modalias = "fxs-spi", + .max_speed_hz = 12500000, /* max spi clock (SCK) speed in HZ */ + .bus_num = 0, + .chip_select = 8 - CONFIG_J11_JUMPER, + .controller_data = &spi_si3xxx_chip_info, + .mode = SPI_MODE_3, + }, + { + .modalias = "fxo-spi", + .max_speed_hz = 12500000, /* max spi clock (SCK) speed in HZ */ + .bus_num = 0, + .chip_select = 8 - CONFIG_J19_JUMPER, + .controller_data = &spi_si3xxx_chip_info, + .mode = SPI_MODE_3, + }, +#endif +#if defined(CONFIG_TOUCHSCREEN_AD7877) || defined(CONFIG_TOUCHSCREEN_AD7877_MODULE) + { + .modalias = "ad7877", + .platform_data = &bfin_ad7877_ts_info, + .irq = IRQ_PF8, + .max_speed_hz = 12500000, /* max spi clock (SCK) speed in HZ */ + .bus_num = 0, + .chip_select = 2, + .controller_data = &spi_ad7877_chip_info, + }, +#endif +#if defined(CONFIG_SND_SOC_WM8731) || defined(CONFIG_SND_SOC_WM8731_MODULE) \ + && defined(CONFIG_SND_SOC_WM8731_SPI) + { + .modalias = "wm8731", + .max_speed_hz = 3125000, /* max spi clock (SCK) speed in HZ */ + .bus_num = 0, + .chip_select = 5, + .controller_data = &spi_wm8731_chip_info, + .mode = SPI_MODE_0, + }, +#endif +#if defined(CONFIG_SPI_SPIDEV) || defined(CONFIG_SPI_SPIDEV_MODULE) + { + .modalias = "spidev", + .max_speed_hz = 3125000, /* max spi clock (SCK) speed in HZ */ + .bus_num = 0, + .chip_select = 1, + .controller_data = &spidev_chip_info, + }, +#endif +#if defined(CONFIG_FB_BFIN_LQ035Q1) || defined(CONFIG_FB_BFIN_LQ035Q1_MODULE) + { + .modalias = "bfin-lq035q1-spi", + .max_speed_hz = 20000000, /* max spi clock (SCK) speed in HZ */ + .bus_num = 0, + .chip_select = 1, + .controller_data = &lq035q1_spi_chip_info, + .mode = SPI_CPHA | SPI_CPOL, + }, +#endif +}; + +/* SPI controller data */ +#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE) +/* SPI (0) */ +static struct bfin5xx_spi_master bfin_spi0_info = { + .num_chipselect = 5, + .enable_dma = 1, /* master has the ability to do dma transfer */ + .pin_req = {P_SPI0_SCK, P_SPI0_MISO, P_SPI0_MOSI, 0}, +}; + +static struct resource bfin_spi0_resource[] = { + [0] = { + .start = SPI0_REGBASE, + .end = SPI0_REGBASE + 0xFF, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = CH_SPI0, + .end = CH_SPI0, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device bfin_spi0_device = { + .name = "bfin-spi", + .id = 0, /* Bus number */ + .num_resources = ARRAY_SIZE(bfin_spi0_resource), + .resource = bfin_spi0_resource, + .dev = { + .platform_data = &bfin_spi0_info, /* Passed to driver */ + }, +}; + +/* SPI (1) */ +static struct bfin5xx_spi_master bfin_spi1_info = { + .num_chipselect = 5, + .enable_dma = 1, /* master has the ability to do dma transfer */ + .pin_req = {P_SPI1_SCK, P_SPI1_MISO, P_SPI1_MOSI, 0}, +}; + +static struct resource bfin_spi1_resource[] = { + [0] = { + .start = SPI1_REGBASE, + .end = SPI1_REGBASE + 0xFF, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = CH_SPI1, + .end = CH_SPI1, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device bfin_spi1_device = { + .name = "bfin-spi", + .id = 1, /* Bus number */ + .num_resources = ARRAY_SIZE(bfin_spi1_resource), + .resource = bfin_spi1_resource, + .dev = { + .platform_data = &bfin_spi1_info, /* Passed to driver */ + }, +}; +#endif /* spi master and devices */ + +#if defined(CONFIG_SERIAL_BFIN) || defined(CONFIG_SERIAL_BFIN_MODULE) +static struct resource bfin_uart_resources[] = { +#ifdef CONFIG_SERIAL_BFIN_UART0 + { + .start = 0xFFC00400, + .end = 0xFFC004FF, + .flags = IORESOURCE_MEM, + }, +#endif +#ifdef CONFIG_SERIAL_BFIN_UART1 + { + .start = 0xFFC02000, + .end = 0xFFC020FF, + .flags = IORESOURCE_MEM, + }, +#endif +}; + +static struct platform_device bfin_uart_device = { + .name = "bfin-uart", + .id = 1, + .num_resources = ARRAY_SIZE(bfin_uart_resources), + .resource = bfin_uart_resources, +}; +#endif + +#if defined(CONFIG_BFIN_SIR) || defined(CONFIG_BFIN_SIR_MODULE) +static struct resource bfin_sir_resources[] = { +#ifdef CONFIG_BFIN_SIR0 + { + .start = 0xFFC00400, + .end = 0xFFC004FF, + .flags = IORESOURCE_MEM, + }, +#endif +#ifdef CONFIG_BFIN_SIR1 + { + .start = 0xFFC02000, + .end = 0xFFC020FF, + .flags = IORESOURCE_MEM, + }, +#endif +}; + +static struct platform_device bfin_sir_device = { + .name = "bfin_sir", + .id = 0, + .num_resources = ARRAY_SIZE(bfin_sir_resources), + .resource = bfin_sir_resources, +}; +#endif + +#if defined(CONFIG_I2C_BLACKFIN_TWI) || defined(CONFIG_I2C_BLACKFIN_TWI_MODULE) +static struct resource bfin_twi0_resource[] = { + [0] = { + .start = TWI0_REGBASE, + .end = TWI0_REGBASE, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = IRQ_TWI, + .end = IRQ_TWI, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device i2c_bfin_twi_device = { + .name = "i2c-bfin-twi", + .id = 0, + .num_resources = ARRAY_SIZE(bfin_twi0_resource), + .resource = bfin_twi0_resource, +}; +#endif + +#ifdef CONFIG_I2C_BOARDINFO +static struct i2c_board_info __initdata bfin_i2c_board_info[] = { +#if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE) + { + I2C_BOARD_INFO("pcf8574_lcd", 0x22), + }, +#endif +#if defined(CONFIG_TWI_KEYPAD) || defined(CONFIG_TWI_KEYPAD_MODULE) + { + I2C_BOARD_INFO("pcf8574_keypad", 0x27), + .irq = IRQ_PF8, + }, +#endif +}; +#endif + +#if defined(CONFIG_SERIAL_BFIN_SPORT) || defined(CONFIG_SERIAL_BFIN_SPORT_MODULE) +static struct platform_device bfin_sport0_uart_device = { + .name = "bfin-sport-uart", + .id = 0, +}; + +static struct platform_device bfin_sport1_uart_device = { + .name = "bfin-sport-uart", + .id = 1, +}; +#endif + +#if defined(CONFIG_KEYBOARD_GPIO) || defined(CONFIG_KEYBOARD_GPIO_MODULE) +#include +#include + +static struct gpio_keys_button bfin_gpio_keys_table[] = { + {BTN_0, GPIO_PG0, 1, "gpio-keys: BTN0"}, + {BTN_1, GPIO_PG13, 1, "gpio-keys: BTN1"}, +}; + +static struct gpio_keys_platform_data bfin_gpio_keys_data = { + .buttons = bfin_gpio_keys_table, + .nbuttons = ARRAY_SIZE(bfin_gpio_keys_table), +}; + +static struct platform_device bfin_device_gpiokeys = { + .name = "gpio-keys", + .dev = { + .platform_data = &bfin_gpio_keys_data, + }, +}; +#endif + +static struct resource bfin_gpios_resources = { + .start = 0, + .end = MAX_BLACKFIN_GPIOS - 1, + .flags = IORESOURCE_IRQ, +}; + +static struct platform_device bfin_gpios_device = { + .name = "simple-gpio", + .id = -1, + .num_resources = 1, + .resource = &bfin_gpios_resources, +}; + +static const unsigned int cclk_vlev_datasheet[] = +{ + VRPAIR(VLEV_100, 400000000), + VRPAIR(VLEV_105, 426000000), + VRPAIR(VLEV_110, 500000000), + VRPAIR(VLEV_115, 533000000), + VRPAIR(VLEV_120, 600000000), +}; + +static struct bfin_dpmc_platform_data bfin_dmpc_vreg_data = { + .tuple_tab = cclk_vlev_datasheet, + .tabsize = ARRAY_SIZE(cclk_vlev_datasheet), + .vr_settling_time = 25 /* us */, +}; + +static struct platform_device bfin_dpmc = { + .name = "bfin dpmc", + .dev = { + .platform_data = &bfin_dmpc_vreg_data, + }, +}; + +static struct platform_device *stamp_devices[] __initdata = { + + &bfin_dpmc, + +#if defined(CONFIG_RTC_DRV_BFIN) || defined(CONFIG_RTC_DRV_BFIN_MODULE) + &rtc_device, +#endif + +#if defined(CONFIG_BFIN_MAC) || defined(CONFIG_BFIN_MAC_MODULE) + &bfin_mac_device, +#endif + +#if defined(CONFIG_SPI_BFIN) || defined(CONFIG_SPI_BFIN_MODULE) + &bfin_spi0_device, + &bfin_spi1_device, +#endif + +#if defined(CONFIG_SERIAL_BFIN) || defined(CONFIG_SERIAL_BFIN_MODULE) + &bfin_uart_device, +#endif + +#if defined(CONFIG_BFIN_SIR) || defined(CONFIG_BFIN_SIR_MODULE) + &bfin_sir_device, +#endif + +#if defined(CONFIG_I2C_BLACKFIN_TWI) || defined(CONFIG_I2C_BLACKFIN_TWI_MODULE) + &i2c_bfin_twi_device, +#endif + +#if defined(CONFIG_SERIAL_BFIN_SPORT) || defined(CONFIG_SERIAL_BFIN_SPORT_MODULE) + &bfin_sport0_uart_device, + &bfin_sport1_uart_device, +#endif + +#if defined(CONFIG_KEYBOARD_GPIO) || defined(CONFIG_KEYBOARD_GPIO_MODULE) + &bfin_device_gpiokeys, +#endif + +#if defined(CONFIG_MTD_PHYSMAP) || defined(CONFIG_MTD_PHYSMAP_MODULE) + &ezbrd_flash_device, +#endif + + &bfin_gpios_device, +}; + +static int __init ezbrd_init(void) +{ + printk(KERN_INFO "%s(): registering device resources\n", __func__); + +#ifdef CONFIG_I2C_BOARDINFO + i2c_register_board_info(0, bfin_i2c_board_info, + ARRAY_SIZE(bfin_i2c_board_info)); +#endif + + platform_add_devices(stamp_devices, ARRAY_SIZE(stamp_devices)); + spi_register_board_info(bfin_spi_board_info, ARRAY_SIZE(bfin_spi_board_info)); + return 0; +} + +arch_initcall(ezbrd_init); + +void native_machine_restart(char *cmd) +{ + /* workaround reboot hang when booting from SPI */ + if ((bfin_read_SYSCR() & 0x7) == 0x3) + bfin_gpio_reset_spi0_ssel1(); +} + +void bfin_get_ether_addr(char *addr) +{ + /* the MAC is stored in OTP memory page 0xDF */ + u32 ret; + u64 otp_mac; + u32 (*otp_read)(u32 page, u32 flags, u64 *page_content) = (void *)0xEF00001A; + + ret = otp_read(0xDF, 0x00, &otp_mac); + if (!(ret & 0x1)) { + char *otp_mac_p = (char *)&otp_mac; + for (ret = 0; ret < 6; ++ret) + addr[ret] = otp_mac_p[5 - ret]; + } +} +EXPORT_SYMBOL(bfin_get_ether_addr); diff --git a/arch/blackfin/mach-bf518/dma.c b/arch/blackfin/mach-bf518/dma.c new file mode 100644 index 000000000000..0d06ced01ced --- /dev/null +++ b/arch/blackfin/mach-bf518/dma.c @@ -0,0 +1,118 @@ +/* + * File: arch/blackfin/mach-bf518/dma.c + * Based on: + * Author: Bryan Wu + * + * Created: + * Description: This file contains the simple DMA Implementation for Blackfin + * + * Modified: + * Copyright 2004-2008 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include + +#include +#include + +struct dma_register *dma_io_base_addr[MAX_BLACKFIN_DMA_CHANNEL] = { + (struct dma_register *) DMA0_NEXT_DESC_PTR, + (struct dma_register *) DMA1_NEXT_DESC_PTR, + (struct dma_register *) DMA2_NEXT_DESC_PTR, + (struct dma_register *) DMA3_NEXT_DESC_PTR, + (struct dma_register *) DMA4_NEXT_DESC_PTR, + (struct dma_register *) DMA5_NEXT_DESC_PTR, + (struct dma_register *) DMA6_NEXT_DESC_PTR, + (struct dma_register *) DMA7_NEXT_DESC_PTR, + (struct dma_register *) DMA8_NEXT_DESC_PTR, + (struct dma_register *) DMA9_NEXT_DESC_PTR, + (struct dma_register *) DMA10_NEXT_DESC_PTR, + (struct dma_register *) DMA11_NEXT_DESC_PTR, + (struct dma_register *) MDMA_S0_NEXT_DESC_PTR, + (struct dma_register *) MDMA_D0_NEXT_DESC_PTR, + (struct dma_register *) MDMA_S1_NEXT_DESC_PTR, + (struct dma_register *) MDMA_D1_NEXT_DESC_PTR, +}; +EXPORT_SYMBOL(dma_io_base_addr); + +int channel2irq(unsigned int channel) +{ + int ret_irq = -1; + + switch (channel) { + case CH_PPI: + ret_irq = IRQ_PPI; + break; + + case CH_EMAC_RX: + ret_irq = IRQ_MAC_RX; + break; + + case CH_EMAC_TX: + ret_irq = IRQ_MAC_TX; + break; + + case CH_UART1_RX: + ret_irq = IRQ_UART1_RX; + break; + + case CH_UART1_TX: + ret_irq = IRQ_UART1_TX; + break; + + case CH_SPORT0_RX: + ret_irq = IRQ_SPORT0_RX; + break; + + case CH_SPORT0_TX: + ret_irq = IRQ_SPORT0_TX; + break; + + case CH_SPORT1_RX: + ret_irq = IRQ_SPORT1_RX; + break; + + case CH_SPORT1_TX: + ret_irq = IRQ_SPORT1_TX; + break; + + case CH_SPI0: + ret_irq = IRQ_SPI0; + break; + + case CH_UART0_RX: + ret_irq = IRQ_UART0_RX; + break; + + case CH_UART0_TX: + ret_irq = IRQ_UART0_TX; + break; + + case CH_MEM_STREAM0_SRC: + case CH_MEM_STREAM0_DEST: + ret_irq = IRQ_MEM_DMA0; + break; + + case CH_MEM_STREAM1_SRC: + case CH_MEM_STREAM1_DEST: + ret_irq = IRQ_MEM_DMA1; + break; + } + return ret_irq; +} diff --git a/arch/blackfin/mach-bf518/head.S b/arch/blackfin/mach-bf518/head.S new file mode 100644 index 000000000000..771bf7e477ff --- /dev/null +++ b/arch/blackfin/mach-bf518/head.S @@ -0,0 +1,146 @@ +/* + * File: arch/blackfin/mach-bf518/head.S + * Based on: arch/blackfin/mach-bf527/head.S + * Author: Bryan Wu + * + * Created: 2008 + * Description: Startup code for Blackfin BF51x + * + * Modified: + * Copyright 2004-2008 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#ifdef CONFIG_BFIN_KERNEL_CLOCK +#include +#include +#endif + +.section .l1.text +#ifdef CONFIG_BFIN_KERNEL_CLOCK +ENTRY(_start_dma_code) + + /* Enable PHY CLK buffer output */ + p0.h = hi(VR_CTL); + p0.l = lo(VR_CTL); + r0.l = w[p0]; + bitset(r0, 14); + w[p0] = r0.l; + ssync; + + p0.h = hi(SIC_IWR0); + p0.l = lo(SIC_IWR0); + r0.l = 0x1; + r0.h = 0x0; + [p0] = r0; + ssync; + + /* + * Set PLL_CTL + * - [14:09] = MSEL[5:0] : CLKIN / VCO multiplication factors + * - [8] = BYPASS : BYPASS the PLL, run CLKIN into CCLK/SCLK + * - [7] = output delay (add 200ps of delay to mem signals) + * - [6] = input delay (add 200ps of input delay to mem signals) + * - [5] = PDWN : 1=All Clocks off + * - [3] = STOPCK : 1=Core Clock off + * - [1] = PLL_OFF : 1=Disable Power to PLL + * - [0] = DF : 1=Pass CLKIN/2 to PLL / 0=Pass CLKIN to PLL + * all other bits set to zero + */ + + p0.h = hi(PLL_LOCKCNT); + p0.l = lo(PLL_LOCKCNT); + r0 = 0x300(Z); + w[p0] = r0.l; + ssync; + + P2.H = hi(EBIU_SDGCTL); + P2.L = lo(EBIU_SDGCTL); + R0 = [P2]; + BITSET (R0, 24); + [P2] = R0; + ssync; + + r0 = CONFIG_VCO_MULT & 63; /* Load the VCO multiplier */ + r0 = r0 << 9; /* Shift it over, */ + r1 = CLKIN_HALF; /* Do we need to divide CLKIN by 2?*/ + r0 = r1 | r0; + r1 = PLL_BYPASS; /* Bypass the PLL? */ + r1 = r1 << 8; /* Shift it over */ + r0 = r1 | r0; /* add them all together */ +#ifdef ANOMALY_05000265 + BITSET(r0, 15); /* Add 250 mV of hysteresis to SPORT input pins */ +#endif + + p0.h = hi(PLL_CTL); + p0.l = lo(PLL_CTL); /* Load the address */ + cli r2; /* Disable interrupts */ + ssync; + w[p0] = r0.l; /* Set the value */ + idle; /* Wait for the PLL to stablize */ + sti r2; /* Enable interrupts */ + +.Lcheck_again: + p0.h = hi(PLL_STAT); + p0.l = lo(PLL_STAT); + R0 = W[P0](Z); + CC = BITTST(R0,5); + if ! CC jump .Lcheck_again; + + /* Configure SCLK & CCLK Dividers */ + r0 = (CONFIG_CCLK_ACT_DIV | CONFIG_SCLK_DIV); + p0.h = hi(PLL_DIV); + p0.l = lo(PLL_DIV); + w[p0] = r0.l; + ssync; + + p0.l = lo(EBIU_SDRRC); + p0.h = hi(EBIU_SDRRC); + r0 = mem_SDRRC; + w[p0] = r0.l; + ssync; + + P2.H = hi(EBIU_SDGCTL); + P2.L = lo(EBIU_SDGCTL); + R0 = [P2]; + BITCLR (R0, 24); + p0.h = hi(EBIU_SDSTAT); + p0.l = lo(EBIU_SDSTAT); + r2.l = w[p0]; + cc = bittst(r2,3); + if !cc jump .Lskip; + NOP; + BITSET (R0, 23); +.Lskip: + [P2] = R0; + SSYNC; + + R0.L = lo(mem_SDGCTL); + R0.H = hi(mem_SDGCTL); + R1 = [p2]; + R1 = R1 | R0; + [P2] = R1; + SSYNC; + + RTS; +ENDPROC(_start_dma_code) +#endif /* CONFIG_BFIN_KERNEL_CLOCK */ diff --git a/arch/blackfin/mach-bf518/include/mach/anomaly.h b/arch/blackfin/mach-bf518/include/mach/anomaly.h new file mode 100644 index 000000000000..5a94f97f04ad --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/anomaly.h @@ -0,0 +1,77 @@ +/* + * File: include/asm-blackfin/mach-bf518/anomaly.h + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * Copyright (C) 2004-2008 Analog Devices Inc. + * Licensed under the GPL-2 or later. + */ + +/* This file shoule be up to date with: + * - ???? + */ + +#ifndef _MACH_ANOMALY_H_ +#define _MACH_ANOMALY_H_ + +/* Multi-Issue Instruction with dsp32shiftimm in slot1 and P-reg Store in slot2 Not Supported */ +#define ANOMALY_05000074 (1) +/* Rx.H Cannot Be Used to Access 16-bit System MMR Registers */ +#define ANOMALY_05000122 (1) +/* False Hardware Error from an Access in the Shadow of a Conditional Branch */ +#define ANOMALY_05000245 (1) +/* Sensitivity To Noise with Slow Input Edge Rates on External SPORT TX and RX Clocks */ +#define ANOMALY_05000265 (1) +/* False Hardware Errors Caused by Fetches at the Boundary of Reserved Memory */ +#define ANOMALY_05000310 (1) +/* PPI Underflow Error Goes Undetected in ITU-R 656 Mode */ +#define ANOMALY_05000366 (1) +/* Lockbox SESR Firmware Does Not Save/Restore Full Context */ +#define ANOMALY_05000405 (1) +/* Lockbox Firmware Memory Cleanup Routine Does not Clear Registers */ +#define ANOMALY_05000408 (1) +/* Speculative Fetches Can Cause Undesired External FIFO Operations */ +#define ANOMALY_05000416 (1) +/* TWI Fall Time (Tof) May Violate the Minimum I2C Specification */ +#define ANOMALY_05000421 (1) +/* TWI Input Capacitance (Ci) May Violate the Maximum I2C Specification */ +#define ANOMALY_05000422 (1) +/* Speculative Fetches of Indirect-Pointer Instructions Can Cause False Hardware Errors */ +#define ANOMALY_05000426 (1) +/* Software System Reset Corrupts PLL_LOCKCNT Register */ +#define ANOMALY_05000430 (1) +/* Incorrect Use of Stack in Lockbox Firmware During Authentication */ +#define ANOMALY_05000431 (1) +/* Certain SIC Registers are not Reset After Soft or Core Double Fault Reset */ +#define ANOMALY_05000435 (1) +/* PORTx_DRIVE and PORTx_HYSTERESIS Registers Read Back Incorrect Values */ +#define ANOMALY_05000438 (1) +/* Preboot Cannot be Used to Program the PLL_DIV Register */ +#define ANOMALY_05000439 (1) +/* bfrom_SysControl() Cannot be Used to Write the PLL_DIV Register */ +#define ANOMALY_05000440 (1) +/* IFLUSH Instruction at End of Hardware Loop Causes Infinite Stall */ +#define ANOMALY_05000443 (1) +/* Incorrect L1 Instruction Bank B Memory Map Location */ +#define ANOMALY_05000444 (1) + +/* Anomalies that don't exist on this proc */ +#define ANOMALY_05000125 (0) +#define ANOMALY_05000158 (0) +#define ANOMALY_05000183 (0) +#define ANOMALY_05000198 (0) +#define ANOMALY_05000230 (0) +#define ANOMALY_05000244 (0) +#define ANOMALY_05000261 (0) +#define ANOMALY_05000263 (0) +#define ANOMALY_05000266 (0) +#define ANOMALY_05000273 (0) +#define ANOMALY_05000285 (0) +#define ANOMALY_05000307 (0) +#define ANOMALY_05000311 (0) +#define ANOMALY_05000312 (0) +#define ANOMALY_05000323 (0) +#define ANOMALY_05000353 (0) +#define ANOMALY_05000363 (0) +#define ANOMALY_05000386 (0) + +#endif diff --git a/arch/blackfin/mach-bf518/include/mach/bf518.h b/arch/blackfin/mach-bf518/include/mach/bf518.h new file mode 100644 index 000000000000..78da1a07ee73 --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/bf518.h @@ -0,0 +1,132 @@ +/* + * File: include/asm-blackfin/mach-bf518/bf518.h + * Based on: include/asm-blackfin/mach-bf527/bf527.h + * Author: Michael Hennerich (michael.hennerich@analog.com) + * + * Created: + * Description: SYSTEM MMR REGISTER AND MEMORY MAP FOR ADSP-BF518 + * + * Modified: + * Copyright 2004-2007 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef __MACH_BF518_H__ +#define __MACH_BF518_H__ + +#define OFFSET_(x) ((x) & 0x0000FFFF) + +/*some misc defines*/ +#define IMASK_IVG15 0x8000 +#define IMASK_IVG14 0x4000 +#define IMASK_IVG13 0x2000 +#define IMASK_IVG12 0x1000 + +#define IMASK_IVG11 0x0800 +#define IMASK_IVG10 0x0400 +#define IMASK_IVG9 0x0200 +#define IMASK_IVG8 0x0100 + +#define IMASK_IVG7 0x0080 +#define IMASK_IVGTMR 0x0040 +#define IMASK_IVGHW 0x0020 + +/***************************/ + +#define BFIN_DSUBBANKS 4 +#define BFIN_DWAYS 2 +#define BFIN_DLINES 64 +#define BFIN_ISUBBANKS 4 +#define BFIN_IWAYS 4 +#define BFIN_ILINES 32 + +#define WAY0_L 0x1 +#define WAY1_L 0x2 +#define WAY01_L 0x3 +#define WAY2_L 0x4 +#define WAY02_L 0x5 +#define WAY12_L 0x6 +#define WAY012_L 0x7 + +#define WAY3_L 0x8 +#define WAY03_L 0x9 +#define WAY13_L 0xA +#define WAY013_L 0xB + +#define WAY32_L 0xC +#define WAY320_L 0xD +#define WAY321_L 0xE +#define WAYALL_L 0xF + +#define DMC_ENABLE (2<<2) /*yes, 2, not 1 */ + +/********************************* EBIU Settings ************************************/ +#define AMBCTL0VAL ((CONFIG_BANK_1 << 16) | CONFIG_BANK_0) +#define AMBCTL1VAL ((CONFIG_BANK_3 << 16) | CONFIG_BANK_2) + +#ifdef CONFIG_C_AMBEN_ALL +#define V_AMBEN AMBEN_ALL +#endif +#ifdef CONFIG_C_AMBEN +#define V_AMBEN 0x0 +#endif +#ifdef CONFIG_C_AMBEN_B0 +#define V_AMBEN AMBEN_B0 +#endif +#ifdef CONFIG_C_AMBEN_B0_B1 +#define V_AMBEN AMBEN_B0_B1 +#endif +#ifdef CONFIG_C_AMBEN_B0_B1_B2 +#define V_AMBEN AMBEN_B0_B1_B2 +#endif +#ifdef CONFIG_C_AMCKEN +#define V_AMCKEN AMCKEN +#else +#define V_AMCKEN 0x0 +#endif +#ifdef CONFIG_C_CDPRIO +#define V_CDPRIO 0x100 +#else +#define V_CDPRIO 0x0 +#endif + +#define AMGCTLVAL (V_AMBEN | V_AMCKEN | V_CDPRIO) + +#ifdef CONFIG_BF518 +#define CPU "BF518" +#define CPUID 0x27e8 +#endif +#ifdef CONFIG_BF516 +#define CPU "BF516" +#define CPUID 0x27e8 +#endif +#ifdef CONFIG_BF514 +#define CPU "BF514" +#define CPUID 0x27e8 +#endif +#ifdef CONFIG_BF512 +#define CPU "BF512" +#define CPUID 0x27e8 +#endif + +#ifndef CPU +#error "Unknown CPU type - This kernel doesn't seem to be configured properly" +#endif + +#endif /* __MACH_BF518_H__ */ diff --git a/arch/blackfin/mach-bf518/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf518/include/mach/bfin_serial_5xx.h new file mode 100644 index 000000000000..b50a63b975a2 --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/bfin_serial_5xx.h @@ -0,0 +1,169 @@ +/* + * file: include/asm-blackfin/mach-bf518/bfin_serial_5xx.h + * based on: + * author: + * + * created: + * description: + * blackfin serial driver head file + * rev: + * + * modified: + * + * + * bugs: enter bugs at http://blackfin.uclinux.org/ + * + * this program is free software; you can redistribute it and/or modify + * it under the terms of the gnu general public license as published by + * the free software foundation; either version 2, or (at your option) + * any later version. + * + * this program is distributed in the hope that it will be useful, + * but without any warranty; without even the implied warranty of + * merchantability or fitness for a particular purpose. see the + * gnu general public license for more details. + * + * you should have received a copy of the gnu general public license + * along with this program; see the file copying. + * if not, write to the free software foundation, + * 59 temple place - suite 330, boston, ma 02111-1307, usa. + */ + +#include +#include +#include + +#define UART_GET_CHAR(uart) bfin_read16(((uart)->port.membase + OFFSET_RBR)) +#define UART_GET_DLL(uart) bfin_read16(((uart)->port.membase + OFFSET_DLL)) +#define UART_GET_IER(uart) bfin_read16(((uart)->port.membase + OFFSET_IER)) +#define UART_GET_DLH(uart) bfin_read16(((uart)->port.membase + OFFSET_DLH)) +#define UART_GET_IIR(uart) bfin_read16(((uart)->port.membase + OFFSET_IIR)) +#define UART_GET_LCR(uart) bfin_read16(((uart)->port.membase + OFFSET_LCR)) +#define UART_GET_GCTL(uart) bfin_read16(((uart)->port.membase + OFFSET_GCTL)) + +#define UART_PUT_CHAR(uart, v) bfin_write16(((uart)->port.membase + OFFSET_THR), v) +#define UART_PUT_DLL(uart, v) bfin_write16(((uart)->port.membase + OFFSET_DLL), v) +#define UART_PUT_IER(uart, v) bfin_write16(((uart)->port.membase + OFFSET_IER), v) +#define UART_SET_IER(uart, v) UART_PUT_IER(uart, UART_GET_IER(uart) | (v)) +#define UART_CLEAR_IER(uart, v) UART_PUT_IER(uart, UART_GET_IER(uart) & ~(v)) +#define UART_PUT_DLH(uart, v) bfin_write16(((uart)->port.membase + OFFSET_DLH), v) +#define UART_PUT_LCR(uart, v) bfin_write16(((uart)->port.membase + OFFSET_LCR), v) +#define UART_PUT_GCTL(uart, v) bfin_write16(((uart)->port.membase + OFFSET_GCTL), v) + +#define UART_SET_DLAB(uart) do { UART_PUT_LCR(uart, UART_GET_LCR(uart) | DLAB); SSYNC(); } while (0) +#define UART_CLEAR_DLAB(uart) do { UART_PUT_LCR(uart, UART_GET_LCR(uart) & ~DLAB); SSYNC(); } while (0) + +#define UART_GET_CTS(x) gpio_get_value(x->cts_pin) +#define UART_SET_RTS(x) gpio_set_value(x->rts_pin, 1) +#define UART_CLEAR_RTS(x) gpio_set_value(x->rts_pin, 0) +#define UART_ENABLE_INTS(x, v) UART_PUT_IER(x, v) +#define UART_DISABLE_INTS(x) UART_PUT_IER(x, 0) + +#if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS) +# define CONFIG_SERIAL_BFIN_CTSRTS + +# ifndef CONFIG_UART0_CTS_PIN +# define CONFIG_UART0_CTS_PIN -1 +# endif + +# ifndef CONFIG_UART0_RTS_PIN +# define CONFIG_UART0_RTS_PIN -1 +# endif + +# ifndef CONFIG_UART1_CTS_PIN +# define CONFIG_UART1_CTS_PIN -1 +# endif + +# ifndef CONFIG_UART1_RTS_PIN +# define CONFIG_UART1_RTS_PIN -1 +# endif +#endif + +#define BFIN_UART_TX_FIFO_SIZE 2 + +/* + * The pin configuration is different from schematic + */ +struct bfin_serial_port { + struct uart_port port; + unsigned int old_status; + unsigned int lsr; +#ifdef CONFIG_SERIAL_BFIN_DMA + int tx_done; + int tx_count; + struct circ_buf rx_dma_buf; + struct timer_list rx_dma_timer; + int rx_dma_nrows; + unsigned int tx_dma_channel; + unsigned int rx_dma_channel; + struct work_struct tx_dma_workqueue; +#endif +#ifdef CONFIG_SERIAL_BFIN_CTSRTS + struct timer_list cts_timer; + int cts_pin; + int rts_pin; +#endif +}; + +/* The hardware clears the LSR bits upon read, so we need to cache + * some of the more fun bits in software so they don't get lost + * when checking the LSR in other code paths (TX). + */ +static inline unsigned int UART_GET_LSR(struct bfin_serial_port *uart) +{ + unsigned int lsr = bfin_read16(uart->port.membase + OFFSET_LSR); + uart->lsr |= (lsr & (BI|FE|PE|OE)); + return lsr | uart->lsr; +} + +static inline void UART_CLEAR_LSR(struct bfin_serial_port *uart) +{ + uart->lsr = 0; + bfin_write16(uart->port.membase + OFFSET_LSR, -1); +} + +struct bfin_serial_res { + unsigned long uart_base_addr; + int uart_irq; +#ifdef CONFIG_SERIAL_BFIN_DMA + unsigned int uart_tx_dma_channel; + unsigned int uart_rx_dma_channel; +#endif +#ifdef CONFIG_SERIAL_BFIN_CTSRTS + int uart_cts_pin; + int uart_rts_pin; +#endif +}; + +struct bfin_serial_res bfin_serial_resource[] = { +#ifdef CONFIG_SERIAL_BFIN_UART0 + { + 0xFFC00400, + IRQ_UART0_RX, +#ifdef CONFIG_SERIAL_BFIN_DMA + CH_UART0_TX, + CH_UART0_RX, +#endif +#ifdef CONFIG_BFIN_UART0_CTSRTS + CONFIG_UART0_CTS_PIN, + CONFIG_UART0_RTS_PIN, +#endif + }, +#endif +#ifdef CONFIG_SERIAL_BFIN_UART1 + { + 0xFFC02000, + IRQ_UART1_RX, +#ifdef CONFIG_SERIAL_BFIN_DMA + CH_UART1_TX, + CH_UART1_RX, +#endif +#ifdef CONFIG_BFIN_UART1_CTSRTS + CONFIG_UART1_CTS_PIN, + CONFIG_UART1_RTS_PIN, +#endif + }, +#endif +}; + +#define DRIVER_NAME "bfin-uart" diff --git a/arch/blackfin/mach-bf518/include/mach/bfin_sir.h b/arch/blackfin/mach-bf518/include/mach/bfin_sir.h new file mode 100644 index 000000000000..cfd8ad4f1f2c --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/bfin_sir.h @@ -0,0 +1,142 @@ +/* + * Blackfin Infra-red Driver + * + * Copyright 2006-2008 Analog Devices Inc. + * + * Enter bugs at http://blackfin.uclinux.org/ + * + * Licensed under the GPL-2 or later. + * + */ + +#include +#include +#include + +#define SIR_UART_GET_CHAR(port) bfin_read16((port)->membase + OFFSET_RBR) +#define SIR_UART_GET_DLL(port) bfin_read16((port)->membase + OFFSET_DLL) +#define SIR_UART_GET_IER(port) bfin_read16((port)->membase + OFFSET_IER) +#define SIR_UART_GET_DLH(port) bfin_read16((port)->membase + OFFSET_DLH) +#define SIR_UART_GET_IIR(port) bfin_read16((port)->membase + OFFSET_IIR) +#define SIR_UART_GET_LCR(port) bfin_read16((port)->membase + OFFSET_LCR) +#define SIR_UART_GET_GCTL(port) bfin_read16((port)->membase + OFFSET_GCTL) + +#define SIR_UART_PUT_CHAR(port, v) bfin_write16(((port)->membase + OFFSET_THR), v) +#define SIR_UART_PUT_DLL(port, v) bfin_write16(((port)->membase + OFFSET_DLL), v) +#define SIR_UART_PUT_IER(port, v) bfin_write16(((port)->membase + OFFSET_IER), v) +#define SIR_UART_PUT_DLH(port, v) bfin_write16(((port)->membase + OFFSET_DLH), v) +#define SIR_UART_PUT_LCR(port, v) bfin_write16(((port)->membase + OFFSET_LCR), v) +#define SIR_UART_PUT_GCTL(port, v) bfin_write16(((port)->membase + OFFSET_GCTL), v) + +#ifdef CONFIG_SIR_BFIN_DMA +struct dma_rx_buf { + char *buf; + int head; + int tail; + }; +#endif /* CONFIG_SIR_BFIN_DMA */ + +struct bfin_sir_port { + unsigned char __iomem *membase; + unsigned int irq; + unsigned int lsr; + unsigned long clk; + struct net_device *dev; +#ifdef CONFIG_SIR_BFIN_DMA + int tx_done; + struct dma_rx_buf rx_dma_buf; + struct timer_list rx_dma_timer; + int rx_dma_nrows; +#endif /* CONFIG_SIR_BFIN_DMA */ + unsigned int tx_dma_channel; + unsigned int rx_dma_channel; +}; + +struct bfin_sir_port sir_ports[BFIN_UART_NR_PORTS]; + +struct bfin_sir_port_res { + unsigned long base_addr; + int irq; + unsigned int rx_dma_channel; + unsigned int tx_dma_channel; +}; + +struct bfin_sir_port_res bfin_sir_port_resource[] = { +#ifdef CONFIG_BFIN_SIR0 + { + 0xFFC00400, + IRQ_UART0_RX, + CH_UART0_RX, + CH_UART0_TX, + }, +#endif +#ifdef CONFIG_BFIN_SIR1 + { + 0xFFC02000, + IRQ_UART1_RX, + CH_UART1_RX, + CH_UART1_TX, + }, +#endif +}; + +int nr_sirs = ARRAY_SIZE(bfin_sir_port_resource); + +struct bfin_sir_self { + struct bfin_sir_port *sir_port; + spinlock_t lock; + unsigned int open; + int speed; + int newspeed; + + struct sk_buff *txskb; + struct sk_buff *rxskb; + struct net_device_stats stats; + struct device *dev; + struct irlap_cb *irlap; + struct qos_info qos; + + iobuff_t tx_buff; + iobuff_t rx_buff; + + struct work_struct work; + int mtt; +}; + +static inline unsigned int SIR_UART_GET_LSR(struct bfin_sir_port *port) +{ + unsigned int lsr = bfin_read16(port->membase + OFFSET_LSR); + port->lsr |= (lsr & (BI|FE|PE|OE)); + return lsr | port->lsr; +} + +static inline void SIR_UART_CLEAR_LSR(struct bfin_sir_port *port) +{ + port->lsr = 0; + bfin_read16(port->membase + OFFSET_LSR); +} + +#define DRIVER_NAME "bfin_sir" + +static int bfin_sir_hw_init(void) +{ + int ret = -ENODEV; +#ifdef CONFIG_BFIN_SIR0 + ret = peripheral_request(P_UART0_TX, DRIVER_NAME); + if (ret) + return ret; + ret = peripheral_request(P_UART0_RX, DRIVER_NAME); + if (ret) + return ret; +#endif + +#ifdef CONFIG_BFIN_SIR1 + ret = peripheral_request(P_UART1_TX, DRIVER_NAME); + if (ret) + return ret; + ret = peripheral_request(P_UART1_RX, DRIVER_NAME); + if (ret) + return ret; +#endif + return ret; +} diff --git a/arch/blackfin/mach-bf518/include/mach/blackfin.h b/arch/blackfin/mach-bf518/include/mach/blackfin.h new file mode 100644 index 000000000000..d1a2b9ca6227 --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/blackfin.h @@ -0,0 +1,105 @@ +/* + * File: include/asm-blackfin/mach-bf518/blackfin.h + * Based on: + * Author: + * + * Created: + * Description: + * + * Rev: + * + * Modified: + * + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _MACH_BLACKFIN_H_ +#define _MACH_BLACKFIN_H_ + +#define BF518_FAMILY + +#include "bf518.h" +#include "mem_map.h" +#include "defBF512.h" +#include "anomaly.h" + +#if defined(CONFIG_BF518) +#include "defBF518.h" +#endif + +#if defined(CONFIG_BF516) +#include "defBF516.h" +#endif + +#if defined(CONFIG_BF514) +#include "defBF514.h" +#endif + +#if defined(CONFIG_BF512) +#include "defBF512.h" +#endif + +#if !defined(__ASSEMBLY__) +#include "cdefBF512.h" + +#if defined(CONFIG_BF518) +#include "cdefBF518.h" +#endif + +#if defined(CONFIG_BF516) +#include "cdefBF516.h" +#endif + +#if defined(CONFIG_BF514) +#include "cdefBF514.h" +#endif +#endif + +/* UART_IIR Register */ +#define STATUS(x) ((x << 1) & 0x06) +#define STATUS_P1 0x02 +#define STATUS_P0 0x01 + +#define BFIN_UART_NR_PORTS 2 + +#define OFFSET_THR 0x00 /* Transmit Holding register */ +#define OFFSET_RBR 0x00 /* Receive Buffer register */ +#define OFFSET_DLL 0x00 /* Divisor Latch (Low-Byte) */ +#define OFFSET_IER 0x04 /* Interrupt Enable Register */ +#define OFFSET_DLH 0x04 /* Divisor Latch (High-Byte) */ +#define OFFSET_IIR 0x08 /* Interrupt Identification Register */ +#define OFFSET_LCR 0x0C /* Line Control Register */ +#define OFFSET_MCR 0x10 /* Modem Control Register */ +#define OFFSET_LSR 0x14 /* Line Status Register */ +#define OFFSET_MSR 0x18 /* Modem Status Register */ +#define OFFSET_SCR 0x1C /* SCR Scratch Register */ +#define OFFSET_GCTL 0x24 /* Global Control Register */ + +/* DPMC*/ +#define bfin_read_STOPCK_OFF() bfin_read_STOPCK() +#define bfin_write_STOPCK_OFF(val) bfin_write_STOPCK(val) +#define STOPCK_OFF STOPCK + +/* PLL_DIV Masks */ +#define CCLK_DIV1 CSEL_DIV1 /* CCLK = VCO / 1 */ +#define CCLK_DIV2 CSEL_DIV2 /* CCLK = VCO / 2 */ +#define CCLK_DIV4 CSEL_DIV4 /* CCLK = VCO / 4 */ +#define CCLK_DIV8 CSEL_DIV8 /* CCLK = VCO / 8 */ + +#endif diff --git a/arch/blackfin/mach-bf518/include/mach/cdefBF512.h b/arch/blackfin/mach-bf518/include/mach/cdefBF512.h new file mode 100644 index 000000000000..820c13c4daaa --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/cdefBF512.h @@ -0,0 +1,46 @@ +/* + * File: include/asm-blackfin/mach-bf518/cdefbf512.h + * Based on: + * Author: + * + * Created: + * Description: system mmr register map + * + * Rev: + * + * Modified: + * + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _CDEF_BF512_H +#define _CDEF_BF512_H + +/* include all Core registers and bit definitions */ +#include "defBF512.h" + +/* include core specific register pointer definitions */ +#include + +/* SYSTEM & MMR ADDRESS DEFINITIONS FOR ADSP-BF512 */ + +/* include cdefBF51x_base.h for the set of #defines that are common to all ADSP-BF51x processors */ +#include "cdefBF51x_base.h" + +#endif /* _CDEF_BF512_H */ diff --git a/arch/blackfin/mach-bf518/include/mach/cdefBF514.h b/arch/blackfin/mach-bf518/include/mach/cdefBF514.h new file mode 100644 index 000000000000..9521e178fb28 --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/cdefBF514.h @@ -0,0 +1,48 @@ +/* + * File: include/asm-blackfin/mach-bf518/cdefbf514.h + * Based on: + * Author: + * + * Created: + * Description: system mmr register map + * + * Rev: + * + * Modified: + * + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _CDEF_BF514_H +#define _CDEF_BF514_H + +/* include all Core registers and bit definitions */ +#include "defBF514.h" + +/* include core specific register pointer definitions */ +#include + +/* SYSTEM & MMR ADDRESS DEFINITIONS FOR ADSP-BF514 */ + +/* include cdefBF51x_base.h for the set of #defines that are common to all ADSP-BF51x processors */ +#include "cdefBF51x_base.h" + +/* The following are the #defines needed by ADSP-BF514 that are not in the common header */ + +#endif /* _CDEF_BF514_H */ diff --git a/arch/blackfin/mach-bf518/include/mach/cdefBF516.h b/arch/blackfin/mach-bf518/include/mach/cdefBF516.h new file mode 100644 index 000000000000..17be34cbb7bf --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/cdefBF516.h @@ -0,0 +1,48 @@ +/* + * File: include/asm-blackfin/mach-bf518/cdefbf516.h + * Based on: + * Author: + * + * Created: + * Description: system mmr register map + * + * Rev: + * + * Modified: + * + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _CDEF_BF516_H +#define _CDEF_BF516_H + +/* include all Core registers and bit definitions */ +#include "defBF516.h" + +/* include core specific register pointer definitions */ +#include + +/* SYSTEM & MMR ADDRESS DEFINITIONS FOR ADSP-BF516 */ + +/* include cdefBF51x_base.h for the set of #defines that are common to all ADSP-BF51x processors */ +#include "cdefBF51x_base.h" + +/* The following are the #defines needed by ADSP-BF516 that are not in the common header */ + +#endif /* _CDEF_BF516_H */ diff --git a/arch/blackfin/mach-bf518/include/mach/cdefBF518.h b/arch/blackfin/mach-bf518/include/mach/cdefBF518.h new file mode 100644 index 000000000000..5f978d48cc6d --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/cdefBF518.h @@ -0,0 +1,48 @@ +/* + * File: include/asm-blackfin/mach-bf518/cdefbf518.h + * Based on: + * Author: + * + * Created: + * Description: system mmr register map + * + * Rev: + * + * Modified: + * + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _CDEF_BF518_H +#define _CDEF_BF518_H + +/* include all Core registers and bit definitions */ +#include "defBF518.h" + +/* include core specific register pointer definitions */ +#include + +/* SYSTEM & MMR ADDRESS DEFINITIONS FOR ADSP-BF518 */ + +/* include cdefBF51x_base.h for the set of #defines that are common to all ADSP-BF51x processors */ +#include "cdefBF51x_base.h" + +/* The following are the #defines needed by ADSP-BF518 that are not in the common header */ + +#endif /* _CDEF_BF518_H */ diff --git a/arch/blackfin/mach-bf518/include/mach/cdefBF51x_base.h b/arch/blackfin/mach-bf518/include/mach/cdefBF51x_base.h new file mode 100644 index 000000000000..95d54e625970 --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/cdefBF51x_base.h @@ -0,0 +1,1204 @@ +/* + * File: include/asm-blackfin/mach-bf518/cdefBF51x_base.h + * Based on: + * Author: + * + * Created: + * Description: + * + * Rev: + * + * Modified: + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _CDEF_BF52X_H +#define _CDEF_BF52X_H + +#include +#include + +#include "defBF51x_base.h" + +/* Include core specific register pointer definitions */ +#include + +/* ==== begin from cdefBF534.h ==== */ + +/* Clock and System Control (0xFFC00000 - 0xFFC000FF) */ +#define bfin_read_PLL_CTL() bfin_read16(PLL_CTL) +/* Writing to PLL_CTL initiates a PLL relock sequence. */ +static __inline__ void bfin_write_PLL_CTL(unsigned int val) +{ + unsigned long flags, iwr0, iwr1; + + if (val == bfin_read_PLL_CTL()) + return; + + local_irq_save(flags); + /* Enable the PLL Wakeup bit in SIC IWR */ + iwr0 = bfin_read32(SIC_IWR0); + iwr1 = bfin_read32(SIC_IWR1); + /* Only allow PPL Wakeup) */ + bfin_write32(SIC_IWR0, IWR_ENABLE(0)); + bfin_write32(SIC_IWR1, 0); + + bfin_write16(PLL_CTL, val); + SSYNC(); + asm("IDLE;"); + + bfin_write32(SIC_IWR0, iwr0); + bfin_write32(SIC_IWR1, iwr1); + local_irq_restore(flags); +} +#define bfin_read_PLL_DIV() bfin_read16(PLL_DIV) +#define bfin_write_PLL_DIV(val) bfin_write16(PLL_DIV, val) +#define bfin_read_VR_CTL() bfin_read16(VR_CTL) +/* Writing to VR_CTL initiates a PLL relock sequence. */ +static __inline__ void bfin_write_VR_CTL(unsigned int val) +{ + unsigned long flags, iwr0, iwr1; + + if (val == bfin_read_VR_CTL()) + return; + + local_irq_save(flags); + /* Enable the PLL Wakeup bit in SIC IWR */ + iwr0 = bfin_read32(SIC_IWR0); + iwr1 = bfin_read32(SIC_IWR1); + /* Only allow PPL Wakeup) */ + bfin_write32(SIC_IWR0, IWR_ENABLE(0)); + bfin_write32(SIC_IWR1, 0); + + bfin_write16(VR_CTL, val); + SSYNC(); + asm("IDLE;"); + + bfin_write32(SIC_IWR0, iwr0); + bfin_write32(SIC_IWR1, iwr1); + local_irq_restore(flags); +} +#define bfin_read_PLL_STAT() bfin_read16(PLL_STAT) +#define bfin_write_PLL_STAT(val) bfin_write16(PLL_STAT, val) +#define bfin_read_PLL_LOCKCNT() bfin_read16(PLL_LOCKCNT) +#define bfin_write_PLL_LOCKCNT(val) bfin_write16(PLL_LOCKCNT, val) +#define bfin_read_CHIPID() bfin_read32(CHIPID) +#define bfin_write_CHIPID(val) bfin_write32(CHIPID, val) + + +/* System Interrupt Controller (0xFFC00100 - 0xFFC001FF) */ +#define bfin_read_SWRST() bfin_read16(SWRST) +#define bfin_write_SWRST(val) bfin_write16(SWRST, val) +#define bfin_read_SYSCR() bfin_read16(SYSCR) +#define bfin_write_SYSCR(val) bfin_write16(SYSCR, val) + +#define bfin_read_SIC_RVECT() bfin_read32(SIC_RVECT) +#define bfin_write_SIC_RVECT(val) bfin_write32(SIC_RVECT, val) +#define bfin_read_SIC_IMASK0() bfin_read32(SIC_IMASK0) +#define bfin_write_SIC_IMASK0(val) bfin_write32(SIC_IMASK0, val) +#define bfin_read_SIC_IMASK(x) bfin_read32(SIC_IMASK0 + (x << 6)) +#define bfin_write_SIC_IMASK(x, val) bfin_write32((SIC_IMASK0 + (x << 6)), val) + +#define bfin_read_SIC_IAR0() bfin_read32(SIC_IAR0) +#define bfin_write_SIC_IAR0(val) bfin_write32(SIC_IAR0, val) +#define bfin_read_SIC_IAR1() bfin_read32(SIC_IAR1) +#define bfin_write_SIC_IAR1(val) bfin_write32(SIC_IAR1, val) +#define bfin_read_SIC_IAR2() bfin_read32(SIC_IAR2) +#define bfin_write_SIC_IAR2(val) bfin_write32(SIC_IAR2, val) +#define bfin_read_SIC_IAR3() bfin_read32(SIC_IAR3) +#define bfin_write_SIC_IAR3(val) bfin_write32(SIC_IAR3, val) + +#define bfin_read_SIC_ISR0() bfin_read32(SIC_ISR0) +#define bfin_write_SIC_ISR0(val) bfin_write32(SIC_ISR0, val) +#define bfin_read_SIC_ISR(x) bfin_read32(SIC_ISR0 + (x << 6)) +#define bfin_write_SIC_ISR(x, val) bfin_write32((SIC_ISR0 + (x << 6)), val) + +#define bfin_read_SIC_IWR0() bfin_read32(SIC_IWR0) +#define bfin_write_SIC_IWR0(val) bfin_write32(SIC_IWR0, val) +#define bfin_read_SIC_IWR(x) bfin_read32(SIC_IWR0 + (x << 6)) +#define bfin_write_SIC_IWR(x, val) bfin_write32((SIC_IWR0 + (x << 6)), val) + +/* SIC Additions to ADSP-BF51x (0xFFC0014C - 0xFFC00162) */ + +#define bfin_read_SIC_IMASK1() bfin_read32(SIC_IMASK1) +#define bfin_write_SIC_IMASK1(val) bfin_write32(SIC_IMASK1, val) +#define bfin_read_SIC_IAR4() bfin_read32(SIC_IAR4) +#define bfin_write_SIC_IAR4(val) bfin_write32(SIC_IAR4, val) +#define bfin_read_SIC_IAR5() bfin_read32(SIC_IAR5) +#define bfin_write_SIC_IAR5(val) bfin_write32(SIC_IAR5, val) +#define bfin_read_SIC_IAR6() bfin_read32(SIC_IAR6) +#define bfin_write_SIC_IAR6(val) bfin_write32(SIC_IAR6, val) +#define bfin_read_SIC_IAR7() bfin_read32(SIC_IAR7) +#define bfin_write_SIC_IAR7(val) bfin_write32(SIC_IAR7, val) +#define bfin_read_SIC_ISR1() bfin_read32(SIC_ISR1) +#define bfin_write_SIC_ISR1(val) bfin_write32(SIC_ISR1, val) +#define bfin_read_SIC_IWR1() bfin_read32(SIC_IWR1) +#define bfin_write_SIC_IWR1(val) bfin_write32(SIC_IWR1, val) + +/* Watchdog Timer (0xFFC00200 - 0xFFC002FF) */ +#define bfin_read_WDOG_CTL() bfin_read16(WDOG_CTL) +#define bfin_write_WDOG_CTL(val) bfin_write16(WDOG_CTL, val) +#define bfin_read_WDOG_CNT() bfin_read32(WDOG_CNT) +#define bfin_write_WDOG_CNT(val) bfin_write32(WDOG_CNT, val) +#define bfin_read_WDOG_STAT() bfin_read32(WDOG_STAT) +#define bfin_write_WDOG_STAT(val) bfin_write32(WDOG_STAT, val) + + +/* Real Time Clock (0xFFC00300 - 0xFFC003FF) */ +#define bfin_read_RTC_STAT() bfin_read32(RTC_STAT) +#define bfin_write_RTC_STAT(val) bfin_write32(RTC_STAT, val) +#define bfin_read_RTC_ICTL() bfin_read16(RTC_ICTL) +#define bfin_write_RTC_ICTL(val) bfin_write16(RTC_ICTL, val) +#define bfin_read_RTC_ISTAT() bfin_read16(RTC_ISTAT) +#define bfin_write_RTC_ISTAT(val) bfin_write16(RTC_ISTAT, val) +#define bfin_read_RTC_SWCNT() bfin_read16(RTC_SWCNT) +#define bfin_write_RTC_SWCNT(val) bfin_write16(RTC_SWCNT, val) +#define bfin_read_RTC_ALARM() bfin_read32(RTC_ALARM) +#define bfin_write_RTC_ALARM(val) bfin_write32(RTC_ALARM, val) +#define bfin_read_RTC_FAST() bfin_read16(RTC_FAST) +#define bfin_write_RTC_FAST(val) bfin_write16(RTC_FAST, val) +#define bfin_read_RTC_PREN() bfin_read16(RTC_PREN) +#define bfin_write_RTC_PREN(val) bfin_write16(RTC_PREN, val) + + +/* UART0 Controller (0xFFC00400 - 0xFFC004FF) */ +#define bfin_read_UART0_THR() bfin_read16(UART0_THR) +#define bfin_write_UART0_THR(val) bfin_write16(UART0_THR, val) +#define bfin_read_UART0_RBR() bfin_read16(UART0_RBR) +#define bfin_write_UART0_RBR(val) bfin_write16(UART0_RBR, val) +#define bfin_read_UART0_DLL() bfin_read16(UART0_DLL) +#define bfin_write_UART0_DLL(val) bfin_write16(UART0_DLL, val) +#define bfin_read_UART0_IER() bfin_read16(UART0_IER) +#define bfin_write_UART0_IER(val) bfin_write16(UART0_IER, val) +#define bfin_read_UART0_DLH() bfin_read16(UART0_DLH) +#define bfin_write_UART0_DLH(val) bfin_write16(UART0_DLH, val) +#define bfin_read_UART0_IIR() bfin_read16(UART0_IIR) +#define bfin_write_UART0_IIR(val) bfin_write16(UART0_IIR, val) +#define bfin_read_UART0_LCR() bfin_read16(UART0_LCR) +#define bfin_write_UART0_LCR(val) bfin_write16(UART0_LCR, val) +#define bfin_read_UART0_MCR() bfin_read16(UART0_MCR) +#define bfin_write_UART0_MCR(val) bfin_write16(UART0_MCR, val) +#define bfin_read_UART0_LSR() bfin_read16(UART0_LSR) +#define bfin_write_UART0_LSR(val) bfin_write16(UART0_LSR, val) +#define bfin_read_UART0_MSR() bfin_read16(UART0_MSR) +#define bfin_write_UART0_MSR(val) bfin_write16(UART0_MSR, val) +#define bfin_read_UART0_SCR() bfin_read16(UART0_SCR) +#define bfin_write_UART0_SCR(val) bfin_write16(UART0_SCR, val) +#define bfin_read_UART0_GCTL() bfin_read16(UART0_GCTL) +#define bfin_write_UART0_GCTL(val) bfin_write16(UART0_GCTL, val) + + +/* SPI Controller (0xFFC00500 - 0xFFC005FF) */ +#define bfin_read_SPI_CTL() bfin_read16(SPI_CTL) +#define bfin_write_SPI_CTL(val) bfin_write16(SPI_CTL, val) +#define bfin_read_SPI_FLG() bfin_read16(SPI_FLG) +#define bfin_write_SPI_FLG(val) bfin_write16(SPI_FLG, val) +#define bfin_read_SPI_STAT() bfin_read16(SPI_STAT) +#define bfin_write_SPI_STAT(val) bfin_write16(SPI_STAT, val) +#define bfin_read_SPI_TDBR() bfin_read16(SPI_TDBR) +#define bfin_write_SPI_TDBR(val) bfin_write16(SPI_TDBR, val) +#define bfin_read_SPI_RDBR() bfin_read16(SPI_RDBR) +#define bfin_write_SPI_RDBR(val) bfin_write16(SPI_RDBR, val) +#define bfin_read_SPI_BAUD() bfin_read16(SPI_BAUD) +#define bfin_write_SPI_BAUD(val) bfin_write16(SPI_BAUD, val) +#define bfin_read_SPI_SHADOW() bfin_read16(SPI_SHADOW) +#define bfin_write_SPI_SHADOW(val) bfin_write16(SPI_SHADOW, val) + + +/* TIMER0-7 Registers (0xFFC00600 - 0xFFC006FF) */ +#define bfin_read_TIMER0_CONFIG() bfin_read16(TIMER0_CONFIG) +#define bfin_write_TIMER0_CONFIG(val) bfin_write16(TIMER0_CONFIG, val) +#define bfin_read_TIMER0_COUNTER() bfin_read32(TIMER0_COUNTER) +#define bfin_write_TIMER0_COUNTER(val) bfin_write32(TIMER0_COUNTER, val) +#define bfin_read_TIMER0_PERIOD() bfin_read32(TIMER0_PERIOD) +#define bfin_write_TIMER0_PERIOD(val) bfin_write32(TIMER0_PERIOD, val) +#define bfin_read_TIMER0_WIDTH() bfin_read32(TIMER0_WIDTH) +#define bfin_write_TIMER0_WIDTH(val) bfin_write32(TIMER0_WIDTH, val) + +#define bfin_read_TIMER1_CONFIG() bfin_read16(TIMER1_CONFIG) +#define bfin_write_TIMER1_CONFIG(val) bfin_write16(TIMER1_CONFIG, val) +#define bfin_read_TIMER1_COUNTER() bfin_read32(TIMER1_COUNTER) +#define bfin_write_TIMER1_COUNTER(val) bfin_write32(TIMER1_COUNTER, val) +#define bfin_read_TIMER1_PERIOD() bfin_read32(TIMER1_PERIOD) +#define bfin_write_TIMER1_PERIOD(val) bfin_write32(TIMER1_PERIOD, val) +#define bfin_read_TIMER1_WIDTH() bfin_read32(TIMER1_WIDTH) +#define bfin_write_TIMER1_WIDTH(val) bfin_write32(TIMER1_WIDTH, val) + +#define bfin_read_TIMER2_CONFIG() bfin_read16(TIMER2_CONFIG) +#define bfin_write_TIMER2_CONFIG(val) bfin_write16(TIMER2_CONFIG, val) +#define bfin_read_TIMER2_COUNTER() bfin_read32(TIMER2_COUNTER) +#define bfin_write_TIMER2_COUNTER(val) bfin_write32(TIMER2_COUNTER, val) +#define bfin_read_TIMER2_PERIOD() bfin_read32(TIMER2_PERIOD) +#define bfin_write_TIMER2_PERIOD(val) bfin_write32(TIMER2_PERIOD, val) +#define bfin_read_TIMER2_WIDTH() bfin_read32(TIMER2_WIDTH) +#define bfin_write_TIMER2_WIDTH(val) bfin_write32(TIMER2_WIDTH, val) + +#define bfin_read_TIMER3_CONFIG() bfin_read16(TIMER3_CONFIG) +#define bfin_write_TIMER3_CONFIG(val) bfin_write16(TIMER3_CONFIG, val) +#define bfin_read_TIMER3_COUNTER() bfin_read32(TIMER3_COUNTER) +#define bfin_write_TIMER3_COUNTER(val) bfin_write32(TIMER3_COUNTER, val) +#define bfin_read_TIMER3_PERIOD() bfin_read32(TIMER3_PERIOD) +#define bfin_write_TIMER3_PERIOD(val) bfin_write32(TIMER3_PERIOD, val) +#define bfin_read_TIMER3_WIDTH() bfin_read32(TIMER3_WIDTH) +#define bfin_write_TIMER3_WIDTH(val) bfin_write32(TIMER3_WIDTH, val) + +#define bfin_read_TIMER4_CONFIG() bfin_read16(TIMER4_CONFIG) +#define bfin_write_TIMER4_CONFIG(val) bfin_write16(TIMER4_CONFIG, val) +#define bfin_read_TIMER4_COUNTER() bfin_read32(TIMER4_COUNTER) +#define bfin_write_TIMER4_COUNTER(val) bfin_write32(TIMER4_COUNTER, val) +#define bfin_read_TIMER4_PERIOD() bfin_read32(TIMER4_PERIOD) +#define bfin_write_TIMER4_PERIOD(val) bfin_write32(TIMER4_PERIOD, val) +#define bfin_read_TIMER4_WIDTH() bfin_read32(TIMER4_WIDTH) +#define bfin_write_TIMER4_WIDTH(val) bfin_write32(TIMER4_WIDTH, val) + +#define bfin_read_TIMER5_CONFIG() bfin_read16(TIMER5_CONFIG) +#define bfin_write_TIMER5_CONFIG(val) bfin_write16(TIMER5_CONFIG, val) +#define bfin_read_TIMER5_COUNTER() bfin_read32(TIMER5_COUNTER) +#define bfin_write_TIMER5_COUNTER(val) bfin_write32(TIMER5_COUNTER, val) +#define bfin_read_TIMER5_PERIOD() bfin_read32(TIMER5_PERIOD) +#define bfin_write_TIMER5_PERIOD(val) bfin_write32(TIMER5_PERIOD, val) +#define bfin_read_TIMER5_WIDTH() bfin_read32(TIMER5_WIDTH) +#define bfin_write_TIMER5_WIDTH(val) bfin_write32(TIMER5_WIDTH, val) + +#define bfin_read_TIMER6_CONFIG() bfin_read16(TIMER6_CONFIG) +#define bfin_write_TIMER6_CONFIG(val) bfin_write16(TIMER6_CONFIG, val) +#define bfin_read_TIMER6_COUNTER() bfin_read32(TIMER6_COUNTER) +#define bfin_write_TIMER6_COUNTER(val) bfin_write32(TIMER6_COUNTER, val) +#define bfin_read_TIMER6_PERIOD() bfin_read32(TIMER6_PERIOD) +#define bfin_write_TIMER6_PERIOD(val) bfin_write32(TIMER6_PERIOD, val) +#define bfin_read_TIMER6_WIDTH() bfin_read32(TIMER6_WIDTH) +#define bfin_write_TIMER6_WIDTH(val) bfin_write32(TIMER6_WIDTH, val) + +#define bfin_read_TIMER7_CONFIG() bfin_read16(TIMER7_CONFIG) +#define bfin_write_TIMER7_CONFIG(val) bfin_write16(TIMER7_CONFIG, val) +#define bfin_read_TIMER7_COUNTER() bfin_read32(TIMER7_COUNTER) +#define bfin_write_TIMER7_COUNTER(val) bfin_write32(TIMER7_COUNTER, val) +#define bfin_read_TIMER7_PERIOD() bfin_read32(TIMER7_PERIOD) +#define bfin_write_TIMER7_PERIOD(val) bfin_write32(TIMER7_PERIOD, val) +#define bfin_read_TIMER7_WIDTH() bfin_read32(TIMER7_WIDTH) +#define bfin_write_TIMER7_WIDTH(val) bfin_write32(TIMER7_WIDTH, val) + +#define bfin_read_TIMER_ENABLE() bfin_read16(TIMER_ENABLE) +#define bfin_write_TIMER_ENABLE(val) bfin_write16(TIMER_ENABLE, val) +#define bfin_read_TIMER_DISABLE() bfin_read16(TIMER_DISABLE) +#define bfin_write_TIMER_DISABLE(val) bfin_write16(TIMER_DISABLE, val) +#define bfin_read_TIMER_STATUS() bfin_read32(TIMER_STATUS) +#define bfin_write_TIMER_STATUS(val) bfin_write32(TIMER_STATUS, val) + + +/* General Purpose I/O Port F (0xFFC00700 - 0xFFC007FF) */ +#define bfin_read_PORTFIO() bfin_read16(PORTFIO) +#define bfin_write_PORTFIO(val) bfin_write16(PORTFIO, val) +#define bfin_read_PORTFIO_CLEAR() bfin_read16(PORTFIO_CLEAR) +#define bfin_write_PORTFIO_CLEAR(val) bfin_write16(PORTFIO_CLEAR, val) +#define bfin_read_PORTFIO_SET() bfin_read16(PORTFIO_SET) +#define bfin_write_PORTFIO_SET(val) bfin_write16(PORTFIO_SET, val) +#define bfin_read_PORTFIO_TOGGLE() bfin_read16(PORTFIO_TOGGLE) +#define bfin_write_PORTFIO_TOGGLE(val) bfin_write16(PORTFIO_TOGGLE, val) +#define bfin_read_PORTFIO_MASKA() bfin_read16(PORTFIO_MASKA) +#define bfin_write_PORTFIO_MASKA(val) bfin_write16(PORTFIO_MASKA, val) +#define bfin_read_PORTFIO_MASKA_CLEAR() bfin_read16(PORTFIO_MASKA_CLEAR) +#define bfin_write_PORTFIO_MASKA_CLEAR(val) bfin_write16(PORTFIO_MASKA_CLEAR, val) +#define bfin_read_PORTFIO_MASKA_SET() bfin_read16(PORTFIO_MASKA_SET) +#define bfin_write_PORTFIO_MASKA_SET(val) bfin_write16(PORTFIO_MASKA_SET, val) +#define bfin_read_PORTFIO_MASKA_TOGGLE() bfin_read16(PORTFIO_MASKA_TOGGLE) +#define bfin_write_PORTFIO_MASKA_TOGGLE(val) bfin_write16(PORTFIO_MASKA_TOGGLE, val) +#define bfin_read_PORTFIO_MASKB() bfin_read16(PORTFIO_MASKB) +#define bfin_write_PORTFIO_MASKB(val) bfin_write16(PORTFIO_MASKB, val) +#define bfin_read_PORTFIO_MASKB_CLEAR() bfin_read16(PORTFIO_MASKB_CLEAR) +#define bfin_write_PORTFIO_MASKB_CLEAR(val) bfin_write16(PORTFIO_MASKB_CLEAR, val) +#define bfin_read_PORTFIO_MASKB_SET() bfin_read16(PORTFIO_MASKB_SET) +#define bfin_write_PORTFIO_MASKB_SET(val) bfin_write16(PORTFIO_MASKB_SET, val) +#define bfin_read_PORTFIO_MASKB_TOGGLE() bfin_read16(PORTFIO_MASKB_TOGGLE) +#define bfin_write_PORTFIO_MASKB_TOGGLE(val) bfin_write16(PORTFIO_MASKB_TOGGLE, val) +#define bfin_read_PORTFIO_DIR() bfin_read16(PORTFIO_DIR) +#define bfin_write_PORTFIO_DIR(val) bfin_write16(PORTFIO_DIR, val) +#define bfin_read_PORTFIO_POLAR() bfin_read16(PORTFIO_POLAR) +#define bfin_write_PORTFIO_POLAR(val) bfin_write16(PORTFIO_POLAR, val) +#define bfin_read_PORTFIO_EDGE() bfin_read16(PORTFIO_EDGE) +#define bfin_write_PORTFIO_EDGE(val) bfin_write16(PORTFIO_EDGE, val) +#define bfin_read_PORTFIO_BOTH() bfin_read16(PORTFIO_BOTH) +#define bfin_write_PORTFIO_BOTH(val) bfin_write16(PORTFIO_BOTH, val) +#define bfin_read_PORTFIO_INEN() bfin_read16(PORTFIO_INEN) +#define bfin_write_PORTFIO_INEN(val) bfin_write16(PORTFIO_INEN, val) + + +/* SPORT0 Controller (0xFFC00800 - 0xFFC008FF) */ +#define bfin_read_SPORT0_TCR1() bfin_read16(SPORT0_TCR1) +#define bfin_write_SPORT0_TCR1(val) bfin_write16(SPORT0_TCR1, val) +#define bfin_read_SPORT0_TCR2() bfin_read16(SPORT0_TCR2) +#define bfin_write_SPORT0_TCR2(val) bfin_write16(SPORT0_TCR2, val) +#define bfin_read_SPORT0_TCLKDIV() bfin_read16(SPORT0_TCLKDIV) +#define bfin_write_SPORT0_TCLKDIV(val) bfin_write16(SPORT0_TCLKDIV, val) +#define bfin_read_SPORT0_TFSDIV() bfin_read16(SPORT0_TFSDIV) +#define bfin_write_SPORT0_TFSDIV(val) bfin_write16(SPORT0_TFSDIV, val) +#define bfin_read_SPORT0_TX() bfin_read32(SPORT0_TX) +#define bfin_write_SPORT0_TX(val) bfin_write32(SPORT0_TX, val) +#define bfin_read_SPORT0_RX() bfin_read32(SPORT0_RX) +#define bfin_write_SPORT0_RX(val) bfin_write32(SPORT0_RX, val) +#define bfin_read_SPORT0_TX32() bfin_read32(SPORT0_TX32) +#define bfin_write_SPORT0_TX32(val) bfin_write32(SPORT0_TX32, val) +#define bfin_read_SPORT0_RX32() bfin_read32(SPORT0_RX32) +#define bfin_write_SPORT0_RX32(val) bfin_write32(SPORT0_RX32, val) +#define bfin_read_SPORT0_TX16() bfin_read16(SPORT0_TX16) +#define bfin_write_SPORT0_TX16(val) bfin_write16(SPORT0_TX16, val) +#define bfin_read_SPORT0_RX16() bfin_read16(SPORT0_RX16) +#define bfin_write_SPORT0_RX16(val) bfin_write16(SPORT0_RX16, val) +#define bfin_read_SPORT0_RCR1() bfin_read16(SPORT0_RCR1) +#define bfin_write_SPORT0_RCR1(val) bfin_write16(SPORT0_RCR1, val) +#define bfin_read_SPORT0_RCR2() bfin_read16(SPORT0_RCR2) +#define bfin_write_SPORT0_RCR2(val) bfin_write16(SPORT0_RCR2, val) +#define bfin_read_SPORT0_RCLKDIV() bfin_read16(SPORT0_RCLKDIV) +#define bfin_write_SPORT0_RCLKDIV(val) bfin_write16(SPORT0_RCLKDIV, val) +#define bfin_read_SPORT0_RFSDIV() bfin_read16(SPORT0_RFSDIV) +#define bfin_write_SPORT0_RFSDIV(val) bfin_write16(SPORT0_RFSDIV, val) +#define bfin_read_SPORT0_STAT() bfin_read16(SPORT0_STAT) +#define bfin_write_SPORT0_STAT(val) bfin_write16(SPORT0_STAT, val) +#define bfin_read_SPORT0_CHNL() bfin_read16(SPORT0_CHNL) +#define bfin_write_SPORT0_CHNL(val) bfin_write16(SPORT0_CHNL, val) +#define bfin_read_SPORT0_MCMC1() bfin_read16(SPORT0_MCMC1) +#define bfin_write_SPORT0_MCMC1(val) bfin_write16(SPORT0_MCMC1, val) +#define bfin_read_SPORT0_MCMC2() bfin_read16(SPORT0_MCMC2) +#define bfin_write_SPORT0_MCMC2(val) bfin_write16(SPORT0_MCMC2, val) +#define bfin_read_SPORT0_MTCS0() bfin_read32(SPORT0_MTCS0) +#define bfin_write_SPORT0_MTCS0(val) bfin_write32(SPORT0_MTCS0, val) +#define bfin_read_SPORT0_MTCS1() bfin_read32(SPORT0_MTCS1) +#define bfin_write_SPORT0_MTCS1(val) bfin_write32(SPORT0_MTCS1, val) +#define bfin_read_SPORT0_MTCS2() bfin_read32(SPORT0_MTCS2) +#define bfin_write_SPORT0_MTCS2(val) bfin_write32(SPORT0_MTCS2, val) +#define bfin_read_SPORT0_MTCS3() bfin_read32(SPORT0_MTCS3) +#define bfin_write_SPORT0_MTCS3(val) bfin_write32(SPORT0_MTCS3, val) +#define bfin_read_SPORT0_MRCS0() bfin_read32(SPORT0_MRCS0) +#define bfin_write_SPORT0_MRCS0(val) bfin_write32(SPORT0_MRCS0, val) +#define bfin_read_SPORT0_MRCS1() bfin_read32(SPORT0_MRCS1) +#define bfin_write_SPORT0_MRCS1(val) bfin_write32(SPORT0_MRCS1, val) +#define bfin_read_SPORT0_MRCS2() bfin_read32(SPORT0_MRCS2) +#define bfin_write_SPORT0_MRCS2(val) bfin_write32(SPORT0_MRCS2, val) +#define bfin_read_SPORT0_MRCS3() bfin_read32(SPORT0_MRCS3) +#define bfin_write_SPORT0_MRCS3(val) bfin_write32(SPORT0_MRCS3, val) + + +/* SPORT1 Controller (0xFFC00900 - 0xFFC009FF) */ +#define bfin_read_SPORT1_TCR1() bfin_read16(SPORT1_TCR1) +#define bfin_write_SPORT1_TCR1(val) bfin_write16(SPORT1_TCR1, val) +#define bfin_read_SPORT1_TCR2() bfin_read16(SPORT1_TCR2) +#define bfin_write_SPORT1_TCR2(val) bfin_write16(SPORT1_TCR2, val) +#define bfin_read_SPORT1_TCLKDIV() bfin_read16(SPORT1_TCLKDIV) +#define bfin_write_SPORT1_TCLKDIV(val) bfin_write16(SPORT1_TCLKDIV, val) +#define bfin_read_SPORT1_TFSDIV() bfin_read16(SPORT1_TFSDIV) +#define bfin_write_SPORT1_TFSDIV(val) bfin_write16(SPORT1_TFSDIV, val) +#define bfin_read_SPORT1_TX() bfin_read32(SPORT1_TX) +#define bfin_write_SPORT1_TX(val) bfin_write32(SPORT1_TX, val) +#define bfin_read_SPORT1_RX() bfin_read32(SPORT1_RX) +#define bfin_write_SPORT1_RX(val) bfin_write32(SPORT1_RX, val) +#define bfin_read_SPORT1_TX32() bfin_read32(SPORT1_TX32) +#define bfin_write_SPORT1_TX32(val) bfin_write32(SPORT1_TX32, val) +#define bfin_read_SPORT1_RX32() bfin_read32(SPORT1_RX32) +#define bfin_write_SPORT1_RX32(val) bfin_write32(SPORT1_RX32, val) +#define bfin_read_SPORT1_TX16() bfin_read16(SPORT1_TX16) +#define bfin_write_SPORT1_TX16(val) bfin_write16(SPORT1_TX16, val) +#define bfin_read_SPORT1_RX16() bfin_read16(SPORT1_RX16) +#define bfin_write_SPORT1_RX16(val) bfin_write16(SPORT1_RX16, val) +#define bfin_read_SPORT1_RCR1() bfin_read16(SPORT1_RCR1) +#define bfin_write_SPORT1_RCR1(val) bfin_write16(SPORT1_RCR1, val) +#define bfin_read_SPORT1_RCR2() bfin_read16(SPORT1_RCR2) +#define bfin_write_SPORT1_RCR2(val) bfin_write16(SPORT1_RCR2, val) +#define bfin_read_SPORT1_RCLKDIV() bfin_read16(SPORT1_RCLKDIV) +#define bfin_write_SPORT1_RCLKDIV(val) bfin_write16(SPORT1_RCLKDIV, val) +#define bfin_read_SPORT1_RFSDIV() bfin_read16(SPORT1_RFSDIV) +#define bfin_write_SPORT1_RFSDIV(val) bfin_write16(SPORT1_RFSDIV, val) +#define bfin_read_SPORT1_STAT() bfin_read16(SPORT1_STAT) +#define bfin_write_SPORT1_STAT(val) bfin_write16(SPORT1_STAT, val) +#define bfin_read_SPORT1_CHNL() bfin_read16(SPORT1_CHNL) +#define bfin_write_SPORT1_CHNL(val) bfin_write16(SPORT1_CHNL, val) +#define bfin_read_SPORT1_MCMC1() bfin_read16(SPORT1_MCMC1) +#define bfin_write_SPORT1_MCMC1(val) bfin_write16(SPORT1_MCMC1, val) +#define bfin_read_SPORT1_MCMC2() bfin_read16(SPORT1_MCMC2) +#define bfin_write_SPORT1_MCMC2(val) bfin_write16(SPORT1_MCMC2, val) +#define bfin_read_SPORT1_MTCS0() bfin_read32(SPORT1_MTCS0) +#define bfin_write_SPORT1_MTCS0(val) bfin_write32(SPORT1_MTCS0, val) +#define bfin_read_SPORT1_MTCS1() bfin_read32(SPORT1_MTCS1) +#define bfin_write_SPORT1_MTCS1(val) bfin_write32(SPORT1_MTCS1, val) +#define bfin_read_SPORT1_MTCS2() bfin_read32(SPORT1_MTCS2) +#define bfin_write_SPORT1_MTCS2(val) bfin_write32(SPORT1_MTCS2, val) +#define bfin_read_SPORT1_MTCS3() bfin_read32(SPORT1_MTCS3) +#define bfin_write_SPORT1_MTCS3(val) bfin_write32(SPORT1_MTCS3, val) +#define bfin_read_SPORT1_MRCS0() bfin_read32(SPORT1_MRCS0) +#define bfin_write_SPORT1_MRCS0(val) bfin_write32(SPORT1_MRCS0, val) +#define bfin_read_SPORT1_MRCS1() bfin_read32(SPORT1_MRCS1) +#define bfin_write_SPORT1_MRCS1(val) bfin_write32(SPORT1_MRCS1, val) +#define bfin_read_SPORT1_MRCS2() bfin_read32(SPORT1_MRCS2) +#define bfin_write_SPORT1_MRCS2(val) bfin_write32(SPORT1_MRCS2, val) +#define bfin_read_SPORT1_MRCS3() bfin_read32(SPORT1_MRCS3) +#define bfin_write_SPORT1_MRCS3(val) bfin_write32(SPORT1_MRCS3, val) + + +/* External Bus Interface Unit (0xFFC00A00 - 0xFFC00AFF) */ +#define bfin_read_EBIU_AMGCTL() bfin_read16(EBIU_AMGCTL) +#define bfin_write_EBIU_AMGCTL(val) bfin_write16(EBIU_AMGCTL, val) +#define bfin_read_EBIU_AMBCTL0() bfin_read32(EBIU_AMBCTL0) +#define bfin_write_EBIU_AMBCTL0(val) bfin_write32(EBIU_AMBCTL0, val) +#define bfin_read_EBIU_AMBCTL1() bfin_read32(EBIU_AMBCTL1) +#define bfin_write_EBIU_AMBCTL1(val) bfin_write32(EBIU_AMBCTL1, val) +#define bfin_read_EBIU_SDGCTL() bfin_read32(EBIU_SDGCTL) +#define bfin_write_EBIU_SDGCTL(val) bfin_write32(EBIU_SDGCTL, val) +#define bfin_read_EBIU_SDBCTL() bfin_read16(EBIU_SDBCTL) +#define bfin_write_EBIU_SDBCTL(val) bfin_write16(EBIU_SDBCTL, val) +#define bfin_read_EBIU_SDRRC() bfin_read16(EBIU_SDRRC) +#define bfin_write_EBIU_SDRRC(val) bfin_write16(EBIU_SDRRC, val) +#define bfin_read_EBIU_SDSTAT() bfin_read16(EBIU_SDSTAT) +#define bfin_write_EBIU_SDSTAT(val) bfin_write16(EBIU_SDSTAT, val) + + +/* DMA Traffic Control Registers */ +#define bfin_read_DMA_TC_PER() bfin_read16(DMA_TC_PER) +#define bfin_write_DMA_TC_PER(val) bfin_write16(DMA_TC_PER, val) +#define bfin_read_DMA_TC_CNT() bfin_read16(DMA_TC_CNT) +#define bfin_write_DMA_TC_CNT(val) bfin_write16(DMA_TC_CNT, val) + +/* Alternate deprecated register names (below) provided for backwards code compatibility */ +#define bfin_read_DMA_TCPER() bfin_read16(DMA_TCPER) +#define bfin_write_DMA_TCPER(val) bfin_write16(DMA_TCPER, val) +#define bfin_read_DMA_TCCNT() bfin_read16(DMA_TCCNT) +#define bfin_write_DMA_TCCNT(val) bfin_write16(DMA_TCCNT, val) + +/* DMA Controller */ +#define bfin_read_DMA0_CONFIG() bfin_read16(DMA0_CONFIG) +#define bfin_write_DMA0_CONFIG(val) bfin_write16(DMA0_CONFIG, val) +#define bfin_read_DMA0_NEXT_DESC_PTR() bfin_read32(DMA0_NEXT_DESC_PTR) +#define bfin_write_DMA0_NEXT_DESC_PTR(val) bfin_write32(DMA0_NEXT_DESC_PTR, val) +#define bfin_read_DMA0_START_ADDR() bfin_read32(DMA0_START_ADDR) +#define bfin_write_DMA0_START_ADDR(val) bfin_write32(DMA0_START_ADDR, val) +#define bfin_read_DMA0_X_COUNT() bfin_read16(DMA0_X_COUNT) +#define bfin_write_DMA0_X_COUNT(val) bfin_write16(DMA0_X_COUNT, val) +#define bfin_read_DMA0_Y_COUNT() bfin_read16(DMA0_Y_COUNT) +#define bfin_write_DMA0_Y_COUNT(val) bfin_write16(DMA0_Y_COUNT, val) +#define bfin_read_DMA0_X_MODIFY() bfin_read16(DMA0_X_MODIFY) +#define bfin_write_DMA0_X_MODIFY(val) bfin_write16(DMA0_X_MODIFY, val) +#define bfin_read_DMA0_Y_MODIFY() bfin_read16(DMA0_Y_MODIFY) +#define bfin_write_DMA0_Y_MODIFY(val) bfin_write16(DMA0_Y_MODIFY, val) +#define bfin_read_DMA0_CURR_DESC_PTR() bfin_read32(DMA0_CURR_DESC_PTR) +#define bfin_write_DMA0_CURR_DESC_PTR(val) bfin_write32(DMA0_CURR_DESC_PTR, val) +#define bfin_read_DMA0_CURR_ADDR() bfin_read32(DMA0_CURR_ADDR) +#define bfin_write_DMA0_CURR_ADDR(val) bfin_write32(DMA0_CURR_ADDR, val) +#define bfin_read_DMA0_CURR_X_COUNT() bfin_read16(DMA0_CURR_X_COUNT) +#define bfin_write_DMA0_CURR_X_COUNT(val) bfin_write16(DMA0_CURR_X_COUNT, val) +#define bfin_read_DMA0_CURR_Y_COUNT() bfin_read16(DMA0_CURR_Y_COUNT) +#define bfin_write_DMA0_CURR_Y_COUNT(val) bfin_write16(DMA0_CURR_Y_COUNT, val) +#define bfin_read_DMA0_IRQ_STATUS() bfin_read16(DMA0_IRQ_STATUS) +#define bfin_write_DMA0_IRQ_STATUS(val) bfin_write16(DMA0_IRQ_STATUS, val) +#define bfin_read_DMA0_PERIPHERAL_MAP() bfin_read16(DMA0_PERIPHERAL_MAP) +#define bfin_write_DMA0_PERIPHERAL_MAP(val) bfin_write16(DMA0_PERIPHERAL_MAP, val) + +#define bfin_read_DMA1_CONFIG() bfin_read16(DMA1_CONFIG) +#define bfin_write_DMA1_CONFIG(val) bfin_write16(DMA1_CONFIG, val) +#define bfin_read_DMA1_NEXT_DESC_PTR() bfin_read32(DMA1_NEXT_DESC_PTR) +#define bfin_write_DMA1_NEXT_DESC_PTR(val) bfin_write32(DMA1_NEXT_DESC_PTR, val) +#define bfin_read_DMA1_START_ADDR() bfin_read32(DMA1_START_ADDR) +#define bfin_write_DMA1_START_ADDR(val) bfin_write32(DMA1_START_ADDR, val) +#define bfin_read_DMA1_X_COUNT() bfin_read16(DMA1_X_COUNT) +#define bfin_write_DMA1_X_COUNT(val) bfin_write16(DMA1_X_COUNT, val) +#define bfin_read_DMA1_Y_COUNT() bfin_read16(DMA1_Y_COUNT) +#define bfin_write_DMA1_Y_COUNT(val) bfin_write16(DMA1_Y_COUNT, val) +#define bfin_read_DMA1_X_MODIFY() bfin_read16(DMA1_X_MODIFY) +#define bfin_write_DMA1_X_MODIFY(val) bfin_write16(DMA1_X_MODIFY, val) +#define bfin_read_DMA1_Y_MODIFY() bfin_read16(DMA1_Y_MODIFY) +#define bfin_write_DMA1_Y_MODIFY(val) bfin_write16(DMA1_Y_MODIFY, val) +#define bfin_read_DMA1_CURR_DESC_PTR() bfin_read32(DMA1_CURR_DESC_PTR) +#define bfin_write_DMA1_CURR_DESC_PTR(val) bfin_write32(DMA1_CURR_DESC_PTR, val) +#define bfin_read_DMA1_CURR_ADDR() bfin_read32(DMA1_CURR_ADDR) +#define bfin_write_DMA1_CURR_ADDR(val) bfin_write32(DMA1_CURR_ADDR, val) +#define bfin_read_DMA1_CURR_X_COUNT() bfin_read16(DMA1_CURR_X_COUNT) +#define bfin_write_DMA1_CURR_X_COUNT(val) bfin_write16(DMA1_CURR_X_COUNT, val) +#define bfin_read_DMA1_CURR_Y_COUNT() bfin_read16(DMA1_CURR_Y_COUNT) +#define bfin_write_DMA1_CURR_Y_COUNT(val) bfin_write16(DMA1_CURR_Y_COUNT, val) +#define bfin_read_DMA1_IRQ_STATUS() bfin_read16(DMA1_IRQ_STATUS) +#define bfin_write_DMA1_IRQ_STATUS(val) bfin_write16(DMA1_IRQ_STATUS, val) +#define bfin_read_DMA1_PERIPHERAL_MAP() bfin_read16(DMA1_PERIPHERAL_MAP) +#define bfin_write_DMA1_PERIPHERAL_MAP(val) bfin_write16(DMA1_PERIPHERAL_MAP, val) + +#define bfin_read_DMA2_CONFIG() bfin_read16(DMA2_CONFIG) +#define bfin_write_DMA2_CONFIG(val) bfin_write16(DMA2_CONFIG, val) +#define bfin_read_DMA2_NEXT_DESC_PTR() bfin_read32(DMA2_NEXT_DESC_PTR) +#define bfin_write_DMA2_NEXT_DESC_PTR(val) bfin_write32(DMA2_NEXT_DESC_PTR, val) +#define bfin_read_DMA2_START_ADDR() bfin_read32(DMA2_START_ADDR) +#define bfin_write_DMA2_START_ADDR(val) bfin_write32(DMA2_START_ADDR, val) +#define bfin_read_DMA2_X_COUNT() bfin_read16(DMA2_X_COUNT) +#define bfin_write_DMA2_X_COUNT(val) bfin_write16(DMA2_X_COUNT, val) +#define bfin_read_DMA2_Y_COUNT() bfin_read16(DMA2_Y_COUNT) +#define bfin_write_DMA2_Y_COUNT(val) bfin_write16(DMA2_Y_COUNT, val) +#define bfin_read_DMA2_X_MODIFY() bfin_read16(DMA2_X_MODIFY) +#define bfin_write_DMA2_X_MODIFY(val) bfin_write16(DMA2_X_MODIFY, val) +#define bfin_read_DMA2_Y_MODIFY() bfin_read16(DMA2_Y_MODIFY) +#define bfin_write_DMA2_Y_MODIFY(val) bfin_write16(DMA2_Y_MODIFY, val) +#define bfin_read_DMA2_CURR_DESC_PTR() bfin_read32(DMA2_CURR_DESC_PTR) +#define bfin_write_DMA2_CURR_DESC_PTR(val) bfin_write32(DMA2_CURR_DESC_PTR, val) +#define bfin_read_DMA2_CURR_ADDR() bfin_read32(DMA2_CURR_ADDR) +#define bfin_write_DMA2_CURR_ADDR(val) bfin_write32(DMA2_CURR_ADDR, val) +#define bfin_read_DMA2_CURR_X_COUNT() bfin_read16(DMA2_CURR_X_COUNT) +#define bfin_write_DMA2_CURR_X_COUNT(val) bfin_write16(DMA2_CURR_X_COUNT, val) +#define bfin_read_DMA2_CURR_Y_COUNT() bfin_read16(DMA2_CURR_Y_COUNT) +#define bfin_write_DMA2_CURR_Y_COUNT(val) bfin_write16(DMA2_CURR_Y_COUNT, val) +#define bfin_read_DMA2_IRQ_STATUS() bfin_read16(DMA2_IRQ_STATUS) +#define bfin_write_DMA2_IRQ_STATUS(val) bfin_write16(DMA2_IRQ_STATUS, val) +#define bfin_read_DMA2_PERIPHERAL_MAP() bfin_read16(DMA2_PERIPHERAL_MAP) +#define bfin_write_DMA2_PERIPHERAL_MAP(val) bfin_write16(DMA2_PERIPHERAL_MAP, val) + +#define bfin_read_DMA3_CONFIG() bfin_read16(DMA3_CONFIG) +#define bfin_write_DMA3_CONFIG(val) bfin_write16(DMA3_CONFIG, val) +#define bfin_read_DMA3_NEXT_DESC_PTR() bfin_read32(DMA3_NEXT_DESC_PTR) +#define bfin_write_DMA3_NEXT_DESC_PTR(val) bfin_write32(DMA3_NEXT_DESC_PTR, val) +#define bfin_read_DMA3_START_ADDR() bfin_read32(DMA3_START_ADDR) +#define bfin_write_DMA3_START_ADDR(val) bfin_write32(DMA3_START_ADDR, val) +#define bfin_read_DMA3_X_COUNT() bfin_read16(DMA3_X_COUNT) +#define bfin_write_DMA3_X_COUNT(val) bfin_write16(DMA3_X_COUNT, val) +#define bfin_read_DMA3_Y_COUNT() bfin_read16(DMA3_Y_COUNT) +#define bfin_write_DMA3_Y_COUNT(val) bfin_write16(DMA3_Y_COUNT, val) +#define bfin_read_DMA3_X_MODIFY() bfin_read16(DMA3_X_MODIFY) +#define bfin_write_DMA3_X_MODIFY(val) bfin_write16(DMA3_X_MODIFY, val) +#define bfin_read_DMA3_Y_MODIFY() bfin_read16(DMA3_Y_MODIFY) +#define bfin_write_DMA3_Y_MODIFY(val) bfin_write16(DMA3_Y_MODIFY, val) +#define bfin_read_DMA3_CURR_DESC_PTR() bfin_read32(DMA3_CURR_DESC_PTR) +#define bfin_write_DMA3_CURR_DESC_PTR(val) bfin_write32(DMA3_CURR_DESC_PTR, val) +#define bfin_read_DMA3_CURR_ADDR() bfin_read32(DMA3_CURR_ADDR) +#define bfin_write_DMA3_CURR_ADDR(val) bfin_write32(DMA3_CURR_ADDR, val) +#define bfin_read_DMA3_CURR_X_COUNT() bfin_read16(DMA3_CURR_X_COUNT) +#define bfin_write_DMA3_CURR_X_COUNT(val) bfin_write16(DMA3_CURR_X_COUNT, val) +#define bfin_read_DMA3_CURR_Y_COUNT() bfin_read16(DMA3_CURR_Y_COUNT) +#define bfin_write_DMA3_CURR_Y_COUNT(val) bfin_write16(DMA3_CURR_Y_COUNT, val) +#define bfin_read_DMA3_IRQ_STATUS() bfin_read16(DMA3_IRQ_STATUS) +#define bfin_write_DMA3_IRQ_STATUS(val) bfin_write16(DMA3_IRQ_STATUS, val) +#define bfin_read_DMA3_PERIPHERAL_MAP() bfin_read16(DMA3_PERIPHERAL_MAP) +#define bfin_write_DMA3_PERIPHERAL_MAP(val) bfin_write16(DMA3_PERIPHERAL_MAP, val) + +#define bfin_read_DMA4_CONFIG() bfin_read16(DMA4_CONFIG) +#define bfin_write_DMA4_CONFIG(val) bfin_write16(DMA4_CONFIG, val) +#define bfin_read_DMA4_NEXT_DESC_PTR() bfin_read32(DMA4_NEXT_DESC_PTR) +#define bfin_write_DMA4_NEXT_DESC_PTR(val) bfin_write32(DMA4_NEXT_DESC_PTR, val) +#define bfin_read_DMA4_START_ADDR() bfin_read32(DMA4_START_ADDR) +#define bfin_write_DMA4_START_ADDR(val) bfin_write32(DMA4_START_ADDR, val) +#define bfin_read_DMA4_X_COUNT() bfin_read16(DMA4_X_COUNT) +#define bfin_write_DMA4_X_COUNT(val) bfin_write16(DMA4_X_COUNT, val) +#define bfin_read_DMA4_Y_COUNT() bfin_read16(DMA4_Y_COUNT) +#define bfin_write_DMA4_Y_COUNT(val) bfin_write16(DMA4_Y_COUNT, val) +#define bfin_read_DMA4_X_MODIFY() bfin_read16(DMA4_X_MODIFY) +#define bfin_write_DMA4_X_MODIFY(val) bfin_write16(DMA4_X_MODIFY, val) +#define bfin_read_DMA4_Y_MODIFY() bfin_read16(DMA4_Y_MODIFY) +#define bfin_write_DMA4_Y_MODIFY(val) bfin_write16(DMA4_Y_MODIFY, val) +#define bfin_read_DMA4_CURR_DESC_PTR() bfin_read32(DMA4_CURR_DESC_PTR) +#define bfin_write_DMA4_CURR_DESC_PTR(val) bfin_write32(DMA4_CURR_DESC_PTR, val) +#define bfin_read_DMA4_CURR_ADDR() bfin_read32(DMA4_CURR_ADDR) +#define bfin_write_DMA4_CURR_ADDR(val) bfin_write32(DMA4_CURR_ADDR, val) +#define bfin_read_DMA4_CURR_X_COUNT() bfin_read16(DMA4_CURR_X_COUNT) +#define bfin_write_DMA4_CURR_X_COUNT(val) bfin_write16(DMA4_CURR_X_COUNT, val) +#define bfin_read_DMA4_CURR_Y_COUNT() bfin_read16(DMA4_CURR_Y_COUNT) +#define bfin_write_DMA4_CURR_Y_COUNT(val) bfin_write16(DMA4_CURR_Y_COUNT, val) +#define bfin_read_DMA4_IRQ_STATUS() bfin_read16(DMA4_IRQ_STATUS) +#define bfin_write_DMA4_IRQ_STATUS(val) bfin_write16(DMA4_IRQ_STATUS, val) +#define bfin_read_DMA4_PERIPHERAL_MAP() bfin_read16(DMA4_PERIPHERAL_MAP) +#define bfin_write_DMA4_PERIPHERAL_MAP(val) bfin_write16(DMA4_PERIPHERAL_MAP, val) + +#define bfin_read_DMA5_CONFIG() bfin_read16(DMA5_CONFIG) +#define bfin_write_DMA5_CONFIG(val) bfin_write16(DMA5_CONFIG, val) +#define bfin_read_DMA5_NEXT_DESC_PTR() bfin_read32(DMA5_NEXT_DESC_PTR) +#define bfin_write_DMA5_NEXT_DESC_PTR(val) bfin_write32(DMA5_NEXT_DESC_PTR, val) +#define bfin_read_DMA5_START_ADDR() bfin_read32(DMA5_START_ADDR) +#define bfin_write_DMA5_START_ADDR(val) bfin_write32(DMA5_START_ADDR, val) +#define bfin_read_DMA5_X_COUNT() bfin_read16(DMA5_X_COUNT) +#define bfin_write_DMA5_X_COUNT(val) bfin_write16(DMA5_X_COUNT, val) +#define bfin_read_DMA5_Y_COUNT() bfin_read16(DMA5_Y_COUNT) +#define bfin_write_DMA5_Y_COUNT(val) bfin_write16(DMA5_Y_COUNT, val) +#define bfin_read_DMA5_X_MODIFY() bfin_read16(DMA5_X_MODIFY) +#define bfin_write_DMA5_X_MODIFY(val) bfin_write16(DMA5_X_MODIFY, val) +#define bfin_read_DMA5_Y_MODIFY() bfin_read16(DMA5_Y_MODIFY) +#define bfin_write_DMA5_Y_MODIFY(val) bfin_write16(DMA5_Y_MODIFY, val) +#define bfin_read_DMA5_CURR_DESC_PTR() bfin_read32(DMA5_CURR_DESC_PTR) +#define bfin_write_DMA5_CURR_DESC_PTR(val) bfin_write32(DMA5_CURR_DESC_PTR, val) +#define bfin_read_DMA5_CURR_ADDR() bfin_read32(DMA5_CURR_ADDR) +#define bfin_write_DMA5_CURR_ADDR(val) bfin_write32(DMA5_CURR_ADDR, val) +#define bfin_read_DMA5_CURR_X_COUNT() bfin_read16(DMA5_CURR_X_COUNT) +#define bfin_write_DMA5_CURR_X_COUNT(val) bfin_write16(DMA5_CURR_X_COUNT, val) +#define bfin_read_DMA5_CURR_Y_COUNT() bfin_read16(DMA5_CURR_Y_COUNT) +#define bfin_write_DMA5_CURR_Y_COUNT(val) bfin_write16(DMA5_CURR_Y_COUNT, val) +#define bfin_read_DMA5_IRQ_STATUS() bfin_read16(DMA5_IRQ_STATUS) +#define bfin_write_DMA5_IRQ_STATUS(val) bfin_write16(DMA5_IRQ_STATUS, val) +#define bfin_read_DMA5_PERIPHERAL_MAP() bfin_read16(DMA5_PERIPHERAL_MAP) +#define bfin_write_DMA5_PERIPHERAL_MAP(val) bfin_write16(DMA5_PERIPHERAL_MAP, val) + +#define bfin_read_DMA6_CONFIG() bfin_read16(DMA6_CONFIG) +#define bfin_write_DMA6_CONFIG(val) bfin_write16(DMA6_CONFIG, val) +#define bfin_read_DMA6_NEXT_DESC_PTR() bfin_read32(DMA6_NEXT_DESC_PTR) +#define bfin_write_DMA6_NEXT_DESC_PTR(val) bfin_write32(DMA6_NEXT_DESC_PTR, val) +#define bfin_read_DMA6_START_ADDR() bfin_read32(DMA6_START_ADDR) +#define bfin_write_DMA6_START_ADDR(val) bfin_write32(DMA6_START_ADDR, val) +#define bfin_read_DMA6_X_COUNT() bfin_read16(DMA6_X_COUNT) +#define bfin_write_DMA6_X_COUNT(val) bfin_write16(DMA6_X_COUNT, val) +#define bfin_read_DMA6_Y_COUNT() bfin_read16(DMA6_Y_COUNT) +#define bfin_write_DMA6_Y_COUNT(val) bfin_write16(DMA6_Y_COUNT, val) +#define bfin_read_DMA6_X_MODIFY() bfin_read16(DMA6_X_MODIFY) +#define bfin_write_DMA6_X_MODIFY(val) bfin_write16(DMA6_X_MODIFY, val) +#define bfin_read_DMA6_Y_MODIFY() bfin_read16(DMA6_Y_MODIFY) +#define bfin_write_DMA6_Y_MODIFY(val) bfin_write16(DMA6_Y_MODIFY, val) +#define bfin_read_DMA6_CURR_DESC_PTR() bfin_read32(DMA6_CURR_DESC_PTR) +#define bfin_write_DMA6_CURR_DESC_PTR(val) bfin_write32(DMA6_CURR_DESC_PTR, val) +#define bfin_read_DMA6_CURR_ADDR() bfin_read32(DMA6_CURR_ADDR) +#define bfin_write_DMA6_CURR_ADDR(val) bfin_write32(DMA6_CURR_ADDR, val) +#define bfin_read_DMA6_CURR_X_COUNT() bfin_read16(DMA6_CURR_X_COUNT) +#define bfin_write_DMA6_CURR_X_COUNT(val) bfin_write16(DMA6_CURR_X_COUNT, val) +#define bfin_read_DMA6_CURR_Y_COUNT() bfin_read16(DMA6_CURR_Y_COUNT) +#define bfin_write_DMA6_CURR_Y_COUNT(val) bfin_write16(DMA6_CURR_Y_COUNT, val) +#define bfin_read_DMA6_IRQ_STATUS() bfin_read16(DMA6_IRQ_STATUS) +#define bfin_write_DMA6_IRQ_STATUS(val) bfin_write16(DMA6_IRQ_STATUS, val) +#define bfin_read_DMA6_PERIPHERAL_MAP() bfin_read16(DMA6_PERIPHERAL_MAP) +#define bfin_write_DMA6_PERIPHERAL_MAP(val) bfin_write16(DMA6_PERIPHERAL_MAP, val) + +#define bfin_read_DMA7_CONFIG() bfin_read16(DMA7_CONFIG) +#define bfin_write_DMA7_CONFIG(val) bfin_write16(DMA7_CONFIG, val) +#define bfin_read_DMA7_NEXT_DESC_PTR() bfin_read32(DMA7_NEXT_DESC_PTR) +#define bfin_write_DMA7_NEXT_DESC_PTR(val) bfin_write32(DMA7_NEXT_DESC_PTR, val) +#define bfin_read_DMA7_START_ADDR() bfin_read32(DMA7_START_ADDR) +#define bfin_write_DMA7_START_ADDR(val) bfin_write32(DMA7_START_ADDR, val) +#define bfin_read_DMA7_X_COUNT() bfin_read16(DMA7_X_COUNT) +#define bfin_write_DMA7_X_COUNT(val) bfin_write16(DMA7_X_COUNT, val) +#define bfin_read_DMA7_Y_COUNT() bfin_read16(DMA7_Y_COUNT) +#define bfin_write_DMA7_Y_COUNT(val) bfin_write16(DMA7_Y_COUNT, val) +#define bfin_read_DMA7_X_MODIFY() bfin_read16(DMA7_X_MODIFY) +#define bfin_write_DMA7_X_MODIFY(val) bfin_write16(DMA7_X_MODIFY, val) +#define bfin_read_DMA7_Y_MODIFY() bfin_read16(DMA7_Y_MODIFY) +#define bfin_write_DMA7_Y_MODIFY(val) bfin_write16(DMA7_Y_MODIFY, val) +#define bfin_read_DMA7_CURR_DESC_PTR() bfin_read32(DMA7_CURR_DESC_PTR) +#define bfin_write_DMA7_CURR_DESC_PTR(val) bfin_write32(DMA7_CURR_DESC_PTR, val) +#define bfin_read_DMA7_CURR_ADDR() bfin_read32(DMA7_CURR_ADDR) +#define bfin_write_DMA7_CURR_ADDR(val) bfin_write32(DMA7_CURR_ADDR, val) +#define bfin_read_DMA7_CURR_X_COUNT() bfin_read16(DMA7_CURR_X_COUNT) +#define bfin_write_DMA7_CURR_X_COUNT(val) bfin_write16(DMA7_CURR_X_COUNT, val) +#define bfin_read_DMA7_CURR_Y_COUNT() bfin_read16(DMA7_CURR_Y_COUNT) +#define bfin_write_DMA7_CURR_Y_COUNT(val) bfin_write16(DMA7_CURR_Y_COUNT, val) +#define bfin_read_DMA7_IRQ_STATUS() bfin_read16(DMA7_IRQ_STATUS) +#define bfin_write_DMA7_IRQ_STATUS(val) bfin_write16(DMA7_IRQ_STATUS, val) +#define bfin_read_DMA7_PERIPHERAL_MAP() bfin_read16(DMA7_PERIPHERAL_MAP) +#define bfin_write_DMA7_PERIPHERAL_MAP(val) bfin_write16(DMA7_PERIPHERAL_MAP, val) + +#define bfin_read_DMA8_CONFIG() bfin_read16(DMA8_CONFIG) +#define bfin_write_DMA8_CONFIG(val) bfin_write16(DMA8_CONFIG, val) +#define bfin_read_DMA8_NEXT_DESC_PTR() bfin_read32(DMA8_NEXT_DESC_PTR) +#define bfin_write_DMA8_NEXT_DESC_PTR(val) bfin_write32(DMA8_NEXT_DESC_PTR, val) +#define bfin_read_DMA8_START_ADDR() bfin_read32(DMA8_START_ADDR) +#define bfin_write_DMA8_START_ADDR(val) bfin_write32(DMA8_START_ADDR, val) +#define bfin_read_DMA8_X_COUNT() bfin_read16(DMA8_X_COUNT) +#define bfin_write_DMA8_X_COUNT(val) bfin_write16(DMA8_X_COUNT, val) +#define bfin_read_DMA8_Y_COUNT() bfin_read16(DMA8_Y_COUNT) +#define bfin_write_DMA8_Y_COUNT(val) bfin_write16(DMA8_Y_COUNT, val) +#define bfin_read_DMA8_X_MODIFY() bfin_read16(DMA8_X_MODIFY) +#define bfin_write_DMA8_X_MODIFY(val) bfin_write16(DMA8_X_MODIFY, val) +#define bfin_read_DMA8_Y_MODIFY() bfin_read16(DMA8_Y_MODIFY) +#define bfin_write_DMA8_Y_MODIFY(val) bfin_write16(DMA8_Y_MODIFY, val) +#define bfin_read_DMA8_CURR_DESC_PTR() bfin_read32(DMA8_CURR_DESC_PTR) +#define bfin_write_DMA8_CURR_DESC_PTR(val) bfin_write32(DMA8_CURR_DESC_PTR, val) +#define bfin_read_DMA8_CURR_ADDR() bfin_read32(DMA8_CURR_ADDR) +#define bfin_write_DMA8_CURR_ADDR(val) bfin_write32(DMA8_CURR_ADDR, val) +#define bfin_read_DMA8_CURR_X_COUNT() bfin_read16(DMA8_CURR_X_COUNT) +#define bfin_write_DMA8_CURR_X_COUNT(val) bfin_write16(DMA8_CURR_X_COUNT, val) +#define bfin_read_DMA8_CURR_Y_COUNT() bfin_read16(DMA8_CURR_Y_COUNT) +#define bfin_write_DMA8_CURR_Y_COUNT(val) bfin_write16(DMA8_CURR_Y_COUNT, val) +#define bfin_read_DMA8_IRQ_STATUS() bfin_read16(DMA8_IRQ_STATUS) +#define bfin_write_DMA8_IRQ_STATUS(val) bfin_write16(DMA8_IRQ_STATUS, val) +#define bfin_read_DMA8_PERIPHERAL_MAP() bfin_read16(DMA8_PERIPHERAL_MAP) +#define bfin_write_DMA8_PERIPHERAL_MAP(val) bfin_write16(DMA8_PERIPHERAL_MAP, val) + +#define bfin_read_DMA9_CONFIG() bfin_read16(DMA9_CONFIG) +#define bfin_write_DMA9_CONFIG(val) bfin_write16(DMA9_CONFIG, val) +#define bfin_read_DMA9_NEXT_DESC_PTR() bfin_read32(DMA9_NEXT_DESC_PTR) +#define bfin_write_DMA9_NEXT_DESC_PTR(val) bfin_write32(DMA9_NEXT_DESC_PTR, val) +#define bfin_read_DMA9_START_ADDR() bfin_read32(DMA9_START_ADDR) +#define bfin_write_DMA9_START_ADDR(val) bfin_write32(DMA9_START_ADDR, val) +#define bfin_read_DMA9_X_COUNT() bfin_read16(DMA9_X_COUNT) +#define bfin_write_DMA9_X_COUNT(val) bfin_write16(DMA9_X_COUNT, val) +#define bfin_read_DMA9_Y_COUNT() bfin_read16(DMA9_Y_COUNT) +#define bfin_write_DMA9_Y_COUNT(val) bfin_write16(DMA9_Y_COUNT, val) +#define bfin_read_DMA9_X_MODIFY() bfin_read16(DMA9_X_MODIFY) +#define bfin_write_DMA9_X_MODIFY(val) bfin_write16(DMA9_X_MODIFY, val) +#define bfin_read_DMA9_Y_MODIFY() bfin_read16(DMA9_Y_MODIFY) +#define bfin_write_DMA9_Y_MODIFY(val) bfin_write16(DMA9_Y_MODIFY, val) +#define bfin_read_DMA9_CURR_DESC_PTR() bfin_read32(DMA9_CURR_DESC_PTR) +#define bfin_write_DMA9_CURR_DESC_PTR(val) bfin_write32(DMA9_CURR_DESC_PTR, val) +#define bfin_read_DMA9_CURR_ADDR() bfin_read32(DMA9_CURR_ADDR) +#define bfin_write_DMA9_CURR_ADDR(val) bfin_write32(DMA9_CURR_ADDR, val) +#define bfin_read_DMA9_CURR_X_COUNT() bfin_read16(DMA9_CURR_X_COUNT) +#define bfin_write_DMA9_CURR_X_COUNT(val) bfin_write16(DMA9_CURR_X_COUNT, val) +#define bfin_read_DMA9_CURR_Y_COUNT() bfin_read16(DMA9_CURR_Y_COUNT) +#define bfin_write_DMA9_CURR_Y_COUNT(val) bfin_write16(DMA9_CURR_Y_COUNT, val) +#define bfin_read_DMA9_IRQ_STATUS() bfin_read16(DMA9_IRQ_STATUS) +#define bfin_write_DMA9_IRQ_STATUS(val) bfin_write16(DMA9_IRQ_STATUS, val) +#define bfin_read_DMA9_PERIPHERAL_MAP() bfin_read16(DMA9_PERIPHERAL_MAP) +#define bfin_write_DMA9_PERIPHERAL_MAP(val) bfin_write16(DMA9_PERIPHERAL_MAP, val) + +#define bfin_read_DMA10_CONFIG() bfin_read16(DMA10_CONFIG) +#define bfin_write_DMA10_CONFIG(val) bfin_write16(DMA10_CONFIG, val) +#define bfin_read_DMA10_NEXT_DESC_PTR() bfin_read32(DMA10_NEXT_DESC_PTR) +#define bfin_write_DMA10_NEXT_DESC_PTR(val) bfin_write32(DMA10_NEXT_DESC_PTR, val) +#define bfin_read_DMA10_START_ADDR() bfin_read32(DMA10_START_ADDR) +#define bfin_write_DMA10_START_ADDR(val) bfin_write32(DMA10_START_ADDR, val) +#define bfin_read_DMA10_X_COUNT() bfin_read16(DMA10_X_COUNT) +#define bfin_write_DMA10_X_COUNT(val) bfin_write16(DMA10_X_COUNT, val) +#define bfin_read_DMA10_Y_COUNT() bfin_read16(DMA10_Y_COUNT) +#define bfin_write_DMA10_Y_COUNT(val) bfin_write16(DMA10_Y_COUNT, val) +#define bfin_read_DMA10_X_MODIFY() bfin_read16(DMA10_X_MODIFY) +#define bfin_write_DMA10_X_MODIFY(val) bfin_write16(DMA10_X_MODIFY, val) +#define bfin_read_DMA10_Y_MODIFY() bfin_read16(DMA10_Y_MODIFY) +#define bfin_write_DMA10_Y_MODIFY(val) bfin_write16(DMA10_Y_MODIFY, val) +#define bfin_read_DMA10_CURR_DESC_PTR() bfin_read32(DMA10_CURR_DESC_PTR) +#define bfin_write_DMA10_CURR_DESC_PTR(val) bfin_write32(DMA10_CURR_DESC_PTR, val) +#define bfin_read_DMA10_CURR_ADDR() bfin_read32(DMA10_CURR_ADDR) +#define bfin_write_DMA10_CURR_ADDR(val) bfin_write32(DMA10_CURR_ADDR, val) +#define bfin_read_DMA10_CURR_X_COUNT() bfin_read16(DMA10_CURR_X_COUNT) +#define bfin_write_DMA10_CURR_X_COUNT(val) bfin_write16(DMA10_CURR_X_COUNT, val) +#define bfin_read_DMA10_CURR_Y_COUNT() bfin_read16(DMA10_CURR_Y_COUNT) +#define bfin_write_DMA10_CURR_Y_COUNT(val) bfin_write16(DMA10_CURR_Y_COUNT, val) +#define bfin_read_DMA10_IRQ_STATUS() bfin_read16(DMA10_IRQ_STATUS) +#define bfin_write_DMA10_IRQ_STATUS(val) bfin_write16(DMA10_IRQ_STATUS, val) +#define bfin_read_DMA10_PERIPHERAL_MAP() bfin_read16(DMA10_PERIPHERAL_MAP) +#define bfin_write_DMA10_PERIPHERAL_MAP(val) bfin_write16(DMA10_PERIPHERAL_MAP, val) + +#define bfin_read_DMA11_CONFIG() bfin_read16(DMA11_CONFIG) +#define bfin_write_DMA11_CONFIG(val) bfin_write16(DMA11_CONFIG, val) +#define bfin_read_DMA11_NEXT_DESC_PTR() bfin_read32(DMA11_NEXT_DESC_PTR) +#define bfin_write_DMA11_NEXT_DESC_PTR(val) bfin_write32(DMA11_NEXT_DESC_PTR, val) +#define bfin_read_DMA11_START_ADDR() bfin_read32(DMA11_START_ADDR) +#define bfin_write_DMA11_START_ADDR(val) bfin_write32(DMA11_START_ADDR, val) +#define bfin_read_DMA11_X_COUNT() bfin_read16(DMA11_X_COUNT) +#define bfin_write_DMA11_X_COUNT(val) bfin_write16(DMA11_X_COUNT, val) +#define bfin_read_DMA11_Y_COUNT() bfin_read16(DMA11_Y_COUNT) +#define bfin_write_DMA11_Y_COUNT(val) bfin_write16(DMA11_Y_COUNT, val) +#define bfin_read_DMA11_X_MODIFY() bfin_read16(DMA11_X_MODIFY) +#define bfin_write_DMA11_X_MODIFY(val) bfin_write16(DMA11_X_MODIFY, val) +#define bfin_read_DMA11_Y_MODIFY() bfin_read16(DMA11_Y_MODIFY) +#define bfin_write_DMA11_Y_MODIFY(val) bfin_write16(DMA11_Y_MODIFY, val) +#define bfin_read_DMA11_CURR_DESC_PTR() bfin_read32(DMA11_CURR_DESC_PTR) +#define bfin_write_DMA11_CURR_DESC_PTR(val) bfin_write32(DMA11_CURR_DESC_PTR, val) +#define bfin_read_DMA11_CURR_ADDR() bfin_read32(DMA11_CURR_ADDR) +#define bfin_write_DMA11_CURR_ADDR(val) bfin_write32(DMA11_CURR_ADDR, val) +#define bfin_read_DMA11_CURR_X_COUNT() bfin_read16(DMA11_CURR_X_COUNT) +#define bfin_write_DMA11_CURR_X_COUNT(val) bfin_write16(DMA11_CURR_X_COUNT, val) +#define bfin_read_DMA11_CURR_Y_COUNT() bfin_read16(DMA11_CURR_Y_COUNT) +#define bfin_write_DMA11_CURR_Y_COUNT(val) bfin_write16(DMA11_CURR_Y_COUNT, val) +#define bfin_read_DMA11_IRQ_STATUS() bfin_read16(DMA11_IRQ_STATUS) +#define bfin_write_DMA11_IRQ_STATUS(val) bfin_write16(DMA11_IRQ_STATUS, val) +#define bfin_read_DMA11_PERIPHERAL_MAP() bfin_read16(DMA11_PERIPHERAL_MAP) +#define bfin_write_DMA11_PERIPHERAL_MAP(val) bfin_write16(DMA11_PERIPHERAL_MAP, val) + +#define bfin_read_MDMA_D0_CONFIG() bfin_read16(MDMA_D0_CONFIG) +#define bfin_write_MDMA_D0_CONFIG(val) bfin_write16(MDMA_D0_CONFIG, val) +#define bfin_read_MDMA_D0_NEXT_DESC_PTR() bfin_read32(MDMA_D0_NEXT_DESC_PTR) +#define bfin_write_MDMA_D0_NEXT_DESC_PTR(val) bfin_write32(MDMA_D0_NEXT_DESC_PTR, val) +#define bfin_read_MDMA_D0_START_ADDR() bfin_read32(MDMA_D0_START_ADDR) +#define bfin_write_MDMA_D0_START_ADDR(val) bfin_write32(MDMA_D0_START_ADDR, val) +#define bfin_read_MDMA_D0_X_COUNT() bfin_read16(MDMA_D0_X_COUNT) +#define bfin_write_MDMA_D0_X_COUNT(val) bfin_write16(MDMA_D0_X_COUNT, val) +#define bfin_read_MDMA_D0_Y_COUNT() bfin_read16(MDMA_D0_Y_COUNT) +#define bfin_write_MDMA_D0_Y_COUNT(val) bfin_write16(MDMA_D0_Y_COUNT, val) +#define bfin_read_MDMA_D0_X_MODIFY() bfin_read16(MDMA_D0_X_MODIFY) +#define bfin_write_MDMA_D0_X_MODIFY(val) bfin_write16(MDMA_D0_X_MODIFY, val) +#define bfin_read_MDMA_D0_Y_MODIFY() bfin_read16(MDMA_D0_Y_MODIFY) +#define bfin_write_MDMA_D0_Y_MODIFY(val) bfin_write16(MDMA_D0_Y_MODIFY, val) +#define bfin_read_MDMA_D0_CURR_DESC_PTR() bfin_read32(MDMA_D0_CURR_DESC_PTR) +#define bfin_write_MDMA_D0_CURR_DESC_PTR(val) bfin_write32(MDMA_D0_CURR_DESC_PTR, val) +#define bfin_read_MDMA_D0_CURR_ADDR() bfin_read32(MDMA_D0_CURR_ADDR) +#define bfin_write_MDMA_D0_CURR_ADDR(val) bfin_write32(MDMA_D0_CURR_ADDR, val) +#define bfin_read_MDMA_D0_CURR_X_COUNT() bfin_read16(MDMA_D0_CURR_X_COUNT) +#define bfin_write_MDMA_D0_CURR_X_COUNT(val) bfin_write16(MDMA_D0_CURR_X_COUNT, val) +#define bfin_read_MDMA_D0_CURR_Y_COUNT() bfin_read16(MDMA_D0_CURR_Y_COUNT) +#define bfin_write_MDMA_D0_CURR_Y_COUNT(val) bfin_write16(MDMA_D0_CURR_Y_COUNT, val) +#define bfin_read_MDMA_D0_IRQ_STATUS() bfin_read16(MDMA_D0_IRQ_STATUS) +#define bfin_write_MDMA_D0_IRQ_STATUS(val) bfin_write16(MDMA_D0_IRQ_STATUS, val) +#define bfin_read_MDMA_D0_PERIPHERAL_MAP() bfin_read16(MDMA_D0_PERIPHERAL_MAP) +#define bfin_write_MDMA_D0_PERIPHERAL_MAP(val) bfin_write16(MDMA_D0_PERIPHERAL_MAP, val) + +#define bfin_read_MDMA_S0_CONFIG() bfin_read16(MDMA_S0_CONFIG) +#define bfin_write_MDMA_S0_CONFIG(val) bfin_write16(MDMA_S0_CONFIG, val) +#define bfin_read_MDMA_S0_NEXT_DESC_PTR() bfin_read32(MDMA_S0_NEXT_DESC_PTR) +#define bfin_write_MDMA_S0_NEXT_DESC_PTR(val) bfin_write32(MDMA_S0_NEXT_DESC_PTR, val) +#define bfin_read_MDMA_S0_START_ADDR() bfin_read32(MDMA_S0_START_ADDR) +#define bfin_write_MDMA_S0_START_ADDR(val) bfin_write32(MDMA_S0_START_ADDR, val) +#define bfin_read_MDMA_S0_X_COUNT() bfin_read16(MDMA_S0_X_COUNT) +#define bfin_write_MDMA_S0_X_COUNT(val) bfin_write16(MDMA_S0_X_COUNT, val) +#define bfin_read_MDMA_S0_Y_COUNT() bfin_read16(MDMA_S0_Y_COUNT) +#define bfin_write_MDMA_S0_Y_COUNT(val) bfin_write16(MDMA_S0_Y_COUNT, val) +#define bfin_read_MDMA_S0_X_MODIFY() bfin_read16(MDMA_S0_X_MODIFY) +#define bfin_write_MDMA_S0_X_MODIFY(val) bfin_write16(MDMA_S0_X_MODIFY, val) +#define bfin_read_MDMA_S0_Y_MODIFY() bfin_read16(MDMA_S0_Y_MODIFY) +#define bfin_write_MDMA_S0_Y_MODIFY(val) bfin_write16(MDMA_S0_Y_MODIFY, val) +#define bfin_read_MDMA_S0_CURR_DESC_PTR() bfin_read32(MDMA_S0_CURR_DESC_PTR) +#define bfin_write_MDMA_S0_CURR_DESC_PTR(val) bfin_write32(MDMA_S0_CURR_DESC_PTR, val) +#define bfin_read_MDMA_S0_CURR_ADDR() bfin_read32(MDMA_S0_CURR_ADDR) +#define bfin_write_MDMA_S0_CURR_ADDR(val) bfin_write32(MDMA_S0_CURR_ADDR, val) +#define bfin_read_MDMA_S0_CURR_X_COUNT() bfin_read16(MDMA_S0_CURR_X_COUNT) +#define bfin_write_MDMA_S0_CURR_X_COUNT(val) bfin_write16(MDMA_S0_CURR_X_COUNT, val) +#define bfin_read_MDMA_S0_CURR_Y_COUNT() bfin_read16(MDMA_S0_CURR_Y_COUNT) +#define bfin_write_MDMA_S0_CURR_Y_COUNT(val) bfin_write16(MDMA_S0_CURR_Y_COUNT, val) +#define bfin_read_MDMA_S0_IRQ_STATUS() bfin_read16(MDMA_S0_IRQ_STATUS) +#define bfin_write_MDMA_S0_IRQ_STATUS(val) bfin_write16(MDMA_S0_IRQ_STATUS, val) +#define bfin_read_MDMA_S0_PERIPHERAL_MAP() bfin_read16(MDMA_S0_PERIPHERAL_MAP) +#define bfin_write_MDMA_S0_PERIPHERAL_MAP(val) bfin_write16(MDMA_S0_PERIPHERAL_MAP, val) + +#define bfin_read_MDMA_D1_CONFIG() bfin_read16(MDMA_D1_CONFIG) +#define bfin_write_MDMA_D1_CONFIG(val) bfin_write16(MDMA_D1_CONFIG, val) +#define bfin_read_MDMA_D1_NEXT_DESC_PTR() bfin_read32(MDMA_D1_NEXT_DESC_PTR) +#define bfin_write_MDMA_D1_NEXT_DESC_PTR(val) bfin_write32(MDMA_D1_NEXT_DESC_PTR, val) +#define bfin_read_MDMA_D1_START_ADDR() bfin_read32(MDMA_D1_START_ADDR) +#define bfin_write_MDMA_D1_START_ADDR(val) bfin_write32(MDMA_D1_START_ADDR, val) +#define bfin_read_MDMA_D1_X_COUNT() bfin_read16(MDMA_D1_X_COUNT) +#define bfin_write_MDMA_D1_X_COUNT(val) bfin_write16(MDMA_D1_X_COUNT, val) +#define bfin_read_MDMA_D1_Y_COUNT() bfin_read16(MDMA_D1_Y_COUNT) +#define bfin_write_MDMA_D1_Y_COUNT(val) bfin_write16(MDMA_D1_Y_COUNT, val) +#define bfin_read_MDMA_D1_X_MODIFY() bfin_read16(MDMA_D1_X_MODIFY) +#define bfin_write_MDMA_D1_X_MODIFY(val) bfin_write16(MDMA_D1_X_MODIFY, val) +#define bfin_read_MDMA_D1_Y_MODIFY() bfin_read16(MDMA_D1_Y_MODIFY) +#define bfin_write_MDMA_D1_Y_MODIFY(val) bfin_write16(MDMA_D1_Y_MODIFY, val) +#define bfin_read_MDMA_D1_CURR_DESC_PTR() bfin_read32(MDMA_D1_CURR_DESC_PTR) +#define bfin_write_MDMA_D1_CURR_DESC_PTR(val) bfin_write32(MDMA_D1_CURR_DESC_PTR, val) +#define bfin_read_MDMA_D1_CURR_ADDR() bfin_read32(MDMA_D1_CURR_ADDR) +#define bfin_write_MDMA_D1_CURR_ADDR(val) bfin_write32(MDMA_D1_CURR_ADDR, val) +#define bfin_read_MDMA_D1_CURR_X_COUNT() bfin_read16(MDMA_D1_CURR_X_COUNT) +#define bfin_write_MDMA_D1_CURR_X_COUNT(val) bfin_write16(MDMA_D1_CURR_X_COUNT, val) +#define bfin_read_MDMA_D1_CURR_Y_COUNT() bfin_read16(MDMA_D1_CURR_Y_COUNT) +#define bfin_write_MDMA_D1_CURR_Y_COUNT(val) bfin_write16(MDMA_D1_CURR_Y_COUNT, val) +#define bfin_read_MDMA_D1_IRQ_STATUS() bfin_read16(MDMA_D1_IRQ_STATUS) +#define bfin_write_MDMA_D1_IRQ_STATUS(val) bfin_write16(MDMA_D1_IRQ_STATUS, val) +#define bfin_read_MDMA_D1_PERIPHERAL_MAP() bfin_read16(MDMA_D1_PERIPHERAL_MAP) +#define bfin_write_MDMA_D1_PERIPHERAL_MAP(val) bfin_write16(MDMA_D1_PERIPHERAL_MAP, val) + +#define bfin_read_MDMA_S1_CONFIG() bfin_read16(MDMA_S1_CONFIG) +#define bfin_write_MDMA_S1_CONFIG(val) bfin_write16(MDMA_S1_CONFIG, val) +#define bfin_read_MDMA_S1_NEXT_DESC_PTR() bfin_read32(MDMA_S1_NEXT_DESC_PTR) +#define bfin_write_MDMA_S1_NEXT_DESC_PTR(val) bfin_write32(MDMA_S1_NEXT_DESC_PTR, val) +#define bfin_read_MDMA_S1_START_ADDR() bfin_read32(MDMA_S1_START_ADDR) +#define bfin_write_MDMA_S1_START_ADDR(val) bfin_write32(MDMA_S1_START_ADDR, val) +#define bfin_read_MDMA_S1_X_COUNT() bfin_read16(MDMA_S1_X_COUNT) +#define bfin_write_MDMA_S1_X_COUNT(val) bfin_write16(MDMA_S1_X_COUNT, val) +#define bfin_read_MDMA_S1_Y_COUNT() bfin_read16(MDMA_S1_Y_COUNT) +#define bfin_write_MDMA_S1_Y_COUNT(val) bfin_write16(MDMA_S1_Y_COUNT, val) +#define bfin_read_MDMA_S1_X_MODIFY() bfin_read16(MDMA_S1_X_MODIFY) +#define bfin_write_MDMA_S1_X_MODIFY(val) bfin_write16(MDMA_S1_X_MODIFY, val) +#define bfin_read_MDMA_S1_Y_MODIFY() bfin_read16(MDMA_S1_Y_MODIFY) +#define bfin_write_MDMA_S1_Y_MODIFY(val) bfin_write16(MDMA_S1_Y_MODIFY, val) +#define bfin_read_MDMA_S1_CURR_DESC_PTR() bfin_read32(MDMA_S1_CURR_DESC_PTR) +#define bfin_write_MDMA_S1_CURR_DESC_PTR(val) bfin_write32(MDMA_S1_CURR_DESC_PTR, val) +#define bfin_read_MDMA_S1_CURR_ADDR() bfin_read32(MDMA_S1_CURR_ADDR) +#define bfin_write_MDMA_S1_CURR_ADDR(val) bfin_write32(MDMA_S1_CURR_ADDR, val) +#define bfin_read_MDMA_S1_CURR_X_COUNT() bfin_read16(MDMA_S1_CURR_X_COUNT) +#define bfin_write_MDMA_S1_CURR_X_COUNT(val) bfin_write16(MDMA_S1_CURR_X_COUNT, val) +#define bfin_read_MDMA_S1_CURR_Y_COUNT() bfin_read16(MDMA_S1_CURR_Y_COUNT) +#define bfin_write_MDMA_S1_CURR_Y_COUNT(val) bfin_write16(MDMA_S1_CURR_Y_COUNT, val) +#define bfin_read_MDMA_S1_IRQ_STATUS() bfin_read16(MDMA_S1_IRQ_STATUS) +#define bfin_write_MDMA_S1_IRQ_STATUS(val) bfin_write16(MDMA_S1_IRQ_STATUS, val) +#define bfin_read_MDMA_S1_PERIPHERAL_MAP() bfin_read16(MDMA_S1_PERIPHERAL_MAP) +#define bfin_write_MDMA_S1_PERIPHERAL_MAP(val) bfin_write16(MDMA_S1_PERIPHERAL_MAP, val) + + +/* Parallel Peripheral Interface (0xFFC01000 - 0xFFC010FF) */ +#define bfin_read_PPI_CONTROL() bfin_read16(PPI_CONTROL) +#define bfin_write_PPI_CONTROL(val) bfin_write16(PPI_CONTROL, val) +#define bfin_read_PPI_STATUS() bfin_read16(PPI_STATUS) +#define bfin_write_PPI_STATUS(val) bfin_write16(PPI_STATUS, val) +#define bfin_read_PPI_DELAY() bfin_read16(PPI_DELAY) +#define bfin_write_PPI_DELAY(val) bfin_write16(PPI_DELAY, val) +#define bfin_read_PPI_COUNT() bfin_read16(PPI_COUNT) +#define bfin_write_PPI_COUNT(val) bfin_write16(PPI_COUNT, val) +#define bfin_read_PPI_FRAME() bfin_read16(PPI_FRAME) +#define bfin_write_PPI_FRAME(val) bfin_write16(PPI_FRAME, val) + + +/* Two-Wire Interface (0xFFC01400 - 0xFFC014FF) */ + +/* General Purpose I/O Port G (0xFFC01500 - 0xFFC015FF) */ +#define bfin_read_PORTGIO() bfin_read16(PORTGIO) +#define bfin_write_PORTGIO(val) bfin_write16(PORTGIO, val) +#define bfin_read_PORTGIO_CLEAR() bfin_read16(PORTGIO_CLEAR) +#define bfin_write_PORTGIO_CLEAR(val) bfin_write16(PORTGIO_CLEAR, val) +#define bfin_read_PORTGIO_SET() bfin_read16(PORTGIO_SET) +#define bfin_write_PORTGIO_SET(val) bfin_write16(PORTGIO_SET, val) +#define bfin_read_PORTGIO_TOGGLE() bfin_read16(PORTGIO_TOGGLE) +#define bfin_write_PORTGIO_TOGGLE(val) bfin_write16(PORTGIO_TOGGLE, val) +#define bfin_read_PORTGIO_MASKA() bfin_read16(PORTGIO_MASKA) +#define bfin_write_PORTGIO_MASKA(val) bfin_write16(PORTGIO_MASKA, val) +#define bfin_read_PORTGIO_MASKA_CLEAR() bfin_read16(PORTGIO_MASKA_CLEAR) +#define bfin_write_PORTGIO_MASKA_CLEAR(val) bfin_write16(PORTGIO_MASKA_CLEAR, val) +#define bfin_read_PORTGIO_MASKA_SET() bfin_read16(PORTGIO_MASKA_SET) +#define bfin_write_PORTGIO_MASKA_SET(val) bfin_write16(PORTGIO_MASKA_SET, val) +#define bfin_read_PORTGIO_MASKA_TOGGLE() bfin_read16(PORTGIO_MASKA_TOGGLE) +#define bfin_write_PORTGIO_MASKA_TOGGLE(val) bfin_write16(PORTGIO_MASKA_TOGGLE, val) +#define bfin_read_PORTGIO_MASKB() bfin_read16(PORTGIO_MASKB) +#define bfin_write_PORTGIO_MASKB(val) bfin_write16(PORTGIO_MASKB, val) +#define bfin_read_PORTGIO_MASKB_CLEAR() bfin_read16(PORTGIO_MASKB_CLEAR) +#define bfin_write_PORTGIO_MASKB_CLEAR(val) bfin_write16(PORTGIO_MASKB_CLEAR, val) +#define bfin_read_PORTGIO_MASKB_SET() bfin_read16(PORTGIO_MASKB_SET) +#define bfin_write_PORTGIO_MASKB_SET(val) bfin_write16(PORTGIO_MASKB_SET, val) +#define bfin_read_PORTGIO_MASKB_TOGGLE() bfin_read16(PORTGIO_MASKB_TOGGLE) +#define bfin_write_PORTGIO_MASKB_TOGGLE(val) bfin_write16(PORTGIO_MASKB_TOGGLE, val) +#define bfin_read_PORTGIO_DIR() bfin_read16(PORTGIO_DIR) +#define bfin_write_PORTGIO_DIR(val) bfin_write16(PORTGIO_DIR, val) +#define bfin_read_PORTGIO_POLAR() bfin_read16(PORTGIO_POLAR) +#define bfin_write_PORTGIO_POLAR(val) bfin_write16(PORTGIO_POLAR, val) +#define bfin_read_PORTGIO_EDGE() bfin_read16(PORTGIO_EDGE) +#define bfin_write_PORTGIO_EDGE(val) bfin_write16(PORTGIO_EDGE, val) +#define bfin_read_PORTGIO_BOTH() bfin_read16(PORTGIO_BOTH) +#define bfin_write_PORTGIO_BOTH(val) bfin_write16(PORTGIO_BOTH, val) +#define bfin_read_PORTGIO_INEN() bfin_read16(PORTGIO_INEN) +#define bfin_write_PORTGIO_INEN(val) bfin_write16(PORTGIO_INEN, val) + + +/* General Purpose I/O Port H (0xFFC01700 - 0xFFC017FF) */ +#define bfin_read_PORTHIO() bfin_read16(PORTHIO) +#define bfin_write_PORTHIO(val) bfin_write16(PORTHIO, val) +#define bfin_read_PORTHIO_CLEAR() bfin_read16(PORTHIO_CLEAR) +#define bfin_write_PORTHIO_CLEAR(val) bfin_write16(PORTHIO_CLEAR, val) +#define bfin_read_PORTHIO_SET() bfin_read16(PORTHIO_SET) +#define bfin_write_PORTHIO_SET(val) bfin_write16(PORTHIO_SET, val) +#define bfin_read_PORTHIO_TOGGLE() bfin_read16(PORTHIO_TOGGLE) +#define bfin_write_PORTHIO_TOGGLE(val) bfin_write16(PORTHIO_TOGGLE, val) +#define bfin_read_PORTHIO_MASKA() bfin_read16(PORTHIO_MASKA) +#define bfin_write_PORTHIO_MASKA(val) bfin_write16(PORTHIO_MASKA, val) +#define bfin_read_PORTHIO_MASKA_CLEAR() bfin_read16(PORTHIO_MASKA_CLEAR) +#define bfin_write_PORTHIO_MASKA_CLEAR(val) bfin_write16(PORTHIO_MASKA_CLEAR, val) +#define bfin_read_PORTHIO_MASKA_SET() bfin_read16(PORTHIO_MASKA_SET) +#define bfin_write_PORTHIO_MASKA_SET(val) bfin_write16(PORTHIO_MASKA_SET, val) +#define bfin_read_PORTHIO_MASKA_TOGGLE() bfin_read16(PORTHIO_MASKA_TOGGLE) +#define bfin_write_PORTHIO_MASKA_TOGGLE(val) bfin_write16(PORTHIO_MASKA_TOGGLE, val) +#define bfin_read_PORTHIO_MASKB() bfin_read16(PORTHIO_MASKB) +#define bfin_write_PORTHIO_MASKB(val) bfin_write16(PORTHIO_MASKB, val) +#define bfin_read_PORTHIO_MASKB_CLEAR() bfin_read16(PORTHIO_MASKB_CLEAR) +#define bfin_write_PORTHIO_MASKB_CLEAR(val) bfin_write16(PORTHIO_MASKB_CLEAR, val) +#define bfin_read_PORTHIO_MASKB_SET() bfin_read16(PORTHIO_MASKB_SET) +#define bfin_write_PORTHIO_MASKB_SET(val) bfin_write16(PORTHIO_MASKB_SET, val) +#define bfin_read_PORTHIO_MASKB_TOGGLE() bfin_read16(PORTHIO_MASKB_TOGGLE) +#define bfin_write_PORTHIO_MASKB_TOGGLE(val) bfin_write16(PORTHIO_MASKB_TOGGLE, val) +#define bfin_read_PORTHIO_DIR() bfin_read16(PORTHIO_DIR) +#define bfin_write_PORTHIO_DIR(val) bfin_write16(PORTHIO_DIR, val) +#define bfin_read_PORTHIO_POLAR() bfin_read16(PORTHIO_POLAR) +#define bfin_write_PORTHIO_POLAR(val) bfin_write16(PORTHIO_POLAR, val) +#define bfin_read_PORTHIO_EDGE() bfin_read16(PORTHIO_EDGE) +#define bfin_write_PORTHIO_EDGE(val) bfin_write16(PORTHIO_EDGE, val) +#define bfin_read_PORTHIO_BOTH() bfin_read16(PORTHIO_BOTH) +#define bfin_write_PORTHIO_BOTH(val) bfin_write16(PORTHIO_BOTH, val) +#define bfin_read_PORTHIO_INEN() bfin_read16(PORTHIO_INEN) +#define bfin_write_PORTHIO_INEN(val) bfin_write16(PORTHIO_INEN, val) + + +/* UART1 Controller (0xFFC02000 - 0xFFC020FF) */ +#define bfin_read_UART1_THR() bfin_read16(UART1_THR) +#define bfin_write_UART1_THR(val) bfin_write16(UART1_THR, val) +#define bfin_read_UART1_RBR() bfin_read16(UART1_RBR) +#define bfin_write_UART1_RBR(val) bfin_write16(UART1_RBR, val) +#define bfin_read_UART1_DLL() bfin_read16(UART1_DLL) +#define bfin_write_UART1_DLL(val) bfin_write16(UART1_DLL, val) +#define bfin_read_UART1_IER() bfin_read16(UART1_IER) +#define bfin_write_UART1_IER(val) bfin_write16(UART1_IER, val) +#define bfin_read_UART1_DLH() bfin_read16(UART1_DLH) +#define bfin_write_UART1_DLH(val) bfin_write16(UART1_DLH, val) +#define bfin_read_UART1_IIR() bfin_read16(UART1_IIR) +#define bfin_write_UART1_IIR(val) bfin_write16(UART1_IIR, val) +#define bfin_read_UART1_LCR() bfin_read16(UART1_LCR) +#define bfin_write_UART1_LCR(val) bfin_write16(UART1_LCR, val) +#define bfin_read_UART1_MCR() bfin_read16(UART1_MCR) +#define bfin_write_UART1_MCR(val) bfin_write16(UART1_MCR, val) +#define bfin_read_UART1_LSR() bfin_read16(UART1_LSR) +#define bfin_write_UART1_LSR(val) bfin_write16(UART1_LSR, val) +#define bfin_read_UART1_MSR() bfin_read16(UART1_MSR) +#define bfin_write_UART1_MSR(val) bfin_write16(UART1_MSR, val) +#define bfin_read_UART1_SCR() bfin_read16(UART1_SCR) +#define bfin_write_UART1_SCR(val) bfin_write16(UART1_SCR, val) +#define bfin_read_UART1_GCTL() bfin_read16(UART1_GCTL) +#define bfin_write_UART1_GCTL(val) bfin_write16(UART1_GCTL, val) + +/* Omit CAN register sets from the cdefBF534.h (CAN is not in the ADSP-BF51x processor) */ + +/* Pin Control Registers (0xFFC03200 - 0xFFC032FF) */ +#define bfin_read_PORTF_FER() bfin_read16(PORTF_FER) +#define bfin_write_PORTF_FER(val) bfin_write16(PORTF_FER, val) +#define bfin_read_PORTG_FER() bfin_read16(PORTG_FER) +#define bfin_write_PORTG_FER(val) bfin_write16(PORTG_FER, val) +#define bfin_read_PORTH_FER() bfin_read16(PORTH_FER) +#define bfin_write_PORTH_FER(val) bfin_write16(PORTH_FER, val) +#define bfin_read_PORT_MUX() bfin_read16(PORT_MUX) +#define bfin_write_PORT_MUX(val) bfin_write16(PORT_MUX, val) + + +/* Handshake MDMA Registers (0xFFC03300 - 0xFFC033FF) */ +#define bfin_read_HMDMA0_CONTROL() bfin_read16(HMDMA0_CONTROL) +#define bfin_write_HMDMA0_CONTROL(val) bfin_write16(HMDMA0_CONTROL, val) +#define bfin_read_HMDMA0_ECINIT() bfin_read16(HMDMA0_ECINIT) +#define bfin_write_HMDMA0_ECINIT(val) bfin_write16(HMDMA0_ECINIT, val) +#define bfin_read_HMDMA0_BCINIT() bfin_read16(HMDMA0_BCINIT) +#define bfin_write_HMDMA0_BCINIT(val) bfin_write16(HMDMA0_BCINIT, val) +#define bfin_read_HMDMA0_ECURGENT() bfin_read16(HMDMA0_ECURGENT) +#define bfin_write_HMDMA0_ECURGENT(val) bfin_write16(HMDMA0_ECURGENT, val) +#define bfin_read_HMDMA0_ECOVERFLOW() bfin_read16(HMDMA0_ECOVERFLOW) +#define bfin_write_HMDMA0_ECOVERFLOW(val) bfin_write16(HMDMA0_ECOVERFLOW, val) +#define bfin_read_HMDMA0_ECOUNT() bfin_read16(HMDMA0_ECOUNT) +#define bfin_write_HMDMA0_ECOUNT(val) bfin_write16(HMDMA0_ECOUNT, val) +#define bfin_read_HMDMA0_BCOUNT() bfin_read16(HMDMA0_BCOUNT) +#define bfin_write_HMDMA0_BCOUNT(val) bfin_write16(HMDMA0_BCOUNT, val) + +#define bfin_read_HMDMA1_CONTROL() bfin_read16(HMDMA1_CONTROL) +#define bfin_write_HMDMA1_CONTROL(val) bfin_write16(HMDMA1_CONTROL, val) +#define bfin_read_HMDMA1_ECINIT() bfin_read16(HMDMA1_ECINIT) +#define bfin_write_HMDMA1_ECINIT(val) bfin_write16(HMDMA1_ECINIT, val) +#define bfin_read_HMDMA1_BCINIT() bfin_read16(HMDMA1_BCINIT) +#define bfin_write_HMDMA1_BCINIT(val) bfin_write16(HMDMA1_BCINIT, val) +#define bfin_read_HMDMA1_ECURGENT() bfin_read16(HMDMA1_ECURGENT) +#define bfin_write_HMDMA1_ECURGENT(val) bfin_write16(HMDMA1_ECURGENT, val) +#define bfin_read_HMDMA1_ECOVERFLOW() bfin_read16(HMDMA1_ECOVERFLOW) +#define bfin_write_HMDMA1_ECOVERFLOW(val) bfin_write16(HMDMA1_ECOVERFLOW, val) +#define bfin_read_HMDMA1_ECOUNT() bfin_read16(HMDMA1_ECOUNT) +#define bfin_write_HMDMA1_ECOUNT(val) bfin_write16(HMDMA1_ECOUNT, val) +#define bfin_read_HMDMA1_BCOUNT() bfin_read16(HMDMA1_BCOUNT) +#define bfin_write_HMDMA1_BCOUNT(val) bfin_write16(HMDMA1_BCOUNT, val) + +/* ==== end from cdefBF534.h ==== */ + +/* GPIO PIN mux (0xFFC03210 - OxFFC03288) */ + +#define bfin_read_PORTF_MUX() bfin_read16(PORTF_MUX) +#define bfin_write_PORTF_MUX(val) bfin_write16(PORTF_MUX, val) +#define bfin_read_PORTG_MUX() bfin_read16(PORTG_MUX) +#define bfin_write_PORTG_MUX(val) bfin_write16(PORTG_MUX, val) +#define bfin_read_PORTH_MUX() bfin_read16(PORTH_MUX) +#define bfin_write_PORTH_MUX(val) bfin_write16(PORTH_MUX, val) + +#define bfin_read_PORTF_DRIVE() bfin_read16(PORTF_DRIVE) +#define bfin_write_PORTF_DRIVE(val) bfin_write16(PORTF_DRIVE, val) +#define bfin_read_PORTG_DRIVE() bfin_read16(PORTG_DRIVE) +#define bfin_write_PORTG_DRIVE(val) bfin_write16(PORTG_DRIVE, val) +#define bfin_read_PORTH_DRIVE() bfin_read16(PORTH_DRIVE) +#define bfin_write_PORTH_DRIVE(val) bfin_write16(PORTH_DRIVE, val) +#define bfin_read_PORTF_SLEW() bfin_read16(PORTF_SLEW) +#define bfin_write_PORTF_SLEW(val) bfin_write16(PORTF_SLEW, val) +#define bfin_read_PORTG_SLEW() bfin_read16(PORTG_SLEW) +#define bfin_write_PORTG_SLEW(val) bfin_write16(PORTG_SLEW, val) +#define bfin_read_PORTH_SLEW() bfin_read16(PORTH_SLEW) +#define bfin_write_PORTH_SLEW(val) bfin_write16(PORTH_SLEW, val) +#define bfin_read_PORTF_HYSTERISIS() bfin_read16(PORTF_HYSTERISIS) +#define bfin_write_PORTF_HYSTERISIS(val) bfin_write16(PORTF_HYSTERISIS, val) +#define bfin_read_PORTG_HYSTERISIS() bfin_read16(PORTG_HYSTERISIS) +#define bfin_write_PORTG_HYSTERISIS(val) bfin_write16(PORTG_HYSTERISIS, val) +#define bfin_read_PORTH_HYSTERISIS() bfin_read16(PORTH_HYSTERISIS) +#define bfin_write_PORTH_HYSTERISIS(val) bfin_write16(PORTH_HYSTERISIS, val) +#define bfin_read_MISCPORT_DRIVE() bfin_read16(MISCPORT_DRIVE) +#define bfin_write_MISCPORT_DRIVE(val) bfin_write16(MISCPORT_DRIVE, val) +#define bfin_read_MISCPORT_SLEW() bfin_read16(MISCPORT_SLEW) +#define bfin_write_MISCPORT_SLEW(val) bfin_write16(MISCPORT_SLEW, val) +#define bfin_read_MISCPORT_HYSTERISIS() bfin_read16(MISCPORT_HYSTERISIS) +#define bfin_write_MISCPORT_HYSTERISIS(val) bfin_write16(MISCPORT_HYSTERISIS, val) + +/* HOST Port Registers */ + +#define bfin_read_HOST_CONTROL() bfin_read16(HOST_CONTROL) +#define bfin_write_HOST_CONTROL(val) bfin_write16(HOST_CONTROL, val) +#define bfin_read_HOST_STATUS() bfin_read16(HOST_STATUS) +#define bfin_write_HOST_STATUS(val) bfin_write16(HOST_STATUS, val) +#define bfin_read_HOST_TIMEOUT() bfin_read16(HOST_TIMEOUT) +#define bfin_write_HOST_TIMEOUT(val) bfin_write16(HOST_TIMEOUT, val) + +/* Counter Registers */ + +#define bfin_read_CNT_CONFIG() bfin_read16(CNT_CONFIG) +#define bfin_write_CNT_CONFIG(val) bfin_write16(CNT_CONFIG, val) +#define bfin_read_CNT_IMASK() bfin_read16(CNT_IMASK) +#define bfin_write_CNT_IMASK(val) bfin_write16(CNT_IMASK, val) +#define bfin_read_CNT_STATUS() bfin_read16(CNT_STATUS) +#define bfin_write_CNT_STATUS(val) bfin_write16(CNT_STATUS, val) +#define bfin_read_CNT_COMMAND() bfin_read16(CNT_COMMAND) +#define bfin_write_CNT_COMMAND(val) bfin_write16(CNT_COMMAND, val) +#define bfin_read_CNT_DEBOUNCE() bfin_read16(CNT_DEBOUNCE) +#define bfin_write_CNT_DEBOUNCE(val) bfin_write16(CNT_DEBOUNCE, val) +#define bfin_read_CNT_COUNTER() bfin_read32(CNT_COUNTER) +#define bfin_write_CNT_COUNTER(val) bfin_write32(CNT_COUNTER, val) +#define bfin_read_CNT_MAX() bfin_read32(CNT_MAX) +#define bfin_write_CNT_MAX(val) bfin_write32(CNT_MAX, val) +#define bfin_read_CNT_MIN() bfin_read32(CNT_MIN) +#define bfin_write_CNT_MIN(val) bfin_write32(CNT_MIN, val) + +/* OTP/FUSE Registers */ + +#define bfin_read_OTP_CONTROL() bfin_read16(OTP_CONTROL) +#define bfin_write_OTP_CONTROL(val) bfin_write16(OTP_CONTROL, val) +#define bfin_read_OTP_BEN() bfin_read16(OTP_BEN) +#define bfin_write_OTP_BEN(val) bfin_write16(OTP_BEN, val) +#define bfin_read_OTP_STATUS() bfin_read16(OTP_STATUS) +#define bfin_write_OTP_STATUS(val) bfin_write16(OTP_STATUS, val) +#define bfin_read_OTP_TIMING() bfin_read32(OTP_TIMING) +#define bfin_write_OTP_TIMING(val) bfin_write32(OTP_TIMING, val) + +/* Security Registers */ + +#define bfin_read_SECURE_SYSSWT() bfin_read32(SECURE_SYSSWT) +#define bfin_write_SECURE_SYSSWT(val) bfin_write32(SECURE_SYSSWT, val) +#define bfin_read_SECURE_CONTROL() bfin_read16(SECURE_CONTROL) +#define bfin_write_SECURE_CONTROL(val) bfin_write16(SECURE_CONTROL, val) +#define bfin_read_SECURE_STATUS() bfin_read16(SECURE_STATUS) +#define bfin_write_SECURE_STATUS(val) bfin_write16(SECURE_STATUS, val) + +/* OTP Read/Write Data Buffer Registers */ + +#define bfin_read_OTP_DATA0() bfin_read32(OTP_DATA0) +#define bfin_write_OTP_DATA0(val) bfin_write32(OTP_DATA0, val) +#define bfin_read_OTP_DATA1() bfin_read32(OTP_DATA1) +#define bfin_write_OTP_DATA1(val) bfin_write32(OTP_DATA1, val) +#define bfin_read_OTP_DATA2() bfin_read32(OTP_DATA2) +#define bfin_write_OTP_DATA2(val) bfin_write32(OTP_DATA2, val) +#define bfin_read_OTP_DATA3() bfin_read32(OTP_DATA3) +#define bfin_write_OTP_DATA3(val) bfin_write32(OTP_DATA3, val) + +/* NFC Registers */ + +#define bfin_read_NFC_CTL() bfin_read16(NFC_CTL) +#define bfin_write_NFC_CTL(val) bfin_write16(NFC_CTL, val) +#define bfin_read_NFC_STAT() bfin_read16(NFC_STAT) +#define bfin_write_NFC_STAT(val) bfin_write16(NFC_STAT, val) +#define bfin_read_NFC_IRQSTAT() bfin_read16(NFC_IRQSTAT) +#define bfin_write_NFC_IRQSTAT(val) bfin_write16(NFC_IRQSTAT, val) +#define bfin_read_NFC_IRQMASK() bfin_read16(NFC_IRQMASK) +#define bfin_write_NFC_IRQMASK(val) bfin_write16(NFC_IRQMASK, val) +#define bfin_read_NFC_ECC0() bfin_read16(NFC_ECC0) +#define bfin_write_NFC_ECC0(val) bfin_write16(NFC_ECC0, val) +#define bfin_read_NFC_ECC1() bfin_read16(NFC_ECC1) +#define bfin_write_NFC_ECC1(val) bfin_write16(NFC_ECC1, val) +#define bfin_read_NFC_ECC2() bfin_read16(NFC_ECC2) +#define bfin_write_NFC_ECC2(val) bfin_write16(NFC_ECC2, val) +#define bfin_read_NFC_ECC3() bfin_read16(NFC_ECC3) +#define bfin_write_NFC_ECC3(val) bfin_write16(NFC_ECC3, val) +#define bfin_read_NFC_COUNT() bfin_read16(NFC_COUNT) +#define bfin_write_NFC_COUNT(val) bfin_write16(NFC_COUNT, val) +#define bfin_read_NFC_RST() bfin_read16(NFC_RST) +#define bfin_write_NFC_RST(val) bfin_write16(NFC_RST, val) +#define bfin_read_NFC_PGCTL() bfin_read16(NFC_PGCTL) +#define bfin_write_NFC_PGCTL(val) bfin_write16(NFC_PGCTL, val) +#define bfin_read_NFC_READ() bfin_read16(NFC_READ) +#define bfin_write_NFC_READ(val) bfin_write16(NFC_READ, val) +#define bfin_read_NFC_ADDR() bfin_read16(NFC_ADDR) +#define bfin_write_NFC_ADDR(val) bfin_write16(NFC_ADDR, val) +#define bfin_read_NFC_CMD() bfin_read16(NFC_CMD) +#define bfin_write_NFC_CMD(val) bfin_write16(NFC_CMD, val) +#define bfin_read_NFC_DATA_WR() bfin_read16(NFC_DATA_WR) +#define bfin_write_NFC_DATA_WR(val) bfin_write16(NFC_DATA_WR, val) +#define bfin_read_NFC_DATA_RD() bfin_read16(NFC_DATA_RD) +#define bfin_write_NFC_DATA_RD(val) bfin_write16(NFC_DATA_RD, val) + +#endif /* _CDEF_BF52X_H */ diff --git a/arch/blackfin/mach-bf518/include/mach/defBF512.h b/arch/blackfin/mach-bf518/include/mach/defBF512.h new file mode 100644 index 000000000000..a96ca90154dd --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/defBF512.h @@ -0,0 +1,42 @@ +/* + * File: include/asm-blackfin/mach-bf518/defBF512.h + * Based on: + * Author: + * + * Created: + * Description: + * + * Rev: + * + * Modified: + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _DEF_BF512_H +#define _DEF_BF512_H + +/* Include all Core registers and bit definitions */ +#include + +/* SYSTEM & MMR ADDRESS DEFINITIONS FOR ADSP-BF512 */ + +/* Include defBF51x_base.h for the set of #defines that are common to all ADSP-BF51x processors */ +#include "defBF51x_base.h" + +#endif /* _DEF_BF512_H */ diff --git a/arch/blackfin/mach-bf518/include/mach/defBF514.h b/arch/blackfin/mach-bf518/include/mach/defBF514.h new file mode 100644 index 000000000000..543f2913b3f5 --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/defBF514.h @@ -0,0 +1,113 @@ +/* + * File: include/asm-blackfin/mach-bf518/defBF514.h + * Based on: + * Author: + * + * Created: + * Description: + * + * Rev: + * + * Modified: + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _DEF_BF514_H +#define _DEF_BF514_H + +/* Include all Core registers and bit definitions */ +#include + +/* SYSTEM & MMR ADDRESS DEFINITIONS FOR ADSP-BF514 */ + +/* Include defBF51x_base.h for the set of #defines that are common to all ADSP-BF51x processors */ +#include "defBF51x_base.h" + +/* The following are the #defines needed by ADSP-BF514 that are not in the common header */ + +/* SDH Registers */ + +#define SDH_PWR_CTL 0xFFC03900 /* SDH Power Control */ +#define SDH_CLK_CTL 0xFFC03904 /* SDH Clock Control */ +#define SDH_ARGUMENT 0xFFC03908 /* SDH Argument */ +#define SDH_COMMAND 0xFFC0390C /* SDH Command */ +#define SDH_RESP_CMD 0xFFC03910 /* SDH Response Command */ +#define SDH_RESPONSE0 0xFFC03914 /* SDH Response0 */ +#define SDH_RESPONSE1 0xFFC03918 /* SDH Response1 */ +#define SDH_RESPONSE2 0xFFC0391C /* SDH Response2 */ +#define SDH_RESPONSE3 0xFFC03920 /* SDH Response3 */ +#define SDH_DATA_TIMER 0xFFC03924 /* SDH Data Timer */ +#define SDH_DATA_LGTH 0xFFC03928 /* SDH Data Length */ +#define SDH_DATA_CTL 0xFFC0392C /* SDH Data Control */ +#define SDH_DATA_CNT 0xFFC03930 /* SDH Data Counter */ +#define SDH_STATUS 0xFFC03934 /* SDH Status */ +#define SDH_STATUS_CLR 0xFFC03938 /* SDH Status Clear */ +#define SDH_MASK0 0xFFC0393C /* SDH Interrupt0 Mask */ +#define SDH_MASK1 0xFFC03940 /* SDH Interrupt1 Mask */ +#define SDH_FIFO_CNT 0xFFC03948 /* SDH FIFO Counter */ +#define SDH_FIFO 0xFFC03980 /* SDH Data FIFO */ +#define SDH_E_STATUS 0xFFC039C0 /* SDH Exception Status */ +#define SDH_E_MASK 0xFFC039C4 /* SDH Exception Mask */ +#define SDH_CFG 0xFFC039C8 /* SDH Configuration */ +#define SDH_RD_WAIT_EN 0xFFC039CC /* SDH Read Wait Enable */ +#define SDH_PID0 0xFFC039D0 /* SDH Peripheral Identification0 */ +#define SDH_PID1 0xFFC039D4 /* SDH Peripheral Identification1 */ +#define SDH_PID2 0xFFC039D8 /* SDH Peripheral Identification2 */ +#define SDH_PID3 0xFFC039DC /* SDH Peripheral Identification3 */ +#define SDH_PID4 0xFFC039E0 /* SDH Peripheral Identification4 */ +#define SDH_PID5 0xFFC039E4 /* SDH Peripheral Identification5 */ +#define SDH_PID6 0xFFC039E8 /* SDH Peripheral Identification6 */ +#define SDH_PID7 0xFFC039EC /* SDH Peripheral Identification7 */ + +/* Removable Storage Interface Registers */ + +#define RSI_PWR_CONTROL 0xFFC03800 /* RSI Power Control Register */ +#define RSI_CLK_CONTROL 0xFFC03804 /* RSI Clock Control Register */ +#define RSI_ARGUMENT 0xFFC03808 /* RSI Argument Register */ +#define RSI_COMMAND 0xFFC0380C /* RSI Command Register */ +#define RSI_RESP_CMD 0xFFC03810 /* RSI Response Command Register */ +#define RSI_RESPONSE0 0xFFC03814 /* RSI Response Register */ +#define RSI_RESPONSE1 0xFFC03818 /* RSI Response Register */ +#define RSI_RESPONSE2 0xFFC0381C /* RSI Response Register */ +#define RSI_RESPONSE3 0xFFC03820 /* RSI Response Register */ +#define RSI_DATA_TIMER 0xFFC03824 /* RSI Data Timer Register */ +#define RSI_DATA_LGTH 0xFFC03828 /* RSI Data Length Register */ +#define RSI_DATA_CONTROL 0xFFC0382C /* RSI Data Control Register */ +#define RSI_DATA_CNT 0xFFC03830 /* RSI Data Counter Register */ +#define RSI_STATUS 0xFFC03834 /* RSI Status Register */ +#define RSI_STATUSCL 0xFFC03838 /* RSI Status Clear Register */ +#define RSI_MASK0 0xFFC0383C /* RSI Interrupt 0 Mask Register */ +#define RSI_MASK1 0xFFC03840 /* RSI Interrupt 1 Mask Register */ +#define RSI_FIFO_CNT 0xFFC03848 /* RSI FIFO Counter Register */ +#define RSI_CEATA_CONTROL 0xFFC0384C /* RSI CEATA Register */ +#define RSI_FIFO 0xFFC03880 /* RSI Data FIFO Register */ +#define RSI_ESTAT 0xFFC038C0 /* RSI Exception Status Register */ +#define RSI_EMASK 0xFFC038C4 /* RSI Exception Mask Register */ +#define RSI_CONFIG 0xFFC038C8 /* RSI Configuration Register */ +#define RSI_RD_WAIT_EN 0xFFC038CC /* RSI Read Wait Enable Register */ +#define RSI_PID0 0xFFC03FE0 /* RSI Peripheral ID Register 0 */ +#define RSI_PID1 0xFFC03FE4 /* RSI Peripheral ID Register 1 */ +#define RSI_PID2 0xFFC03FE8 /* RSI Peripheral ID Register 2 */ +#define RSI_PID3 0xFFC03FEC /* RSI Peripheral ID Register 3 */ +#define RSI_PID4 0xFFC03FF0 /* RSI Peripheral ID Register 4 */ +#define RSI_PID5 0xFFC03FF4 /* RSI Peripheral ID Register 5 */ +#define RSI_PID6 0xFFC03FF8 /* RSI Peripheral ID Register 6 */ +#define RSI_PID7 0xFFC03FFC /* RSI Peripheral ID Register 7 */ + +#endif /* _DEF_BF514_H */ diff --git a/arch/blackfin/mach-bf518/include/mach/defBF516.h b/arch/blackfin/mach-bf518/include/mach/defBF516.h new file mode 100644 index 000000000000..149a269306c5 --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/defBF516.h @@ -0,0 +1,490 @@ +/* + * File: include/asm-blackfin/mach-bf518/defBF516.h + * Based on: + * Author: + * + * Created: + * Description: + * + * Rev: + * + * Modified: + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _DEF_BF516_H +#define _DEF_BF516_H + +/* Include all Core registers and bit definitions */ +#include + +/* SYSTEM & MMR ADDRESS DEFINITIONS FOR ADSP-BF516 */ + +/* Include defBF51x_base.h for the set of #defines that are common to all ADSP-BF51x processors */ +#include "defBF51x_base.h" + +/* The following are the #defines needed by ADSP-BF516 that are not in the common header */ +/* 10/100 Ethernet Controller (0xFFC03000 - 0xFFC031FF) */ + +#define EMAC_OPMODE 0xFFC03000 /* Operating Mode Register */ +#define EMAC_ADDRLO 0xFFC03004 /* Address Low (32 LSBs) Register */ +#define EMAC_ADDRHI 0xFFC03008 /* Address High (16 MSBs) Register */ +#define EMAC_HASHLO 0xFFC0300C /* Multicast Hash Table Low (Bins 31-0) Register */ +#define EMAC_HASHHI 0xFFC03010 /* Multicast Hash Table High (Bins 63-32) Register */ +#define EMAC_STAADD 0xFFC03014 /* Station Management Address Register */ +#define EMAC_STADAT 0xFFC03018 /* Station Management Data Register */ +#define EMAC_FLC 0xFFC0301C /* Flow Control Register */ +#define EMAC_VLAN1 0xFFC03020 /* VLAN1 Tag Register */ +#define EMAC_VLAN2 0xFFC03024 /* VLAN2 Tag Register */ +#define EMAC_WKUP_CTL 0xFFC0302C /* Wake-Up Control/Status Register */ +#define EMAC_WKUP_FFMSK0 0xFFC03030 /* Wake-Up Frame Filter 0 Byte Mask Register */ +#define EMAC_WKUP_FFMSK1 0xFFC03034 /* Wake-Up Frame Filter 1 Byte Mask Register */ +#define EMAC_WKUP_FFMSK2 0xFFC03038 /* Wake-Up Frame Filter 2 Byte Mask Register */ +#define EMAC_WKUP_FFMSK3 0xFFC0303C /* Wake-Up Frame Filter 3 Byte Mask Register */ +#define EMAC_WKUP_FFCMD 0xFFC03040 /* Wake-Up Frame Filter Commands Register */ +#define EMAC_WKUP_FFOFF 0xFFC03044 /* Wake-Up Frame Filter Offsets Register */ +#define EMAC_WKUP_FFCRC0 0xFFC03048 /* Wake-Up Frame Filter 0,1 CRC-16 Register */ +#define EMAC_WKUP_FFCRC1 0xFFC0304C /* Wake-Up Frame Filter 2,3 CRC-16 Register */ + +#define EMAC_SYSCTL 0xFFC03060 /* EMAC System Control Register */ +#define EMAC_SYSTAT 0xFFC03064 /* EMAC System Status Register */ +#define EMAC_RX_STAT 0xFFC03068 /* RX Current Frame Status Register */ +#define EMAC_RX_STKY 0xFFC0306C /* RX Sticky Frame Status Register */ +#define EMAC_RX_IRQE 0xFFC03070 /* RX Frame Status Interrupt Enables Register */ +#define EMAC_TX_STAT 0xFFC03074 /* TX Current Frame Status Register */ +#define EMAC_TX_STKY 0xFFC03078 /* TX Sticky Frame Status Register */ +#define EMAC_TX_IRQE 0xFFC0307C /* TX Frame Status Interrupt Enables Register */ + +#define EMAC_MMC_CTL 0xFFC03080 /* MMC Counter Control Register */ +#define EMAC_MMC_RIRQS 0xFFC03084 /* MMC RX Interrupt Status Register */ +#define EMAC_MMC_RIRQE 0xFFC03088 /* MMC RX Interrupt Enables Register */ +#define EMAC_MMC_TIRQS 0xFFC0308C /* MMC TX Interrupt Status Register */ +#define EMAC_MMC_TIRQE 0xFFC03090 /* MMC TX Interrupt Enables Register */ + +#define EMAC_RXC_OK 0xFFC03100 /* RX Frame Successful Count */ +#define EMAC_RXC_FCS 0xFFC03104 /* RX Frame FCS Failure Count */ +#define EMAC_RXC_ALIGN 0xFFC03108 /* RX Alignment Error Count */ +#define EMAC_RXC_OCTET 0xFFC0310C /* RX Octets Successfully Received Count */ +#define EMAC_RXC_DMAOVF 0xFFC03110 /* Internal MAC Sublayer Error RX Frame Count */ +#define EMAC_RXC_UNICST 0xFFC03114 /* Unicast RX Frame Count */ +#define EMAC_RXC_MULTI 0xFFC03118 /* Multicast RX Frame Count */ +#define EMAC_RXC_BROAD 0xFFC0311C /* Broadcast RX Frame Count */ +#define EMAC_RXC_LNERRI 0xFFC03120 /* RX Frame In Range Error Count */ +#define EMAC_RXC_LNERRO 0xFFC03124 /* RX Frame Out Of Range Error Count */ +#define EMAC_RXC_LONG 0xFFC03128 /* RX Frame Too Long Count */ +#define EMAC_RXC_MACCTL 0xFFC0312C /* MAC Control RX Frame Count */ +#define EMAC_RXC_OPCODE 0xFFC03130 /* Unsupported Op-Code RX Frame Count */ +#define EMAC_RXC_PAUSE 0xFFC03134 /* MAC Control Pause RX Frame Count */ +#define EMAC_RXC_ALLFRM 0xFFC03138 /* Overall RX Frame Count */ +#define EMAC_RXC_ALLOCT 0xFFC0313C /* Overall RX Octet Count */ +#define EMAC_RXC_TYPED 0xFFC03140 /* Type/Length Consistent RX Frame Count */ +#define EMAC_RXC_SHORT 0xFFC03144 /* RX Frame Fragment Count - Byte Count x < 64 */ +#define EMAC_RXC_EQ64 0xFFC03148 /* Good RX Frame Count - Byte Count x = 64 */ +#define EMAC_RXC_LT128 0xFFC0314C /* Good RX Frame Count - Byte Count 64 < x < 128 */ +#define EMAC_RXC_LT256 0xFFC03150 /* Good RX Frame Count - Byte Count 128 <= x < 256 */ +#define EMAC_RXC_LT512 0xFFC03154 /* Good RX Frame Count - Byte Count 256 <= x < 512 */ +#define EMAC_RXC_LT1024 0xFFC03158 /* Good RX Frame Count - Byte Count 512 <= x < 1024 */ +#define EMAC_RXC_GE1024 0xFFC0315C /* Good RX Frame Count - Byte Count x >= 1024 */ + +#define EMAC_TXC_OK 0xFFC03180 /* TX Frame Successful Count */ +#define EMAC_TXC_1COL 0xFFC03184 /* TX Frames Successful After Single Collision Count */ +#define EMAC_TXC_GT1COL 0xFFC03188 /* TX Frames Successful After Multiple Collisions Count */ +#define EMAC_TXC_OCTET 0xFFC0318C /* TX Octets Successfully Received Count */ +#define EMAC_TXC_DEFER 0xFFC03190 /* TX Frame Delayed Due To Busy Count */ +#define EMAC_TXC_LATECL 0xFFC03194 /* Late TX Collisions Count */ +#define EMAC_TXC_XS_COL 0xFFC03198 /* TX Frame Failed Due To Excessive Collisions Count */ +#define EMAC_TXC_DMAUND 0xFFC0319C /* Internal MAC Sublayer Error TX Frame Count */ +#define EMAC_TXC_CRSERR 0xFFC031A0 /* Carrier Sense Deasserted During TX Frame Count */ +#define EMAC_TXC_UNICST 0xFFC031A4 /* Unicast TX Frame Count */ +#define EMAC_TXC_MULTI 0xFFC031A8 /* Multicast TX Frame Count */ +#define EMAC_TXC_BROAD 0xFFC031AC /* Broadcast TX Frame Count */ +#define EMAC_TXC_XS_DFR 0xFFC031B0 /* TX Frames With Excessive Deferral Count */ +#define EMAC_TXC_MACCTL 0xFFC031B4 /* MAC Control TX Frame Count */ +#define EMAC_TXC_ALLFRM 0xFFC031B8 /* Overall TX Frame Count */ +#define EMAC_TXC_ALLOCT 0xFFC031BC /* Overall TX Octet Count */ +#define EMAC_TXC_EQ64 0xFFC031C0 /* Good TX Frame Count - Byte Count x = 64 */ +#define EMAC_TXC_LT128 0xFFC031C4 /* Good TX Frame Count - Byte Count 64 < x < 128 */ +#define EMAC_TXC_LT256 0xFFC031C8 /* Good TX Frame Count - Byte Count 128 <= x < 256 */ +#define EMAC_TXC_LT512 0xFFC031CC /* Good TX Frame Count - Byte Count 256 <= x < 512 */ +#define EMAC_TXC_LT1024 0xFFC031D0 /* Good TX Frame Count - Byte Count 512 <= x < 1024 */ +#define EMAC_TXC_GE1024 0xFFC031D4 /* Good TX Frame Count - Byte Count x >= 1024 */ +#define EMAC_TXC_ABORT 0xFFC031D8 /* Total TX Frames Aborted Count */ + +/* Listing for IEEE-Supported Count Registers */ + +#define FramesReceivedOK EMAC_RXC_OK /* RX Frame Successful Count */ +#define FrameCheckSequenceErrors EMAC_RXC_FCS /* RX Frame FCS Failure Count */ +#define AlignmentErrors EMAC_RXC_ALIGN /* RX Alignment Error Count */ +#define OctetsReceivedOK EMAC_RXC_OCTET /* RX Octets Successfully Received Count */ +#define FramesLostDueToIntMACRcvError EMAC_RXC_DMAOVF /* Internal MAC Sublayer Error RX Frame Count */ +#define UnicastFramesReceivedOK EMAC_RXC_UNICST /* Unicast RX Frame Count */ +#define MulticastFramesReceivedOK EMAC_RXC_MULTI /* Multicast RX Frame Count */ +#define BroadcastFramesReceivedOK EMAC_RXC_BROAD /* Broadcast RX Frame Count */ +#define InRangeLengthErrors EMAC_RXC_LNERRI /* RX Frame In Range Error Count */ +#define OutOfRangeLengthField EMAC_RXC_LNERRO /* RX Frame Out Of Range Error Count */ +#define FrameTooLongErrors EMAC_RXC_LONG /* RX Frame Too Long Count */ +#define MACControlFramesReceived EMAC_RXC_MACCTL /* MAC Control RX Frame Count */ +#define UnsupportedOpcodesReceived EMAC_RXC_OPCODE /* Unsupported Op-Code RX Frame Count */ +#define PAUSEMACCtrlFramesReceived EMAC_RXC_PAUSE /* MAC Control Pause RX Frame Count */ +#define FramesReceivedAll EMAC_RXC_ALLFRM /* Overall RX Frame Count */ +#define OctetsReceivedAll EMAC_RXC_ALLOCT /* Overall RX Octet Count */ +#define TypedFramesReceived EMAC_RXC_TYPED /* Type/Length Consistent RX Frame Count */ +#define FramesLenLt64Received EMAC_RXC_SHORT /* RX Frame Fragment Count - Byte Count x < 64 */ +#define FramesLenEq64Received EMAC_RXC_EQ64 /* Good RX Frame Count - Byte Count x = 64 */ +#define FramesLen65_127Received EMAC_RXC_LT128 /* Good RX Frame Count - Byte Count 64 < x < 128 */ +#define FramesLen128_255Received EMAC_RXC_LT256 /* Good RX Frame Count - Byte Count 128 <= x < 256 */ +#define FramesLen256_511Received EMAC_RXC_LT512 /* Good RX Frame Count - Byte Count 256 <= x < 512 */ +#define FramesLen512_1023Received EMAC_RXC_LT1024 /* Good RX Frame Count - Byte Count 512 <= x < 1024 */ +#define FramesLen1024_MaxReceived EMAC_RXC_GE1024 /* Good RX Frame Count - Byte Count x >= 1024 */ + +#define FramesTransmittedOK EMAC_TXC_OK /* TX Frame Successful Count */ +#define SingleCollisionFrames EMAC_TXC_1COL /* TX Frames Successful After Single Collision Count */ +#define MultipleCollisionFrames EMAC_TXC_GT1COL /* TX Frames Successful After Multiple Collisions Count */ +#define OctetsTransmittedOK EMAC_TXC_OCTET /* TX Octets Successfully Received Count */ +#define FramesWithDeferredXmissions EMAC_TXC_DEFER /* TX Frame Delayed Due To Busy Count */ +#define LateCollisions EMAC_TXC_LATECL /* Late TX Collisions Count */ +#define FramesAbortedDueToXSColls EMAC_TXC_XS_COL /* TX Frame Failed Due To Excessive Collisions Count */ +#define FramesLostDueToIntMacXmitError EMAC_TXC_DMAUND /* Internal MAC Sublayer Error TX Frame Count */ +#define CarrierSenseErrors EMAC_TXC_CRSERR /* Carrier Sense Deasserted During TX Frame Count */ +#define UnicastFramesXmittedOK EMAC_TXC_UNICST /* Unicast TX Frame Count */ +#define MulticastFramesXmittedOK EMAC_TXC_MULTI /* Multicast TX Frame Count */ +#define BroadcastFramesXmittedOK EMAC_TXC_BROAD /* Broadcast TX Frame Count */ +#define FramesWithExcessiveDeferral EMAC_TXC_XS_DFR /* TX Frames With Excessive Deferral Count */ +#define MACControlFramesTransmitted EMAC_TXC_MACCTL /* MAC Control TX Frame Count */ +#define FramesTransmittedAll EMAC_TXC_ALLFRM /* Overall TX Frame Count */ +#define OctetsTransmittedAll EMAC_TXC_ALLOCT /* Overall TX Octet Count */ +#define FramesLenEq64Transmitted EMAC_TXC_EQ64 /* Good TX Frame Count - Byte Count x = 64 */ +#define FramesLen65_127Transmitted EMAC_TXC_LT128 /* Good TX Frame Count - Byte Count 64 < x < 128 */ +#define FramesLen128_255Transmitted EMAC_TXC_LT256 /* Good TX Frame Count - Byte Count 128 <= x < 256 */ +#define FramesLen256_511Transmitted EMAC_TXC_LT512 /* Good TX Frame Count - Byte Count 256 <= x < 512 */ +#define FramesLen512_1023Transmitted EMAC_TXC_LT1024 /* Good TX Frame Count - Byte Count 512 <= x < 1024 */ +#define FramesLen1024_MaxTransmitted EMAC_TXC_GE1024 /* Good TX Frame Count - Byte Count x >= 1024 */ +#define TxAbortedFrames EMAC_TXC_ABORT /* Total TX Frames Aborted Count */ + +/*********************************************************************************** +** System MMR Register Bits And Macros +** +** Disclaimer: All macros are intended to make C and Assembly code more readable. +** Use these macros carefully, as any that do left shifts for field +** depositing will result in the lower order bits being destroyed. Any +** macro that shifts left to properly position the bit-field should be +** used as part of an OR to initialize a register and NOT as a dynamic +** modifier UNLESS the lower order bits are saved and ORed back in when +** the macro is used. +*************************************************************************************/ + +/************************ ETHERNET 10/100 CONTROLLER MASKS ************************/ + +/* EMAC_OPMODE Masks */ + +#define RE 0x00000001 /* Receiver Enable */ +#define ASTP 0x00000002 /* Enable Automatic Pad Stripping On RX Frames */ +#define HU 0x00000010 /* Hash Filter Unicast Address */ +#define HM 0x00000020 /* Hash Filter Multicast Address */ +#define PAM 0x00000040 /* Pass-All-Multicast Mode Enable */ +#define PR 0x00000080 /* Promiscuous Mode Enable */ +#define IFE 0x00000100 /* Inverse Filtering Enable */ +#define DBF 0x00000200 /* Disable Broadcast Frame Reception */ +#define PBF 0x00000400 /* Pass Bad Frames Enable */ +#define PSF 0x00000800 /* Pass Short Frames Enable */ +#define RAF 0x00001000 /* Receive-All Mode */ +#define TE 0x00010000 /* Transmitter Enable */ +#define DTXPAD 0x00020000 /* Disable Automatic TX Padding */ +#define DTXCRC 0x00040000 /* Disable Automatic TX CRC Generation */ +#define DC 0x00080000 /* Deferral Check */ +#define BOLMT 0x00300000 /* Back-Off Limit */ +#define BOLMT_10 0x00000000 /* 10-bit range */ +#define BOLMT_8 0x00100000 /* 8-bit range */ +#define BOLMT_4 0x00200000 /* 4-bit range */ +#define BOLMT_1 0x00300000 /* 1-bit range */ +#define DRTY 0x00400000 /* Disable TX Retry On Collision */ +#define LCTRE 0x00800000 /* Enable TX Retry On Late Collision */ +#define RMII 0x01000000 /* RMII/MII* Mode */ +#define RMII_10 0x02000000 /* Speed Select for RMII Port (10MBit/100MBit*) */ +#define FDMODE 0x04000000 /* Duplex Mode Enable (Full/Half*) */ +#define LB 0x08000000 /* Internal Loopback Enable */ +#define DRO 0x10000000 /* Disable Receive Own Frames (Half-Duplex Mode) */ + +/* EMAC_STAADD Masks */ + +#define STABUSY 0x00000001 /* Initiate Station Mgt Reg Access / STA Busy Stat */ +#define STAOP 0x00000002 /* Station Management Operation Code (Write/Read*) */ +#define STADISPRE 0x00000004 /* Disable Preamble Generation */ +#define STAIE 0x00000008 /* Station Mgt. Transfer Done Interrupt Enable */ +#define REGAD 0x000007C0 /* STA Register Address */ +#define PHYAD 0x0000F800 /* PHY Device Address */ + +#define SET_REGAD(x) (((x)&0x1F)<< 6 ) /* Set STA Register Address */ +#define SET_PHYAD(x) (((x)&0x1F)<< 11 ) /* Set PHY Device Address */ + +/* EMAC_STADAT Mask */ + +#define STADATA 0x0000FFFF /* Station Management Data */ + +/* EMAC_FLC Masks */ + +#define FLCBUSY 0x00000001 /* Send Flow Ctrl Frame / Flow Ctrl Busy Status */ +#define FLCE 0x00000002 /* Flow Control Enable */ +#define PCF 0x00000004 /* Pass Control Frames */ +#define BKPRSEN 0x00000008 /* Enable Backpressure */ +#define FLCPAUSE 0xFFFF0000 /* Pause Time */ + +#define SET_FLCPAUSE(x) (((x)&0xFFFF)<< 16) /* Set Pause Time */ + +/* EMAC_WKUP_CTL Masks */ + +#define CAPWKFRM 0x00000001 /* Capture Wake-Up Frames */ +#define MPKE 0x00000002 /* Magic Packet Enable */ +#define RWKE 0x00000004 /* Remote Wake-Up Frame Enable */ +#define GUWKE 0x00000008 /* Global Unicast Wake Enable */ +#define MPKS 0x00000020 /* Magic Packet Received Status */ +#define RWKS 0x00000F00 /* Wake-Up Frame Received Status, Filters 3:0 */ + +/* EMAC_WKUP_FFCMD Masks */ + +#define WF0_E 0x00000001 /* Enable Wake-Up Filter 0 */ +#define WF0_T 0x00000008 /* Wake-Up Filter 0 Addr Type (Multicast/Unicast*) */ +#define WF1_E 0x00000100 /* Enable Wake-Up Filter 1 */ +#define WF1_T 0x00000800 /* Wake-Up Filter 1 Addr Type (Multicast/Unicast*) */ +#define WF2_E 0x00010000 /* Enable Wake-Up Filter 2 */ +#define WF2_T 0x00080000 /* Wake-Up Filter 2 Addr Type (Multicast/Unicast*) */ +#define WF3_E 0x01000000 /* Enable Wake-Up Filter 3 */ +#define WF3_T 0x08000000 /* Wake-Up Filter 3 Addr Type (Multicast/Unicast*) */ + +/* EMAC_WKUP_FFOFF Masks */ + +#define WF0_OFF 0x000000FF /* Wake-Up Filter 0 Pattern Offset */ +#define WF1_OFF 0x0000FF00 /* Wake-Up Filter 1 Pattern Offset */ +#define WF2_OFF 0x00FF0000 /* Wake-Up Filter 2 Pattern Offset */ +#define WF3_OFF 0xFF000000 /* Wake-Up Filter 3 Pattern Offset */ + +#define SET_WF0_OFF(x) (((x)&0xFF)<< 0 ) /* Set Wake-Up Filter 0 Byte Offset */ +#define SET_WF1_OFF(x) (((x)&0xFF)<< 8 ) /* Set Wake-Up Filter 1 Byte Offset */ +#define SET_WF2_OFF(x) (((x)&0xFF)<< 16 ) /* Set Wake-Up Filter 2 Byte Offset */ +#define SET_WF3_OFF(x) (((x)&0xFF)<< 24 ) /* Set Wake-Up Filter 3 Byte Offset */ +/* Set ALL Offsets */ +#define SET_WF_OFFS(x0,x1,x2,x3) (SET_WF0_OFF((x0))|SET_WF1_OFF((x1))|SET_WF2_OFF((x2))|SET_WF3_OFF((x3))) + +/* EMAC_WKUP_FFCRC0 Masks */ + +#define WF0_CRC 0x0000FFFF /* Wake-Up Filter 0 Pattern CRC */ +#define WF1_CRC 0xFFFF0000 /* Wake-Up Filter 1 Pattern CRC */ + +#define SET_WF0_CRC(x) (((x)&0xFFFF)<< 0 ) /* Set Wake-Up Filter 0 Target CRC */ +#define SET_WF1_CRC(x) (((x)&0xFFFF)<< 16 ) /* Set Wake-Up Filter 1 Target CRC */ + +/* EMAC_WKUP_FFCRC1 Masks */ + +#define WF2_CRC 0x0000FFFF /* Wake-Up Filter 2 Pattern CRC */ +#define WF3_CRC 0xFFFF0000 /* Wake-Up Filter 3 Pattern CRC */ + +#define SET_WF2_CRC(x) (((x)&0xFFFF)<< 0 ) /* Set Wake-Up Filter 2 Target CRC */ +#define SET_WF3_CRC(x) (((x)&0xFFFF)<< 16 ) /* Set Wake-Up Filter 3 Target CRC */ + +/* EMAC_SYSCTL Masks */ + +#define PHYIE 0x00000001 /* PHY_INT Interrupt Enable */ +#define RXDWA 0x00000002 /* Receive Frame DMA Word Alignment (Odd/Even*) */ +#define RXCKS 0x00000004 /* Enable RX Frame TCP/UDP Checksum Computation */ +#define TXDWA 0x00000010 /* Transmit Frame DMA Word Alignment (Odd/Even*) */ +#define MDCDIV 0x00003F00 /* SCLK:MDC Clock Divisor [MDC=SCLK/(2*(N+1))] */ + +#define SET_MDCDIV(x) (((x)&0x3F)<< 8) /* Set MDC Clock Divisor */ + +/* EMAC_SYSTAT Masks */ + +#define PHYINT 0x00000001 /* PHY_INT Interrupt Status */ +#define MMCINT 0x00000002 /* MMC Counter Interrupt Status */ +#define RXFSINT 0x00000004 /* RX Frame-Status Interrupt Status */ +#define TXFSINT 0x00000008 /* TX Frame-Status Interrupt Status */ +#define WAKEDET 0x00000010 /* Wake-Up Detected Status */ +#define RXDMAERR 0x00000020 /* RX DMA Direction Error Status */ +#define TXDMAERR 0x00000040 /* TX DMA Direction Error Status */ +#define STMDONE 0x00000080 /* Station Mgt. Transfer Done Interrupt Status */ + +/* EMAC_RX_STAT, EMAC_RX_STKY, and EMAC_RX_IRQE Masks */ + +#define RX_FRLEN 0x000007FF /* Frame Length In Bytes */ +#define RX_COMP 0x00001000 /* RX Frame Complete */ +#define RX_OK 0x00002000 /* RX Frame Received With No Errors */ +#define RX_LONG 0x00004000 /* RX Frame Too Long Error */ +#define RX_ALIGN 0x00008000 /* RX Frame Alignment Error */ +#define RX_CRC 0x00010000 /* RX Frame CRC Error */ +#define RX_LEN 0x00020000 /* RX Frame Length Error */ +#define RX_FRAG 0x00040000 /* RX Frame Fragment Error */ +#define RX_ADDR 0x00080000 /* RX Frame Address Filter Failed Error */ +#define RX_DMAO 0x00100000 /* RX Frame DMA Overrun Error */ +#define RX_PHY 0x00200000 /* RX Frame PHY Error */ +#define RX_LATE 0x00400000 /* RX Frame Late Collision Error */ +#define RX_RANGE 0x00800000 /* RX Frame Length Field Out of Range Error */ +#define RX_MULTI 0x01000000 /* RX Multicast Frame Indicator */ +#define RX_BROAD 0x02000000 /* RX Broadcast Frame Indicator */ +#define RX_CTL 0x04000000 /* RX Control Frame Indicator */ +#define RX_UCTL 0x08000000 /* Unsupported RX Control Frame Indicator */ +#define RX_TYPE 0x10000000 /* RX Typed Frame Indicator */ +#define RX_VLAN1 0x20000000 /* RX VLAN1 Frame Indicator */ +#define RX_VLAN2 0x40000000 /* RX VLAN2 Frame Indicator */ +#define RX_ACCEPT 0x80000000 /* RX Frame Accepted Indicator */ + +/* EMAC_TX_STAT, EMAC_TX_STKY, and EMAC_TX_IRQE Masks */ + +#define TX_COMP 0x00000001 /* TX Frame Complete */ +#define TX_OK 0x00000002 /* TX Frame Sent With No Errors */ +#define TX_ECOLL 0x00000004 /* TX Frame Excessive Collision Error */ +#define TX_LATE 0x00000008 /* TX Frame Late Collision Error */ +#define TX_DMAU 0x00000010 /* TX Frame DMA Underrun Error (STAT) */ +#define TX_MACE 0x00000010 /* Internal MAC Error Detected (STKY and IRQE) */ +#define TX_EDEFER 0x00000020 /* TX Frame Excessive Deferral Error */ +#define TX_BROAD 0x00000040 /* TX Broadcast Frame Indicator */ +#define TX_MULTI 0x00000080 /* TX Multicast Frame Indicator */ +#define TX_CCNT 0x00000F00 /* TX Frame Collision Count */ +#define TX_DEFER 0x00001000 /* TX Frame Deferred Indicator */ +#define TX_CRS 0x00002000 /* TX Frame Carrier Sense Not Asserted Error */ +#define TX_LOSS 0x00004000 /* TX Frame Carrier Lost During TX Error */ +#define TX_RETRY 0x00008000 /* TX Frame Successful After Retry */ +#define TX_FRLEN 0x07FF0000 /* TX Frame Length (Bytes) */ + +/* EMAC_MMC_CTL Masks */ +#define RSTC 0x00000001 /* Reset All Counters */ +#define CROLL 0x00000002 /* Counter Roll-Over Enable */ +#define CCOR 0x00000004 /* Counter Clear-On-Read Mode Enable */ +#define MMCE 0x00000008 /* Enable MMC Counter Operation */ + +/* EMAC_MMC_RIRQS and EMAC_MMC_RIRQE Masks */ +#define RX_OK_CNT 0x00000001 /* RX Frames Received With No Errors */ +#define RX_FCS_CNT 0x00000002 /* RX Frames W/Frame Check Sequence Errors */ +#define RX_ALIGN_CNT 0x00000004 /* RX Frames With Alignment Errors */ +#define RX_OCTET_CNT 0x00000008 /* RX Octets Received OK */ +#define RX_LOST_CNT 0x00000010 /* RX Frames Lost Due To Internal MAC RX Error */ +#define RX_UNI_CNT 0x00000020 /* Unicast RX Frames Received OK */ +#define RX_MULTI_CNT 0x00000040 /* Multicast RX Frames Received OK */ +#define RX_BROAD_CNT 0x00000080 /* Broadcast RX Frames Received OK */ +#define RX_IRL_CNT 0x00000100 /* RX Frames With In-Range Length Errors */ +#define RX_ORL_CNT 0x00000200 /* RX Frames With Out-Of-Range Length Errors */ +#define RX_LONG_CNT 0x00000400 /* RX Frames With Frame Too Long Errors */ +#define RX_MACCTL_CNT 0x00000800 /* MAC Control RX Frames Received */ +#define RX_OPCODE_CTL 0x00001000 /* Unsupported Op-Code RX Frames Received */ +#define RX_PAUSE_CNT 0x00002000 /* PAUSEMAC Control RX Frames Received */ +#define RX_ALLF_CNT 0x00004000 /* All RX Frames Received */ +#define RX_ALLO_CNT 0x00008000 /* All RX Octets Received */ +#define RX_TYPED_CNT 0x00010000 /* Typed RX Frames Received */ +#define RX_SHORT_CNT 0x00020000 /* RX Frame Fragments (< 64 Bytes) Received */ +#define RX_EQ64_CNT 0x00040000 /* 64-Byte RX Frames Received */ +#define RX_LT128_CNT 0x00080000 /* 65-127-Byte RX Frames Received */ +#define RX_LT256_CNT 0x00100000 /* 128-255-Byte RX Frames Received */ +#define RX_LT512_CNT 0x00200000 /* 256-511-Byte RX Frames Received */ +#define RX_LT1024_CNT 0x00400000 /* 512-1023-Byte RX Frames Received */ +#define RX_GE1024_CNT 0x00800000 /* 1024-Max-Byte RX Frames Received */ + +/* EMAC_MMC_TIRQS and EMAC_MMC_TIRQE Masks */ + +#define TX_OK_CNT 0x00000001 /* TX Frames Sent OK */ +#define TX_SCOLL_CNT 0x00000002 /* TX Frames With Single Collisions */ +#define TX_MCOLL_CNT 0x00000004 /* TX Frames With Multiple Collisions */ +#define TX_OCTET_CNT 0x00000008 /* TX Octets Sent OK */ +#define TX_DEFER_CNT 0x00000010 /* TX Frames With Deferred Transmission */ +#define TX_LATE_CNT 0x00000020 /* TX Frames With Late Collisions */ +#define TX_ABORTC_CNT 0x00000040 /* TX Frames Aborted Due To Excess Collisions */ +#define TX_LOST_CNT 0x00000080 /* TX Frames Lost Due To Internal MAC TX Error */ +#define TX_CRS_CNT 0x00000100 /* TX Frames With Carrier Sense Errors */ +#define TX_UNI_CNT 0x00000200 /* Unicast TX Frames Sent */ +#define TX_MULTI_CNT 0x00000400 /* Multicast TX Frames Sent */ +#define TX_BROAD_CNT 0x00000800 /* Broadcast TX Frames Sent */ +#define TX_EXDEF_CTL 0x00001000 /* TX Frames With Excessive Deferral */ +#define TX_MACCTL_CNT 0x00002000 /* MAC Control TX Frames Sent */ +#define TX_ALLF_CNT 0x00004000 /* All TX Frames Sent */ +#define TX_ALLO_CNT 0x00008000 /* All TX Octets Sent */ +#define TX_EQ64_CNT 0x00010000 /* 64-Byte TX Frames Sent */ +#define TX_LT128_CNT 0x00020000 /* 65-127-Byte TX Frames Sent */ +#define TX_LT256_CNT 0x00040000 /* 128-255-Byte TX Frames Sent */ +#define TX_LT512_CNT 0x00080000 /* 256-511-Byte TX Frames Sent */ +#define TX_LT1024_CNT 0x00100000 /* 512-1023-Byte TX Frames Sent */ +#define TX_GE1024_CNT 0x00200000 /* 1024-Max-Byte TX Frames Sent */ +#define TX_ABORT_CNT 0x00400000 /* TX Frames Aborted */ + +/* SDH Registers */ + +#define SDH_PWR_CTL 0xFFC03900 /* SDH Power Control */ +#define SDH_CLK_CTL 0xFFC03904 /* SDH Clock Control */ +#define SDH_ARGUMENT 0xFFC03908 /* SDH Argument */ +#define SDH_COMMAND 0xFFC0390C /* SDH Command */ +#define SDH_RESP_CMD 0xFFC03910 /* SDH Response Command */ +#define SDH_RESPONSE0 0xFFC03914 /* SDH Response0 */ +#define SDH_RESPONSE1 0xFFC03918 /* SDH Response1 */ +#define SDH_RESPONSE2 0xFFC0391C /* SDH Response2 */ +#define SDH_RESPONSE3 0xFFC03920 /* SDH Response3 */ +#define SDH_DATA_TIMER 0xFFC03924 /* SDH Data Timer */ +#define SDH_DATA_LGTH 0xFFC03928 /* SDH Data Length */ +#define SDH_DATA_CTL 0xFFC0392C /* SDH Data Control */ +#define SDH_DATA_CNT 0xFFC03930 /* SDH Data Counter */ +#define SDH_STATUS 0xFFC03934 /* SDH Status */ +#define SDH_STATUS_CLR 0xFFC03938 /* SDH Status Clear */ +#define SDH_MASK0 0xFFC0393C /* SDH Interrupt0 Mask */ +#define SDH_MASK1 0xFFC03940 /* SDH Interrupt1 Mask */ +#define SDH_FIFO_CNT 0xFFC03948 /* SDH FIFO Counter */ +#define SDH_FIFO 0xFFC03980 /* SDH Data FIFO */ +#define SDH_E_STATUS 0xFFC039C0 /* SDH Exception Status */ +#define SDH_E_MASK 0xFFC039C4 /* SDH Exception Mask */ +#define SDH_CFG 0xFFC039C8 /* SDH Configuration */ +#define SDH_RD_WAIT_EN 0xFFC039CC /* SDH Read Wait Enable */ +#define SDH_PID0 0xFFC039D0 /* SDH Peripheral Identification0 */ +#define SDH_PID1 0xFFC039D4 /* SDH Peripheral Identification1 */ +#define SDH_PID2 0xFFC039D8 /* SDH Peripheral Identification2 */ +#define SDH_PID3 0xFFC039DC /* SDH Peripheral Identification3 */ +#define SDH_PID4 0xFFC039E0 /* SDH Peripheral Identification4 */ +#define SDH_PID5 0xFFC039E4 /* SDH Peripheral Identification5 */ +#define SDH_PID6 0xFFC039E8 /* SDH Peripheral Identification6 */ +#define SDH_PID7 0xFFC039EC /* SDH Peripheral Identification7 */ + +/* Removable Storage Interface Registers */ + +#define RSI_PWR_CONTROL 0xFFC03800 /* RSI Power Control Register */ +#define RSI_CLK_CONTROL 0xFFC03804 /* RSI Clock Control Register */ +#define RSI_ARGUMENT 0xFFC03808 /* RSI Argument Register */ +#define RSI_COMMAND 0xFFC0380C /* RSI Command Register */ +#define RSI_RESP_CMD 0xFFC03810 /* RSI Response Command Register */ +#define RSI_RESPONSE0 0xFFC03814 /* RSI Response Register */ +#define RSI_RESPONSE1 0xFFC03818 /* RSI Response Register */ +#define RSI_RESPONSE2 0xFFC0381C /* RSI Response Register */ +#define RSI_RESPONSE3 0xFFC03820 /* RSI Response Register */ +#define RSI_DATA_TIMER 0xFFC03824 /* RSI Data Timer Register */ +#define RSI_DATA_LGTH 0xFFC03828 /* RSI Data Length Register */ +#define RSI_DATA_CONTROL 0xFFC0382C /* RSI Data Control Register */ +#define RSI_DATA_CNT 0xFFC03830 /* RSI Data Counter Register */ +#define RSI_STATUS 0xFFC03834 /* RSI Status Register */ +#define RSI_STATUSCL 0xFFC03838 /* RSI Status Clear Register */ +#define RSI_MASK0 0xFFC0383C /* RSI Interrupt 0 Mask Register */ +#define RSI_MASK1 0xFFC03840 /* RSI Interrupt 1 Mask Register */ +#define RSI_FIFO_CNT 0xFFC03848 /* RSI FIFO Counter Register */ +#define RSI_CEATA_CONTROL 0xFFC0384C /* RSI CEATA Register */ +#define RSI_FIFO 0xFFC03880 /* RSI Data FIFO Register */ +#define RSI_ESTAT 0xFFC038C0 /* RSI Exception Status Register */ +#define RSI_EMASK 0xFFC038C4 /* RSI Exception Mask Register */ +#define RSI_CONFIG 0xFFC038C8 /* RSI Configuration Register */ +#define RSI_RD_WAIT_EN 0xFFC038CC /* RSI Read Wait Enable Register */ +#define RSI_PID0 0xFFC03FE0 /* RSI Peripheral ID Register 0 */ +#define RSI_PID1 0xFFC03FE4 /* RSI Peripheral ID Register 1 */ +#define RSI_PID2 0xFFC03FE8 /* RSI Peripheral ID Register 2 */ +#define RSI_PID3 0xFFC03FEC /* RSI Peripheral ID Register 3 */ +#define RSI_PID4 0xFFC03FF0 /* RSI Peripheral ID Register 4 */ +#define RSI_PID5 0xFFC03FF4 /* RSI Peripheral ID Register 5 */ +#define RSI_PID6 0xFFC03FF8 /* RSI Peripheral ID Register 6 */ +#define RSI_PID7 0xFFC03FFC /* RSI Peripheral ID Register 7 */ + +#endif /* _DEF_BF516_H */ diff --git a/arch/blackfin/mach-bf518/include/mach/defBF518.h b/arch/blackfin/mach-bf518/include/mach/defBF518.h new file mode 100644 index 000000000000..4eaade15dd76 --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/defBF518.h @@ -0,0 +1,516 @@ +/* + * File: include/asm-blackfin/mach-bf518/defBF518.h + * Based on: + * Author: + * + * Created: + * Description: + * + * Rev: + * + * Modified: + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _DEF_BF518_H +#define _DEF_BF518_H + +/* Include all Core registers and bit definitions */ +#include + +/* SYSTEM & MMR ADDRESS DEFINITIONS FOR ADSP-BF518 */ + +/* Include defBF51x_base.h for the set of #defines that are common to all ADSP-BF51x processors */ +#include "defBF51x_base.h" + +/* The following are the #defines needed by ADSP-BF518 that are not in the common header */ +/* 10/100 Ethernet Controller (0xFFC03000 - 0xFFC031FF) */ + +#define EMAC_OPMODE 0xFFC03000 /* Operating Mode Register */ +#define EMAC_ADDRLO 0xFFC03004 /* Address Low (32 LSBs) Register */ +#define EMAC_ADDRHI 0xFFC03008 /* Address High (16 MSBs) Register */ +#define EMAC_HASHLO 0xFFC0300C /* Multicast Hash Table Low (Bins 31-0) Register */ +#define EMAC_HASHHI 0xFFC03010 /* Multicast Hash Table High (Bins 63-32) Register */ +#define EMAC_STAADD 0xFFC03014 /* Station Management Address Register */ +#define EMAC_STADAT 0xFFC03018 /* Station Management Data Register */ +#define EMAC_FLC 0xFFC0301C /* Flow Control Register */ +#define EMAC_VLAN1 0xFFC03020 /* VLAN1 Tag Register */ +#define EMAC_VLAN2 0xFFC03024 /* VLAN2 Tag Register */ +#define EMAC_WKUP_CTL 0xFFC0302C /* Wake-Up Control/Status Register */ +#define EMAC_WKUP_FFMSK0 0xFFC03030 /* Wake-Up Frame Filter 0 Byte Mask Register */ +#define EMAC_WKUP_FFMSK1 0xFFC03034 /* Wake-Up Frame Filter 1 Byte Mask Register */ +#define EMAC_WKUP_FFMSK2 0xFFC03038 /* Wake-Up Frame Filter 2 Byte Mask Register */ +#define EMAC_WKUP_FFMSK3 0xFFC0303C /* Wake-Up Frame Filter 3 Byte Mask Register */ +#define EMAC_WKUP_FFCMD 0xFFC03040 /* Wake-Up Frame Filter Commands Register */ +#define EMAC_WKUP_FFOFF 0xFFC03044 /* Wake-Up Frame Filter Offsets Register */ +#define EMAC_WKUP_FFCRC0 0xFFC03048 /* Wake-Up Frame Filter 0,1 CRC-16 Register */ +#define EMAC_WKUP_FFCRC1 0xFFC0304C /* Wake-Up Frame Filter 2,3 CRC-16 Register */ + +#define EMAC_SYSCTL 0xFFC03060 /* EMAC System Control Register */ +#define EMAC_SYSTAT 0xFFC03064 /* EMAC System Status Register */ +#define EMAC_RX_STAT 0xFFC03068 /* RX Current Frame Status Register */ +#define EMAC_RX_STKY 0xFFC0306C /* RX Sticky Frame Status Register */ +#define EMAC_RX_IRQE 0xFFC03070 /* RX Frame Status Interrupt Enables Register */ +#define EMAC_TX_STAT 0xFFC03074 /* TX Current Frame Status Register */ +#define EMAC_TX_STKY 0xFFC03078 /* TX Sticky Frame Status Register */ +#define EMAC_TX_IRQE 0xFFC0307C /* TX Frame Status Interrupt Enables Register */ + +#define EMAC_MMC_CTL 0xFFC03080 /* MMC Counter Control Register */ +#define EMAC_MMC_RIRQS 0xFFC03084 /* MMC RX Interrupt Status Register */ +#define EMAC_MMC_RIRQE 0xFFC03088 /* MMC RX Interrupt Enables Register */ +#define EMAC_MMC_TIRQS 0xFFC0308C /* MMC TX Interrupt Status Register */ +#define EMAC_MMC_TIRQE 0xFFC03090 /* MMC TX Interrupt Enables Register */ + +#define EMAC_RXC_OK 0xFFC03100 /* RX Frame Successful Count */ +#define EMAC_RXC_FCS 0xFFC03104 /* RX Frame FCS Failure Count */ +#define EMAC_RXC_ALIGN 0xFFC03108 /* RX Alignment Error Count */ +#define EMAC_RXC_OCTET 0xFFC0310C /* RX Octets Successfully Received Count */ +#define EMAC_RXC_DMAOVF 0xFFC03110 /* Internal MAC Sublayer Error RX Frame Count */ +#define EMAC_RXC_UNICST 0xFFC03114 /* Unicast RX Frame Count */ +#define EMAC_RXC_MULTI 0xFFC03118 /* Multicast RX Frame Count */ +#define EMAC_RXC_BROAD 0xFFC0311C /* Broadcast RX Frame Count */ +#define EMAC_RXC_LNERRI 0xFFC03120 /* RX Frame In Range Error Count */ +#define EMAC_RXC_LNERRO 0xFFC03124 /* RX Frame Out Of Range Error Count */ +#define EMAC_RXC_LONG 0xFFC03128 /* RX Frame Too Long Count */ +#define EMAC_RXC_MACCTL 0xFFC0312C /* MAC Control RX Frame Count */ +#define EMAC_RXC_OPCODE 0xFFC03130 /* Unsupported Op-Code RX Frame Count */ +#define EMAC_RXC_PAUSE 0xFFC03134 /* MAC Control Pause RX Frame Count */ +#define EMAC_RXC_ALLFRM 0xFFC03138 /* Overall RX Frame Count */ +#define EMAC_RXC_ALLOCT 0xFFC0313C /* Overall RX Octet Count */ +#define EMAC_RXC_TYPED 0xFFC03140 /* Type/Length Consistent RX Frame Count */ +#define EMAC_RXC_SHORT 0xFFC03144 /* RX Frame Fragment Count - Byte Count x < 64 */ +#define EMAC_RXC_EQ64 0xFFC03148 /* Good RX Frame Count - Byte Count x = 64 */ +#define EMAC_RXC_LT128 0xFFC0314C /* Good RX Frame Count - Byte Count 64 < x < 128 */ +#define EMAC_RXC_LT256 0xFFC03150 /* Good RX Frame Count - Byte Count 128 <= x < 256 */ +#define EMAC_RXC_LT512 0xFFC03154 /* Good RX Frame Count - Byte Count 256 <= x < 512 */ +#define EMAC_RXC_LT1024 0xFFC03158 /* Good RX Frame Count - Byte Count 512 <= x < 1024 */ +#define EMAC_RXC_GE1024 0xFFC0315C /* Good RX Frame Count - Byte Count x >= 1024 */ + +#define EMAC_TXC_OK 0xFFC03180 /* TX Frame Successful Count */ +#define EMAC_TXC_1COL 0xFFC03184 /* TX Frames Successful After Single Collision Count */ +#define EMAC_TXC_GT1COL 0xFFC03188 /* TX Frames Successful After Multiple Collisions Count */ +#define EMAC_TXC_OCTET 0xFFC0318C /* TX Octets Successfully Received Count */ +#define EMAC_TXC_DEFER 0xFFC03190 /* TX Frame Delayed Due To Busy Count */ +#define EMAC_TXC_LATECL 0xFFC03194 /* Late TX Collisions Count */ +#define EMAC_TXC_XS_COL 0xFFC03198 /* TX Frame Failed Due To Excessive Collisions Count */ +#define EMAC_TXC_DMAUND 0xFFC0319C /* Internal MAC Sublayer Error TX Frame Count */ +#define EMAC_TXC_CRSERR 0xFFC031A0 /* Carrier Sense Deasserted During TX Frame Count */ +#define EMAC_TXC_UNICST 0xFFC031A4 /* Unicast TX Frame Count */ +#define EMAC_TXC_MULTI 0xFFC031A8 /* Multicast TX Frame Count */ +#define EMAC_TXC_BROAD 0xFFC031AC /* Broadcast TX Frame Count */ +#define EMAC_TXC_XS_DFR 0xFFC031B0 /* TX Frames With Excessive Deferral Count */ +#define EMAC_TXC_MACCTL 0xFFC031B4 /* MAC Control TX Frame Count */ +#define EMAC_TXC_ALLFRM 0xFFC031B8 /* Overall TX Frame Count */ +#define EMAC_TXC_ALLOCT 0xFFC031BC /* Overall TX Octet Count */ +#define EMAC_TXC_EQ64 0xFFC031C0 /* Good TX Frame Count - Byte Count x = 64 */ +#define EMAC_TXC_LT128 0xFFC031C4 /* Good TX Frame Count - Byte Count 64 < x < 128 */ +#define EMAC_TXC_LT256 0xFFC031C8 /* Good TX Frame Count - Byte Count 128 <= x < 256 */ +#define EMAC_TXC_LT512 0xFFC031CC /* Good TX Frame Count - Byte Count 256 <= x < 512 */ +#define EMAC_TXC_LT1024 0xFFC031D0 /* Good TX Frame Count - Byte Count 512 <= x < 1024 */ +#define EMAC_TXC_GE1024 0xFFC031D4 /* Good TX Frame Count - Byte Count x >= 1024 */ +#define EMAC_TXC_ABORT 0xFFC031D8 /* Total TX Frames Aborted Count */ + +/* Listing for IEEE-Supported Count Registers */ + +#define FramesReceivedOK EMAC_RXC_OK /* RX Frame Successful Count */ +#define FrameCheckSequenceErrors EMAC_RXC_FCS /* RX Frame FCS Failure Count */ +#define AlignmentErrors EMAC_RXC_ALIGN /* RX Alignment Error Count */ +#define OctetsReceivedOK EMAC_RXC_OCTET /* RX Octets Successfully Received Count */ +#define FramesLostDueToIntMACRcvError EMAC_RXC_DMAOVF /* Internal MAC Sublayer Error RX Frame Count */ +#define UnicastFramesReceivedOK EMAC_RXC_UNICST /* Unicast RX Frame Count */ +#define MulticastFramesReceivedOK EMAC_RXC_MULTI /* Multicast RX Frame Count */ +#define BroadcastFramesReceivedOK EMAC_RXC_BROAD /* Broadcast RX Frame Count */ +#define InRangeLengthErrors EMAC_RXC_LNERRI /* RX Frame In Range Error Count */ +#define OutOfRangeLengthField EMAC_RXC_LNERRO /* RX Frame Out Of Range Error Count */ +#define FrameTooLongErrors EMAC_RXC_LONG /* RX Frame Too Long Count */ +#define MACControlFramesReceived EMAC_RXC_MACCTL /* MAC Control RX Frame Count */ +#define UnsupportedOpcodesReceived EMAC_RXC_OPCODE /* Unsupported Op-Code RX Frame Count */ +#define PAUSEMACCtrlFramesReceived EMAC_RXC_PAUSE /* MAC Control Pause RX Frame Count */ +#define FramesReceivedAll EMAC_RXC_ALLFRM /* Overall RX Frame Count */ +#define OctetsReceivedAll EMAC_RXC_ALLOCT /* Overall RX Octet Count */ +#define TypedFramesReceived EMAC_RXC_TYPED /* Type/Length Consistent RX Frame Count */ +#define FramesLenLt64Received EMAC_RXC_SHORT /* RX Frame Fragment Count - Byte Count x < 64 */ +#define FramesLenEq64Received EMAC_RXC_EQ64 /* Good RX Frame Count - Byte Count x = 64 */ +#define FramesLen65_127Received EMAC_RXC_LT128 /* Good RX Frame Count - Byte Count 64 < x < 128 */ +#define FramesLen128_255Received EMAC_RXC_LT256 /* Good RX Frame Count - Byte Count 128 <= x < 256 */ +#define FramesLen256_511Received EMAC_RXC_LT512 /* Good RX Frame Count - Byte Count 256 <= x < 512 */ +#define FramesLen512_1023Received EMAC_RXC_LT1024 /* Good RX Frame Count - Byte Count 512 <= x < 1024 */ +#define FramesLen1024_MaxReceived EMAC_RXC_GE1024 /* Good RX Frame Count - Byte Count x >= 1024 */ + +#define FramesTransmittedOK EMAC_TXC_OK /* TX Frame Successful Count */ +#define SingleCollisionFrames EMAC_TXC_1COL /* TX Frames Successful After Single Collision Count */ +#define MultipleCollisionFrames EMAC_TXC_GT1COL /* TX Frames Successful After Multiple Collisions Count */ +#define OctetsTransmittedOK EMAC_TXC_OCTET /* TX Octets Successfully Received Count */ +#define FramesWithDeferredXmissions EMAC_TXC_DEFER /* TX Frame Delayed Due To Busy Count */ +#define LateCollisions EMAC_TXC_LATECL /* Late TX Collisions Count */ +#define FramesAbortedDueToXSColls EMAC_TXC_XS_COL /* TX Frame Failed Due To Excessive Collisions Count */ +#define FramesLostDueToIntMacXmitError EMAC_TXC_DMAUND /* Internal MAC Sublayer Error TX Frame Count */ +#define CarrierSenseErrors EMAC_TXC_CRSERR /* Carrier Sense Deasserted During TX Frame Count */ +#define UnicastFramesXmittedOK EMAC_TXC_UNICST /* Unicast TX Frame Count */ +#define MulticastFramesXmittedOK EMAC_TXC_MULTI /* Multicast TX Frame Count */ +#define BroadcastFramesXmittedOK EMAC_TXC_BROAD /* Broadcast TX Frame Count */ +#define FramesWithExcessiveDeferral EMAC_TXC_XS_DFR /* TX Frames With Excessive Deferral Count */ +#define MACControlFramesTransmitted EMAC_TXC_MACCTL /* MAC Control TX Frame Count */ +#define FramesTransmittedAll EMAC_TXC_ALLFRM /* Overall TX Frame Count */ +#define OctetsTransmittedAll EMAC_TXC_ALLOCT /* Overall TX Octet Count */ +#define FramesLenEq64Transmitted EMAC_TXC_EQ64 /* Good TX Frame Count - Byte Count x = 64 */ +#define FramesLen65_127Transmitted EMAC_TXC_LT128 /* Good TX Frame Count - Byte Count 64 < x < 128 */ +#define FramesLen128_255Transmitted EMAC_TXC_LT256 /* Good TX Frame Count - Byte Count 128 <= x < 256 */ +#define FramesLen256_511Transmitted EMAC_TXC_LT512 /* Good TX Frame Count - Byte Count 256 <= x < 512 */ +#define FramesLen512_1023Transmitted EMAC_TXC_LT1024 /* Good TX Frame Count - Byte Count 512 <= x < 1024 */ +#define FramesLen1024_MaxTransmitted EMAC_TXC_GE1024 /* Good TX Frame Count - Byte Count x >= 1024 */ +#define TxAbortedFrames EMAC_TXC_ABORT /* Total TX Frames Aborted Count */ + +/*********************************************************************************** +** System MMR Register Bits And Macros +** +** Disclaimer: All macros are intended to make C and Assembly code more readable. +** Use these macros carefully, as any that do left shifts for field +** depositing will result in the lower order bits being destroyed. Any +** macro that shifts left to properly position the bit-field should be +** used as part of an OR to initialize a register and NOT as a dynamic +** modifier UNLESS the lower order bits are saved and ORed back in when +** the macro is used. +*************************************************************************************/ + +/************************ ETHERNET 10/100 CONTROLLER MASKS ************************/ + +/* EMAC_OPMODE Masks */ + +#define RE 0x00000001 /* Receiver Enable */ +#define ASTP 0x00000002 /* Enable Automatic Pad Stripping On RX Frames */ +#define HU 0x00000010 /* Hash Filter Unicast Address */ +#define HM 0x00000020 /* Hash Filter Multicast Address */ +#define PAM 0x00000040 /* Pass-All-Multicast Mode Enable */ +#define PR 0x00000080 /* Promiscuous Mode Enable */ +#define IFE 0x00000100 /* Inverse Filtering Enable */ +#define DBF 0x00000200 /* Disable Broadcast Frame Reception */ +#define PBF 0x00000400 /* Pass Bad Frames Enable */ +#define PSF 0x00000800 /* Pass Short Frames Enable */ +#define RAF 0x00001000 /* Receive-All Mode */ +#define TE 0x00010000 /* Transmitter Enable */ +#define DTXPAD 0x00020000 /* Disable Automatic TX Padding */ +#define DTXCRC 0x00040000 /* Disable Automatic TX CRC Generation */ +#define DC 0x00080000 /* Deferral Check */ +#define BOLMT 0x00300000 /* Back-Off Limit */ +#define BOLMT_10 0x00000000 /* 10-bit range */ +#define BOLMT_8 0x00100000 /* 8-bit range */ +#define BOLMT_4 0x00200000 /* 4-bit range */ +#define BOLMT_1 0x00300000 /* 1-bit range */ +#define DRTY 0x00400000 /* Disable TX Retry On Collision */ +#define LCTRE 0x00800000 /* Enable TX Retry On Late Collision */ +#define RMII 0x01000000 /* RMII/MII* Mode */ +#define RMII_10 0x02000000 /* Speed Select for RMII Port (10MBit/100MBit*) */ +#define FDMODE 0x04000000 /* Duplex Mode Enable (Full/Half*) */ +#define LB 0x08000000 /* Internal Loopback Enable */ +#define DRO 0x10000000 /* Disable Receive Own Frames (Half-Duplex Mode) */ + +/* EMAC_STAADD Masks */ + +#define STABUSY 0x00000001 /* Initiate Station Mgt Reg Access / STA Busy Stat */ +#define STAOP 0x00000002 /* Station Management Operation Code (Write/Read*) */ +#define STADISPRE 0x00000004 /* Disable Preamble Generation */ +#define STAIE 0x00000008 /* Station Mgt. Transfer Done Interrupt Enable */ +#define REGAD 0x000007C0 /* STA Register Address */ +#define PHYAD 0x0000F800 /* PHY Device Address */ + +#define SET_REGAD(x) (((x)&0x1F)<< 6 ) /* Set STA Register Address */ +#define SET_PHYAD(x) (((x)&0x1F)<< 11 ) /* Set PHY Device Address */ + +/* EMAC_STADAT Mask */ + +#define STADATA 0x0000FFFF /* Station Management Data */ + +/* EMAC_FLC Masks */ + +#define FLCBUSY 0x00000001 /* Send Flow Ctrl Frame / Flow Ctrl Busy Status */ +#define FLCE 0x00000002 /* Flow Control Enable */ +#define PCF 0x00000004 /* Pass Control Frames */ +#define BKPRSEN 0x00000008 /* Enable Backpressure */ +#define FLCPAUSE 0xFFFF0000 /* Pause Time */ + +#define SET_FLCPAUSE(x) (((x)&0xFFFF)<< 16) /* Set Pause Time */ + +/* EMAC_WKUP_CTL Masks */ + +#define CAPWKFRM 0x00000001 /* Capture Wake-Up Frames */ +#define MPKE 0x00000002 /* Magic Packet Enable */ +#define RWKE 0x00000004 /* Remote Wake-Up Frame Enable */ +#define GUWKE 0x00000008 /* Global Unicast Wake Enable */ +#define MPKS 0x00000020 /* Magic Packet Received Status */ +#define RWKS 0x00000F00 /* Wake-Up Frame Received Status, Filters 3:0 */ + +/* EMAC_WKUP_FFCMD Masks */ + +#define WF0_E 0x00000001 /* Enable Wake-Up Filter 0 */ +#define WF0_T 0x00000008 /* Wake-Up Filter 0 Addr Type (Multicast/Unicast*) */ +#define WF1_E 0x00000100 /* Enable Wake-Up Filter 1 */ +#define WF1_T 0x00000800 /* Wake-Up Filter 1 Addr Type (Multicast/Unicast*) */ +#define WF2_E 0x00010000 /* Enable Wake-Up Filter 2 */ +#define WF2_T 0x00080000 /* Wake-Up Filter 2 Addr Type (Multicast/Unicast*) */ +#define WF3_E 0x01000000 /* Enable Wake-Up Filter 3 */ +#define WF3_T 0x08000000 /* Wake-Up Filter 3 Addr Type (Multicast/Unicast*) */ + +/* EMAC_WKUP_FFOFF Masks */ + +#define WF0_OFF 0x000000FF /* Wake-Up Filter 0 Pattern Offset */ +#define WF1_OFF 0x0000FF00 /* Wake-Up Filter 1 Pattern Offset */ +#define WF2_OFF 0x00FF0000 /* Wake-Up Filter 2 Pattern Offset */ +#define WF3_OFF 0xFF000000 /* Wake-Up Filter 3 Pattern Offset */ + +#define SET_WF0_OFF(x) (((x)&0xFF)<< 0 ) /* Set Wake-Up Filter 0 Byte Offset */ +#define SET_WF1_OFF(x) (((x)&0xFF)<< 8 ) /* Set Wake-Up Filter 1 Byte Offset */ +#define SET_WF2_OFF(x) (((x)&0xFF)<< 16 ) /* Set Wake-Up Filter 2 Byte Offset */ +#define SET_WF3_OFF(x) (((x)&0xFF)<< 24 ) /* Set Wake-Up Filter 3 Byte Offset */ +/* Set ALL Offsets */ +#define SET_WF_OFFS(x0,x1,x2,x3) (SET_WF0_OFF((x0))|SET_WF1_OFF((x1))|SET_WF2_OFF((x2))|SET_WF3_OFF((x3))) + +/* EMAC_WKUP_FFCRC0 Masks */ + +#define WF0_CRC 0x0000FFFF /* Wake-Up Filter 0 Pattern CRC */ +#define WF1_CRC 0xFFFF0000 /* Wake-Up Filter 1 Pattern CRC */ + +#define SET_WF0_CRC(x) (((x)&0xFFFF)<< 0 ) /* Set Wake-Up Filter 0 Target CRC */ +#define SET_WF1_CRC(x) (((x)&0xFFFF)<< 16 ) /* Set Wake-Up Filter 1 Target CRC */ + +/* EMAC_WKUP_FFCRC1 Masks */ + +#define WF2_CRC 0x0000FFFF /* Wake-Up Filter 2 Pattern CRC */ +#define WF3_CRC 0xFFFF0000 /* Wake-Up Filter 3 Pattern CRC */ + +#define SET_WF2_CRC(x) (((x)&0xFFFF)<< 0 ) /* Set Wake-Up Filter 2 Target CRC */ +#define SET_WF3_CRC(x) (((x)&0xFFFF)<< 16 ) /* Set Wake-Up Filter 3 Target CRC */ + +/* EMAC_SYSCTL Masks */ + +#define PHYIE 0x00000001 /* PHY_INT Interrupt Enable */ +#define RXDWA 0x00000002 /* Receive Frame DMA Word Alignment (Odd/Even*) */ +#define RXCKS 0x00000004 /* Enable RX Frame TCP/UDP Checksum Computation */ +#define TXDWA 0x00000010 /* Transmit Frame DMA Word Alignment (Odd/Even*) */ +#define MDCDIV 0x00003F00 /* SCLK:MDC Clock Divisor [MDC=SCLK/(2*(N+1))] */ + +#define SET_MDCDIV(x) (((x)&0x3F)<< 8) /* Set MDC Clock Divisor */ + +/* EMAC_SYSTAT Masks */ + +#define PHYINT 0x00000001 /* PHY_INT Interrupt Status */ +#define MMCINT 0x00000002 /* MMC Counter Interrupt Status */ +#define RXFSINT 0x00000004 /* RX Frame-Status Interrupt Status */ +#define TXFSINT 0x00000008 /* TX Frame-Status Interrupt Status */ +#define WAKEDET 0x00000010 /* Wake-Up Detected Status */ +#define RXDMAERR 0x00000020 /* RX DMA Direction Error Status */ +#define TXDMAERR 0x00000040 /* TX DMA Direction Error Status */ +#define STMDONE 0x00000080 /* Station Mgt. Transfer Done Interrupt Status */ + +/* EMAC_RX_STAT, EMAC_RX_STKY, and EMAC_RX_IRQE Masks */ + +#define RX_FRLEN 0x000007FF /* Frame Length In Bytes */ +#define RX_COMP 0x00001000 /* RX Frame Complete */ +#define RX_OK 0x00002000 /* RX Frame Received With No Errors */ +#define RX_LONG 0x00004000 /* RX Frame Too Long Error */ +#define RX_ALIGN 0x00008000 /* RX Frame Alignment Error */ +#define RX_CRC 0x00010000 /* RX Frame CRC Error */ +#define RX_LEN 0x00020000 /* RX Frame Length Error */ +#define RX_FRAG 0x00040000 /* RX Frame Fragment Error */ +#define RX_ADDR 0x00080000 /* RX Frame Address Filter Failed Error */ +#define RX_DMAO 0x00100000 /* RX Frame DMA Overrun Error */ +#define RX_PHY 0x00200000 /* RX Frame PHY Error */ +#define RX_LATE 0x00400000 /* RX Frame Late Collision Error */ +#define RX_RANGE 0x00800000 /* RX Frame Length Field Out of Range Error */ +#define RX_MULTI 0x01000000 /* RX Multicast Frame Indicator */ +#define RX_BROAD 0x02000000 /* RX Broadcast Frame Indicator */ +#define RX_CTL 0x04000000 /* RX Control Frame Indicator */ +#define RX_UCTL 0x08000000 /* Unsupported RX Control Frame Indicator */ +#define RX_TYPE 0x10000000 /* RX Typed Frame Indicator */ +#define RX_VLAN1 0x20000000 /* RX VLAN1 Frame Indicator */ +#define RX_VLAN2 0x40000000 /* RX VLAN2 Frame Indicator */ +#define RX_ACCEPT 0x80000000 /* RX Frame Accepted Indicator */ + +/* EMAC_TX_STAT, EMAC_TX_STKY, and EMAC_TX_IRQE Masks */ + +#define TX_COMP 0x00000001 /* TX Frame Complete */ +#define TX_OK 0x00000002 /* TX Frame Sent With No Errors */ +#define TX_ECOLL 0x00000004 /* TX Frame Excessive Collision Error */ +#define TX_LATE 0x00000008 /* TX Frame Late Collision Error */ +#define TX_DMAU 0x00000010 /* TX Frame DMA Underrun Error (STAT) */ +#define TX_MACE 0x00000010 /* Internal MAC Error Detected (STKY and IRQE) */ +#define TX_EDEFER 0x00000020 /* TX Frame Excessive Deferral Error */ +#define TX_BROAD 0x00000040 /* TX Broadcast Frame Indicator */ +#define TX_MULTI 0x00000080 /* TX Multicast Frame Indicator */ +#define TX_CCNT 0x00000F00 /* TX Frame Collision Count */ +#define TX_DEFER 0x00001000 /* TX Frame Deferred Indicator */ +#define TX_CRS 0x00002000 /* TX Frame Carrier Sense Not Asserted Error */ +#define TX_LOSS 0x00004000 /* TX Frame Carrier Lost During TX Error */ +#define TX_RETRY 0x00008000 /* TX Frame Successful After Retry */ +#define TX_FRLEN 0x07FF0000 /* TX Frame Length (Bytes) */ + +/* EMAC_MMC_CTL Masks */ +#define RSTC 0x00000001 /* Reset All Counters */ +#define CROLL 0x00000002 /* Counter Roll-Over Enable */ +#define CCOR 0x00000004 /* Counter Clear-On-Read Mode Enable */ +#define MMCE 0x00000008 /* Enable MMC Counter Operation */ + +/* EMAC_MMC_RIRQS and EMAC_MMC_RIRQE Masks */ +#define RX_OK_CNT 0x00000001 /* RX Frames Received With No Errors */ +#define RX_FCS_CNT 0x00000002 /* RX Frames W/Frame Check Sequence Errors */ +#define RX_ALIGN_CNT 0x00000004 /* RX Frames With Alignment Errors */ +#define RX_OCTET_CNT 0x00000008 /* RX Octets Received OK */ +#define RX_LOST_CNT 0x00000010 /* RX Frames Lost Due To Internal MAC RX Error */ +#define RX_UNI_CNT 0x00000020 /* Unicast RX Frames Received OK */ +#define RX_MULTI_CNT 0x00000040 /* Multicast RX Frames Received OK */ +#define RX_BROAD_CNT 0x00000080 /* Broadcast RX Frames Received OK */ +#define RX_IRL_CNT 0x00000100 /* RX Frames With In-Range Length Errors */ +#define RX_ORL_CNT 0x00000200 /* RX Frames With Out-Of-Range Length Errors */ +#define RX_LONG_CNT 0x00000400 /* RX Frames With Frame Too Long Errors */ +#define RX_MACCTL_CNT 0x00000800 /* MAC Control RX Frames Received */ +#define RX_OPCODE_CTL 0x00001000 /* Unsupported Op-Code RX Frames Received */ +#define RX_PAUSE_CNT 0x00002000 /* PAUSEMAC Control RX Frames Received */ +#define RX_ALLF_CNT 0x00004000 /* All RX Frames Received */ +#define RX_ALLO_CNT 0x00008000 /* All RX Octets Received */ +#define RX_TYPED_CNT 0x00010000 /* Typed RX Frames Received */ +#define RX_SHORT_CNT 0x00020000 /* RX Frame Fragments (< 64 Bytes) Received */ +#define RX_EQ64_CNT 0x00040000 /* 64-Byte RX Frames Received */ +#define RX_LT128_CNT 0x00080000 /* 65-127-Byte RX Frames Received */ +#define RX_LT256_CNT 0x00100000 /* 128-255-Byte RX Frames Received */ +#define RX_LT512_CNT 0x00200000 /* 256-511-Byte RX Frames Received */ +#define RX_LT1024_CNT 0x00400000 /* 512-1023-Byte RX Frames Received */ +#define RX_GE1024_CNT 0x00800000 /* 1024-Max-Byte RX Frames Received */ + +/* EMAC_MMC_TIRQS and EMAC_MMC_TIRQE Masks */ + +#define TX_OK_CNT 0x00000001 /* TX Frames Sent OK */ +#define TX_SCOLL_CNT 0x00000002 /* TX Frames With Single Collisions */ +#define TX_MCOLL_CNT 0x00000004 /* TX Frames With Multiple Collisions */ +#define TX_OCTET_CNT 0x00000008 /* TX Octets Sent OK */ +#define TX_DEFER_CNT 0x00000010 /* TX Frames With Deferred Transmission */ +#define TX_LATE_CNT 0x00000020 /* TX Frames With Late Collisions */ +#define TX_ABORTC_CNT 0x00000040 /* TX Frames Aborted Due To Excess Collisions */ +#define TX_LOST_CNT 0x00000080 /* TX Frames Lost Due To Internal MAC TX Error */ +#define TX_CRS_CNT 0x00000100 /* TX Frames With Carrier Sense Errors */ +#define TX_UNI_CNT 0x00000200 /* Unicast TX Frames Sent */ +#define TX_MULTI_CNT 0x00000400 /* Multicast TX Frames Sent */ +#define TX_BROAD_CNT 0x00000800 /* Broadcast TX Frames Sent */ +#define TX_EXDEF_CTL 0x00001000 /* TX Frames With Excessive Deferral */ +#define TX_MACCTL_CNT 0x00002000 /* MAC Control TX Frames Sent */ +#define TX_ALLF_CNT 0x00004000 /* All TX Frames Sent */ +#define TX_ALLO_CNT 0x00008000 /* All TX Octets Sent */ +#define TX_EQ64_CNT 0x00010000 /* 64-Byte TX Frames Sent */ +#define TX_LT128_CNT 0x00020000 /* 65-127-Byte TX Frames Sent */ +#define TX_LT256_CNT 0x00040000 /* 128-255-Byte TX Frames Sent */ +#define TX_LT512_CNT 0x00080000 /* 256-511-Byte TX Frames Sent */ +#define TX_LT1024_CNT 0x00100000 /* 512-1023-Byte TX Frames Sent */ +#define TX_GE1024_CNT 0x00200000 /* 1024-Max-Byte TX Frames Sent */ +#define TX_ABORT_CNT 0x00400000 /* TX Frames Aborted */ + +/* SDH Registers */ + +#define SDH_PWR_CTL 0xFFC03900 /* SDH Power Control */ +#define SDH_CLK_CTL 0xFFC03904 /* SDH Clock Control */ +#define SDH_ARGUMENT 0xFFC03908 /* SDH Argument */ +#define SDH_COMMAND 0xFFC0390C /* SDH Command */ +#define SDH_RESP_CMD 0xFFC03910 /* SDH Response Command */ +#define SDH_RESPONSE0 0xFFC03914 /* SDH Response0 */ +#define SDH_RESPONSE1 0xFFC03918 /* SDH Response1 */ +#define SDH_RESPONSE2 0xFFC0391C /* SDH Response2 */ +#define SDH_RESPONSE3 0xFFC03920 /* SDH Response3 */ +#define SDH_DATA_TIMER 0xFFC03924 /* SDH Data Timer */ +#define SDH_DATA_LGTH 0xFFC03928 /* SDH Data Length */ +#define SDH_DATA_CTL 0xFFC0392C /* SDH Data Control */ +#define SDH_DATA_CNT 0xFFC03930 /* SDH Data Counter */ +#define SDH_STATUS 0xFFC03934 /* SDH Status */ +#define SDH_STATUS_CLR 0xFFC03938 /* SDH Status Clear */ +#define SDH_MASK0 0xFFC0393C /* SDH Interrupt0 Mask */ +#define SDH_MASK1 0xFFC03940 /* SDH Interrupt1 Mask */ +#define SDH_FIFO_CNT 0xFFC03948 /* SDH FIFO Counter */ +#define SDH_FIFO 0xFFC03980 /* SDH Data FIFO */ +#define SDH_E_STATUS 0xFFC039C0 /* SDH Exception Status */ +#define SDH_E_MASK 0xFFC039C4 /* SDH Exception Mask */ +#define SDH_CFG 0xFFC039C8 /* SDH Configuration */ +#define SDH_RD_WAIT_EN 0xFFC039CC /* SDH Read Wait Enable */ +#define SDH_PID0 0xFFC039D0 /* SDH Peripheral Identification0 */ +#define SDH_PID1 0xFFC039D4 /* SDH Peripheral Identification1 */ +#define SDH_PID2 0xFFC039D8 /* SDH Peripheral Identification2 */ +#define SDH_PID3 0xFFC039DC /* SDH Peripheral Identification3 */ +#define SDH_PID4 0xFFC039E0 /* SDH Peripheral Identification4 */ +#define SDH_PID5 0xFFC039E4 /* SDH Peripheral Identification5 */ +#define SDH_PID6 0xFFC039E8 /* SDH Peripheral Identification6 */ +#define SDH_PID7 0xFFC039EC /* SDH Peripheral Identification7 */ + +/* Removable Storage Interface Registers */ + +#define RSI_PWR_CONTROL 0xFFC03800 /* RSI Power Control Register */ +#define RSI_CLK_CONTROL 0xFFC03804 /* RSI Clock Control Register */ +#define RSI_ARGUMENT 0xFFC03808 /* RSI Argument Register */ +#define RSI_COMMAND 0xFFC0380C /* RSI Command Register */ +#define RSI_RESP_CMD 0xFFC03810 /* RSI Response Command Register */ +#define RSI_RESPONSE0 0xFFC03814 /* RSI Response Register */ +#define RSI_RESPONSE1 0xFFC03818 /* RSI Response Register */ +#define RSI_RESPONSE2 0xFFC0381C /* RSI Response Register */ +#define RSI_RESPONSE3 0xFFC03820 /* RSI Response Register */ +#define RSI_DATA_TIMER 0xFFC03824 /* RSI Data Timer Register */ +#define RSI_DATA_LGTH 0xFFC03828 /* RSI Data Length Register */ +#define RSI_DATA_CONTROL 0xFFC0382C /* RSI Data Control Register */ +#define RSI_DATA_CNT 0xFFC03830 /* RSI Data Counter Register */ +#define RSI_STATUS 0xFFC03834 /* RSI Status Register */ +#define RSI_STATUSCL 0xFFC03838 /* RSI Status Clear Register */ +#define RSI_MASK0 0xFFC0383C /* RSI Interrupt 0 Mask Register */ +#define RSI_MASK1 0xFFC03840 /* RSI Interrupt 1 Mask Register */ +#define RSI_FIFO_CNT 0xFFC03848 /* RSI FIFO Counter Register */ +#define RSI_CEATA_CONTROL 0xFFC0384C /* RSI CEATA Register */ +#define RSI_FIFO 0xFFC03880 /* RSI Data FIFO Register */ +#define RSI_ESTAT 0xFFC038C0 /* RSI Exception Status Register */ +#define RSI_EMASK 0xFFC038C4 /* RSI Exception Mask Register */ +#define RSI_CONFIG 0xFFC038C8 /* RSI Configuration Register */ +#define RSI_RD_WAIT_EN 0xFFC038CC /* RSI Read Wait Enable Register */ +#define RSI_PID0 0xFFC03FE0 /* RSI Peripheral ID Register 0 */ +#define RSI_PID1 0xFFC03FE4 /* RSI Peripheral ID Register 1 */ +#define RSI_PID2 0xFFC03FE8 /* RSI Peripheral ID Register 2 */ +#define RSI_PID3 0xFFC03FEC /* RSI Peripheral ID Register 3 */ +#define RSI_PID4 0xFFC03FF0 /* RSI Peripheral ID Register 4 */ +#define RSI_PID5 0xFFC03FF4 /* RSI Peripheral ID Register 5 */ +#define RSI_PID6 0xFFC03FF8 /* RSI Peripheral ID Register 6 */ +#define RSI_PID7 0xFFC03FFC /* RSI Peripheral ID Register 7 */ + +/* PTP TSYNC Registers */ + +#define EMAC_PTP_CTL 0xFFC030A0 /* PTP Block Control */ +#define EMAC_PTP_IE 0xFFC030A4 /* PTP Block Interrupt Enable */ +#define EMAC_PTP_ISTAT 0xFFC030A8 /* PTP Block Interrupt Status */ +#define EMAC_PTP_FOFF 0xFFC030AC /* PTP Filter offset Register */ +#define EMAC_PTP_FV1 0xFFC030B0 /* PTP Filter Value Register 1 */ +#define EMAC_PTP_FV2 0xFFC030B4 /* PTP Filter Value Register 2 */ +#define EMAC_PTP_FV3 0xFFC030B8 /* PTP Filter Value Register 3 */ +#define EMAC_PTP_ADDEND 0xFFC030BC /* PTP Addend for Frequency Compensation */ +#define EMAC_PTP_ACCR 0xFFC030C0 /* PTP Accumulator for Frequency Compensation */ +#define EMAC_PTP_OFFSET 0xFFC030C4 /* PTP Time Offset Register */ +#define EMAC_PTP_TIMELO 0xFFC030C8 /* PTP Precision Clock Time Low */ +#define EMAC_PTP_TIMEHI 0xFFC030CC /* PTP Precision Clock Time High */ +#define EMAC_PTP_RXSNAPLO 0xFFC030D0 /* PTP Receive Snapshot Register Low */ +#define EMAC_PTP_RXSNAPHI 0xFFC030D4 /* PTP Receive Snapshot Register High */ +#define EMAC_PTP_TXSNAPLO 0xFFC030D8 /* PTP Transmit Snapshot Register Low */ +#define EMAC_PTP_TXSNAPHI 0xFFC030DC /* PTP Transmit Snapshot Register High */ +#define EMAC_PTP_ALARMLO 0xFFC030E0 /* PTP Alarm time Low */ +#define EMAC_PTP_ALARMHI 0xFFC030E4 /* PTP Alarm time High */ +#define EMAC_PTP_ID_OFF 0xFFC030E8 /* PTP Capture ID offset register */ +#define EMAC_PTP_ID_SNAP 0xFFC030EC /* PTP Capture ID register */ +#define EMAC_PTP_PPS_STARTLO 0xFFC030F0 /* PPS Start Time Low */ +#define EMAC_PTP_PPS_STARTHI 0xFFC030F4 /* PPS Start Time High */ +#define EMAC_PTP_PPS_PERIOD 0xFFC030F8 /* PPS Count Register */ + +#endif /* _DEF_BF518_H */ diff --git a/arch/blackfin/mach-bf518/include/mach/defBF51x_base.h b/arch/blackfin/mach-bf518/include/mach/defBF51x_base.h new file mode 100644 index 000000000000..1bec8d1c2a73 --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/defBF51x_base.h @@ -0,0 +1,1940 @@ +/* + * File: include/asm-blackfin/mach-bf518/defBF51x_base.h + * Based on: + * Author: + * + * Created: + * Description: + * + * Rev: + * + * Modified: + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _DEF_BF51X_H +#define _DEF_BF51X_H + + +/* ************************************************************** */ +/* SYSTEM & MMR ADDRESS DEFINITIONS COMMON TO ALL ADSP-BF51x */ +/* ************************************************************** */ + +/* Clock and System Control (0xFFC00000 - 0xFFC000FF) */ +#define PLL_CTL 0xFFC00000 /* PLL Control Register */ +#define PLL_DIV 0xFFC00004 /* PLL Divide Register */ +#define VR_CTL 0xFFC00008 /* Voltage Regulator Control Register */ +#define PLL_STAT 0xFFC0000C /* PLL Status Register */ +#define PLL_LOCKCNT 0xFFC00010 /* PLL Lock Count Register */ +#define CHIPID 0xFFC00014 /* Device ID Register */ + +/* System Interrupt Controller (0xFFC00100 - 0xFFC001FF) */ +#define SWRST 0xFFC00100 /* Software Reset Register */ +#define SYSCR 0xFFC00104 /* System Configuration Register */ +#define SIC_RVECT 0xFFC00108 /* Interrupt Reset Vector Address Register */ + +#define SIC_IMASK0 0xFFC0010C /* Interrupt Mask Register */ +#define SIC_IAR0 0xFFC00110 /* Interrupt Assignment Register 0 */ +#define SIC_IAR1 0xFFC00114 /* Interrupt Assignment Register 1 */ +#define SIC_IAR2 0xFFC00118 /* Interrupt Assignment Register 2 */ +#define SIC_IAR3 0xFFC0011C /* Interrupt Assignment Register 3 */ +#define SIC_ISR0 0xFFC00120 /* Interrupt Status Register */ +#define SIC_IWR0 0xFFC00124 /* Interrupt Wakeup Register */ + +/* SIC Additions to ADSP-BF51x (0xFFC0014C - 0xFFC00162) */ +#define SIC_IMASK1 0xFFC0014C /* Interrupt Mask register of SIC2 */ +#define SIC_IAR4 0xFFC00150 /* Interrupt Assignment register4 */ +#define SIC_IAR5 0xFFC00154 /* Interrupt Assignment register5 */ +#define SIC_IAR6 0xFFC00158 /* Interrupt Assignment register6 */ +#define SIC_IAR7 0xFFC0015C /* Interrupt Assignment register7 */ +#define SIC_ISR1 0xFFC00160 /* Interrupt Statur register */ +#define SIC_IWR1 0xFFC00164 /* Interrupt Wakeup register */ + + +/* Watchdog Timer (0xFFC00200 - 0xFFC002FF) */ +#define WDOG_CTL 0xFFC00200 /* Watchdog Control Register */ +#define WDOG_CNT 0xFFC00204 /* Watchdog Count Register */ +#define WDOG_STAT 0xFFC00208 /* Watchdog Status Register */ + + +/* Real Time Clock (0xFFC00300 - 0xFFC003FF) */ +#define RTC_STAT 0xFFC00300 /* RTC Status Register */ +#define RTC_ICTL 0xFFC00304 /* RTC Interrupt Control Register */ +#define RTC_ISTAT 0xFFC00308 /* RTC Interrupt Status Register */ +#define RTC_SWCNT 0xFFC0030C /* RTC Stopwatch Count Register */ +#define RTC_ALARM 0xFFC00310 /* RTC Alarm Time Register */ +#define RTC_FAST 0xFFC00314 /* RTC Prescaler Enable Register */ +#define RTC_PREN 0xFFC00314 /* RTC Prescaler Enable Alternate Macro */ + + +/* UART0 Controller (0xFFC00400 - 0xFFC004FF) */ +#define UART0_THR 0xFFC00400 /* Transmit Holding register */ +#define UART0_RBR 0xFFC00400 /* Receive Buffer register */ +#define UART0_DLL 0xFFC00400 /* Divisor Latch (Low-Byte) */ +#define UART0_IER 0xFFC00404 /* Interrupt Enable Register */ +#define UART0_DLH 0xFFC00404 /* Divisor Latch (High-Byte) */ +#define UART0_IIR 0xFFC00408 /* Interrupt Identification Register */ +#define UART0_LCR 0xFFC0040C /* Line Control Register */ +#define UART0_MCR 0xFFC00410 /* Modem Control Register */ +#define UART0_LSR 0xFFC00414 /* Line Status Register */ +#define UART0_MSR 0xFFC00418 /* Modem Status Register */ +#define UART0_SCR 0xFFC0041C /* SCR Scratch Register */ +#define UART0_GCTL 0xFFC00424 /* Global Control Register */ + +/* SPI0 Controller (0xFFC00500 - 0xFFC005FF) */ +#define SPI0_REGBASE 0xFFC00500 +#define SPI0_CTL 0xFFC00500 /* SPI Control Register */ +#define SPI0_FLG 0xFFC00504 /* SPI Flag register */ +#define SPI0_STAT 0xFFC00508 /* SPI Status register */ +#define SPI0_TDBR 0xFFC0050C /* SPI Transmit Data Buffer Register */ +#define SPI0_RDBR 0xFFC00510 /* SPI Receive Data Buffer Register */ +#define SPI0_BAUD 0xFFC00514 /* SPI Baud rate Register */ +#define SPI0_SHADOW 0xFFC00518 /* SPI_RDBR Shadow Register */ + +/* SPI1 Controller (0xFFC03400 - 0xFFC034FF) */ +#define SPI1_REGBASE 0xFFC03400 +#define SPI1_CTL 0xFFC03400 /* SPI Control Register */ +#define SPI1_FLG 0xFFC03404 /* SPI Flag register */ +#define SPI1_STAT 0xFFC03408 /* SPI Status register */ +#define SPI1_TDBR 0xFFC0340C /* SPI Transmit Data Buffer Register */ +#define SPI1_RDBR 0xFFC03410 /* SPI Receive Data Buffer Register */ +#define SPI1_BAUD 0xFFC03414 /* SPI Baud rate Register */ +#define SPI1_SHADOW 0xFFC03418 /* SPI_RDBR Shadow Register */ + +/* TIMER0-7 Registers (0xFFC00600 - 0xFFC006FF) */ +#define TIMER0_CONFIG 0xFFC00600 /* Timer 0 Configuration Register */ +#define TIMER0_COUNTER 0xFFC00604 /* Timer 0 Counter Register */ +#define TIMER0_PERIOD 0xFFC00608 /* Timer 0 Period Register */ +#define TIMER0_WIDTH 0xFFC0060C /* Timer 0 Width Register */ + +#define TIMER1_CONFIG 0xFFC00610 /* Timer 1 Configuration Register */ +#define TIMER1_COUNTER 0xFFC00614 /* Timer 1 Counter Register */ +#define TIMER1_PERIOD 0xFFC00618 /* Timer 1 Period Register */ +#define TIMER1_WIDTH 0xFFC0061C /* Timer 1 Width Register */ + +#define TIMER2_CONFIG 0xFFC00620 /* Timer 2 Configuration Register */ +#define TIMER2_COUNTER 0xFFC00624 /* Timer 2 Counter Register */ +#define TIMER2_PERIOD 0xFFC00628 /* Timer 2 Period Register */ +#define TIMER2_WIDTH 0xFFC0062C /* Timer 2 Width Register */ + +#define TIMER3_CONFIG 0xFFC00630 /* Timer 3 Configuration Register */ +#define TIMER3_COUNTER 0xFFC00634 /* Timer 3 Counter Register */ +#define TIMER3_PERIOD 0xFFC00638 /* Timer 3 Period Register */ +#define TIMER3_WIDTH 0xFFC0063C /* Timer 3 Width Register */ + +#define TIMER4_CONFIG 0xFFC00640 /* Timer 4 Configuration Register */ +#define TIMER4_COUNTER 0xFFC00644 /* Timer 4 Counter Register */ +#define TIMER4_PERIOD 0xFFC00648 /* Timer 4 Period Register */ +#define TIMER4_WIDTH 0xFFC0064C /* Timer 4 Width Register */ + +#define TIMER5_CONFIG 0xFFC00650 /* Timer 5 Configuration Register */ +#define TIMER5_COUNTER 0xFFC00654 /* Timer 5 Counter Register */ +#define TIMER5_PERIOD 0xFFC00658 /* Timer 5 Period Register */ +#define TIMER5_WIDTH 0xFFC0065C /* Timer 5 Width Register */ + +#define TIMER6_CONFIG 0xFFC00660 /* Timer 6 Configuration Register */ +#define TIMER6_COUNTER 0xFFC00664 /* Timer 6 Counter Register */ +#define TIMER6_PERIOD 0xFFC00668 /* Timer 6 Period Register */ +#define TIMER6_WIDTH 0xFFC0066C /* Timer 6 Width Register */ + +#define TIMER7_CONFIG 0xFFC00670 /* Timer 7 Configuration Register */ +#define TIMER7_COUNTER 0xFFC00674 /* Timer 7 Counter Register */ +#define TIMER7_PERIOD 0xFFC00678 /* Timer 7 Period Register */ +#define TIMER7_WIDTH 0xFFC0067C /* Timer 7 Width Register */ + +#define TIMER_ENABLE 0xFFC00680 /* Timer Enable Register */ +#define TIMER_DISABLE 0xFFC00684 /* Timer Disable Register */ +#define TIMER_STATUS 0xFFC00688 /* Timer Status Register */ + +/* General Purpose I/O Port F (0xFFC00700 - 0xFFC007FF) */ +#define PORTFIO 0xFFC00700 /* Port F I/O Pin State Specify Register */ +#define PORTFIO_CLEAR 0xFFC00704 /* Port F I/O Peripheral Interrupt Clear Register */ +#define PORTFIO_SET 0xFFC00708 /* Port F I/O Peripheral Interrupt Set Register */ +#define PORTFIO_TOGGLE 0xFFC0070C /* Port F I/O Pin State Toggle Register */ +#define PORTFIO_MASKA 0xFFC00710 /* Port F I/O Mask State Specify Interrupt A Register */ +#define PORTFIO_MASKA_CLEAR 0xFFC00714 /* Port F I/O Mask Disable Interrupt A Register */ +#define PORTFIO_MASKA_SET 0xFFC00718 /* Port F I/O Mask Enable Interrupt A Register */ +#define PORTFIO_MASKA_TOGGLE 0xFFC0071C /* Port F I/O Mask Toggle Enable Interrupt A Register */ +#define PORTFIO_MASKB 0xFFC00720 /* Port F I/O Mask State Specify Interrupt B Register */ +#define PORTFIO_MASKB_CLEAR 0xFFC00724 /* Port F I/O Mask Disable Interrupt B Register */ +#define PORTFIO_MASKB_SET 0xFFC00728 /* Port F I/O Mask Enable Interrupt B Register */ +#define PORTFIO_MASKB_TOGGLE 0xFFC0072C /* Port F I/O Mask Toggle Enable Interrupt B Register */ +#define PORTFIO_DIR 0xFFC00730 /* Port F I/O Direction Register */ +#define PORTFIO_POLAR 0xFFC00734 /* Port F I/O Source Polarity Register */ +#define PORTFIO_EDGE 0xFFC00738 /* Port F I/O Source Sensitivity Register */ +#define PORTFIO_BOTH 0xFFC0073C /* Port F I/O Set on BOTH Edges Register */ +#define PORTFIO_INEN 0xFFC00740 /* Port F I/O Input Enable Register */ + +/* SPORT0 Controller (0xFFC00800 - 0xFFC008FF) */ +#define SPORT0_TCR1 0xFFC00800 /* SPORT0 Transmit Configuration 1 Register */ +#define SPORT0_TCR2 0xFFC00804 /* SPORT0 Transmit Configuration 2 Register */ +#define SPORT0_TCLKDIV 0xFFC00808 /* SPORT0 Transmit Clock Divider */ +#define SPORT0_TFSDIV 0xFFC0080C /* SPORT0 Transmit Frame Sync Divider */ +#define SPORT0_TX 0xFFC00810 /* SPORT0 TX Data Register */ +#define SPORT0_RX 0xFFC00818 /* SPORT0 RX Data Register */ +#define SPORT0_RCR1 0xFFC00820 /* SPORT0 Transmit Configuration 1 Register */ +#define SPORT0_RCR2 0xFFC00824 /* SPORT0 Transmit Configuration 2 Register */ +#define SPORT0_RCLKDIV 0xFFC00828 /* SPORT0 Receive Clock Divider */ +#define SPORT0_RFSDIV 0xFFC0082C /* SPORT0 Receive Frame Sync Divider */ +#define SPORT0_STAT 0xFFC00830 /* SPORT0 Status Register */ +#define SPORT0_CHNL 0xFFC00834 /* SPORT0 Current Channel Register */ +#define SPORT0_MCMC1 0xFFC00838 /* SPORT0 Multi-Channel Configuration Register 1 */ +#define SPORT0_MCMC2 0xFFC0083C /* SPORT0 Multi-Channel Configuration Register 2 */ +#define SPORT0_MTCS0 0xFFC00840 /* SPORT0 Multi-Channel Transmit Select Register 0 */ +#define SPORT0_MTCS1 0xFFC00844 /* SPORT0 Multi-Channel Transmit Select Register 1 */ +#define SPORT0_MTCS2 0xFFC00848 /* SPORT0 Multi-Channel Transmit Select Register 2 */ +#define SPORT0_MTCS3 0xFFC0084C /* SPORT0 Multi-Channel Transmit Select Register 3 */ +#define SPORT0_MRCS0 0xFFC00850 /* SPORT0 Multi-Channel Receive Select Register 0 */ +#define SPORT0_MRCS1 0xFFC00854 /* SPORT0 Multi-Channel Receive Select Register 1 */ +#define SPORT0_MRCS2 0xFFC00858 /* SPORT0 Multi-Channel Receive Select Register 2 */ +#define SPORT0_MRCS3 0xFFC0085C /* SPORT0 Multi-Channel Receive Select Register 3 */ + +/* SPORT1 Controller (0xFFC00900 - 0xFFC009FF) */ +#define SPORT1_TCR1 0xFFC00900 /* SPORT1 Transmit Configuration 1 Register */ +#define SPORT1_TCR2 0xFFC00904 /* SPORT1 Transmit Configuration 2 Register */ +#define SPORT1_TCLKDIV 0xFFC00908 /* SPORT1 Transmit Clock Divider */ +#define SPORT1_TFSDIV 0xFFC0090C /* SPORT1 Transmit Frame Sync Divider */ +#define SPORT1_TX 0xFFC00910 /* SPORT1 TX Data Register */ +#define SPORT1_RX 0xFFC00918 /* SPORT1 RX Data Register */ +#define SPORT1_RCR1 0xFFC00920 /* SPORT1 Transmit Configuration 1 Register */ +#define SPORT1_RCR2 0xFFC00924 /* SPORT1 Transmit Configuration 2 Register */ +#define SPORT1_RCLKDIV 0xFFC00928 /* SPORT1 Receive Clock Divider */ +#define SPORT1_RFSDIV 0xFFC0092C /* SPORT1 Receive Frame Sync Divider */ +#define SPORT1_STAT 0xFFC00930 /* SPORT1 Status Register */ +#define SPORT1_CHNL 0xFFC00934 /* SPORT1 Current Channel Register */ +#define SPORT1_MCMC1 0xFFC00938 /* SPORT1 Multi-Channel Configuration Register 1 */ +#define SPORT1_MCMC2 0xFFC0093C /* SPORT1 Multi-Channel Configuration Register 2 */ +#define SPORT1_MTCS0 0xFFC00940 /* SPORT1 Multi-Channel Transmit Select Register 0 */ +#define SPORT1_MTCS1 0xFFC00944 /* SPORT1 Multi-Channel Transmit Select Register 1 */ +#define SPORT1_MTCS2 0xFFC00948 /* SPORT1 Multi-Channel Transmit Select Register 2 */ +#define SPORT1_MTCS3 0xFFC0094C /* SPORT1 Multi-Channel Transmit Select Register 3 */ +#define SPORT1_MRCS0 0xFFC00950 /* SPORT1 Multi-Channel Receive Select Register 0 */ +#define SPORT1_MRCS1 0xFFC00954 /* SPORT1 Multi-Channel Receive Select Register 1 */ +#define SPORT1_MRCS2 0xFFC00958 /* SPORT1 Multi-Channel Receive Select Register 2 */ +#define SPORT1_MRCS3 0xFFC0095C /* SPORT1 Multi-Channel Receive Select Register 3 */ + +/* External Bus Interface Unit (0xFFC00A00 - 0xFFC00AFF) */ +#define EBIU_AMGCTL 0xFFC00A00 /* Asynchronous Memory Global Control Register */ +#define EBIU_AMBCTL0 0xFFC00A04 /* Asynchronous Memory Bank Control Register 0 */ +#define EBIU_AMBCTL1 0xFFC00A08 /* Asynchronous Memory Bank Control Register 1 */ +#define EBIU_SDGCTL 0xFFC00A10 /* SDRAM Global Control Register */ +#define EBIU_SDBCTL 0xFFC00A14 /* SDRAM Bank Control Register */ +#define EBIU_SDRRC 0xFFC00A18 /* SDRAM Refresh Rate Control Register */ +#define EBIU_SDSTAT 0xFFC00A1C /* SDRAM Status Register */ + +/* DMA Traffic Control Registers */ +#define DMA_TC_PER 0xFFC00B0C /* Traffic Control Periods Register */ +#define DMA_TC_CNT 0xFFC00B10 /* Traffic Control Current Counts Register */ + +/* Alternate deprecated register names (below) provided for backwards code compatibility */ +#define DMA_TCPER 0xFFC00B0C /* Traffic Control Periods Register */ +#define DMA_TCCNT 0xFFC00B10 /* Traffic Control Current Counts Register */ + +/* DMA Controller (0xFFC00C00 - 0xFFC00FFF) */ +#define DMA0_NEXT_DESC_PTR 0xFFC00C00 /* DMA Channel 0 Next Descriptor Pointer Register */ +#define DMA0_START_ADDR 0xFFC00C04 /* DMA Channel 0 Start Address Register */ +#define DMA0_CONFIG 0xFFC00C08 /* DMA Channel 0 Configuration Register */ +#define DMA0_X_COUNT 0xFFC00C10 /* DMA Channel 0 X Count Register */ +#define DMA0_X_MODIFY 0xFFC00C14 /* DMA Channel 0 X Modify Register */ +#define DMA0_Y_COUNT 0xFFC00C18 /* DMA Channel 0 Y Count Register */ +#define DMA0_Y_MODIFY 0xFFC00C1C /* DMA Channel 0 Y Modify Register */ +#define DMA0_CURR_DESC_PTR 0xFFC00C20 /* DMA Channel 0 Current Descriptor Pointer Register */ +#define DMA0_CURR_ADDR 0xFFC00C24 /* DMA Channel 0 Current Address Register */ +#define DMA0_IRQ_STATUS 0xFFC00C28 /* DMA Channel 0 Interrupt/Status Register */ +#define DMA0_PERIPHERAL_MAP 0xFFC00C2C /* DMA Channel 0 Peripheral Map Register */ +#define DMA0_CURR_X_COUNT 0xFFC00C30 /* DMA Channel 0 Current X Count Register */ +#define DMA0_CURR_Y_COUNT 0xFFC00C38 /* DMA Channel 0 Current Y Count Register */ + +#define DMA1_NEXT_DESC_PTR 0xFFC00C40 /* DMA Channel 1 Next Descriptor Pointer Register */ +#define DMA1_START_ADDR 0xFFC00C44 /* DMA Channel 1 Start Address Register */ +#define DMA1_CONFIG 0xFFC00C48 /* DMA Channel 1 Configuration Register */ +#define DMA1_X_COUNT 0xFFC00C50 /* DMA Channel 1 X Count Register */ +#define DMA1_X_MODIFY 0xFFC00C54 /* DMA Channel 1 X Modify Register */ +#define DMA1_Y_COUNT 0xFFC00C58 /* DMA Channel 1 Y Count Register */ +#define DMA1_Y_MODIFY 0xFFC00C5C /* DMA Channel 1 Y Modify Register */ +#define DMA1_CURR_DESC_PTR 0xFFC00C60 /* DMA Channel 1 Current Descriptor Pointer Register */ +#define DMA1_CURR_ADDR 0xFFC00C64 /* DMA Channel 1 Current Address Register */ +#define DMA1_IRQ_STATUS 0xFFC00C68 /* DMA Channel 1 Interrupt/Status Register */ +#define DMA1_PERIPHERAL_MAP 0xFFC00C6C /* DMA Channel 1 Peripheral Map Register */ +#define DMA1_CURR_X_COUNT 0xFFC00C70 /* DMA Channel 1 Current X Count Register */ +#define DMA1_CURR_Y_COUNT 0xFFC00C78 /* DMA Channel 1 Current Y Count Register */ + +#define DMA2_NEXT_DESC_PTR 0xFFC00C80 /* DMA Channel 2 Next Descriptor Pointer Register */ +#define DMA2_START_ADDR 0xFFC00C84 /* DMA Channel 2 Start Address Register */ +#define DMA2_CONFIG 0xFFC00C88 /* DMA Channel 2 Configuration Register */ +#define DMA2_X_COUNT 0xFFC00C90 /* DMA Channel 2 X Count Register */ +#define DMA2_X_MODIFY 0xFFC00C94 /* DMA Channel 2 X Modify Register */ +#define DMA2_Y_COUNT 0xFFC00C98 /* DMA Channel 2 Y Count Register */ +#define DMA2_Y_MODIFY 0xFFC00C9C /* DMA Channel 2 Y Modify Register */ +#define DMA2_CURR_DESC_PTR 0xFFC00CA0 /* DMA Channel 2 Current Descriptor Pointer Register */ +#define DMA2_CURR_ADDR 0xFFC00CA4 /* DMA Channel 2 Current Address Register */ +#define DMA2_IRQ_STATUS 0xFFC00CA8 /* DMA Channel 2 Interrupt/Status Register */ +#define DMA2_PERIPHERAL_MAP 0xFFC00CAC /* DMA Channel 2 Peripheral Map Register */ +#define DMA2_CURR_X_COUNT 0xFFC00CB0 /* DMA Channel 2 Current X Count Register */ +#define DMA2_CURR_Y_COUNT 0xFFC00CB8 /* DMA Channel 2 Current Y Count Register */ + +#define DMA3_NEXT_DESC_PTR 0xFFC00CC0 /* DMA Channel 3 Next Descriptor Pointer Register */ +#define DMA3_START_ADDR 0xFFC00CC4 /* DMA Channel 3 Start Address Register */ +#define DMA3_CONFIG 0xFFC00CC8 /* DMA Channel 3 Configuration Register */ +#define DMA3_X_COUNT 0xFFC00CD0 /* DMA Channel 3 X Count Register */ +#define DMA3_X_MODIFY 0xFFC00CD4 /* DMA Channel 3 X Modify Register */ +#define DMA3_Y_COUNT 0xFFC00CD8 /* DMA Channel 3 Y Count Register */ +#define DMA3_Y_MODIFY 0xFFC00CDC /* DMA Channel 3 Y Modify Register */ +#define DMA3_CURR_DESC_PTR 0xFFC00CE0 /* DMA Channel 3 Current Descriptor Pointer Register */ +#define DMA3_CURR_ADDR 0xFFC00CE4 /* DMA Channel 3 Current Address Register */ +#define DMA3_IRQ_STATUS 0xFFC00CE8 /* DMA Channel 3 Interrupt/Status Register */ +#define DMA3_PERIPHERAL_MAP 0xFFC00CEC /* DMA Channel 3 Peripheral Map Register */ +#define DMA3_CURR_X_COUNT 0xFFC00CF0 /* DMA Channel 3 Current X Count Register */ +#define DMA3_CURR_Y_COUNT 0xFFC00CF8 /* DMA Channel 3 Current Y Count Register */ + +#define DMA4_NEXT_DESC_PTR 0xFFC00D00 /* DMA Channel 4 Next Descriptor Pointer Register */ +#define DMA4_START_ADDR 0xFFC00D04 /* DMA Channel 4 Start Address Register */ +#define DMA4_CONFIG 0xFFC00D08 /* DMA Channel 4 Configuration Register */ +#define DMA4_X_COUNT 0xFFC00D10 /* DMA Channel 4 X Count Register */ +#define DMA4_X_MODIFY 0xFFC00D14 /* DMA Channel 4 X Modify Register */ +#define DMA4_Y_COUNT 0xFFC00D18 /* DMA Channel 4 Y Count Register */ +#define DMA4_Y_MODIFY 0xFFC00D1C /* DMA Channel 4 Y Modify Register */ +#define DMA4_CURR_DESC_PTR 0xFFC00D20 /* DMA Channel 4 Current Descriptor Pointer Register */ +#define DMA4_CURR_ADDR 0xFFC00D24 /* DMA Channel 4 Current Address Register */ +#define DMA4_IRQ_STATUS 0xFFC00D28 /* DMA Channel 4 Interrupt/Status Register */ +#define DMA4_PERIPHERAL_MAP 0xFFC00D2C /* DMA Channel 4 Peripheral Map Register */ +#define DMA4_CURR_X_COUNT 0xFFC00D30 /* DMA Channel 4 Current X Count Register */ +#define DMA4_CURR_Y_COUNT 0xFFC00D38 /* DMA Channel 4 Current Y Count Register */ + +#define DMA5_NEXT_DESC_PTR 0xFFC00D40 /* DMA Channel 5 Next Descriptor Pointer Register */ +#define DMA5_START_ADDR 0xFFC00D44 /* DMA Channel 5 Start Address Register */ +#define DMA5_CONFIG 0xFFC00D48 /* DMA Channel 5 Configuration Register */ +#define DMA5_X_COUNT 0xFFC00D50 /* DMA Channel 5 X Count Register */ +#define DMA5_X_MODIFY 0xFFC00D54 /* DMA Channel 5 X Modify Register */ +#define DMA5_Y_COUNT 0xFFC00D58 /* DMA Channel 5 Y Count Register */ +#define DMA5_Y_MODIFY 0xFFC00D5C /* DMA Channel 5 Y Modify Register */ +#define DMA5_CURR_DESC_PTR 0xFFC00D60 /* DMA Channel 5 Current Descriptor Pointer Register */ +#define DMA5_CURR_ADDR 0xFFC00D64 /* DMA Channel 5 Current Address Register */ +#define DMA5_IRQ_STATUS 0xFFC00D68 /* DMA Channel 5 Interrupt/Status Register */ +#define DMA5_PERIPHERAL_MAP 0xFFC00D6C /* DMA Channel 5 Peripheral Map Register */ +#define DMA5_CURR_X_COUNT 0xFFC00D70 /* DMA Channel 5 Current X Count Register */ +#define DMA5_CURR_Y_COUNT 0xFFC00D78 /* DMA Channel 5 Current Y Count Register */ + +#define DMA6_NEXT_DESC_PTR 0xFFC00D80 /* DMA Channel 6 Next Descriptor Pointer Register */ +#define DMA6_START_ADDR 0xFFC00D84 /* DMA Channel 6 Start Address Register */ +#define DMA6_CONFIG 0xFFC00D88 /* DMA Channel 6 Configuration Register */ +#define DMA6_X_COUNT 0xFFC00D90 /* DMA Channel 6 X Count Register */ +#define DMA6_X_MODIFY 0xFFC00D94 /* DMA Channel 6 X Modify Register */ +#define DMA6_Y_COUNT 0xFFC00D98 /* DMA Channel 6 Y Count Register */ +#define DMA6_Y_MODIFY 0xFFC00D9C /* DMA Channel 6 Y Modify Register */ +#define DMA6_CURR_DESC_PTR 0xFFC00DA0 /* DMA Channel 6 Current Descriptor Pointer Register */ +#define DMA6_CURR_ADDR 0xFFC00DA4 /* DMA Channel 6 Current Address Register */ +#define DMA6_IRQ_STATUS 0xFFC00DA8 /* DMA Channel 6 Interrupt/Status Register */ +#define DMA6_PERIPHERAL_MAP 0xFFC00DAC /* DMA Channel 6 Peripheral Map Register */ +#define DMA6_CURR_X_COUNT 0xFFC00DB0 /* DMA Channel 6 Current X Count Register */ +#define DMA6_CURR_Y_COUNT 0xFFC00DB8 /* DMA Channel 6 Current Y Count Register */ + +#define DMA7_NEXT_DESC_PTR 0xFFC00DC0 /* DMA Channel 7 Next Descriptor Pointer Register */ +#define DMA7_START_ADDR 0xFFC00DC4 /* DMA Channel 7 Start Address Register */ +#define DMA7_CONFIG 0xFFC00DC8 /* DMA Channel 7 Configuration Register */ +#define DMA7_X_COUNT 0xFFC00DD0 /* DMA Channel 7 X Count Register */ +#define DMA7_X_MODIFY 0xFFC00DD4 /* DMA Channel 7 X Modify Register */ +#define DMA7_Y_COUNT 0xFFC00DD8 /* DMA Channel 7 Y Count Register */ +#define DMA7_Y_MODIFY 0xFFC00DDC /* DMA Channel 7 Y Modify Register */ +#define DMA7_CURR_DESC_PTR 0xFFC00DE0 /* DMA Channel 7 Current Descriptor Pointer Register */ +#define DMA7_CURR_ADDR 0xFFC00DE4 /* DMA Channel 7 Current Address Register */ +#define DMA7_IRQ_STATUS 0xFFC00DE8 /* DMA Channel 7 Interrupt/Status Register */ +#define DMA7_PERIPHERAL_MAP 0xFFC00DEC /* DMA Channel 7 Peripheral Map Register */ +#define DMA7_CURR_X_COUNT 0xFFC00DF0 /* DMA Channel 7 Current X Count Register */ +#define DMA7_CURR_Y_COUNT 0xFFC00DF8 /* DMA Channel 7 Current Y Count Register */ + +#define DMA8_NEXT_DESC_PTR 0xFFC00E00 /* DMA Channel 8 Next Descriptor Pointer Register */ +#define DMA8_START_ADDR 0xFFC00E04 /* DMA Channel 8 Start Address Register */ +#define DMA8_CONFIG 0xFFC00E08 /* DMA Channel 8 Configuration Register */ +#define DMA8_X_COUNT 0xFFC00E10 /* DMA Channel 8 X Count Register */ +#define DMA8_X_MODIFY 0xFFC00E14 /* DMA Channel 8 X Modify Register */ +#define DMA8_Y_COUNT 0xFFC00E18 /* DMA Channel 8 Y Count Register */ +#define DMA8_Y_MODIFY 0xFFC00E1C /* DMA Channel 8 Y Modify Register */ +#define DMA8_CURR_DESC_PTR 0xFFC00E20 /* DMA Channel 8 Current Descriptor Pointer Register */ +#define DMA8_CURR_ADDR 0xFFC00E24 /* DMA Channel 8 Current Address Register */ +#define DMA8_IRQ_STATUS 0xFFC00E28 /* DMA Channel 8 Interrupt/Status Register */ +#define DMA8_PERIPHERAL_MAP 0xFFC00E2C /* DMA Channel 8 Peripheral Map Register */ +#define DMA8_CURR_X_COUNT 0xFFC00E30 /* DMA Channel 8 Current X Count Register */ +#define DMA8_CURR_Y_COUNT 0xFFC00E38 /* DMA Channel 8 Current Y Count Register */ + +#define DMA9_NEXT_DESC_PTR 0xFFC00E40 /* DMA Channel 9 Next Descriptor Pointer Register */ +#define DMA9_START_ADDR 0xFFC00E44 /* DMA Channel 9 Start Address Register */ +#define DMA9_CONFIG 0xFFC00E48 /* DMA Channel 9 Configuration Register */ +#define DMA9_X_COUNT 0xFFC00E50 /* DMA Channel 9 X Count Register */ +#define DMA9_X_MODIFY 0xFFC00E54 /* DMA Channel 9 X Modify Register */ +#define DMA9_Y_COUNT 0xFFC00E58 /* DMA Channel 9 Y Count Register */ +#define DMA9_Y_MODIFY 0xFFC00E5C /* DMA Channel 9 Y Modify Register */ +#define DMA9_CURR_DESC_PTR 0xFFC00E60 /* DMA Channel 9 Current Descriptor Pointer Register */ +#define DMA9_CURR_ADDR 0xFFC00E64 /* DMA Channel 9 Current Address Register */ +#define DMA9_IRQ_STATUS 0xFFC00E68 /* DMA Channel 9 Interrupt/Status Register */ +#define DMA9_PERIPHERAL_MAP 0xFFC00E6C /* DMA Channel 9 Peripheral Map Register */ +#define DMA9_CURR_X_COUNT 0xFFC00E70 /* DMA Channel 9 Current X Count Register */ +#define DMA9_CURR_Y_COUNT 0xFFC00E78 /* DMA Channel 9 Current Y Count Register */ + +#define DMA10_NEXT_DESC_PTR 0xFFC00E80 /* DMA Channel 10 Next Descriptor Pointer Register */ +#define DMA10_START_ADDR 0xFFC00E84 /* DMA Channel 10 Start Address Register */ +#define DMA10_CONFIG 0xFFC00E88 /* DMA Channel 10 Configuration Register */ +#define DMA10_X_COUNT 0xFFC00E90 /* DMA Channel 10 X Count Register */ +#define DMA10_X_MODIFY 0xFFC00E94 /* DMA Channel 10 X Modify Register */ +#define DMA10_Y_COUNT 0xFFC00E98 /* DMA Channel 10 Y Count Register */ +#define DMA10_Y_MODIFY 0xFFC00E9C /* DMA Channel 10 Y Modify Register */ +#define DMA10_CURR_DESC_PTR 0xFFC00EA0 /* DMA Channel 10 Current Descriptor Pointer Register */ +#define DMA10_CURR_ADDR 0xFFC00EA4 /* DMA Channel 10 Current Address Register */ +#define DMA10_IRQ_STATUS 0xFFC00EA8 /* DMA Channel 10 Interrupt/Status Register */ +#define DMA10_PERIPHERAL_MAP 0xFFC00EAC /* DMA Channel 10 Peripheral Map Register */ +#define DMA10_CURR_X_COUNT 0xFFC00EB0 /* DMA Channel 10 Current X Count Register */ +#define DMA10_CURR_Y_COUNT 0xFFC00EB8 /* DMA Channel 10 Current Y Count Register */ + +#define DMA11_NEXT_DESC_PTR 0xFFC00EC0 /* DMA Channel 11 Next Descriptor Pointer Register */ +#define DMA11_START_ADDR 0xFFC00EC4 /* DMA Channel 11 Start Address Register */ +#define DMA11_CONFIG 0xFFC00EC8 /* DMA Channel 11 Configuration Register */ +#define DMA11_X_COUNT 0xFFC00ED0 /* DMA Channel 11 X Count Register */ +#define DMA11_X_MODIFY 0xFFC00ED4 /* DMA Channel 11 X Modify Register */ +#define DMA11_Y_COUNT 0xFFC00ED8 /* DMA Channel 11 Y Count Register */ +#define DMA11_Y_MODIFY 0xFFC00EDC /* DMA Channel 11 Y Modify Register */ +#define DMA11_CURR_DESC_PTR 0xFFC00EE0 /* DMA Channel 11 Current Descriptor Pointer Register */ +#define DMA11_CURR_ADDR 0xFFC00EE4 /* DMA Channel 11 Current Address Register */ +#define DMA11_IRQ_STATUS 0xFFC00EE8 /* DMA Channel 11 Interrupt/Status Register */ +#define DMA11_PERIPHERAL_MAP 0xFFC00EEC /* DMA Channel 11 Peripheral Map Register */ +#define DMA11_CURR_X_COUNT 0xFFC00EF0 /* DMA Channel 11 Current X Count Register */ +#define DMA11_CURR_Y_COUNT 0xFFC00EF8 /* DMA Channel 11 Current Y Count Register */ + +#define MDMA_D0_NEXT_DESC_PTR 0xFFC00F00 /* MemDMA Stream 0 Destination Next Descriptor Pointer Register */ +#define MDMA_D0_START_ADDR 0xFFC00F04 /* MemDMA Stream 0 Destination Start Address Register */ +#define MDMA_D0_CONFIG 0xFFC00F08 /* MemDMA Stream 0 Destination Configuration Register */ +#define MDMA_D0_X_COUNT 0xFFC00F10 /* MemDMA Stream 0 Destination X Count Register */ +#define MDMA_D0_X_MODIFY 0xFFC00F14 /* MemDMA Stream 0 Destination X Modify Register */ +#define MDMA_D0_Y_COUNT 0xFFC00F18 /* MemDMA Stream 0 Destination Y Count Register */ +#define MDMA_D0_Y_MODIFY 0xFFC00F1C /* MemDMA Stream 0 Destination Y Modify Register */ +#define MDMA_D0_CURR_DESC_PTR 0xFFC00F20 /* MemDMA Stream 0 Destination Current Descriptor Pointer Register */ +#define MDMA_D0_CURR_ADDR 0xFFC00F24 /* MemDMA Stream 0 Destination Current Address Register */ +#define MDMA_D0_IRQ_STATUS 0xFFC00F28 /* MemDMA Stream 0 Destination Interrupt/Status Register */ +#define MDMA_D0_PERIPHERAL_MAP 0xFFC00F2C /* MemDMA Stream 0 Destination Peripheral Map Register */ +#define MDMA_D0_CURR_X_COUNT 0xFFC00F30 /* MemDMA Stream 0 Destination Current X Count Register */ +#define MDMA_D0_CURR_Y_COUNT 0xFFC00F38 /* MemDMA Stream 0 Destination Current Y Count Register */ + +#define MDMA_S0_NEXT_DESC_PTR 0xFFC00F40 /* MemDMA Stream 0 Source Next Descriptor Pointer Register */ +#define MDMA_S0_START_ADDR 0xFFC00F44 /* MemDMA Stream 0 Source Start Address Register */ +#define MDMA_S0_CONFIG 0xFFC00F48 /* MemDMA Stream 0 Source Configuration Register */ +#define MDMA_S0_X_COUNT 0xFFC00F50 /* MemDMA Stream 0 Source X Count Register */ +#define MDMA_S0_X_MODIFY 0xFFC00F54 /* MemDMA Stream 0 Source X Modify Register */ +#define MDMA_S0_Y_COUNT 0xFFC00F58 /* MemDMA Stream 0 Source Y Count Register */ +#define MDMA_S0_Y_MODIFY 0xFFC00F5C /* MemDMA Stream 0 Source Y Modify Register */ +#define MDMA_S0_CURR_DESC_PTR 0xFFC00F60 /* MemDMA Stream 0 Source Current Descriptor Pointer Register */ +#define MDMA_S0_CURR_ADDR 0xFFC00F64 /* MemDMA Stream 0 Source Current Address Register */ +#define MDMA_S0_IRQ_STATUS 0xFFC00F68 /* MemDMA Stream 0 Source Interrupt/Status Register */ +#define MDMA_S0_PERIPHERAL_MAP 0xFFC00F6C /* MemDMA Stream 0 Source Peripheral Map Register */ +#define MDMA_S0_CURR_X_COUNT 0xFFC00F70 /* MemDMA Stream 0 Source Current X Count Register */ +#define MDMA_S0_CURR_Y_COUNT 0xFFC00F78 /* MemDMA Stream 0 Source Current Y Count Register */ + +#define MDMA_D1_NEXT_DESC_PTR 0xFFC00F80 /* MemDMA Stream 1 Destination Next Descriptor Pointer Register */ +#define MDMA_D1_START_ADDR 0xFFC00F84 /* MemDMA Stream 1 Destination Start Address Register */ +#define MDMA_D1_CONFIG 0xFFC00F88 /* MemDMA Stream 1 Destination Configuration Register */ +#define MDMA_D1_X_COUNT 0xFFC00F90 /* MemDMA Stream 1 Destination X Count Register */ +#define MDMA_D1_X_MODIFY 0xFFC00F94 /* MemDMA Stream 1 Destination X Modify Register */ +#define MDMA_D1_Y_COUNT 0xFFC00F98 /* MemDMA Stream 1 Destination Y Count Register */ +#define MDMA_D1_Y_MODIFY 0xFFC00F9C /* MemDMA Stream 1 Destination Y Modify Register */ +#define MDMA_D1_CURR_DESC_PTR 0xFFC00FA0 /* MemDMA Stream 1 Destination Current Descriptor Pointer Register */ +#define MDMA_D1_CURR_ADDR 0xFFC00FA4 /* MemDMA Stream 1 Destination Current Address Register */ +#define MDMA_D1_IRQ_STATUS 0xFFC00FA8 /* MemDMA Stream 1 Destination Interrupt/Status Register */ +#define MDMA_D1_PERIPHERAL_MAP 0xFFC00FAC /* MemDMA Stream 1 Destination Peripheral Map Register */ +#define MDMA_D1_CURR_X_COUNT 0xFFC00FB0 /* MemDMA Stream 1 Destination Current X Count Register */ +#define MDMA_D1_CURR_Y_COUNT 0xFFC00FB8 /* MemDMA Stream 1 Destination Current Y Count Register */ + +#define MDMA_S1_NEXT_DESC_PTR 0xFFC00FC0 /* MemDMA Stream 1 Source Next Descriptor Pointer Register */ +#define MDMA_S1_START_ADDR 0xFFC00FC4 /* MemDMA Stream 1 Source Start Address Register */ +#define MDMA_S1_CONFIG 0xFFC00FC8 /* MemDMA Stream 1 Source Configuration Register */ +#define MDMA_S1_X_COUNT 0xFFC00FD0 /* MemDMA Stream 1 Source X Count Register */ +#define MDMA_S1_X_MODIFY 0xFFC00FD4 /* MemDMA Stream 1 Source X Modify Register */ +#define MDMA_S1_Y_COUNT 0xFFC00FD8 /* MemDMA Stream 1 Source Y Count Register */ +#define MDMA_S1_Y_MODIFY 0xFFC00FDC /* MemDMA Stream 1 Source Y Modify Register */ +#define MDMA_S1_CURR_DESC_PTR 0xFFC00FE0 /* MemDMA Stream 1 Source Current Descriptor Pointer Register */ +#define MDMA_S1_CURR_ADDR 0xFFC00FE4 /* MemDMA Stream 1 Source Current Address Register */ +#define MDMA_S1_IRQ_STATUS 0xFFC00FE8 /* MemDMA Stream 1 Source Interrupt/Status Register */ +#define MDMA_S1_PERIPHERAL_MAP 0xFFC00FEC /* MemDMA Stream 1 Source Peripheral Map Register */ +#define MDMA_S1_CURR_X_COUNT 0xFFC00FF0 /* MemDMA Stream 1 Source Current X Count Register */ +#define MDMA_S1_CURR_Y_COUNT 0xFFC00FF8 /* MemDMA Stream 1 Source Current Y Count Register */ + + +/* Parallel Peripheral Interface (0xFFC01000 - 0xFFC010FF) */ +#define PPI_CONTROL 0xFFC01000 /* PPI Control Register */ +#define PPI_STATUS 0xFFC01004 /* PPI Status Register */ +#define PPI_COUNT 0xFFC01008 /* PPI Transfer Count Register */ +#define PPI_DELAY 0xFFC0100C /* PPI Delay Count Register */ +#define PPI_FRAME 0xFFC01010 /* PPI Frame Length Register */ + + +/* Two-Wire Interface (0xFFC01400 - 0xFFC014FF) */ +#define TWI0_REGBASE 0xFFC01400 +#define TWI_CLKDIV 0xFFC01400 /* Serial Clock Divider Register */ +#define TWI_CONTROL 0xFFC01404 /* TWI Control Register */ +#define TWI_SLAVE_CTL 0xFFC01408 /* Slave Mode Control Register */ +#define TWI_SLAVE_STAT 0xFFC0140C /* Slave Mode Status Register */ +#define TWI_SLAVE_ADDR 0xFFC01410 /* Slave Mode Address Register */ +#define TWI_MASTER_CTL 0xFFC01414 /* Master Mode Control Register */ +#define TWI_MASTER_STAT 0xFFC01418 /* Master Mode Status Register */ +#define TWI_MASTER_ADDR 0xFFC0141C /* Master Mode Address Register */ +#define TWI_INT_STAT 0xFFC01420 /* TWI Interrupt Status Register */ +#define TWI_INT_MASK 0xFFC01424 /* TWI Master Interrupt Mask Register */ +#define TWI_FIFO_CTL 0xFFC01428 /* FIFO Control Register */ +#define TWI_FIFO_STAT 0xFFC0142C /* FIFO Status Register */ +#define TWI_XMT_DATA8 0xFFC01480 /* FIFO Transmit Data Single Byte Register */ +#define TWI_XMT_DATA16 0xFFC01484 /* FIFO Transmit Data Double Byte Register */ +#define TWI_RCV_DATA8 0xFFC01488 /* FIFO Receive Data Single Byte Register */ +#define TWI_RCV_DATA16 0xFFC0148C /* FIFO Receive Data Double Byte Register */ + + +/* General Purpose I/O Port G (0xFFC01500 - 0xFFC015FF) */ +#define PORTGIO 0xFFC01500 /* Port G I/O Pin State Specify Register */ +#define PORTGIO_CLEAR 0xFFC01504 /* Port G I/O Peripheral Interrupt Clear Register */ +#define PORTGIO_SET 0xFFC01508 /* Port G I/O Peripheral Interrupt Set Register */ +#define PORTGIO_TOGGLE 0xFFC0150C /* Port G I/O Pin State Toggle Register */ +#define PORTGIO_MASKA 0xFFC01510 /* Port G I/O Mask State Specify Interrupt A Register */ +#define PORTGIO_MASKA_CLEAR 0xFFC01514 /* Port G I/O Mask Disable Interrupt A Register */ +#define PORTGIO_MASKA_SET 0xFFC01518 /* Port G I/O Mask Enable Interrupt A Register */ +#define PORTGIO_MASKA_TOGGLE 0xFFC0151C /* Port G I/O Mask Toggle Enable Interrupt A Register */ +#define PORTGIO_MASKB 0xFFC01520 /* Port G I/O Mask State Specify Interrupt B Register */ +#define PORTGIO_MASKB_CLEAR 0xFFC01524 /* Port G I/O Mask Disable Interrupt B Register */ +#define PORTGIO_MASKB_SET 0xFFC01528 /* Port G I/O Mask Enable Interrupt B Register */ +#define PORTGIO_MASKB_TOGGLE 0xFFC0152C /* Port G I/O Mask Toggle Enable Interrupt B Register */ +#define PORTGIO_DIR 0xFFC01530 /* Port G I/O Direction Register */ +#define PORTGIO_POLAR 0xFFC01534 /* Port G I/O Source Polarity Register */ +#define PORTGIO_EDGE 0xFFC01538 /* Port G I/O Source Sensitivity Register */ +#define PORTGIO_BOTH 0xFFC0153C /* Port G I/O Set on BOTH Edges Register */ +#define PORTGIO_INEN 0xFFC01540 /* Port G I/O Input Enable Register */ + + +/* General Purpose I/O Port H (0xFFC01700 - 0xFFC017FF) */ +#define PORTHIO 0xFFC01700 /* Port H I/O Pin State Specify Register */ +#define PORTHIO_CLEAR 0xFFC01704 /* Port H I/O Peripheral Interrupt Clear Register */ +#define PORTHIO_SET 0xFFC01708 /* Port H I/O Peripheral Interrupt Set Register */ +#define PORTHIO_TOGGLE 0xFFC0170C /* Port H I/O Pin State Toggle Register */ +#define PORTHIO_MASKA 0xFFC01710 /* Port H I/O Mask State Specify Interrupt A Register */ +#define PORTHIO_MASKA_CLEAR 0xFFC01714 /* Port H I/O Mask Disable Interrupt A Register */ +#define PORTHIO_MASKA_SET 0xFFC01718 /* Port H I/O Mask Enable Interrupt A Register */ +#define PORTHIO_MASKA_TOGGLE 0xFFC0171C /* Port H I/O Mask Toggle Enable Interrupt A Register */ +#define PORTHIO_MASKB 0xFFC01720 /* Port H I/O Mask State Specify Interrupt B Register */ +#define PORTHIO_MASKB_CLEAR 0xFFC01724 /* Port H I/O Mask Disable Interrupt B Register */ +#define PORTHIO_MASKB_SET 0xFFC01728 /* Port H I/O Mask Enable Interrupt B Register */ +#define PORTHIO_MASKB_TOGGLE 0xFFC0172C /* Port H I/O Mask Toggle Enable Interrupt B Register */ +#define PORTHIO_DIR 0xFFC01730 /* Port H I/O Direction Register */ +#define PORTHIO_POLAR 0xFFC01734 /* Port H I/O Source Polarity Register */ +#define PORTHIO_EDGE 0xFFC01738 /* Port H I/O Source Sensitivity Register */ +#define PORTHIO_BOTH 0xFFC0173C /* Port H I/O Set on BOTH Edges Register */ +#define PORTHIO_INEN 0xFFC01740 /* Port H I/O Input Enable Register */ + + +/* UART1 Controller (0xFFC02000 - 0xFFC020FF) */ +#define UART1_THR 0xFFC02000 /* Transmit Holding register */ +#define UART1_RBR 0xFFC02000 /* Receive Buffer register */ +#define UART1_DLL 0xFFC02000 /* Divisor Latch (Low-Byte) */ +#define UART1_IER 0xFFC02004 /* Interrupt Enable Register */ +#define UART1_DLH 0xFFC02004 /* Divisor Latch (High-Byte) */ +#define UART1_IIR 0xFFC02008 /* Interrupt Identification Register */ +#define UART1_LCR 0xFFC0200C /* Line Control Register */ +#define UART1_MCR 0xFFC02010 /* Modem Control Register */ +#define UART1_LSR 0xFFC02014 /* Line Status Register */ +#define UART1_MSR 0xFFC02018 /* Modem Status Register */ +#define UART1_SCR 0xFFC0201C /* SCR Scratch Register */ +#define UART1_GCTL 0xFFC02024 /* Global Control Register */ + + +/* Pin Control Registers (0xFFC03200 - 0xFFC032FF) */ +#define PORTF_FER 0xFFC03200 /* Port F Function Enable Register (Alternate/Flag*) */ +#define PORTG_FER 0xFFC03204 /* Port G Function Enable Register (Alternate/Flag*) */ +#define PORTH_FER 0xFFC03208 /* Port H Function Enable Register (Alternate/Flag*) */ +#define BFIN_PORT_MUX 0xFFC0320C /* Port Multiplexer Control Register */ + + +/* Handshake MDMA Registers (0xFFC03300 - 0xFFC033FF) */ +#define HMDMA0_CONTROL 0xFFC03300 /* Handshake MDMA0 Control Register */ +#define HMDMA0_ECINIT 0xFFC03304 /* HMDMA0 Initial Edge Count Register */ +#define HMDMA0_BCINIT 0xFFC03308 /* HMDMA0 Initial Block Count Register */ +#define HMDMA0_ECURGENT 0xFFC0330C /* HMDMA0 Urgent Edge Count Threshhold Register */ +#define HMDMA0_ECOVERFLOW 0xFFC03310 /* HMDMA0 Edge Count Overflow Interrupt Register */ +#define HMDMA0_ECOUNT 0xFFC03314 /* HMDMA0 Current Edge Count Register */ +#define HMDMA0_BCOUNT 0xFFC03318 /* HMDMA0 Current Block Count Register */ + +#define HMDMA1_CONTROL 0xFFC03340 /* Handshake MDMA1 Control Register */ +#define HMDMA1_ECINIT 0xFFC03344 /* HMDMA1 Initial Edge Count Register */ +#define HMDMA1_BCINIT 0xFFC03348 /* HMDMA1 Initial Block Count Register */ +#define HMDMA1_ECURGENT 0xFFC0334C /* HMDMA1 Urgent Edge Count Threshhold Register */ +#define HMDMA1_ECOVERFLOW 0xFFC03350 /* HMDMA1 Edge Count Overflow Interrupt Register */ +#define HMDMA1_ECOUNT 0xFFC03354 /* HMDMA1 Current Edge Count Register */ +#define HMDMA1_BCOUNT 0xFFC03358 /* HMDMA1 Current Block Count Register */ + + +/* GPIO PIN mux (0xFFC03210 - OxFFC03288) */ +#define PORTF_MUX 0xFFC03210 /* Port F mux control */ +#define PORTG_MUX 0xFFC03214 /* Port G mux control */ +#define PORTH_MUX 0xFFC03218 /* Port H mux control */ +#define PORTF_DRIVE 0xFFC03220 /* Port F drive strength control */ +#define PORTG_DRIVE 0xFFC03224 /* Port G drive strength control */ +#define PORTH_DRIVE 0xFFC03228 /* Port H drive strength control */ +#define PORTF_SLEW 0xFFC03230 /* Port F slew control */ +#define PORTG_SLEW 0xFFC03234 /* Port G slew control */ +#define PORTH_SLEW 0xFFC03238 /* Port H slew control */ +#define PORTF_HYSTERISIS 0xFFC03240 /* Port F Schmitt trigger control */ +#define PORTG_HYSTERISIS 0xFFC03244 /* Port G Schmitt trigger control */ +#define PORTH_HYSTERISIS 0xFFC03248 /* Port H Schmitt trigger control */ +#define MISCPORT_DRIVE 0xFFC03280 /* Misc Port drive strength control */ +#define MISCPORT_SLEW 0xFFC03284 /* Misc Port slew control */ +#define MISCPORT_HYSTERISIS 0xFFC03288 /* Misc Port Schmitt trigger control */ + + +/*********************************************************************************** +** System MMR Register Bits And Macros +** +** Disclaimer: All macros are intended to make C and Assembly code more readable. +** Use these macros carefully, as any that do left shifts for field +** depositing will result in the lower order bits being destroyed. Any +** macro that shifts left to properly position the bit-field should be +** used as part of an OR to initialize a register and NOT as a dynamic +** modifier UNLESS the lower order bits are saved and ORed back in when +** the macro is used. +*************************************************************************************/ +/* +** ********************* PLL AND RESET MASKS ****************************************/ +/* PLL_CTL Masks */ +#define DF 0x0001 /* 0: PLL = CLKIN, 1: PLL = CLKIN/2 */ +#define PLL_OFF 0x0002 /* PLL Not Powered */ +#define STOPCK 0x0008 /* Core Clock Off */ +#define PDWN 0x0020 /* Enter Deep Sleep Mode */ +#define IN_DELAY 0x0040 /* Add 200ps Delay To EBIU Input Latches */ +#define OUT_DELAY 0x0080 /* Add 200ps Delay To EBIU Output Signals */ +#define BYPASS 0x0100 /* Bypass the PLL */ +#define MSEL 0x7E00 /* Multiplier Select For CCLK/VCO Factors */ +/* PLL_CTL Macros (Only Use With Logic OR While Setting Lower Order Bits) */ +#define SET_MSEL(x) (((x)&0x3F) << 0x9) /* Set MSEL = 0-63 --> VCO = CLKIN*MSEL */ + +/* PLL_DIV Masks */ +#define SSEL 0x000F /* System Select */ +#define CSEL 0x0030 /* Core Select */ +#define CSEL_DIV1 0x0000 /* CCLK = VCO / 1 */ +#define CSEL_DIV2 0x0010 /* CCLK = VCO / 2 */ +#define CSEL_DIV4 0x0020 /* CCLK = VCO / 4 */ +#define CSEL_DIV8 0x0030 /* CCLK = VCO / 8 */ +/* PLL_DIV Macros */ +#define SET_SSEL(x) ((x)&0xF) /* Set SSEL = 0-15 --> SCLK = VCO/SSEL */ + +/* VR_CTL Masks */ +#define FREQ 0x3000 /* Switching Oscillator Frequency For Regulator */ +#define HIBERNATE 0x0000 /* Powerdown/Bypass On-Board Regulation */ + +#define VLEV 0x00F0 /* Internal Voltage Level */ +#define VLEV_085 0x0060 /* VLEV = 0.85 V (-5% - +10% Accuracy) */ +#define VLEV_090 0x0070 /* VLEV = 0.90 V (-5% - +10% Accuracy) */ +#define VLEV_095 0x0080 /* VLEV = 0.95 V (-5% - +10% Accuracy) */ +#define VLEV_100 0x0090 /* VLEV = 1.00 V (-5% - +10% Accuracy) */ +#define VLEV_105 0x00A0 /* VLEV = 1.05 V (-5% - +10% Accuracy) */ +#define VLEV_110 0x00B0 /* VLEV = 1.10 V (-5% - +10% Accuracy) */ +#define VLEV_115 0x00C0 /* VLEV = 1.15 V (-5% - +10% Accuracy) */ +#define VLEV_120 0x00D0 /* VLEV = 1.20 V (-5% - +10% Accuracy) */ +#define VLEV_125 0x00E0 /* VLEV = 1.25 V (-5% - +10% Accuracy) */ +#define VLEV_130 0x00F0 /* VLEV = 1.30 V (-5% - +10% Accuracy) */ + +#define WAKE 0x0100 /* Enable RTC/Reset Wakeup From Hibernate */ +#define USBWE 0x0200 /* Enable USB Wakeup From Hibernate */ +#define PHYWE 0x0400 /* Enable PHY Wakeup From Hibernate */ +#define CLKBUFOE 0x4000 /* CLKIN Buffer Output Enable */ +#define PHYCLKOE CLKBUFOE /* Alternative legacy name for the above */ +#define SCKELOW 0x8000 /* Enable Drive CKE Low During Reset */ + +/* PLL_STAT Masks */ +#define ACTIVE_PLLENABLED 0x0001 /* Processor In Active Mode With PLL Enabled */ +#define FULL_ON 0x0002 /* Processor In Full On Mode */ +#define ACTIVE_PLLDISABLED 0x0004 /* Processor In Active Mode With PLL Disabled */ +#define PLL_LOCKED 0x0020 /* PLL_LOCKCNT Has Been Reached */ + +/* CHIPID Masks */ +#define CHIPID_VERSION 0xF0000000 +#define CHIPID_FAMILY 0x0FFFF000 +#define CHIPID_MANUFACTURE 0x00000FFE + +/* SWRST Masks */ +#define SYSTEM_RESET 0x0007 /* Initiates A System Software Reset */ +#define DOUBLE_FAULT 0x0008 /* Core Double Fault Causes Reset */ +#define RESET_DOUBLE 0x2000 /* SW Reset Generated By Core Double-Fault */ +#define RESET_WDOG 0x4000 /* SW Reset Generated By Watchdog Timer */ +#define RESET_SOFTWARE 0x8000 /* SW Reset Occurred Since Last Read Of SWRST */ + +/* SYSCR Masks */ +#define BMODE 0x0007 /* Boot Mode - Latched During HW Reset From Mode Pins */ +#define NOBOOT 0x0010 /* Execute From L1 or ASYNC Bank 0 When BMODE = 0 */ + + +/* ************* SYSTEM INTERRUPT CONTROLLER MASKS *************************************/ +/* Peripheral Masks For SIC_ISR, SIC_IWR, SIC_IMASK */ + +#if 0 +#define IRQ_PLL_WAKEUP 0x00000001 /* PLL Wakeup Interrupt */ + +#define IRQ_ERROR1 0x00000002 /* Error Interrupt (DMA, DMARx Block, DMARx Overflow) */ +#define IRQ_ERROR2 0x00000004 /* Error Interrupt (CAN, Ethernet, SPORTx, PPI, SPI, UARTx) */ +#define IRQ_RTC 0x00000008 /* Real Time Clock Interrupt */ +#define IRQ_DMA0 0x00000010 /* DMA Channel 0 (PPI) Interrupt */ +#define IRQ_DMA3 0x00000020 /* DMA Channel 3 (SPORT0 RX) Interrupt */ +#define IRQ_DMA4 0x00000040 /* DMA Channel 4 (SPORT0 TX) Interrupt */ +#define IRQ_DMA5 0x00000080 /* DMA Channel 5 (SPORT1 RX) Interrupt */ + +#define IRQ_DMA6 0x00000100 /* DMA Channel 6 (SPORT1 TX) Interrupt */ +#define IRQ_TWI 0x00000200 /* TWI Interrupt */ +#define IRQ_DMA7 0x00000400 /* DMA Channel 7 (SPI) Interrupt */ +#define IRQ_DMA8 0x00000800 /* DMA Channel 8 (UART0 RX) Interrupt */ +#define IRQ_DMA9 0x00001000 /* DMA Channel 9 (UART0 TX) Interrupt */ +#define IRQ_DMA10 0x00002000 /* DMA Channel 10 (UART1 RX) Interrupt */ +#define IRQ_DMA11 0x00004000 /* DMA Channel 11 (UART1 TX) Interrupt */ +#define IRQ_CAN_RX 0x00008000 /* CAN Receive Interrupt */ + +#define IRQ_CAN_TX 0x00010000 /* CAN Transmit Interrupt */ +#define IRQ_DMA1 0x00020000 /* DMA Channel 1 (Ethernet RX) Interrupt */ +#define IRQ_PFA_PORTH 0x00020000 /* PF Port H (PF47:32) Interrupt A */ +#define IRQ_DMA2 0x00040000 /* DMA Channel 2 (Ethernet TX) Interrupt */ +#define IRQ_PFB_PORTH 0x00040000 /* PF Port H (PF47:32) Interrupt B */ +#define IRQ_TIMER0 0x00080000 /* Timer 0 Interrupt */ +#define IRQ_TIMER1 0x00100000 /* Timer 1 Interrupt */ +#define IRQ_TIMER2 0x00200000 /* Timer 2 Interrupt */ +#define IRQ_TIMER3 0x00400000 /* Timer 3 Interrupt */ +#define IRQ_TIMER4 0x00800000 /* Timer 4 Interrupt */ + +#define IRQ_TIMER5 0x01000000 /* Timer 5 Interrupt */ +#define IRQ_TIMER6 0x02000000 /* Timer 6 Interrupt */ +#define IRQ_TIMER7 0x04000000 /* Timer 7 Interrupt */ +#define IRQ_PFA_PORTFG 0x08000000 /* PF Ports F&G (PF31:0) Interrupt A */ +#define IRQ_PFB_PORTF 0x80000000 /* PF Port F (PF15:0) Interrupt B */ +#define IRQ_DMA12 0x20000000 /* DMA Channels 12 (MDMA1 Source) RX Interrupt */ +#define IRQ_DMA13 0x20000000 /* DMA Channels 13 (MDMA1 Destination) TX Interrupt */ +#define IRQ_DMA14 0x40000000 /* DMA Channels 14 (MDMA0 Source) RX Interrupt */ +#define IRQ_DMA15 0x40000000 /* DMA Channels 15 (MDMA0 Destination) TX Interrupt */ +#define IRQ_WDOG 0x80000000 /* Software Watchdog Timer Interrupt */ +#define IRQ_PFB_PORTG 0x10000000 /* PF Port G (PF31:16) Interrupt B */ +#endif + +/* SIC_IAR0 Macros */ +#define P0_IVG(x) (((x)&0xF)-7) /* Peripheral #0 assigned IVG #x */ +#define P1_IVG(x) (((x)&0xF)-7) << 0x4 /* Peripheral #1 assigned IVG #x */ +#define P2_IVG(x) (((x)&0xF)-7) << 0x8 /* Peripheral #2 assigned IVG #x */ +#define P3_IVG(x) (((x)&0xF)-7) << 0xC /* Peripheral #3 assigned IVG #x */ +#define P4_IVG(x) (((x)&0xF)-7) << 0x10 /* Peripheral #4 assigned IVG #x */ +#define P5_IVG(x) (((x)&0xF)-7) << 0x14 /* Peripheral #5 assigned IVG #x */ +#define P6_IVG(x) (((x)&0xF)-7) << 0x18 /* Peripheral #6 assigned IVG #x */ +#define P7_IVG(x) (((x)&0xF)-7) << 0x1C /* Peripheral #7 assigned IVG #x */ + +/* SIC_IAR1 Macros */ +#define P8_IVG(x) (((x)&0xF)-7) /* Peripheral #8 assigned IVG #x */ +#define P9_IVG(x) (((x)&0xF)-7) << 0x4 /* Peripheral #9 assigned IVG #x */ +#define P10_IVG(x) (((x)&0xF)-7) << 0x8 /* Peripheral #10 assigned IVG #x */ +#define P11_IVG(x) (((x)&0xF)-7) << 0xC /* Peripheral #11 assigned IVG #x */ +#define P12_IVG(x) (((x)&0xF)-7) << 0x10 /* Peripheral #12 assigned IVG #x */ +#define P13_IVG(x) (((x)&0xF)-7) << 0x14 /* Peripheral #13 assigned IVG #x */ +#define P14_IVG(x) (((x)&0xF)-7) << 0x18 /* Peripheral #14 assigned IVG #x */ +#define P15_IVG(x) (((x)&0xF)-7) << 0x1C /* Peripheral #15 assigned IVG #x */ + +/* SIC_IAR2 Macros */ +#define P16_IVG(x) (((x)&0xF)-7) /* Peripheral #16 assigned IVG #x */ +#define P17_IVG(x) (((x)&0xF)-7) << 0x4 /* Peripheral #17 assigned IVG #x */ +#define P18_IVG(x) (((x)&0xF)-7) << 0x8 /* Peripheral #18 assigned IVG #x */ +#define P19_IVG(x) (((x)&0xF)-7) << 0xC /* Peripheral #19 assigned IVG #x */ +#define P20_IVG(x) (((x)&0xF)-7) << 0x10 /* Peripheral #20 assigned IVG #x */ +#define P21_IVG(x) (((x)&0xF)-7) << 0x14 /* Peripheral #21 assigned IVG #x */ +#define P22_IVG(x) (((x)&0xF)-7) << 0x18 /* Peripheral #22 assigned IVG #x */ +#define P23_IVG(x) (((x)&0xF)-7) << 0x1C /* Peripheral #23 assigned IVG #x */ + +/* SIC_IAR3 Macros */ +#define P24_IVG(x) (((x)&0xF)-7) /* Peripheral #24 assigned IVG #x */ +#define P25_IVG(x) (((x)&0xF)-7) << 0x4 /* Peripheral #25 assigned IVG #x */ +#define P26_IVG(x) (((x)&0xF)-7) << 0x8 /* Peripheral #26 assigned IVG #x */ +#define P27_IVG(x) (((x)&0xF)-7) << 0xC /* Peripheral #27 assigned IVG #x */ +#define P28_IVG(x) (((x)&0xF)-7) << 0x10 /* Peripheral #28 assigned IVG #x */ +#define P29_IVG(x) (((x)&0xF)-7) << 0x14 /* Peripheral #29 assigned IVG #x */ +#define P30_IVG(x) (((x)&0xF)-7) << 0x18 /* Peripheral #30 assigned IVG #x */ +#define P31_IVG(x) (((x)&0xF)-7) << 0x1C /* Peripheral #31 assigned IVG #x */ + + +/* SIC_IMASK Masks */ +#define SIC_UNMASK_ALL 0x00000000 /* Unmask all peripheral interrupts */ +#define SIC_MASK_ALL 0xFFFFFFFF /* Mask all peripheral interrupts */ +#define SIC_MASK(x) (1 << ((x)&0x1F)) /* Mask Peripheral #x interrupt */ +#define SIC_UNMASK(x) (0xFFFFFFFF ^ (1 << ((x)&0x1F))) /* Unmask Peripheral #x interrupt */ + +/* SIC_IWR Masks */ +#define IWR_DISABLE_ALL 0x00000000 /* Wakeup Disable all peripherals */ +#define IWR_ENABLE_ALL 0xFFFFFFFF /* Wakeup Enable all peripherals */ +#define IWR_ENABLE(x) (1 << ((x)&0x1F)) /* Wakeup Enable Peripheral #x */ +#define IWR_DISABLE(x) (0xFFFFFFFF ^ (1 << ((x)&0x1F))) /* Wakeup Disable Peripheral #x */ + + +/* ********* WATCHDOG TIMER MASKS ******************** */ + +/* Watchdog Timer WDOG_CTL Register Masks */ + +#define WDEV(x) (((x)<<1) & 0x0006) /* event generated on roll over */ +#define WDEV_RESET 0x0000 /* generate reset event on roll over */ +#define WDEV_NMI 0x0002 /* generate NMI event on roll over */ +#define WDEV_GPI 0x0004 /* generate GP IRQ on roll over */ +#define WDEV_NONE 0x0006 /* no event on roll over */ +#define WDEN 0x0FF0 /* enable watchdog */ +#define WDDIS 0x0AD0 /* disable watchdog */ +#define WDRO 0x8000 /* watchdog rolled over latch */ + +/* depreciated WDOG_CTL Register Masks for legacy code */ + + +#define ICTL WDEV +#define ENABLE_RESET WDEV_RESET +#define WDOG_RESET WDEV_RESET +#define ENABLE_NMI WDEV_NMI +#define WDOG_NMI WDEV_NMI +#define ENABLE_GPI WDEV_GPI +#define WDOG_GPI WDEV_GPI +#define DISABLE_EVT WDEV_NONE +#define WDOG_NONE WDEV_NONE + +#define TMR_EN WDEN +#define TMR_DIS WDDIS +#define TRO WDRO +#define ICTL_P0 0x01 + #define ICTL_P1 0x02 +#define TRO_P 0x0F + + + +/* *************** REAL TIME CLOCK MASKS **************************/ +/* RTC_STAT and RTC_ALARM Masks */ +#define RTC_SEC 0x0000003F /* Real-Time Clock Seconds */ +#define RTC_MIN 0x00000FC0 /* Real-Time Clock Minutes */ +#define RTC_HR 0x0001F000 /* Real-Time Clock Hours */ +#define RTC_DAY 0xFFFE0000 /* Real-Time Clock Days */ + +/* RTC_ALARM Macro z=day y=hr x=min w=sec */ +#define SET_ALARM(z,y,x,w) ((((z)&0x7FFF)<<0x11)|(((y)&0x1F)<<0xC)|(((x)&0x3F)<<0x6)|((w)&0x3F)) + +/* RTC_ICTL and RTC_ISTAT Masks */ +#define STOPWATCH 0x0001 /* Stopwatch Interrupt Enable */ +#define ALARM 0x0002 /* Alarm Interrupt Enable */ +#define SECOND 0x0004 /* Seconds (1 Hz) Interrupt Enable */ +#define MINUTE 0x0008 /* Minutes Interrupt Enable */ +#define HOUR 0x0010 /* Hours Interrupt Enable */ +#define DAY 0x0020 /* 24 Hours (Days) Interrupt Enable */ +#define DAY_ALARM 0x0040 /* Day Alarm (Day, Hour, Minute, Second) Interrupt Enable */ +#define WRITE_PENDING 0x4000 /* Write Pending Status */ +#define WRITE_COMPLETE 0x8000 /* Write Complete Interrupt Enable */ + +/* RTC_FAST / RTC_PREN Mask */ +#define PREN 0x0001 /* Enable Prescaler, RTC Runs @1 Hz */ + + +/* ************** UART CONTROLLER MASKS *************************/ +/* UARTx_LCR Masks */ +#define WLS(x) (((x)-5) & 0x03) /* Word Length Select */ +#define STB 0x04 /* Stop Bits */ +#define PEN 0x08 /* Parity Enable */ +#define EPS 0x10 /* Even Parity Select */ +#define STP 0x20 /* Stick Parity */ +#define SB 0x40 /* Set Break */ +#define DLAB 0x80 /* Divisor Latch Access */ + +/* UARTx_MCR Mask */ +#define LOOP_ENA 0x10 /* Loopback Mode Enable */ +#define LOOP_ENA_P 0x04 + +/* UARTx_LSR Masks */ +#define DR 0x01 /* Data Ready */ +#define OE 0x02 /* Overrun Error */ +#define PE 0x04 /* Parity Error */ +#define FE 0x08 /* Framing Error */ +#define BI 0x10 /* Break Interrupt */ +#define THRE 0x20 /* THR Empty */ +#define TEMT 0x40 /* TSR and UART_THR Empty */ + +/* UARTx_IER Masks */ +#define ERBFI 0x01 /* Enable Receive Buffer Full Interrupt */ +#define ETBEI 0x02 /* Enable Transmit Buffer Empty Interrupt */ +#define ELSI 0x04 /* Enable RX Status Interrupt */ + +/* UARTx_IIR Masks */ +#define NINT 0x01 /* Pending Interrupt */ +#define IIR_TX_READY 0x02 /* UART_THR empty */ +#define IIR_RX_READY 0x04 /* Receive data ready */ +#define IIR_LINE_CHANGE 0x06 /* Receive line status */ +#define IIR_STATUS 0x06 /* Highest Priority Pending Interrupt */ + +/* UARTx_GCTL Masks */ +#define UCEN 0x01 /* Enable UARTx Clocks */ +#define IREN 0x02 /* Enable IrDA Mode */ +#define TPOLC 0x04 /* IrDA TX Polarity Change */ +#define RPOLC 0x08 /* IrDA RX Polarity Change */ +#define FPE 0x10 /* Force Parity Error On Transmit */ +#define FFE 0x20 /* Force Framing Error On Transmit */ + + +/* *********** SERIAL PERIPHERAL INTERFACE (SPI) MASKS ****************************/ +/* SPI_CTL Masks */ +#define TIMOD 0x0003 /* Transfer Initiate Mode */ +#define RDBR_CORE 0x0000 /* RDBR Read Initiates, IRQ When RDBR Full */ +#define TDBR_CORE 0x0001 /* TDBR Write Initiates, IRQ When TDBR Empty */ +#define RDBR_DMA 0x0002 /* DMA Read, DMA Until FIFO Empty */ +#define TDBR_DMA 0x0003 /* DMA Write, DMA Until FIFO Full */ +#define SZ 0x0004 /* Send Zero (When TDBR Empty, Send Zero/Last*) */ +#define GM 0x0008 /* Get More (When RDBR Full, Overwrite/Discard*) */ +#define PSSE 0x0010 /* Slave-Select Input Enable */ +#define EMISO 0x0020 /* Enable MISO As Output */ +#define SIZE 0x0100 /* Size of Words (16/8* Bits) */ +#define LSBF 0x0200 /* LSB First */ +#define CPHA 0x0400 /* Clock Phase */ +#define CPOL 0x0800 /* Clock Polarity */ +#define MSTR 0x1000 /* Master/Slave* */ +#define WOM 0x2000 /* Write Open Drain Master */ +#define SPE 0x4000 /* SPI Enable */ + +/* SPI_FLG Masks */ +#define FLS1 0x0002 /* Enables SPI_FLOUT1 as SPI Slave-Select Output */ +#define FLS2 0x0004 /* Enables SPI_FLOUT2 as SPI Slave-Select Output */ +#define FLS3 0x0008 /* Enables SPI_FLOUT3 as SPI Slave-Select Output */ +#define FLS4 0x0010 /* Enables SPI_FLOUT4 as SPI Slave-Select Output */ +#define FLS5 0x0020 /* Enables SPI_FLOUT5 as SPI Slave-Select Output */ +#define FLS6 0x0040 /* Enables SPI_FLOUT6 as SPI Slave-Select Output */ +#define FLS7 0x0080 /* Enables SPI_FLOUT7 as SPI Slave-Select Output */ +#define FLG1 0xFDFF /* Activates SPI_FLOUT1 */ +#define FLG2 0xFBFF /* Activates SPI_FLOUT2 */ +#define FLG3 0xF7FF /* Activates SPI_FLOUT3 */ +#define FLG4 0xEFFF /* Activates SPI_FLOUT4 */ +#define FLG5 0xDFFF /* Activates SPI_FLOUT5 */ +#define FLG6 0xBFFF /* Activates SPI_FLOUT6 */ +#define FLG7 0x7FFF /* Activates SPI_FLOUT7 */ + +/* SPI_STAT Masks */ +#define SPIF 0x0001 /* SPI Finished (Single-Word Transfer Complete) */ +#define MODF 0x0002 /* Mode Fault Error (Another Device Tried To Become Master) */ +#define TXE 0x0004 /* Transmission Error (Data Sent With No New Data In TDBR) */ +#define TXS 0x0008 /* SPI_TDBR Data Buffer Status (Full/Empty*) */ +#define RBSY 0x0010 /* Receive Error (Data Received With RDBR Full) */ +#define RXS 0x0020 /* SPI_RDBR Data Buffer Status (Full/Empty*) */ +#define TXCOL 0x0040 /* Transmit Collision Error (Corrupt Data May Have Been Sent) */ + + +/* **************** GENERAL PURPOSE TIMER MASKS **********************/ +/* TIMER_ENABLE Masks */ +#define TIMEN0 0x0001 /* Enable Timer 0 */ +#define TIMEN1 0x0002 /* Enable Timer 1 */ +#define TIMEN2 0x0004 /* Enable Timer 2 */ +#define TIMEN3 0x0008 /* Enable Timer 3 */ +#define TIMEN4 0x0010 /* Enable Timer 4 */ +#define TIMEN5 0x0020 /* Enable Timer 5 */ +#define TIMEN6 0x0040 /* Enable Timer 6 */ +#define TIMEN7 0x0080 /* Enable Timer 7 */ + +/* TIMER_DISABLE Masks */ +#define TIMDIS0 TIMEN0 /* Disable Timer 0 */ +#define TIMDIS1 TIMEN1 /* Disable Timer 1 */ +#define TIMDIS2 TIMEN2 /* Disable Timer 2 */ +#define TIMDIS3 TIMEN3 /* Disable Timer 3 */ +#define TIMDIS4 TIMEN4 /* Disable Timer 4 */ +#define TIMDIS5 TIMEN5 /* Disable Timer 5 */ +#define TIMDIS6 TIMEN6 /* Disable Timer 6 */ +#define TIMDIS7 TIMEN7 /* Disable Timer 7 */ + +/* TIMER_STATUS Masks */ +#define TIMIL0 0x00000001 /* Timer 0 Interrupt */ +#define TIMIL1 0x00000002 /* Timer 1 Interrupt */ +#define TIMIL2 0x00000004 /* Timer 2 Interrupt */ +#define TIMIL3 0x00000008 /* Timer 3 Interrupt */ +#define TOVF_ERR0 0x00000010 /* Timer 0 Counter Overflow */ +#define TOVF_ERR1 0x00000020 /* Timer 1 Counter Overflow */ +#define TOVF_ERR2 0x00000040 /* Timer 2 Counter Overflow */ +#define TOVF_ERR3 0x00000080 /* Timer 3 Counter Overflow */ +#define TRUN0 0x00001000 /* Timer 0 Slave Enable Status */ +#define TRUN1 0x00002000 /* Timer 1 Slave Enable Status */ +#define TRUN2 0x00004000 /* Timer 2 Slave Enable Status */ +#define TRUN3 0x00008000 /* Timer 3 Slave Enable Status */ +#define TIMIL4 0x00010000 /* Timer 4 Interrupt */ +#define TIMIL5 0x00020000 /* Timer 5 Interrupt */ +#define TIMIL6 0x00040000 /* Timer 6 Interrupt */ +#define TIMIL7 0x00080000 /* Timer 7 Interrupt */ +#define TOVF_ERR4 0x00100000 /* Timer 4 Counter Overflow */ +#define TOVF_ERR5 0x00200000 /* Timer 5 Counter Overflow */ +#define TOVF_ERR6 0x00400000 /* Timer 6 Counter Overflow */ +#define TOVF_ERR7 0x00800000 /* Timer 7 Counter Overflow */ +#define TRUN4 0x10000000 /* Timer 4 Slave Enable Status */ +#define TRUN5 0x20000000 /* Timer 5 Slave Enable Status */ +#define TRUN6 0x40000000 /* Timer 6 Slave Enable Status */ +#define TRUN7 0x80000000 /* Timer 7 Slave Enable Status */ + +/* Alternate Deprecated Macros Provided For Backwards Code Compatibility */ +#define TOVL_ERR0 TOVF_ERR0 +#define TOVL_ERR1 TOVF_ERR1 +#define TOVL_ERR2 TOVF_ERR2 +#define TOVL_ERR3 TOVF_ERR3 +#define TOVL_ERR4 TOVF_ERR4 +#define TOVL_ERR5 TOVF_ERR5 +#define TOVL_ERR6 TOVF_ERR6 +#define TOVL_ERR7 TOVF_ERR7 + +/* TIMERx_CONFIG Masks */ +#define PWM_OUT 0x0001 /* Pulse-Width Modulation Output Mode */ +#define WDTH_CAP 0x0002 /* Width Capture Input Mode */ +#define EXT_CLK 0x0003 /* External Clock Mode */ +#define PULSE_HI 0x0004 /* Action Pulse (Positive/Negative*) */ +#define PERIOD_CNT 0x0008 /* Period Count */ +#define IRQ_ENA 0x0010 /* Interrupt Request Enable */ +#define TIN_SEL 0x0020 /* Timer Input Select */ +#define OUT_DIS 0x0040 /* Output Pad Disable */ +#define CLK_SEL 0x0080 /* Timer Clock Select */ +#define TOGGLE_HI 0x0100 /* PWM_OUT PULSE_HI Toggle Mode */ +#define EMU_RUN 0x0200 /* Emulation Behavior Select */ +#define ERR_TYP 0xC000 /* Error Type */ + + +/* ****************** GPIO PORTS F, G, H MASKS ***********************/ +/* General Purpose IO (0xFFC00700 - 0xFFC007FF) Masks */ +/* Port F Masks */ +#define PF0 0x0001 +#define PF1 0x0002 +#define PF2 0x0004 +#define PF3 0x0008 +#define PF4 0x0010 +#define PF5 0x0020 +#define PF6 0x0040 +#define PF7 0x0080 +#define PF8 0x0100 +#define PF9 0x0200 +#define PF10 0x0400 +#define PF11 0x0800 +#define PF12 0x1000 +#define PF13 0x2000 +#define PF14 0x4000 +#define PF15 0x8000 + +/* Port G Masks */ +#define PG0 0x0001 +#define PG1 0x0002 +#define PG2 0x0004 +#define PG3 0x0008 +#define PG4 0x0010 +#define PG5 0x0020 +#define PG6 0x0040 +#define PG7 0x0080 +#define PG8 0x0100 +#define PG9 0x0200 +#define PG10 0x0400 +#define PG11 0x0800 +#define PG12 0x1000 +#define PG13 0x2000 +#define PG14 0x4000 +#define PG15 0x8000 + +/* Port H Masks */ +#define PH0 0x0001 +#define PH1 0x0002 +#define PH2 0x0004 +#define PH3 0x0008 +#define PH4 0x0010 +#define PH5 0x0020 +#define PH6 0x0040 +#define PH7 0x0080 + + +/* ******************* SERIAL PORT MASKS **************************************/ +/* SPORTx_TCR1 Masks */ +#define TSPEN 0x0001 /* Transmit Enable */ +#define ITCLK 0x0002 /* Internal Transmit Clock Select */ +#define DTYPE_NORM 0x0004 /* Data Format Normal */ +#define DTYPE_ULAW 0x0008 /* Compand Using u-Law */ +#define DTYPE_ALAW 0x000C /* Compand Using A-Law */ +#define TLSBIT 0x0010 /* Transmit Bit Order */ +#define ITFS 0x0200 /* Internal Transmit Frame Sync Select */ +#define TFSR 0x0400 /* Transmit Frame Sync Required Select */ +#define DITFS 0x0800 /* Data-Independent Transmit Frame Sync Select */ +#define LTFS 0x1000 /* Low Transmit Frame Sync Select */ +#define LATFS 0x2000 /* Late Transmit Frame Sync Select */ +#define TCKFE 0x4000 /* Clock Falling Edge Select */ + +/* SPORTx_TCR2 Masks and Macro */ +#define SLEN(x) ((x)&0x1F) /* SPORT TX Word Length (2 - 31) */ +#define TXSE 0x0100 /* TX Secondary Enable */ +#define TSFSE 0x0200 /* Transmit Stereo Frame Sync Enable */ +#define TRFST 0x0400 /* Left/Right Order (1 = Right Channel 1st) */ + +/* SPORTx_RCR1 Masks */ +#define RSPEN 0x0001 /* Receive Enable */ +#define IRCLK 0x0002 /* Internal Receive Clock Select */ +#define DTYPE_NORM 0x0004 /* Data Format Normal */ +#define DTYPE_ULAW 0x0008 /* Compand Using u-Law */ +#define DTYPE_ALAW 0x000C /* Compand Using A-Law */ +#define RLSBIT 0x0010 /* Receive Bit Order */ +#define IRFS 0x0200 /* Internal Receive Frame Sync Select */ +#define RFSR 0x0400 /* Receive Frame Sync Required Select */ +#define LRFS 0x1000 /* Low Receive Frame Sync Select */ +#define LARFS 0x2000 /* Late Receive Frame Sync Select */ +#define RCKFE 0x4000 /* Clock Falling Edge Select */ + +/* SPORTx_RCR2 Masks */ +#define SLEN(x) ((x)&0x1F) /* SPORT RX Word Length (2 - 31) */ +#define RXSE 0x0100 /* RX Secondary Enable */ +#define RSFSE 0x0200 /* RX Stereo Frame Sync Enable */ +#define RRFST 0x0400 /* Right-First Data Order */ + +/* SPORTx_STAT Masks */ +#define RXNE 0x0001 /* Receive FIFO Not Empty Status */ +#define RUVF 0x0002 /* Sticky Receive Underflow Status */ +#define ROVF 0x0004 /* Sticky Receive Overflow Status */ +#define TXF 0x0008 /* Transmit FIFO Full Status */ +#define TUVF 0x0010 /* Sticky Transmit Underflow Status */ +#define TOVF 0x0020 /* Sticky Transmit Overflow Status */ +#define TXHRE 0x0040 /* Transmit Hold Register Empty */ + +/* SPORTx_MCMC1 Macros */ +#define SP_WOFF(x) ((x) & 0x3FF) /* Multichannel Window Offset Field */ + +/* Only use WSIZE Macro With Logic OR While Setting Lower Order Bits */ +#define SP_WSIZE(x) (((((x)>>0x3)-1)&0xF) << 0xC) /* Multichannel Window Size = (x/8)-1 */ + +/* SPORTx_MCMC2 Masks */ +#define REC_BYPASS 0x0000 /* Bypass Mode (No Clock Recovery) */ +#define REC_2FROM4 0x0002 /* Recover 2 MHz Clock from 4 MHz Clock */ +#define REC_8FROM16 0x0003 /* Recover 8 MHz Clock from 16 MHz Clock */ +#define MCDTXPE 0x0004 /* Multichannel DMA Transmit Packing */ +#define MCDRXPE 0x0008 /* Multichannel DMA Receive Packing */ +#define MCMEN 0x0010 /* Multichannel Frame Mode Enable */ +#define FSDR 0x0080 /* Multichannel Frame Sync to Data Relationship */ +#define MFD_0 0x0000 /* Multichannel Frame Delay = 0 */ +#define MFD_1 0x1000 /* Multichannel Frame Delay = 1 */ +#define MFD_2 0x2000 /* Multichannel Frame Delay = 2 */ +#define MFD_3 0x3000 /* Multichannel Frame Delay = 3 */ +#define MFD_4 0x4000 /* Multichannel Frame Delay = 4 */ +#define MFD_5 0x5000 /* Multichannel Frame Delay = 5 */ +#define MFD_6 0x6000 /* Multichannel Frame Delay = 6 */ +#define MFD_7 0x7000 /* Multichannel Frame Delay = 7 */ +#define MFD_8 0x8000 /* Multichannel Frame Delay = 8 */ +#define MFD_9 0x9000 /* Multichannel Frame Delay = 9 */ +#define MFD_10 0xA000 /* Multichannel Frame Delay = 10 */ +#define MFD_11 0xB000 /* Multichannel Frame Delay = 11 */ +#define MFD_12 0xC000 /* Multichannel Frame Delay = 12 */ +#define MFD_13 0xD000 /* Multichannel Frame Delay = 13 */ +#define MFD_14 0xE000 /* Multichannel Frame Delay = 14 */ +#define MFD_15 0xF000 /* Multichannel Frame Delay = 15 */ + + +/* ********************* ASYNCHRONOUS MEMORY CONTROLLER MASKS *************************/ +/* EBIU_AMGCTL Masks */ +#define AMCKEN 0x0001 /* Enable CLKOUT */ +#define AMBEN_NONE 0x0000 /* All Banks Disabled */ +#define AMBEN_B0 0x0002 /* Enable Async Memory Bank 0 only */ +#define AMBEN_B0_B1 0x0004 /* Enable Async Memory Banks 0 & 1 only */ +#define AMBEN_B0_B1_B2 0x0006 /* Enable Async Memory Banks 0, 1, and 2 */ +#define AMBEN_ALL 0x0008 /* Enable Async Memory Banks (all) 0, 1, 2, and 3 */ + +/* EBIU_AMBCTL0 Masks */ +#define B0RDYEN 0x00000001 /* Bank 0 (B0) RDY Enable */ +#define B0RDYPOL 0x00000002 /* B0 RDY Active High */ +#define B0TT_1 0x00000004 /* B0 Transition Time (Read to Write) = 1 cycle */ +#define B0TT_2 0x00000008 /* B0 Transition Time (Read to Write) = 2 cycles */ +#define B0TT_3 0x0000000C /* B0 Transition Time (Read to Write) = 3 cycles */ +#define B0TT_4 0x00000000 /* B0 Transition Time (Read to Write) = 4 cycles */ +#define B0ST_1 0x00000010 /* B0 Setup Time (AOE to Read/Write) = 1 cycle */ +#define B0ST_2 0x00000020 /* B0 Setup Time (AOE to Read/Write) = 2 cycles */ +#define B0ST_3 0x00000030 /* B0 Setup Time (AOE to Read/Write) = 3 cycles */ +#define B0ST_4 0x00000000 /* B0 Setup Time (AOE to Read/Write) = 4 cycles */ +#define B0HT_1 0x00000040 /* B0 Hold Time (~Read/Write to ~AOE) = 1 cycle */ +#define B0HT_2 0x00000080 /* B0 Hold Time (~Read/Write to ~AOE) = 2 cycles */ +#define B0HT_3 0x000000C0 /* B0 Hold Time (~Read/Write to ~AOE) = 3 cycles */ +#define B0HT_0 0x00000000 /* B0 Hold Time (~Read/Write to ~AOE) = 0 cycles */ +#define B0RAT_1 0x00000100 /* B0 Read Access Time = 1 cycle */ +#define B0RAT_2 0x00000200 /* B0 Read Access Time = 2 cycles */ +#define B0RAT_3 0x00000300 /* B0 Read Access Time = 3 cycles */ +#define B0RAT_4 0x00000400 /* B0 Read Access Time = 4 cycles */ +#define B0RAT_5 0x00000500 /* B0 Read Access Time = 5 cycles */ +#define B0RAT_6 0x00000600 /* B0 Read Access Time = 6 cycles */ +#define B0RAT_7 0x00000700 /* B0 Read Access Time = 7 cycles */ +#define B0RAT_8 0x00000800 /* B0 Read Access Time = 8 cycles */ +#define B0RAT_9 0x00000900 /* B0 Read Access Time = 9 cycles */ +#define B0RAT_10 0x00000A00 /* B0 Read Access Time = 10 cycles */ +#define B0RAT_11 0x00000B00 /* B0 Read Access Time = 11 cycles */ +#define B0RAT_12 0x00000C00 /* B0 Read Access Time = 12 cycles */ +#define B0RAT_13 0x00000D00 /* B0 Read Access Time = 13 cycles */ +#define B0RAT_14 0x00000E00 /* B0 Read Access Time = 14 cycles */ +#define B0RAT_15 0x00000F00 /* B0 Read Access Time = 15 cycles */ +#define B0WAT_1 0x00001000 /* B0 Write Access Time = 1 cycle */ +#define B0WAT_2 0x00002000 /* B0 Write Access Time = 2 cycles */ +#define B0WAT_3 0x00003000 /* B0 Write Access Time = 3 cycles */ +#define B0WAT_4 0x00004000 /* B0 Write Access Time = 4 cycles */ +#define B0WAT_5 0x00005000 /* B0 Write Access Time = 5 cycles */ +#define B0WAT_6 0x00006000 /* B0 Write Access Time = 6 cycles */ +#define B0WAT_7 0x00007000 /* B0 Write Access Time = 7 cycles */ +#define B0WAT_8 0x00008000 /* B0 Write Access Time = 8 cycles */ +#define B0WAT_9 0x00009000 /* B0 Write Access Time = 9 cycles */ +#define B0WAT_10 0x0000A000 /* B0 Write Access Time = 10 cycles */ +#define B0WAT_11 0x0000B000 /* B0 Write Access Time = 11 cycles */ +#define B0WAT_12 0x0000C000 /* B0 Write Access Time = 12 cycles */ +#define B0WAT_13 0x0000D000 /* B0 Write Access Time = 13 cycles */ +#define B0WAT_14 0x0000E000 /* B0 Write Access Time = 14 cycles */ +#define B0WAT_15 0x0000F000 /* B0 Write Access Time = 15 cycles */ + +#define B1RDYEN 0x00010000 /* Bank 1 (B1) RDY Enable */ +#define B1RDYPOL 0x00020000 /* B1 RDY Active High */ +#define B1TT_1 0x00040000 /* B1 Transition Time (Read to Write) = 1 cycle */ +#define B1TT_2 0x00080000 /* B1 Transition Time (Read to Write) = 2 cycles */ +#define B1TT_3 0x000C0000 /* B1 Transition Time (Read to Write) = 3 cycles */ +#define B1TT_4 0x00000000 /* B1 Transition Time (Read to Write) = 4 cycles */ +#define B1ST_1 0x00100000 /* B1 Setup Time (AOE to Read/Write) = 1 cycle */ +#define B1ST_2 0x00200000 /* B1 Setup Time (AOE to Read/Write) = 2 cycles */ +#define B1ST_3 0x00300000 /* B1 Setup Time (AOE to Read/Write) = 3 cycles */ +#define B1ST_4 0x00000000 /* B1 Setup Time (AOE to Read/Write) = 4 cycles */ +#define B1HT_1 0x00400000 /* B1 Hold Time (~Read/Write to ~AOE) = 1 cycle */ +#define B1HT_2 0x00800000 /* B1 Hold Time (~Read/Write to ~AOE) = 2 cycles */ +#define B1HT_3 0x00C00000 /* B1 Hold Time (~Read/Write to ~AOE) = 3 cycles */ +#define B1HT_0 0x00000000 /* B1 Hold Time (~Read/Write to ~AOE) = 0 cycles */ +#define B1RAT_1 0x01000000 /* B1 Read Access Time = 1 cycle */ +#define B1RAT_2 0x02000000 /* B1 Read Access Time = 2 cycles */ +#define B1RAT_3 0x03000000 /* B1 Read Access Time = 3 cycles */ +#define B1RAT_4 0x04000000 /* B1 Read Access Time = 4 cycles */ +#define B1RAT_5 0x05000000 /* B1 Read Access Time = 5 cycles */ +#define B1RAT_6 0x06000000 /* B1 Read Access Time = 6 cycles */ +#define B1RAT_7 0x07000000 /* B1 Read Access Time = 7 cycles */ +#define B1RAT_8 0x08000000 /* B1 Read Access Time = 8 cycles */ +#define B1RAT_9 0x09000000 /* B1 Read Access Time = 9 cycles */ +#define B1RAT_10 0x0A000000 /* B1 Read Access Time = 10 cycles */ +#define B1RAT_11 0x0B000000 /* B1 Read Access Time = 11 cycles */ +#define B1RAT_12 0x0C000000 /* B1 Read Access Time = 12 cycles */ +#define B1RAT_13 0x0D000000 /* B1 Read Access Time = 13 cycles */ +#define B1RAT_14 0x0E000000 /* B1 Read Access Time = 14 cycles */ +#define B1RAT_15 0x0F000000 /* B1 Read Access Time = 15 cycles */ +#define B1WAT_1 0x10000000 /* B1 Write Access Time = 1 cycle */ +#define B1WAT_2 0x20000000 /* B1 Write Access Time = 2 cycles */ +#define B1WAT_3 0x30000000 /* B1 Write Access Time = 3 cycles */ +#define B1WAT_4 0x40000000 /* B1 Write Access Time = 4 cycles */ +#define B1WAT_5 0x50000000 /* B1 Write Access Time = 5 cycles */ +#define B1WAT_6 0x60000000 /* B1 Write Access Time = 6 cycles */ +#define B1WAT_7 0x70000000 /* B1 Write Access Time = 7 cycles */ +#define B1WAT_8 0x80000000 /* B1 Write Access Time = 8 cycles */ +#define B1WAT_9 0x90000000 /* B1 Write Access Time = 9 cycles */ +#define B1WAT_10 0xA0000000 /* B1 Write Access Time = 10 cycles */ +#define B1WAT_11 0xB0000000 /* B1 Write Access Time = 11 cycles */ +#define B1WAT_12 0xC0000000 /* B1 Write Access Time = 12 cycles */ +#define B1WAT_13 0xD0000000 /* B1 Write Access Time = 13 cycles */ +#define B1WAT_14 0xE0000000 /* B1 Write Access Time = 14 cycles */ +#define B1WAT_15 0xF0000000 /* B1 Write Access Time = 15 cycles */ + +/* EBIU_AMBCTL1 Masks */ +#define B2RDYEN 0x00000001 /* Bank 2 (B2) RDY Enable */ +#define B2RDYPOL 0x00000002 /* B2 RDY Active High */ +#define B2TT_1 0x00000004 /* B2 Transition Time (Read to Write) = 1 cycle */ +#define B2TT_2 0x00000008 /* B2 Transition Time (Read to Write) = 2 cycles */ +#define B2TT_3 0x0000000C /* B2 Transition Time (Read to Write) = 3 cycles */ +#define B2TT_4 0x00000000 /* B2 Transition Time (Read to Write) = 4 cycles */ +#define B2ST_1 0x00000010 /* B2 Setup Time (AOE to Read/Write) = 1 cycle */ +#define B2ST_2 0x00000020 /* B2 Setup Time (AOE to Read/Write) = 2 cycles */ +#define B2ST_3 0x00000030 /* B2 Setup Time (AOE to Read/Write) = 3 cycles */ +#define B2ST_4 0x00000000 /* B2 Setup Time (AOE to Read/Write) = 4 cycles */ +#define B2HT_1 0x00000040 /* B2 Hold Time (~Read/Write to ~AOE) = 1 cycle */ +#define B2HT_2 0x00000080 /* B2 Hold Time (~Read/Write to ~AOE) = 2 cycles */ +#define B2HT_3 0x000000C0 /* B2 Hold Time (~Read/Write to ~AOE) = 3 cycles */ +#define B2HT_0 0x00000000 /* B2 Hold Time (~Read/Write to ~AOE) = 0 cycles */ +#define B2RAT_1 0x00000100 /* B2 Read Access Time = 1 cycle */ +#define B2RAT_2 0x00000200 /* B2 Read Access Time = 2 cycles */ +#define B2RAT_3 0x00000300 /* B2 Read Access Time = 3 cycles */ +#define B2RAT_4 0x00000400 /* B2 Read Access Time = 4 cycles */ +#define B2RAT_5 0x00000500 /* B2 Read Access Time = 5 cycles */ +#define B2RAT_6 0x00000600 /* B2 Read Access Time = 6 cycles */ +#define B2RAT_7 0x00000700 /* B2 Read Access Time = 7 cycles */ +#define B2RAT_8 0x00000800 /* B2 Read Access Time = 8 cycles */ +#define B2RAT_9 0x00000900 /* B2 Read Access Time = 9 cycles */ +#define B2RAT_10 0x00000A00 /* B2 Read Access Time = 10 cycles */ +#define B2RAT_11 0x00000B00 /* B2 Read Access Time = 11 cycles */ +#define B2RAT_12 0x00000C00 /* B2 Read Access Time = 12 cycles */ +#define B2RAT_13 0x00000D00 /* B2 Read Access Time = 13 cycles */ +#define B2RAT_14 0x00000E00 /* B2 Read Access Time = 14 cycles */ +#define B2RAT_15 0x00000F00 /* B2 Read Access Time = 15 cycles */ +#define B2WAT_1 0x00001000 /* B2 Write Access Time = 1 cycle */ +#define B2WAT_2 0x00002000 /* B2 Write Access Time = 2 cycles */ +#define B2WAT_3 0x00003000 /* B2 Write Access Time = 3 cycles */ +#define B2WAT_4 0x00004000 /* B2 Write Access Time = 4 cycles */ +#define B2WAT_5 0x00005000 /* B2 Write Access Time = 5 cycles */ +#define B2WAT_6 0x00006000 /* B2 Write Access Time = 6 cycles */ +#define B2WAT_7 0x00007000 /* B2 Write Access Time = 7 cycles */ +#define B2WAT_8 0x00008000 /* B2 Write Access Time = 8 cycles */ +#define B2WAT_9 0x00009000 /* B2 Write Access Time = 9 cycles */ +#define B2WAT_10 0x0000A000 /* B2 Write Access Time = 10 cycles */ +#define B2WAT_11 0x0000B000 /* B2 Write Access Time = 11 cycles */ +#define B2WAT_12 0x0000C000 /* B2 Write Access Time = 12 cycles */ +#define B2WAT_13 0x0000D000 /* B2 Write Access Time = 13 cycles */ +#define B2WAT_14 0x0000E000 /* B2 Write Access Time = 14 cycles */ +#define B2WAT_15 0x0000F000 /* B2 Write Access Time = 15 cycles */ + +#define B3RDYEN 0x00010000 /* Bank 3 (B3) RDY Enable */ +#define B3RDYPOL 0x00020000 /* B3 RDY Active High */ +#define B3TT_1 0x00040000 /* B3 Transition Time (Read to Write) = 1 cycle */ +#define B3TT_2 0x00080000 /* B3 Transition Time (Read to Write) = 2 cycles */ +#define B3TT_3 0x000C0000 /* B3 Transition Time (Read to Write) = 3 cycles */ +#define B3TT_4 0x00000000 /* B3 Transition Time (Read to Write) = 4 cycles */ +#define B3ST_1 0x00100000 /* B3 Setup Time (AOE to Read/Write) = 1 cycle */ +#define B3ST_2 0x00200000 /* B3 Setup Time (AOE to Read/Write) = 2 cycles */ +#define B3ST_3 0x00300000 /* B3 Setup Time (AOE to Read/Write) = 3 cycles */ +#define B3ST_4 0x00000000 /* B3 Setup Time (AOE to Read/Write) = 4 cycles */ +#define B3HT_1 0x00400000 /* B3 Hold Time (~Read/Write to ~AOE) = 1 cycle */ +#define B3HT_2 0x00800000 /* B3 Hold Time (~Read/Write to ~AOE) = 2 cycles */ +#define B3HT_3 0x00C00000 /* B3 Hold Time (~Read/Write to ~AOE) = 3 cycles */ +#define B3HT_0 0x00000000 /* B3 Hold Time (~Read/Write to ~AOE) = 0 cycles */ +#define B3RAT_1 0x01000000 /* B3 Read Access Time = 1 cycle */ +#define B3RAT_2 0x02000000 /* B3 Read Access Time = 2 cycles */ +#define B3RAT_3 0x03000000 /* B3 Read Access Time = 3 cycles */ +#define B3RAT_4 0x04000000 /* B3 Read Access Time = 4 cycles */ +#define B3RAT_5 0x05000000 /* B3 Read Access Time = 5 cycles */ +#define B3RAT_6 0x06000000 /* B3 Read Access Time = 6 cycles */ +#define B3RAT_7 0x07000000 /* B3 Read Access Time = 7 cycles */ +#define B3RAT_8 0x08000000 /* B3 Read Access Time = 8 cycles */ +#define B3RAT_9 0x09000000 /* B3 Read Access Time = 9 cycles */ +#define B3RAT_10 0x0A000000 /* B3 Read Access Time = 10 cycles */ +#define B3RAT_11 0x0B000000 /* B3 Read Access Time = 11 cycles */ +#define B3RAT_12 0x0C000000 /* B3 Read Access Time = 12 cycles */ +#define B3RAT_13 0x0D000000 /* B3 Read Access Time = 13 cycles */ +#define B3RAT_14 0x0E000000 /* B3 Read Access Time = 14 cycles */ +#define B3RAT_15 0x0F000000 /* B3 Read Access Time = 15 cycles */ +#define B3WAT_1 0x10000000 /* B3 Write Access Time = 1 cycle */ +#define B3WAT_2 0x20000000 /* B3 Write Access Time = 2 cycles */ +#define B3WAT_3 0x30000000 /* B3 Write Access Time = 3 cycles */ +#define B3WAT_4 0x40000000 /* B3 Write Access Time = 4 cycles */ +#define B3WAT_5 0x50000000 /* B3 Write Access Time = 5 cycles */ +#define B3WAT_6 0x60000000 /* B3 Write Access Time = 6 cycles */ +#define B3WAT_7 0x70000000 /* B3 Write Access Time = 7 cycles */ +#define B3WAT_8 0x80000000 /* B3 Write Access Time = 8 cycles */ +#define B3WAT_9 0x90000000 /* B3 Write Access Time = 9 cycles */ +#define B3WAT_10 0xA0000000 /* B3 Write Access Time = 10 cycles */ +#define B3WAT_11 0xB0000000 /* B3 Write Access Time = 11 cycles */ +#define B3WAT_12 0xC0000000 /* B3 Write Access Time = 12 cycles */ +#define B3WAT_13 0xD0000000 /* B3 Write Access Time = 13 cycles */ +#define B3WAT_14 0xE0000000 /* B3 Write Access Time = 14 cycles */ +#define B3WAT_15 0xF0000000 /* B3 Write Access Time = 15 cycles */ + + +/* ********************** SDRAM CONTROLLER MASKS **********************************************/ +/* EBIU_SDGCTL Masks */ +#define SCTLE 0x00000001 /* Enable SDRAM Signals */ +#define CL_2 0x00000008 /* SDRAM CAS Latency = 2 cycles */ +#define CL_3 0x0000000C /* SDRAM CAS Latency = 3 cycles */ +#define PASR_ALL 0x00000000 /* All 4 SDRAM Banks Refreshed In Self-Refresh */ +#define PASR_B0_B1 0x00000010 /* SDRAM Banks 0 and 1 Are Refreshed In Self-Refresh */ +#define PASR_B0 0x00000020 /* Only SDRAM Bank 0 Is Refreshed In Self-Refresh */ +#define TRAS_1 0x00000040 /* SDRAM tRAS = 1 cycle */ +#define TRAS_2 0x00000080 /* SDRAM tRAS = 2 cycles */ +#define TRAS_3 0x000000C0 /* SDRAM tRAS = 3 cycles */ +#define TRAS_4 0x00000100 /* SDRAM tRAS = 4 cycles */ +#define TRAS_5 0x00000140 /* SDRAM tRAS = 5 cycles */ +#define TRAS_6 0x00000180 /* SDRAM tRAS = 6 cycles */ +#define TRAS_7 0x000001C0 /* SDRAM tRAS = 7 cycles */ +#define TRAS_8 0x00000200 /* SDRAM tRAS = 8 cycles */ +#define TRAS_9 0x00000240 /* SDRAM tRAS = 9 cycles */ +#define TRAS_10 0x00000280 /* SDRAM tRAS = 10 cycles */ +#define TRAS_11 0x000002C0 /* SDRAM tRAS = 11 cycles */ +#define TRAS_12 0x00000300 /* SDRAM tRAS = 12 cycles */ +#define TRAS_13 0x00000340 /* SDRAM tRAS = 13 cycles */ +#define TRAS_14 0x00000380 /* SDRAM tRAS = 14 cycles */ +#define TRAS_15 0x000003C0 /* SDRAM tRAS = 15 cycles */ +#define TRP_1 0x00000800 /* SDRAM tRP = 1 cycle */ +#define TRP_2 0x00001000 /* SDRAM tRP = 2 cycles */ +#define TRP_3 0x00001800 /* SDRAM tRP = 3 cycles */ +#define TRP_4 0x00002000 /* SDRAM tRP = 4 cycles */ +#define TRP_5 0x00002800 /* SDRAM tRP = 5 cycles */ +#define TRP_6 0x00003000 /* SDRAM tRP = 6 cycles */ +#define TRP_7 0x00003800 /* SDRAM tRP = 7 cycles */ +#define TRCD_1 0x00008000 /* SDRAM tRCD = 1 cycle */ +#define TRCD_2 0x00010000 /* SDRAM tRCD = 2 cycles */ +#define TRCD_3 0x00018000 /* SDRAM tRCD = 3 cycles */ +#define TRCD_4 0x00020000 /* SDRAM tRCD = 4 cycles */ +#define TRCD_5 0x00028000 /* SDRAM tRCD = 5 cycles */ +#define TRCD_6 0x00030000 /* SDRAM tRCD = 6 cycles */ +#define TRCD_7 0x00038000 /* SDRAM tRCD = 7 cycles */ +#define TWR_1 0x00080000 /* SDRAM tWR = 1 cycle */ +#define TWR_2 0x00100000 /* SDRAM tWR = 2 cycles */ +#define TWR_3 0x00180000 /* SDRAM tWR = 3 cycles */ +#define PUPSD 0x00200000 /* Power-Up Start Delay (15 SCLK Cycles Delay) */ +#define PSM 0x00400000 /* Power-Up Sequence (Mode Register Before/After* Refresh) */ +#define PSS 0x00800000 /* Enable Power-Up Sequence on Next SDRAM Access */ +#define SRFS 0x01000000 /* Enable SDRAM Self-Refresh Mode */ +#define EBUFE 0x02000000 /* Enable External Buffering Timing */ +#define FBBRW 0x04000000 /* Enable Fast Back-To-Back Read To Write */ +#define EMREN 0x10000000 /* Extended Mode Register Enable */ +#define TCSR 0x20000000 /* Temp-Compensated Self-Refresh Value (85/45* Deg C) */ +#define CDDBG 0x40000000 /* Tristate SDRAM Controls During Bus Grant */ + +/* EBIU_SDBCTL Masks */ +#define EBE 0x0001 /* Enable SDRAM External Bank */ +#define EBSZ_16 0x0000 /* SDRAM External Bank Size = 16MB */ +#define EBSZ_32 0x0002 /* SDRAM External Bank Size = 32MB */ +#define EBSZ_64 0x0004 /* SDRAM External Bank Size = 64MB */ +#define EBSZ_128 0x0006 /* SDRAM External Bank Size = 128MB */ +#define EBSZ_256 0x0008 /* SDRAM External Bank Size = 256MB */ +#define EBSZ_512 0x000A /* SDRAM External Bank Size = 512MB */ +#define EBCAW_8 0x0000 /* SDRAM External Bank Column Address Width = 8 Bits */ +#define EBCAW_9 0x0010 /* SDRAM External Bank Column Address Width = 9 Bits */ +#define EBCAW_10 0x0020 /* SDRAM External Bank Column Address Width = 10 Bits */ +#define EBCAW_11 0x0030 /* SDRAM External Bank Column Address Width = 11 Bits */ + +/* EBIU_SDSTAT Masks */ +#define SDCI 0x0001 /* SDRAM Controller Idle */ +#define SDSRA 0x0002 /* SDRAM Self-Refresh Active */ +#define SDPUA 0x0004 /* SDRAM Power-Up Active */ +#define SDRS 0x0008 /* SDRAM Will Power-Up On Next Access */ +#define SDEASE 0x0010 /* SDRAM EAB Sticky Error Status */ +#define BGSTAT 0x0020 /* Bus Grant Status */ + + +/* ************************** DMA CONTROLLER MASKS ********************************/ +/* DMAx_CONFIG, MDMA_yy_CONFIG Masks */ +#define DMAEN 0x0001 /* DMA Channel Enable */ +#define WNR 0x0002 /* Channel Direction (W/R*) */ +#define WDSIZE_8 0x0000 /* Transfer Word Size = 8 */ +#define WDSIZE_16 0x0004 /* Transfer Word Size = 16 */ +#define WDSIZE_32 0x0008 /* Transfer Word Size = 32 */ +#define DMA2D 0x0010 /* DMA Mode (2D/1D*) */ +#define RESTART 0x0020 /* DMA Buffer Clear */ +#define DI_SEL 0x0040 /* Data Interrupt Timing Select */ +#define DI_EN 0x0080 /* Data Interrupt Enable */ +#define NDSIZE_0 0x0000 /* Next Descriptor Size = 0 (Stop/Autobuffer) */ +#define NDSIZE_1 0x0100 /* Next Descriptor Size = 1 */ +#define NDSIZE_2 0x0200 /* Next Descriptor Size = 2 */ +#define NDSIZE_3 0x0300 /* Next Descriptor Size = 3 */ +#define NDSIZE_4 0x0400 /* Next Descriptor Size = 4 */ +#define NDSIZE_5 0x0500 /* Next Descriptor Size = 5 */ +#define NDSIZE_6 0x0600 /* Next Descriptor Size = 6 */ +#define NDSIZE_7 0x0700 /* Next Descriptor Size = 7 */ +#define NDSIZE_8 0x0800 /* Next Descriptor Size = 8 */ +#define NDSIZE_9 0x0900 /* Next Descriptor Size = 9 */ +#define NDSIZE 0x0900 /* Next Descriptor Size */ +#define DMAFLOW 0x7000 /* Flow Control */ +#define DMAFLOW_STOP 0x0000 /* Stop Mode */ +#define DMAFLOW_AUTO 0x1000 /* Autobuffer Mode */ +#define DMAFLOW_ARRAY 0x4000 /* Descriptor Array Mode */ +#define DMAFLOW_SMALL 0x6000 /* Small Model Descriptor List Mode */ +#define DMAFLOW_LARGE 0x7000 /* Large Model Descriptor List Mode */ + +/* DMAx_PERIPHERAL_MAP, MDMA_yy_PERIPHERAL_MAP Masks */ +#define CTYPE 0x0040 /* DMA Channel Type Indicator (Memory/Peripheral*) */ +#define PMAP 0xF000 /* Peripheral Mapped To This Channel */ +#define PMAP_PPI 0x0000 /* PPI Port DMA */ +#define PMAP_EMACRX 0x1000 /* Ethernet Receive DMA */ +#define PMAP_EMACTX 0x2000 /* Ethernet Transmit DMA */ +#define PMAP_SPORT0RX 0x3000 /* SPORT0 Receive DMA */ +#define PMAP_SPORT0TX 0x4000 /* SPORT0 Transmit DMA */ +#define PMAP_SPORT1RX 0x5000 /* SPORT1 Receive DMA */ +#define PMAP_SPORT1TX 0x6000 /* SPORT1 Transmit DMA */ +#define PMAP_SPI 0x7000 /* SPI Port DMA */ +#define PMAP_UART0RX 0x8000 /* UART0 Port Receive DMA */ +#define PMAP_UART0TX 0x9000 /* UART0 Port Transmit DMA */ +#define PMAP_UART1RX 0xA000 /* UART1 Port Receive DMA */ +#define PMAP_UART1TX 0xB000 /* UART1 Port Transmit DMA */ + +/* DMAx_IRQ_STATUS, MDMA_yy_IRQ_STATUS Masks */ +#define DMA_DONE 0x0001 /* DMA Completion Interrupt Status */ +#define DMA_ERR 0x0002 /* DMA Error Interrupt Status */ +#define DFETCH 0x0004 /* DMA Descriptor Fetch Indicator */ +#define DMA_RUN 0x0008 /* DMA Channel Running Indicator */ + + +/* ************ PARALLEL PERIPHERAL INTERFACE (PPI) MASKS *************/ +/* PPI_CONTROL Masks */ +#define PORT_EN 0x0001 /* PPI Port Enable */ +#define PORT_DIR 0x0002 /* PPI Port Direction */ +#define XFR_TYPE 0x000C /* PPI Transfer Type */ +#define PORT_CFG 0x0030 /* PPI Port Configuration */ +#define FLD_SEL 0x0040 /* PPI Active Field Select */ +#define PACK_EN 0x0080 /* PPI Packing Mode */ +#define DMA32 0x0100 /* PPI 32-bit DMA Enable */ +#define SKIP_EN 0x0200 /* PPI Skip Element Enable */ +#define SKIP_EO 0x0400 /* PPI Skip Even/Odd Elements */ +#define DLEN_8 0x0000 /* Data Length = 8 Bits */ +#define DLEN_10 0x0800 /* Data Length = 10 Bits */ +#define DLEN_11 0x1000 /* Data Length = 11 Bits */ +#define DLEN_12 0x1800 /* Data Length = 12 Bits */ +#define DLEN_13 0x2000 /* Data Length = 13 Bits */ +#define DLEN_14 0x2800 /* Data Length = 14 Bits */ +#define DLEN_15 0x3000 /* Data Length = 15 Bits */ +#define DLEN_16 0x3800 /* Data Length = 16 Bits */ +#define DLENGTH 0x3800 /* PPI Data Length */ +#define POLC 0x4000 /* PPI Clock Polarity */ +#define POLS 0x8000 /* PPI Frame Sync Polarity */ + +/* PPI_STATUS Masks */ +#define FLD 0x0400 /* Field Indicator */ +#define FT_ERR 0x0800 /* Frame Track Error */ +#define OVR 0x1000 /* FIFO Overflow Error */ +#define UNDR 0x2000 /* FIFO Underrun Error */ +#define ERR_DET 0x4000 /* Error Detected Indicator */ +#define ERR_NCOR 0x8000 /* Error Not Corrected Indicator */ + + +/* ******************** TWO-WIRE INTERFACE (TWI) MASKS ***********************/ +/* TWI_CLKDIV Macros (Use: *pTWI_CLKDIV = CLKLOW(x)|CLKHI(y); ) */ +#define CLKLOW(x) ((x) & 0xFF) /* Periods Clock Is Held Low */ +#define CLKHI(y) (((y)&0xFF)<<0x8) /* Periods Before New Clock Low */ + +/* TWI_PRESCALE Masks */ +#define PRESCALE 0x007F /* SCLKs Per Internal Time Reference (10MHz) */ +#define TWI_ENA 0x0080 /* TWI Enable */ +#define SCCB 0x0200 /* SCCB Compatibility Enable */ + +/* TWI_SLAVE_CTRL Masks */ +#define SEN 0x0001 /* Slave Enable */ +#define SADD_LEN 0x0002 /* Slave Address Length */ +#define STDVAL 0x0004 /* Slave Transmit Data Valid */ +#define NAK 0x0008 /* NAK/ACK* Generated At Conclusion Of Transfer */ +#define GEN 0x0010 /* General Call Adrress Matching Enabled */ + +/* TWI_SLAVE_STAT Masks */ +#define SDIR 0x0001 /* Slave Transfer Direction (Transmit/Receive*) */ +#define GCALL 0x0002 /* General Call Indicator */ + +/* TWI_MASTER_CTRL Masks */ +#define MEN 0x0001 /* Master Mode Enable */ +#define MADD_LEN 0x0002 /* Master Address Length */ +#define MDIR 0x0004 /* Master Transmit Direction (RX/TX*) */ +#define FAST 0x0008 /* Use Fast Mode Timing Specs */ +#define STOP 0x0010 /* Issue Stop Condition */ +#define RSTART 0x0020 /* Repeat Start or Stop* At End Of Transfer */ +#define DCNT 0x3FC0 /* Data Bytes To Transfer */ +#define SDAOVR 0x4000 /* Serial Data Override */ +#define SCLOVR 0x8000 /* Serial Clock Override */ + +/* TWI_MASTER_STAT Masks */ +#define MPROG 0x0001 /* Master Transfer In Progress */ +#define LOSTARB 0x0002 /* Lost Arbitration Indicator (Xfer Aborted) */ +#define ANAK 0x0004 /* Address Not Acknowledged */ +#define DNAK 0x0008 /* Data Not Acknowledged */ +#define BUFRDERR 0x0010 /* Buffer Read Error */ +#define BUFWRERR 0x0020 /* Buffer Write Error */ +#define SDASEN 0x0040 /* Serial Data Sense */ +#define SCLSEN 0x0080 /* Serial Clock Sense */ +#define BUSBUSY 0x0100 /* Bus Busy Indicator */ + +/* TWI_INT_SRC and TWI_INT_ENABLE Masks */ +#define SINIT 0x0001 /* Slave Transfer Initiated */ +#define SCOMP 0x0002 /* Slave Transfer Complete */ +#define SERR 0x0004 /* Slave Transfer Error */ +#define SOVF 0x0008 /* Slave Overflow */ +#define MCOMP 0x0010 /* Master Transfer Complete */ +#define MERR 0x0020 /* Master Transfer Error */ +#define XMTSERV 0x0040 /* Transmit FIFO Service */ +#define RCVSERV 0x0080 /* Receive FIFO Service */ + +/* TWI_FIFO_CTRL Masks */ +#define XMTFLUSH 0x0001 /* Transmit Buffer Flush */ +#define RCVFLUSH 0x0002 /* Receive Buffer Flush */ +#define XMTINTLEN 0x0004 /* Transmit Buffer Interrupt Length */ +#define RCVINTLEN 0x0008 /* Receive Buffer Interrupt Length */ + +/* TWI_FIFO_STAT Masks */ +#define XMTSTAT 0x0003 /* Transmit FIFO Status */ +#define XMT_EMPTY 0x0000 /* Transmit FIFO Empty */ +#define XMT_HALF 0x0001 /* Transmit FIFO Has 1 Byte To Write */ +#define XMT_FULL 0x0003 /* Transmit FIFO Full (2 Bytes To Write) */ + +#define RCVSTAT 0x000C /* Receive FIFO Status */ +#define RCV_EMPTY 0x0000 /* Receive FIFO Empty */ +#define RCV_HALF 0x0004 /* Receive FIFO Has 1 Byte To Read */ +#define RCV_FULL 0x000C /* Receive FIFO Full (2 Bytes To Read) */ + + +/* ******************* PIN CONTROL REGISTER MASKS ************************/ +/* PORT_MUX Masks */ +#define PJSE 0x0001 /* Port J SPI/SPORT Enable */ +#define PJSE_SPORT 0x0000 /* Enable TFS0/DT0PRI */ +#define PJSE_SPI 0x0001 /* Enable SPI_SSEL3:2 */ + +#define PJCE(x) (((x)&0x3)<<1) /* Port J CAN/SPI/SPORT Enable */ +#define PJCE_SPORT 0x0000 /* Enable DR0SEC/DT0SEC */ +#define PJCE_CAN 0x0002 /* Enable CAN RX/TX */ +#define PJCE_SPI 0x0004 /* Enable SPI_SSEL7 */ + +#define PFDE 0x0008 /* Port F DMA Request Enable */ +#define PFDE_UART 0x0000 /* Enable UART0 RX/TX */ +#define PFDE_DMA 0x0008 /* Enable DMAR1:0 */ + +#define PFTE 0x0010 /* Port F Timer Enable */ +#define PFTE_UART 0x0000 /* Enable UART1 RX/TX */ +#define PFTE_TIMER 0x0010 /* Enable TMR7:6 */ + +#define PFS6E 0x0020 /* Port F SPI SSEL 6 Enable */ +#define PFS6E_TIMER 0x0000 /* Enable TMR5 */ +#define PFS6E_SPI 0x0020 /* Enable SPI_SSEL6 */ + +#define PFS5E 0x0040 /* Port F SPI SSEL 5 Enable */ +#define PFS5E_TIMER 0x0000 /* Enable TMR4 */ +#define PFS5E_SPI 0x0040 /* Enable SPI_SSEL5 */ + +#define PFS4E 0x0080 /* Port F SPI SSEL 4 Enable */ +#define PFS4E_TIMER 0x0000 /* Enable TMR3 */ +#define PFS4E_SPI 0x0080 /* Enable SPI_SSEL4 */ + +#define PFFE 0x0100 /* Port F PPI Frame Sync Enable */ +#define PFFE_TIMER 0x0000 /* Enable TMR2 */ +#define PFFE_PPI 0x0100 /* Enable PPI FS3 */ + +#define PGSE 0x0200 /* Port G SPORT1 Secondary Enable */ +#define PGSE_PPI 0x0000 /* Enable PPI D9:8 */ +#define PGSE_SPORT 0x0200 /* Enable DR1SEC/DT1SEC */ + +#define PGRE 0x0400 /* Port G SPORT1 Receive Enable */ +#define PGRE_PPI 0x0000 /* Enable PPI D12:10 */ +#define PGRE_SPORT 0x0400 /* Enable DR1PRI/RFS1/RSCLK1 */ + +#define PGTE 0x0800 /* Port G SPORT1 Transmit Enable */ +#define PGTE_PPI 0x0000 /* Enable PPI D15:13 */ +#define PGTE_SPORT 0x0800 /* Enable DT1PRI/TFS1/TSCLK1 */ + + +/* ****************** HANDSHAKE DMA (HDMA) MASKS *********************/ +/* HDMAx_CTL Masks */ +#define HMDMAEN 0x0001 /* Enable Handshake DMA 0/1 */ +#define REP 0x0002 /* HDMA Request Polarity */ +#define UTE 0x0004 /* Urgency Threshold Enable */ +#define OIE 0x0010 /* Overflow Interrupt Enable */ +#define BDIE 0x0020 /* Block Done Interrupt Enable */ +#define MBDI 0x0040 /* Mask Block Done IRQ If Pending ECNT */ +#define DRQ 0x0300 /* HDMA Request Type */ +#define DRQ_NONE 0x0000 /* No Request */ +#define DRQ_SINGLE 0x0100 /* Channels Request Single */ +#define DRQ_MULTI 0x0200 /* Channels Request Multi (Default) */ +#define DRQ_URGENT 0x0300 /* Channels Request Multi Urgent */ +#define RBC 0x1000 /* Reload BCNT With IBCNT */ +#define PS 0x2000 /* HDMA Pin Status */ +#define OI 0x4000 /* Overflow Interrupt Generated */ +#define BDI 0x8000 /* Block Done Interrupt Generated */ + +/* entry addresses of the user-callable Boot ROM functions */ + +#define _BOOTROM_RESET 0xEF000000 +#define _BOOTROM_FINAL_INIT 0xEF000002 +#define _BOOTROM_DO_MEMORY_DMA 0xEF000006 +#define _BOOTROM_BOOT_DXE_FLASH 0xEF000008 +#define _BOOTROM_BOOT_DXE_SPI 0xEF00000A +#define _BOOTROM_BOOT_DXE_TWI 0xEF00000C +#define _BOOTROM_GET_DXE_ADDRESS_FLASH 0xEF000010 +#define _BOOTROM_GET_DXE_ADDRESS_SPI 0xEF000012 +#define _BOOTROM_GET_DXE_ADDRESS_TWI 0xEF000014 + +/* Alternate Deprecated Macros Provided For Backwards Code Compatibility */ +#define PGDE_UART PFDE_UART +#define PGDE_DMA PFDE_DMA +#define CKELOW SCKELOW + +/* HOST Port Registers */ + +#define HOST_CONTROL 0xffc03400 /* HOST Control Register */ +#define HOST_STATUS 0xffc03404 /* HOST Status Register */ +#define HOST_TIMEOUT 0xffc03408 /* HOST Acknowledge Mode Timeout Register */ + +/* Counter Registers */ + +#define CNT_CONFIG 0xffc03500 /* Configuration Register */ +#define CNT_IMASK 0xffc03504 /* Interrupt Mask Register */ +#define CNT_STATUS 0xffc03508 /* Status Register */ +#define CNT_COMMAND 0xffc0350c /* Command Register */ +#define CNT_DEBOUNCE 0xffc03510 /* Debounce Register */ +#define CNT_COUNTER 0xffc03514 /* Counter Register */ +#define CNT_MAX 0xffc03518 /* Maximal Count Register */ +#define CNT_MIN 0xffc0351c /* Minimal Count Register */ + +/* OTP/FUSE Registers */ + +#define OTP_CONTROL 0xffc03600 /* OTP/Fuse Control Register */ +#define OTP_BEN 0xffc03604 /* OTP/Fuse Byte Enable */ +#define OTP_STATUS 0xffc03608 /* OTP/Fuse Status */ +#define OTP_TIMING 0xffc0360c /* OTP/Fuse Access Timing */ + +/* Security Registers */ + +#define SECURE_SYSSWT 0xffc03620 /* Secure System Switches */ +#define SECURE_CONTROL 0xffc03624 /* Secure Control */ +#define SECURE_STATUS 0xffc03628 /* Secure Status */ + +/* OTP Read/Write Data Buffer Registers */ + +#define OTP_DATA0 0xffc03680 /* OTP/Fuse Data (OTP_DATA0-3) accesses the fuse read write buffer */ +#define OTP_DATA1 0xffc03684 /* OTP/Fuse Data (OTP_DATA0-3) accesses the fuse read write buffer */ +#define OTP_DATA2 0xffc03688 /* OTP/Fuse Data (OTP_DATA0-3) accesses the fuse read write buffer */ +#define OTP_DATA3 0xffc0368c /* OTP/Fuse Data (OTP_DATA0-3) accesses the fuse read write buffer */ + +/* Motor Control PWM Registers */ + +#define PWM_CTRL 0xffc03700 /* PWM Control Register */ +#define PWM_STAT 0xffc03704 /* PWM Status Register */ +#define PWM_TM 0xffc03708 /* PWM Period Register */ +#define PWM_DT 0xffc0370c /* PWM Dead Time Register */ +#define PWM_GATE 0xffc03710 /* PWM Chopping Control */ +#define PWM_CHA 0xffc03714 /* PWM Channel A Duty Control */ +#define PWM_CHB 0xffc03718 /* PWM Channel B Duty Control */ +#define PWM_CHC 0xffc0371c /* PWM Channel C Duty Control */ +#define PWM_SEG 0xffc03720 /* PWM Crossover and Output Enable */ +#define PWM_SYNCWT 0xffc03724 /* PWM Sync Pluse Width Control */ +#define PWM_CHAL 0xffc03728 /* PWM Channel AL Duty Control (SR mode only) */ +#define PWM_CHBL 0xffc0372c /* PWM Channel BL Duty Control (SR mode only) */ +#define PWM_CHCL 0xffc03730 /* PWM Channel CL Duty Control (SR mode only) */ +#define PWM_LSI 0xffc03734 /* PWM Low Side Invert (SR mode only) */ +#define PWM_STAT2 0xffc03738 /* PWM Status Register 2 */ + + +/* ********************************************************** */ +/* SINGLE BIT MACRO PAIRS (bit mask and negated one) */ +/* and MULTI BIT READ MACROS */ +/* ********************************************************** */ + +/* Bit masks for HOST_CONTROL */ + +#define HOST_CNTR_HOST_EN 0x1 /* Host Enable */ +#define HOST_CNTR_nHOST_EN 0x0 +#define HOST_CNTR_HOST_END 0x2 /* Host Endianess */ +#define HOST_CNTR_nHOST_END 0x0 +#define HOST_CNTR_DATA_SIZE 0x4 /* Data Size */ +#define HOST_CNTR_nDATA_SIZE 0x0 +#define HOST_CNTR_HOST_RST 0x8 /* Host Reset */ +#define HOST_CNTR_nHOST_RST 0x0 +#define HOST_CNTR_HRDY_OVR 0x20 /* Host Ready Override */ +#define HOST_CNTR_nHRDY_OVR 0x0 +#define HOST_CNTR_INT_MODE 0x40 /* Interrupt Mode */ +#define HOST_CNTR_nINT_MODE 0x0 +#define HOST_CNTR_BT_EN 0x80 /* Bus Timeout Enable */ +#define HOST_CNTR_ nBT_EN 0x0 +#define HOST_CNTR_EHW 0x100 /* Enable Host Write */ +#define HOST_CNTR_nEHW 0x0 +#define HOST_CNTR_EHR 0x200 /* Enable Host Read */ +#define HOST_CNTR_nEHR 0x0 +#define HOST_CNTR_BDR 0x400 /* Burst DMA Requests */ +#define HOST_CNTR_nBDR 0x0 + +/* Bit masks for HOST_STATUS */ + +#define HOST_STAT_READY 0x1 /* DMA Ready */ +#define HOST_STAT_nREADY 0x0 +#define HOST_STAT_FIFOFULL 0x2 /* FIFO Full */ +#define HOST_STAT_nFIFOFULL 0x0 +#define HOST_STAT_FIFOEMPTY 0x4 /* FIFO Empty */ +#define HOST_STAT_nFIFOEMPTY 0x0 +#define HOST_STAT_COMPLETE 0x8 /* DMA Complete */ +#define HOST_STAT_nCOMPLETE 0x0 +#define HOST_STAT_HSHK 0x10 /* Host Handshake */ +#define HOST_STAT_nHSHK 0x0 +#define HOST_STAT_TIMEOUT 0x20 /* Host Timeout */ +#define HOST_STAT_nTIMEOUT 0x0 +#define HOST_STAT_HIRQ 0x40 /* Host Interrupt Request */ +#define HOST_STAT_nHIRQ 0x0 +#define HOST_STAT_ALLOW_CNFG 0x80 /* Allow New Configuration */ +#define HOST_STAT_nALLOW_CNFG 0x0 +#define HOST_STAT_DMA_DIR 0x100 /* DMA Direction */ +#define HOST_STAT_nDMA_DIR 0x0 +#define HOST_STAT_BTE 0x200 /* Bus Timeout Enabled */ +#define HOST_STAT_nBTE 0x0 +#define HOST_STAT_HOSTRD_DONE 0x8000 /* Host Read Completion Interrupt */ +#define HOST_STAT_nHOSTRD_DONE 0x0 + +/* Bit masks for HOST_TIMEOUT */ + +#define HOST_COUNT_TIMEOUT 0x7ff /* Host Timeout count */ + +/* Bit masks for CNT_CONFIG */ + +#define CNTE 0x1 /* Counter Enable */ +#define nCNTE 0x0 +#define DEBE 0x2 /* Debounce Enable */ +#define nDEBE 0x0 +#define CDGINV 0x10 /* CDG Pin Polarity Invert */ +#define nCDGINV 0x0 +#define CUDINV 0x20 /* CUD Pin Polarity Invert */ +#define nCUDINV 0x0 +#define CZMINV 0x40 /* CZM Pin Polarity Invert */ +#define nCZMINV 0x0 +#define CNTMODE 0x700 /* Counter Operating Mode */ +#define ZMZC 0x800 /* CZM Zeroes Counter Enable */ +#define nZMZC 0x0 +#define BNDMODE 0x3000 /* Boundary register Mode */ +#define INPDIS 0x8000 /* CUG and CDG Input Disable */ +#define nINPDIS 0x0 + +/* Bit masks for CNT_IMASK */ + +#define ICIE 0x1 /* Illegal Gray/Binary Code Interrupt Enable */ +#define nICIE 0x0 +#define UCIE 0x2 /* Up count Interrupt Enable */ +#define nUCIE 0x0 +#define DCIE 0x4 /* Down count Interrupt Enable */ +#define nDCIE 0x0 +#define MINCIE 0x8 /* Min Count Interrupt Enable */ +#define nMINCIE 0x0 +#define MAXCIE 0x10 /* Max Count Interrupt Enable */ +#define nMAXCIE 0x0 +#define COV31IE 0x20 /* Bit 31 Overflow Interrupt Enable */ +#define nCOV31IE 0x0 +#define COV15IE 0x40 /* Bit 15 Overflow Interrupt Enable */ +#define nCOV15IE 0x0 +#define CZEROIE 0x80 /* Count to Zero Interrupt Enable */ +#define nCZEROIE 0x0 +#define CZMIE 0x100 /* CZM Pin Interrupt Enable */ +#define nCZMIE 0x0 +#define CZMEIE 0x200 /* CZM Error Interrupt Enable */ +#define nCZMEIE 0x0 +#define CZMZIE 0x400 /* CZM Zeroes Counter Interrupt Enable */ +#define nCZMZIE 0x0 + +/* Bit masks for CNT_STATUS */ + +#define ICII 0x1 /* Illegal Gray/Binary Code Interrupt Identifier */ +#define nICII 0x0 +#define UCII 0x2 /* Up count Interrupt Identifier */ +#define nUCII 0x0 +#define DCII 0x4 /* Down count Interrupt Identifier */ +#define nDCII 0x0 +#define MINCII 0x8 /* Min Count Interrupt Identifier */ +#define nMINCII 0x0 +#define MAXCII 0x10 /* Max Count Interrupt Identifier */ +#define nMAXCII 0x0 +#define COV31II 0x20 /* Bit 31 Overflow Interrupt Identifier */ +#define nCOV31II 0x0 +#define COV15II 0x40 /* Bit 15 Overflow Interrupt Identifier */ +#define nCOV15II 0x0 +#define CZEROII 0x80 /* Count to Zero Interrupt Identifier */ +#define nCZEROII 0x0 +#define CZMII 0x100 /* CZM Pin Interrupt Identifier */ +#define nCZMII 0x0 +#define CZMEII 0x200 /* CZM Error Interrupt Identifier */ +#define nCZMEII 0x0 +#define CZMZII 0x400 /* CZM Zeroes Counter Interrupt Identifier */ +#define nCZMZII 0x0 + +/* Bit masks for CNT_COMMAND */ + +#define W1LCNT 0xf /* Load Counter Register */ +#define W1LMIN 0xf0 /* Load Min Register */ +#define W1LMAX 0xf00 /* Load Max Register */ +#define W1ZMONCE 0x1000 /* Enable CZM Clear Counter Once */ +#define nW1ZMONCE 0x0 + +/* Bit masks for CNT_DEBOUNCE */ + +#define DPRESCALE 0xf /* Load Counter Register */ + +/* CNT_COMMAND bit field options */ + +#define W1LCNT_ZERO 0x0001 /* write 1 to load CNT_COUNTER with zero */ +#define W1LCNT_MIN 0x0004 /* write 1 to load CNT_COUNTER from CNT_MIN */ +#define W1LCNT_MAX 0x0008 /* write 1 to load CNT_COUNTER from CNT_MAX */ + +#define W1LMIN_ZERO 0x0010 /* write 1 to load CNT_MIN with zero */ +#define W1LMIN_CNT 0x0020 /* write 1 to load CNT_MIN from CNT_COUNTER */ +#define W1LMIN_MAX 0x0080 /* write 1 to load CNT_MIN from CNT_MAX */ + +#define W1LMAX_ZERO 0x0100 /* write 1 to load CNT_MAX with zero */ +#define W1LMAX_CNT 0x0200 /* write 1 to load CNT_MAX from CNT_COUNTER */ +#define W1LMAX_MIN 0x0400 /* write 1 to load CNT_MAX from CNT_MIN */ + +/* CNT_CONFIG bit field options */ + +#define CNTMODE_QUADENC 0x0000 /* quadrature encoder mode */ +#define CNTMODE_BINENC 0x0100 /* binary encoder mode */ +#define CNTMODE_UDCNT 0x0200 /* up/down counter mode */ +#define CNTMODE_DIRCNT 0x0400 /* direction counter mode */ +#define CNTMODE_DIRTMR 0x0500 /* direction timer mode */ + +#define BNDMODE_COMP 0x0000 /* boundary compare mode */ +#define BNDMODE_ZERO 0x1000 /* boundary compare and zero mode */ +#define BNDMODE_CAPT 0x2000 /* boundary capture mode */ +#define BNDMODE_AEXT 0x3000 /* boundary auto-extend mode */ + +/* Bit masks for OTP_CONTROL */ + +#define FUSE_FADDR 0x1ff /* OTP/Fuse Address */ +#define FIEN 0x800 /* OTP/Fuse Interrupt Enable */ +#define nFIEN 0x0 +#define FTESTDEC 0x1000 /* OTP/Fuse Test Decoder */ +#define nFTESTDEC 0x0 +#define FWRTEST 0x2000 /* OTP/Fuse Write Test */ +#define nFWRTEST 0x0 +#define FRDEN 0x4000 /* OTP/Fuse Read Enable */ +#define nFRDEN 0x0 +#define FWREN 0x8000 /* OTP/Fuse Write Enable */ +#define nFWREN 0x0 + +/* Bit masks for OTP_BEN */ + +#define FBEN 0xffff /* OTP/Fuse Byte Enable */ + +/* Bit masks for OTP_STATUS */ + +#define FCOMP 0x1 /* OTP/Fuse Access Complete */ +#define nFCOMP 0x0 +#define FERROR 0x2 /* OTP/Fuse Access Error */ +#define nFERROR 0x0 +#define MMRGLOAD 0x10 /* Memory Mapped Register Gasket Load */ +#define nMMRGLOAD 0x0 +#define MMRGLOCK 0x20 /* Memory Mapped Register Gasket Lock */ +#define nMMRGLOCK 0x0 +#define FPGMEN 0x40 /* OTP/Fuse Program Enable */ +#define nFPGMEN 0x0 + +/* Bit masks for OTP_TIMING */ + +#define USECDIV 0xff /* Micro Second Divider */ +#define READACC 0x7f00 /* Read Access Time */ +#define CPUMPRL 0x38000 /* Charge Pump Release Time */ +#define CPUMPSU 0xc0000 /* Charge Pump Setup Time */ +#define CPUMPHD 0xf00000 /* Charge Pump Hold Time */ +#define PGMTIME 0xff000000 /* Program Time */ + +/* Bit masks for SECURE_SYSSWT */ + +#define EMUDABL 0x1 /* Emulation Disable. */ +#define nEMUDABL 0x0 +#define RSTDABL 0x2 /* Reset Disable */ +#define nRSTDABL 0x0 +#define L1IDABL 0x1c /* L1 Instruction Memory Disable. */ +#define L1DADABL 0xe0 /* L1 Data Bank A Memory Disable. */ +#define L1DBDABL 0x700 /* L1 Data Bank B Memory Disable. */ +#define DMA0OVR 0x800 /* DMA0 Memory Access Override */ +#define nDMA0OVR 0x0 +#define DMA1OVR 0x1000 /* DMA1 Memory Access Override */ +#define nDMA1OVR 0x0 +#define EMUOVR 0x4000 /* Emulation Override */ +#define nEMUOVR 0x0 +#define OTPSEN 0x8000 /* OTP Secrets Enable. */ +#define nOTPSEN 0x0 +#define L2DABL 0x70000 /* L2 Memory Disable. */ + +/* Bit masks for SECURE_CONTROL */ + +#define SECURE0 0x1 /* SECURE 0 */ +#define nSECURE0 0x0 +#define SECURE1 0x2 /* SECURE 1 */ +#define nSECURE1 0x0 +#define SECURE2 0x4 /* SECURE 2 */ +#define nSECURE2 0x0 +#define SECURE3 0x8 /* SECURE 3 */ +#define nSECURE3 0x0 + +/* Bit masks for SECURE_STATUS */ + +#define SECMODE 0x3 /* Secured Mode Control State */ +#define NMI 0x4 /* Non Maskable Interrupt */ +#define nNMI 0x0 +#define AFVALID 0x8 /* Authentication Firmware Valid */ +#define nAFVALID 0x0 +#define AFEXIT 0x10 /* Authentication Firmware Exit */ +#define nAFEXIT 0x0 +#define SECSTAT 0xe0 /* Secure Status */ + + + +#endif /* _DEF_BF51X_H */ diff --git a/arch/blackfin/mach-bf518/include/mach/dma.h b/arch/blackfin/mach-bf518/include/mach/dma.h new file mode 100644 index 000000000000..e2a71ba907e9 --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/dma.h @@ -0,0 +1,57 @@ +/* + * file: include/asm-blackfin/mach-bf518/dma.h + * based on: include/asm-blackfin/mach-bf527/dma.h + * author: Michael Hennerich (michael.hennerich@analog.com) + * + * created: + * description: + * system DMA map + * rev: + * + * modified: + * + * + * bugs: enter bugs at http://blackfin.uclinux.org/ + * + * this program is free software; you can redistribute it and/or modify + * it under the terms of the gnu general public license as published by + * the free software foundation; either version 2, or (at your option) + * any later version. + * + * this program is distributed in the hope that it will be useful, + * but without any warranty; without even the implied warranty of + * merchantability or fitness for a particular purpose. see the + * gnu general public license for more details. + * + * you should have received a copy of the gnu general public license + * along with this program; see the file copying. + * if not, write to the free software foundation, + * 59 temple place - suite 330, boston, ma 02111-1307, usa. + */ + +#ifndef _MACH_DMA_H_ +#define _MACH_DMA_H_ + +#define MAX_BLACKFIN_DMA_CHANNEL 16 + +#define CH_PPI 0 /* PPI receive/transmit */ +#define CH_EMAC_RX 1 /* Ethernet MAC receive */ +#define CH_EMAC_TX 2 /* Ethernet MAC transmit */ +#define CH_SPORT0_RX 3 /* SPORT0 receive */ +#define CH_SPORT0_TX 4 /* SPORT0 transmit */ +#define CH_RSI 4 /* RSI */ +#define CH_SPORT1_RX 5 /* SPORT1 receive */ +#define CH_SPI1 5 /* SPI1 transmit/receive */ +#define CH_SPORT1_TX 6 /* SPORT1 transmit */ +#define CH_SPI0 7 /* SPI0 transmit/receive */ +#define CH_UART0_RX 8 /* UART0 receive */ +#define CH_UART0_TX 9 /* UART0 transmit */ +#define CH_UART1_RX 10 /* UART1 receive */ +#define CH_UART1_TX 11 /* UART1 transmit */ + +#define CH_MEM_STREAM0_SRC 12 /* RX */ +#define CH_MEM_STREAM0_DEST 13 /* TX */ +#define CH_MEM_STREAM1_SRC 14 /* RX */ +#define CH_MEM_STREAM1_DEST 15 /* TX */ + +#endif diff --git a/arch/blackfin/mach-bf518/include/mach/irq.h b/arch/blackfin/mach-bf518/include/mach/irq.h new file mode 100644 index 000000000000..e5062f107ae2 --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/irq.h @@ -0,0 +1,260 @@ +/* + * file: include/asm-blackfin/mach-bf518/irq.h + * based on: include/asm-blackfin/mach-bf527/irq.h + * author: Michael Hennerich (michael.hennerich@analog.com) + * + * created: + * description: + * system mmr register map + * rev: + * + * modified: + * + * + * bugs: enter bugs at http://blackfin.uclinux.org/ + * + * this program is free software; you can redistribute it and/or modify + * it under the terms of the gnu general public license as published by + * the free software foundation; either version 2, or (at your option) + * any later version. + * + * this program is distributed in the hope that it will be useful, + * but without any warranty; without even the implied warranty of + * merchantability or fitness for a particular purpose. see the + * gnu general public license for more details. + * + * you should have received a copy of the gnu general public license + * along with this program; see the file copying. + * if not, write to the free software foundation, + * 59 temple place - suite 330, boston, ma 02111-1307, usa. + */ + +#ifndef _BF518_IRQ_H_ +#define _BF518_IRQ_H_ + +/* + * Interrupt source definitions + Event Source Core Event Name + Core Emulation ** + Events (highest priority) EMU 0 + Reset RST 1 + NMI NMI 2 + Exception EVX 3 + Reserved -- 4 + Hardware Error IVHW 5 + Core Timer IVTMR 6 * + + ..... + + Software Interrupt 1 IVG14 31 + Software Interrupt 2 -- + (lowest priority) IVG15 32 * +*/ + +#define NR_PERI_INTS (2 * 32) + +/* The ABSTRACT IRQ definitions */ +/** the first seven of the following are fixed, the rest you change if you need to **/ +#define IRQ_EMU 0 /* Emulation */ +#define IRQ_RST 1 /* reset */ +#define IRQ_NMI 2 /* Non Maskable */ +#define IRQ_EVX 3 /* Exception */ +#define IRQ_UNUSED 4 /* - unused interrupt */ +#define IRQ_HWERR 5 /* Hardware Error */ +#define IRQ_CORETMR 6 /* Core timer */ + +#define BFIN_IRQ(x) ((x) + 7) + +#define IRQ_PLL_WAKEUP BFIN_IRQ(0) /* PLL Wakeup Interrupt */ +#define IRQ_DMA0_ERROR BFIN_IRQ(1) /* DMA Error 0 (generic) */ +#define IRQ_DMAR0_BLK BFIN_IRQ(2) /* DMAR0 Block Interrupt */ +#define IRQ_DMAR1_BLK BFIN_IRQ(3) /* DMAR1 Block Interrupt */ +#define IRQ_DMAR0_OVR BFIN_IRQ(4) /* DMAR0 Overflow Error */ +#define IRQ_DMAR1_OVR BFIN_IRQ(5) /* DMAR1 Overflow Error */ +#define IRQ_PPI_ERROR BFIN_IRQ(6) /* PPI Error */ +#define IRQ_MAC_ERROR BFIN_IRQ(7) /* MAC Status */ +#define IRQ_SPORT0_ERROR BFIN_IRQ(8) /* SPORT0 Status */ +#define IRQ_SPORT1_ERROR BFIN_IRQ(9) /* SPORT1 Status */ +#define IRQ_PTP_ERROR BFIN_IRQ(10) /* PTP Error Interrupt */ +#define IRQ_UART0_ERROR BFIN_IRQ(12) /* UART0 Status */ +#define IRQ_UART1_ERROR BFIN_IRQ(13) /* UART1 Status */ +#define IRQ_RTC BFIN_IRQ(14) /* RTC */ +#define IRQ_PPI BFIN_IRQ(15) /* DMA Channel 0 (PPI) */ +#define IRQ_SPORT0_RX BFIN_IRQ(16) /* DMA 3 Channel (SPORT0 RX) */ +#define IRQ_SPORT0_TX BFIN_IRQ(17) /* DMA 4 Channel (SPORT0 TX) */ +#define IRQ_RSI BFIN_IRQ(17) /* DMA 4 Channel (RSI) */ +#define IRQ_SPORT1_RX BFIN_IRQ(18) /* DMA 5 Channel (SPORT1 RX/SPI) */ +#define IRQ_SPI1 BFIN_IRQ(18) /* DMA 5 Channel (SPI1) */ +#define IRQ_SPORT1_TX BFIN_IRQ(19) /* DMA 6 Channel (SPORT1 TX) */ +#define IRQ_TWI BFIN_IRQ(20) /* TWI */ +#define IRQ_SPI0 BFIN_IRQ(21) /* DMA 7 Channel (SPI0) */ +#define IRQ_UART0_RX BFIN_IRQ(22) /* DMA8 Channel (UART0 RX) */ +#define IRQ_UART0_TX BFIN_IRQ(23) /* DMA9 Channel (UART0 TX) */ +#define IRQ_UART1_RX BFIN_IRQ(24) /* DMA10 Channel (UART1 RX) */ +#define IRQ_UART1_TX BFIN_IRQ(25) /* DMA11 Channel (UART1 TX) */ +#define IRQ_OPTSEC BFIN_IRQ(26) /* OTPSEC Interrupt */ +#define IRQ_CNT BFIN_IRQ(27) /* GP Counter */ +#define IRQ_MAC_RX BFIN_IRQ(28) /* DMA1 Channel (MAC RX) */ +#define IRQ_PORTH_INTA BFIN_IRQ(29) /* Port H Interrupt A */ +#define IRQ_MAC_TX BFIN_IRQ(30) /* DMA2 Channel (MAC TX) */ +#define IRQ_PORTH_INTB BFIN_IRQ(31) /* Port H Interrupt B */ +#define IRQ_TMR0 BFIN_IRQ(32) /* Timer 0 */ +#define IRQ_TMR1 BFIN_IRQ(33) /* Timer 1 */ +#define IRQ_TMR2 BFIN_IRQ(34) /* Timer 2 */ +#define IRQ_TMR3 BFIN_IRQ(35) /* Timer 3 */ +#define IRQ_TMR4 BFIN_IRQ(36) /* Timer 4 */ +#define IRQ_TMR5 BFIN_IRQ(37) /* Timer 5 */ +#define IRQ_TMR6 BFIN_IRQ(38) /* Timer 6 */ +#define IRQ_TMR7 BFIN_IRQ(39) /* Timer 7 */ +#define IRQ_PORTG_INTA BFIN_IRQ(40) /* Port G Interrupt A */ +#define IRQ_PORTG_INTB BFIN_IRQ(41) /* Port G Interrupt B */ +#define IRQ_MEM_DMA0 BFIN_IRQ(42) /* MDMA Stream 0 */ +#define IRQ_MEM_DMA1 BFIN_IRQ(43) /* MDMA Stream 1 */ +#define IRQ_WATCH BFIN_IRQ(44) /* Software Watchdog Timer */ +#define IRQ_PORTF_INTA BFIN_IRQ(45) /* Port F Interrupt A */ +#define IRQ_PORTF_INTB BFIN_IRQ(46) /* Port F Interrupt B */ +#define IRQ_SPI0_ERROR BFIN_IRQ(47) /* SPI0 Status */ +#define IRQ_SPI1_ERROR BFIN_IRQ(48) /* SPI1 Error */ +#define IRQ_RSI_INT0 BFIN_IRQ(51) /* RSI Interrupt0 */ +#define IRQ_RSI_INT1 BFIN_IRQ(52) /* RSI Interrupt1 */ +#define IRQ_PWM_TRIP BFIN_IRQ(53) /* PWM Trip Interrupt */ +#define IRQ_PWM_SYNC BFIN_IRQ(54) /* PWM Sync Interrupt */ +#define IRQ_PTP_STAT BFIN_IRQ(55) /* PTP Stat Interrupt */ + +#define SYS_IRQS BFIN_IRQ(63) /* 70 */ + +#define IRQ_PF0 71 +#define IRQ_PF1 72 +#define IRQ_PF2 73 +#define IRQ_PF3 74 +#define IRQ_PF4 75 +#define IRQ_PF5 76 +#define IRQ_PF6 77 +#define IRQ_PF7 78 +#define IRQ_PF8 79 +#define IRQ_PF9 80 +#define IRQ_PF10 81 +#define IRQ_PF11 82 +#define IRQ_PF12 83 +#define IRQ_PF13 84 +#define IRQ_PF14 85 +#define IRQ_PF15 86 + +#define IRQ_PG0 87 +#define IRQ_PG1 88 +#define IRQ_PG2 89 +#define IRQ_PG3 90 +#define IRQ_PG4 91 +#define IRQ_PG5 92 +#define IRQ_PG6 93 +#define IRQ_PG7 94 +#define IRQ_PG8 95 +#define IRQ_PG9 96 +#define IRQ_PG10 97 +#define IRQ_PG11 98 +#define IRQ_PG12 99 +#define IRQ_PG13 100 +#define IRQ_PG14 101 +#define IRQ_PG15 102 + +#define IRQ_PH0 103 +#define IRQ_PH1 104 +#define IRQ_PH2 105 +#define IRQ_PH3 106 +#define IRQ_PH4 107 +#define IRQ_PH5 108 +#define IRQ_PH6 109 +#define IRQ_PH7 110 +#define IRQ_PH8 111 +#define IRQ_PH9 112 +#define IRQ_PH10 113 +#define IRQ_PH11 114 +#define IRQ_PH12 115 +#define IRQ_PH13 116 +#define IRQ_PH14 117 +#define IRQ_PH15 118 + +#define GPIO_IRQ_BASE IRQ_PF0 + +#define NR_IRQS (IRQ_PH15 + 1) + +#define IVG7 7 +#define IVG8 8 +#define IVG9 9 +#define IVG10 10 +#define IVG11 11 +#define IVG12 12 +#define IVG13 13 +#define IVG14 14 +#define IVG15 15 + +/* IAR0 BIT FIELDS */ +#define IRQ_PLL_WAKEUP_POS 0 +#define IRQ_DMA0_ERROR_POS 4 +#define IRQ_DMAR0_BLK_POS 8 +#define IRQ_DMAR1_BLK_POS 12 +#define IRQ_DMAR0_OVR_POS 16 +#define IRQ_DMAR1_OVR_POS 20 +#define IRQ_PPI_ERROR_POS 24 +#define IRQ_MAC_ERROR_POS 28 + +/* IAR1 BIT FIELDS */ +#define IRQ_SPORT0_ERROR_POS 0 +#define IRQ_SPORT1_ERROR_POS 4 +#define IRQ_PTP_ERROR_POS 8 +#define IRQ_UART0_ERROR_POS 16 +#define IRQ_UART1_ERROR_POS 20 +#define IRQ_RTC_POS 24 +#define IRQ_PPI_POS 28 + +/* IAR2 BIT FIELDS */ +#define IRQ_SPORT0_RX_POS 0 +#define IRQ_SPORT0_TX_POS 4 +#define IRQ_RSI_POS 4 +#define IRQ_SPORT1_RX_POS 8 +#define IRQ_SPI1_POS 8 +#define IRQ_SPORT1_TX_POS 12 +#define IRQ_TWI_POS 16 +#define IRQ_SPI0_POS 20 +#define IRQ_UART0_RX_POS 24 +#define IRQ_UART0_TX_POS 28 + +/* IAR3 BIT FIELDS */ +#define IRQ_UART1_RX_POS 0 +#define IRQ_UART1_TX_POS 4 +#define IRQ_OPTSEC_POS 8 +#define IRQ_CNT_POS 12 +#define IRQ_MAC_RX_POS 16 +#define IRQ_PORTH_INTA_POS 20 +#define IRQ_MAC_TX_POS 24 +#define IRQ_PORTH_INTB_POS 28 + +/* IAR4 BIT FIELDS */ +#define IRQ_TMR0_POS 0 +#define IRQ_TMR1_POS 4 +#define IRQ_TMR2_POS 8 +#define IRQ_TMR3_POS 12 +#define IRQ_TMR4_POS 16 +#define IRQ_TMR5_POS 20 +#define IRQ_TMR6_POS 24 +#define IRQ_TMR7_POS 28 + +/* IAR5 BIT FIELDS */ +#define IRQ_PORTG_INTA_POS 0 +#define IRQ_PORTG_INTB_POS 4 +#define IRQ_MEM_DMA0_POS 8 +#define IRQ_MEM_DMA1_POS 12 +#define IRQ_WATCH_POS 16 +#define IRQ_PORTF_INTA_POS 20 +#define IRQ_PORTF_INTB_POS 24 +#define IRQ_SPI0_ERROR_POS 28 + +/* IAR6 BIT FIELDS */ +#define IRQ_SPI1_ERROR_POS 0 +#define IRQ_RSI_INT0_POS 12 +#define IRQ_RSI_INT1_POS 16 +#define IRQ_PWM_TRIP_POS 20 +#define IRQ_PWM_SYNC_POS 24 +#define IRQ_PTP_STAT_POS 28 + +#endif /* _BF518_IRQ_H_ */ diff --git a/arch/blackfin/mach-bf518/include/mach/mem_init.h b/arch/blackfin/mach-bf518/include/mach/mem_init.h new file mode 100644 index 000000000000..2f4f4092fb54 --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/mem_init.h @@ -0,0 +1,310 @@ +/* + * File: include/asm-blackfin/mach-bf518/mem_init.h + * Based on: + * Author: + * + * Created: + * Description: + * + * Rev: + * + * Modified: + * Copyright 2004-2007 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#if (CONFIG_MEM_MT48LC16M16A2TG_75 || CONFIG_MEM_MT48LC64M4A2FB_7E || CONFIG_MEM_MT48LC16M8A2TG_75 || CONFIG_MEM_GENERIC_BOARD || CONFIG_MEM_MT48LC32M8A2_75 || CONFIG_MEM_MT48LC32M16A2TG_75) +#if (CONFIG_SCLK_HZ > 119402985) +#define SDRAM_tRP TRP_2 +#define SDRAM_tRP_num 2 +#define SDRAM_tRAS TRAS_7 +#define SDRAM_tRAS_num 7 +#define SDRAM_tRCD TRCD_2 +#define SDRAM_tWR TWR_2 +#endif +#if (CONFIG_SCLK_HZ > 104477612) && (CONFIG_SCLK_HZ <= 119402985) +#define SDRAM_tRP TRP_2 +#define SDRAM_tRP_num 2 +#define SDRAM_tRAS TRAS_6 +#define SDRAM_tRAS_num 6 +#define SDRAM_tRCD TRCD_2 +#define SDRAM_tWR TWR_2 +#endif +#if (CONFIG_SCLK_HZ > 89552239) && (CONFIG_SCLK_HZ <= 104477612) +#define SDRAM_tRP TRP_2 +#define SDRAM_tRP_num 2 +#define SDRAM_tRAS TRAS_5 +#define SDRAM_tRAS_num 5 +#define SDRAM_tRCD TRCD_2 +#define SDRAM_tWR TWR_2 +#endif +#if (CONFIG_SCLK_HZ > 74626866) && (CONFIG_SCLK_HZ <= 89552239) +#define SDRAM_tRP TRP_2 +#define SDRAM_tRP_num 2 +#define SDRAM_tRAS TRAS_4 +#define SDRAM_tRAS_num 4 +#define SDRAM_tRCD TRCD_2 +#define SDRAM_tWR TWR_2 +#endif +#if (CONFIG_SCLK_HZ > 66666667) && (CONFIG_SCLK_HZ <= 74626866) +#define SDRAM_tRP TRP_2 +#define SDRAM_tRP_num 2 +#define SDRAM_tRAS TRAS_3 +#define SDRAM_tRAS_num 3 +#define SDRAM_tRCD TRCD_2 +#define SDRAM_tWR TWR_2 +#endif +#if (CONFIG_SCLK_HZ > 59701493) && (CONFIG_SCLK_HZ <= 66666667) +#define SDRAM_tRP TRP_1 +#define SDRAM_tRP_num 1 +#define SDRAM_tRAS TRAS_4 +#define SDRAM_tRAS_num 3 +#define SDRAM_tRCD TRCD_1 +#define SDRAM_tWR TWR_2 +#endif +#if (CONFIG_SCLK_HZ > 44776119) && (CONFIG_SCLK_HZ <= 59701493) +#define SDRAM_tRP TRP_1 +#define SDRAM_tRP_num 1 +#define SDRAM_tRAS TRAS_3 +#define SDRAM_tRAS_num 3 +#define SDRAM_tRCD TRCD_1 +#define SDRAM_tWR TWR_2 +#endif +#if (CONFIG_SCLK_HZ > 29850746) && (CONFIG_SCLK_HZ <= 44776119) +#define SDRAM_tRP TRP_1 +#define SDRAM_tRP_num 1 +#define SDRAM_tRAS TRAS_2 +#define SDRAM_tRAS_num 2 +#define SDRAM_tRCD TRCD_1 +#define SDRAM_tWR TWR_2 +#endif +#if (CONFIG_SCLK_HZ <= 29850746) +#define SDRAM_tRP TRP_1 +#define SDRAM_tRP_num 1 +#define SDRAM_tRAS TRAS_1 +#define SDRAM_tRAS_num 1 +#define SDRAM_tRCD TRCD_1 +#define SDRAM_tWR TWR_2 +#endif +#endif + +#if (CONFIG_MEM_MT48LC16M16A2TG_75) + /*SDRAM INFORMATION: */ +#define SDRAM_Tref 64 /* Refresh period in milliseconds */ +#define SDRAM_NRA 8192 /* Number of row addresses in SDRAM */ +#define SDRAM_CL CL_3 +#endif + +#if (CONFIG_MEM_MT48LC16M8A2TG_75) + /*SDRAM INFORMATION: */ +#define SDRAM_Tref 64 /* Refresh period in milliseconds */ +#define SDRAM_NRA 4096 /* Number of row addresses in SDRAM */ +#define SDRAM_CL CL_3 +#endif + +#if (CONFIG_MEM_MT48LC32M8A2_75) + /*SDRAM INFORMATION: */ +#define SDRAM_Tref 64 /* Refresh period in milliseconds */ +#define SDRAM_NRA 8192 /* Number of row addresses in SDRAM */ +#define SDRAM_CL CL_3 +#endif + +#if (CONFIG_MEM_MT48LC64M4A2FB_7E) + /*SDRAM INFORMATION: */ +#define SDRAM_Tref 64 /* Refresh period in milliseconds */ +#define SDRAM_NRA 8192 /* Number of row addresses in SDRAM */ +#define SDRAM_CL CL_3 +#endif + +#if (CONFIG_MEM_GENERIC_BOARD) + /*SDRAM INFORMATION: Modify this for your board */ +#define SDRAM_Tref 64 /* Refresh period in milliseconds */ +#define SDRAM_NRA 8192 /* Number of row addresses in SDRAM */ +#define SDRAM_CL CL_3 +#endif + +#if (CONFIG_MEM_MT48LC32M16A2TG_75) + /*SDRAM INFORMATION: */ +#define SDRAM_Tref 64 /* Refresh period in milliseconds */ +#define SDRAM_NRA 8192 /* Number of row addresses in SDRAM */ +#define SDRAM_CL CL_3 +#endif + +/* Equation from section 17 (p17-46) of BF533 HRM */ +#define mem_SDRRC (((CONFIG_SCLK_HZ / 1000) * SDRAM_Tref) / SDRAM_NRA) - (SDRAM_tRAS_num + SDRAM_tRP_num) + +/* Enable SCLK Out */ +#define mem_SDGCTL (SCTLE | SDRAM_CL | SDRAM_tRAS | SDRAM_tRP | SDRAM_tRCD | SDRAM_tWR | PSS) + +#if defined CONFIG_CLKIN_HALF +#define CLKIN_HALF 1 +#else +#define CLKIN_HALF 0 +#endif + +#if defined CONFIG_PLL_BYPASS +#define PLL_BYPASS 1 +#else +#define PLL_BYPASS 0 +#endif + +/***************************************Currently Not Being Used *********************************/ +#define flash_EBIU_AMBCTL_WAT ((CONFIG_FLASH_SPEED_BWAT * 4) / (4000000000 / CONFIG_SCLK_HZ)) + 1 +#define flash_EBIU_AMBCTL_RAT ((CONFIG_FLASH_SPEED_BRAT * 4) / (4000000000 / CONFIG_SCLK_HZ)) + 1 +#define flash_EBIU_AMBCTL_HT ((CONFIG_FLASH_SPEED_BHT * 4) / (4000000000 / CONFIG_SCLK_HZ)) +#define flash_EBIU_AMBCTL_ST ((CONFIG_FLASH_SPEED_BST * 4) / (4000000000 / CONFIG_SCLK_HZ)) + 1 +#define flash_EBIU_AMBCTL_TT ((CONFIG_FLASH_SPEED_BTT * 4) / (4000000000 / CONFIG_SCLK_HZ)) + 1 + +#if (flash_EBIU_AMBCTL_TT > 3) +#define flash_EBIU_AMBCTL0_TT B0TT_4 +#endif +#if (flash_EBIU_AMBCTL_TT == 3) +#define flash_EBIU_AMBCTL0_TT B0TT_3 +#endif +#if (flash_EBIU_AMBCTL_TT == 2) +#define flash_EBIU_AMBCTL0_TT B0TT_2 +#endif +#if (flash_EBIU_AMBCTL_TT < 2) +#define flash_EBIU_AMBCTL0_TT B0TT_1 +#endif + +#if (flash_EBIU_AMBCTL_ST > 3) +#define flash_EBIU_AMBCTL0_ST B0ST_4 +#endif +#if (flash_EBIU_AMBCTL_ST == 3) +#define flash_EBIU_AMBCTL0_ST B0ST_3 +#endif +#if (flash_EBIU_AMBCTL_ST == 2) +#define flash_EBIU_AMBCTL0_ST B0ST_2 +#endif +#if (flash_EBIU_AMBCTL_ST < 2) +#define flash_EBIU_AMBCTL0_ST B0ST_1 +#endif + +#if (flash_EBIU_AMBCTL_HT > 2) +#define flash_EBIU_AMBCTL0_HT B0HT_3 +#endif +#if (flash_EBIU_AMBCTL_HT == 2) +#define flash_EBIU_AMBCTL0_HT B0HT_2 +#endif +#if (flash_EBIU_AMBCTL_HT == 1) +#define flash_EBIU_AMBCTL0_HT B0HT_1 +#endif +#if (flash_EBIU_AMBCTL_HT == 0 && CONFIG_FLASH_SPEED_BHT == 0) +#define flash_EBIU_AMBCTL0_HT B0HT_0 +#endif +#if (flash_EBIU_AMBCTL_HT == 0 && CONFIG_FLASH_SPEED_BHT != 0) +#define flash_EBIU_AMBCTL0_HT B0HT_1 +#endif + +#if (flash_EBIU_AMBCTL_WAT > 14) +#define flash_EBIU_AMBCTL0_WAT B0WAT_15 +#endif +#if (flash_EBIU_AMBCTL_WAT == 14) +#define flash_EBIU_AMBCTL0_WAT B0WAT_14 +#endif +#if (flash_EBIU_AMBCTL_WAT == 13) +#define flash_EBIU_AMBCTL0_WAT B0WAT_13 +#endif +#if (flash_EBIU_AMBCTL_WAT == 12) +#define flash_EBIU_AMBCTL0_WAT B0WAT_12 +#endif +#if (flash_EBIU_AMBCTL_WAT == 11) +#define flash_EBIU_AMBCTL0_WAT B0WAT_11 +#endif +#if (flash_EBIU_AMBCTL_WAT == 10) +#define flash_EBIU_AMBCTL0_WAT B0WAT_10 +#endif +#if (flash_EBIU_AMBCTL_WAT == 9) +#define flash_EBIU_AMBCTL0_WAT B0WAT_9 +#endif +#if (flash_EBIU_AMBCTL_WAT == 8) +#define flash_EBIU_AMBCTL0_WAT B0WAT_8 +#endif +#if (flash_EBIU_AMBCTL_WAT == 7) +#define flash_EBIU_AMBCTL0_WAT B0WAT_7 +#endif +#if (flash_EBIU_AMBCTL_WAT == 6) +#define flash_EBIU_AMBCTL0_WAT B0WAT_6 +#endif +#if (flash_EBIU_AMBCTL_WAT == 5) +#define flash_EBIU_AMBCTL0_WAT B0WAT_5 +#endif +#if (flash_EBIU_AMBCTL_WAT == 4) +#define flash_EBIU_AMBCTL0_WAT B0WAT_4 +#endif +#if (flash_EBIU_AMBCTL_WAT == 3) +#define flash_EBIU_AMBCTL0_WAT B0WAT_3 +#endif +#if (flash_EBIU_AMBCTL_WAT == 2) +#define flash_EBIU_AMBCTL0_WAT B0WAT_2 +#endif +#if (flash_EBIU_AMBCTL_WAT == 1) +#define flash_EBIU_AMBCTL0_WAT B0WAT_1 +#endif + +#if (flash_EBIU_AMBCTL_RAT > 14) +#define flash_EBIU_AMBCTL0_RAT B0RAT_15 +#endif +#if (flash_EBIU_AMBCTL_RAT == 14) +#define flash_EBIU_AMBCTL0_RAT B0RAT_14 +#endif +#if (flash_EBIU_AMBCTL_RAT == 13) +#define flash_EBIU_AMBCTL0_RAT B0RAT_13 +#endif +#if (flash_EBIU_AMBCTL_RAT == 12) +#define flash_EBIU_AMBCTL0_RAT B0RAT_12 +#endif +#if (flash_EBIU_AMBCTL_RAT == 11) +#define flash_EBIU_AMBCTL0_RAT B0RAT_11 +#endif +#if (flash_EBIU_AMBCTL_RAT == 10) +#define flash_EBIU_AMBCTL0_RAT B0RAT_10 +#endif +#if (flash_EBIU_AMBCTL_RAT == 9) +#define flash_EBIU_AMBCTL0_RAT B0RAT_9 +#endif +#if (flash_EBIU_AMBCTL_RAT == 8) +#define flash_EBIU_AMBCTL0_RAT B0RAT_8 +#endif +#if (flash_EBIU_AMBCTL_RAT == 7) +#define flash_EBIU_AMBCTL0_RAT B0RAT_7 +#endif +#if (flash_EBIU_AMBCTL_RAT == 6) +#define flash_EBIU_AMBCTL0_RAT B0RAT_6 +#endif +#if (flash_EBIU_AMBCTL_RAT == 5) +#define flash_EBIU_AMBCTL0_RAT B0RAT_5 +#endif +#if (flash_EBIU_AMBCTL_RAT == 4) +#define flash_EBIU_AMBCTL0_RAT B0RAT_4 +#endif +#if (flash_EBIU_AMBCTL_RAT == 3) +#define flash_EBIU_AMBCTL0_RAT B0RAT_3 +#endif +#if (flash_EBIU_AMBCTL_RAT == 2) +#define flash_EBIU_AMBCTL0_RAT B0RAT_2 +#endif +#if (flash_EBIU_AMBCTL_RAT == 1) +#define flash_EBIU_AMBCTL0_RAT B0RAT_1 +#endif + +#define flash_EBIU_AMBCTL0 \ + (flash_EBIU_AMBCTL0_WAT | flash_EBIU_AMBCTL0_RAT | flash_EBIU_AMBCTL0_HT | \ + flash_EBIU_AMBCTL0_ST | flash_EBIU_AMBCTL0_TT | CONFIG_FLASH_SPEED_RDYEN) diff --git a/arch/blackfin/mach-bf518/include/mach/mem_map.h b/arch/blackfin/mach-bf518/include/mach/mem_map.h new file mode 100644 index 000000000000..10f678f3c5c0 --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/mem_map.h @@ -0,0 +1,102 @@ +/* + * file: include/asm-blackfin/mach-bf518/mem_map.h + * based on: include/asm-blackfin/mach-bf527/mem_map.h + * author: Bryan Wu + * + * created: + * description: + * Memory MAP Common header file for blackfin BF518/6/4/2 of processors. + * rev: + * + * modified: + * + * bugs: enter bugs at http://blackfin.uclinux.org/ + * + * this program is free software; you can redistribute it and/or modify + * it under the terms of the gnu general public license as published by + * the free software foundation; either version 2, or (at your option) + * any later version. + * + * this program is distributed in the hope that it will be useful, + * but without any warranty; without even the implied warranty of + * merchantability or fitness for a particular purpose. see the + * gnu general public license for more details. + * + * you should have received a copy of the gnu general public license + * along with this program; see the file copying. + * if not, write to the free software foundation, + * 59 temple place - suite 330, boston, ma 02111-1307, usa. + */ + +#ifndef _MEM_MAP_518_H_ +#define _MEM_MAP_518_H_ + +#define COREMMR_BASE 0xFFE00000 /* Core MMRs */ +#define SYSMMR_BASE 0xFFC00000 /* System MMRs */ + +/* Async Memory Banks */ +#define ASYNC_BANK3_BASE 0x20300000 /* Async Bank 3 */ +#define ASYNC_BANK3_SIZE 0x00100000 /* 1M */ +#define ASYNC_BANK2_BASE 0x20200000 /* Async Bank 2 */ +#define ASYNC_BANK2_SIZE 0x00100000 /* 1M */ +#define ASYNC_BANK1_BASE 0x20100000 /* Async Bank 1 */ +#define ASYNC_BANK1_SIZE 0x00100000 /* 1M */ +#define ASYNC_BANK0_BASE 0x20000000 /* Async Bank 0 */ +#define ASYNC_BANK0_SIZE 0x00100000 /* 1M */ + +/* Boot ROM Memory */ + +#define BOOT_ROM_START 0xEF000000 +#define BOOT_ROM_LENGTH 0x8000 + +/* Level 1 Memory */ + +/* Memory Map for ADSP-BF518/6/4/2 processors */ + +#ifdef CONFIG_BFIN_ICACHE +#define BFIN_ICACHESIZE (16 * 1024) +#else +#define BFIN_ICACHESIZE (0) +#endif + +#define L1_CODE_START 0xFFA00000 +#define L1_DATA_A_START 0xFF800000 +#define L1_DATA_B_START 0xFF900000 + +#define L1_CODE_LENGTH 0xC000 + +#ifdef CONFIG_BFIN_DCACHE + +#ifdef CONFIG_BFIN_DCACHE_BANKA +#define DMEM_CNTR (ACACHE_BSRAM | ENDCPLB | PORT_PREF0) +#define L1_DATA_A_LENGTH (0x8000 - 0x4000) +#define L1_DATA_B_LENGTH 0x8000 +#define BFIN_DCACHESIZE (16 * 1024) +#define BFIN_DSUPBANKS 1 +#else +#define DMEM_CNTR (ACACHE_BCACHE | ENDCPLB | PORT_PREF0) +#define L1_DATA_A_LENGTH (0x8000 - 0x4000) +#define L1_DATA_B_LENGTH (0x8000 - 0x4000) +#define BFIN_DCACHESIZE (32 * 1024) +#define BFIN_DSUPBANKS 2 +#endif + +#else +#define DMEM_CNTR (ASRAM_BSRAM | ENDCPLB | PORT_PREF0) +#define L1_DATA_A_LENGTH 0x8000 +#define L1_DATA_B_LENGTH 0x8000 +#define BFIN_DCACHESIZE 0 +#define BFIN_DSUPBANKS 0 +#endif /*CONFIG_BFIN_DCACHE */ + +/* Level 2 Memory - none */ + +#define L2_START 0 +#define L2_LENGTH 0 + +/* Scratch Pad Memory */ + +#define L1_SCRATCH_START 0xFFB00000 +#define L1_SCRATCH_LENGTH 0x1000 + +#endif /* _MEM_MAP_518_H_ */ diff --git a/arch/blackfin/mach-bf518/include/mach/portmux.h b/arch/blackfin/mach-bf518/include/mach/portmux.h new file mode 100644 index 000000000000..5af30c8ecfe7 --- /dev/null +++ b/arch/blackfin/mach-bf518/include/mach/portmux.h @@ -0,0 +1,188 @@ +#ifndef _MACH_PORTMUX_H_ +#define _MACH_PORTMUX_H_ + +#define MAX_RESOURCES MAX_BLACKFIN_GPIOS + +/* EMAC MII/RMII Port Mux */ +#define P_MII0_ETxD2 (P_DEFINED | P_IDENT(GPIO_PF0) | P_FUNCT(0)) +#define P_MII0_ERxD2 (P_DEFINED | P_IDENT(GPIO_PF1) | P_FUNCT(0)) +#define P_MII0_ETxD3 (P_DEFINED | P_IDENT(GPIO_PF2) | P_FUNCT(0)) +#define P_MII0_ERxD3 (P_DEFINED | P_IDENT(GPIO_PF3) | P_FUNCT(0)) +#define P_MII0_ERxCLK (P_DEFINED | P_IDENT(GPIO_PF4) | P_FUNCT(0)) +#define P_MII0_ERxDV (P_DEFINED | P_IDENT(GPIO_PF5) | P_FUNCT(0)) +#define P_MII0_COL (P_DEFINED | P_IDENT(GPIO_PF6) | P_FUNCT(0)) + +#define P_MII0_MDC (P_DEFINED | P_IDENT(GPIO_PF8) | P_FUNCT(0)) +#define P_MII0_MDIO (P_DEFINED | P_IDENT(GPIO_PF9) | P_FUNCT(0)) +#define P_MII0_ETxD0 (P_DEFINED | P_IDENT(GPIO_PF10) | P_FUNCT(0)) +#define P_MII0_ERxD0 (P_DEFINED | P_IDENT(GPIO_PF11) | P_FUNCT(0)) +#define P_MII0_ETxD1 (P_DEFINED | P_IDENT(GPIO_PF12) | P_FUNCT(0)) +#define P_MII0_ERxD1 (P_DEFINED | P_IDENT(GPIO_PF13) | P_FUNCT(0)) +#define P_MII0_ETxEn (P_DEFINED | P_IDENT(GPIO_PF14) | P_FUNCT(0)) +#define P_MII0_PHYINT (P_DEFINED | P_IDENT(GPIO_PF15) | P_FUNCT(0)) +#define P_MII0_CRS (P_DEFINED | P_IDENT(GPIO_PG0) | P_FUNCT(0)) +#define P_MII0_ERxER (P_DEFINED | P_IDENT(GPIO_PG1) | P_FUNCT(0)) +#define P_MII0_ETxCLK (P_DEFINED | P_IDENT(GPIO_PG2) | P_FUNCT(0)) + +#define P_MII {\ + P_MII0_ETxD0, \ + P_MII0_ETxD1, \ + P_MII0_ETxD2, \ + P_MII0_ETxD3, \ + P_MII0_ETxEN, \ + P_MII0_TxCLK, \ + P_MII0_PHYINT, \ + P_MII0_COL, \ + P_MII0_ERxD0, \ + P_MII0_ERxD1, \ + P_MII0_ERxD2, \ + P_MII0_ERxD3, \ + P_MII0_ERxDV, \ + P_MII0_ERxCLK, \ + P_MII0_ERxER, \ + P_MII0_CRS, \ + P_MII0_MDC, \ + P_MII0_MDIO, 0} + +#define P_RMII {\ + P_MII0_ETxD0, \ + P_MII0_ETxD1, \ + P_MII0_ETxEN, \ + P_MII0_ERxD0, \ + P_MII0_ERxD1, \ + P_MII0_ERxER, \ + P_MII0_TxCLK, \ + P_MII0_PHYINT, \ + P_MII0_CRS, \ + P_MII0_MDC, \ + P_MII0_MDIO, 0} + +/* PPI Port Mux */ +#define P_PPI0_D0 (P_DEFINED | P_IDENT(GPIO_PF0) | P_FUNCT(1)) +#define P_PPI0_D1 (P_DEFINED | P_IDENT(GPIO_PF1) | P_FUNCT(1)) +#define P_PPI0_D2 (P_DEFINED | P_IDENT(GPIO_PF2) | P_FUNCT(1)) +#define P_PPI0_D3 (P_DEFINED | P_IDENT(GPIO_PF3) | P_FUNCT(1)) +#define P_PPI0_D4 (P_DEFINED | P_IDENT(GPIO_PF4) | P_FUNCT(1)) +#define P_PPI0_D5 (P_DEFINED | P_IDENT(GPIO_PF5) | P_FUNCT(1)) +#define P_PPI0_D6 (P_DEFINED | P_IDENT(GPIO_PF6) | P_FUNCT(1)) +#define P_PPI0_D7 (P_DEFINED | P_IDENT(GPIO_PF7) | P_FUNCT(1)) +#define P_PPI0_D8 (P_DEFINED | P_IDENT(GPIO_PF8) | P_FUNCT(1)) +#define P_PPI0_D9 (P_DEFINED | P_IDENT(GPIO_PF9) | P_FUNCT(1)) +#define P_PPI0_D10 (P_DEFINED | P_IDENT(GPIO_PF10) | P_FUNCT(1)) +#define P_PPI0_D11 (P_DEFINED | P_IDENT(GPIO_PF11) | P_FUNCT(1)) +#define P_PPI0_D12 (P_DEFINED | P_IDENT(GPIO_PF12) | P_FUNCT(1)) +#define P_PPI0_D13 (P_DEFINED | P_IDENT(GPIO_PF13) | P_FUNCT(1)) +#define P_PPI0_D14 (P_DEFINED | P_IDENT(GPIO_PF14) | P_FUNCT(1)) +#define P_PPI0_D15 (P_DEFINED | P_IDENT(GPIO_PF15) | P_FUNCT(1)) + +#define P_PPI0_CLK (P_DEFINED | P_IDENT(GPIO_PG12) | P_FUNCT(1)) +#define P_PPI0_FS1 (P_DEFINED | P_IDENT(GPIO_PG13) | P_FUNCT(1)) +#define P_PPI0_FS2 (P_DEFINED | P_IDENT(GPIO_PG14) | P_FUNCT(1)) +#define P_PPI0_FS3 (P_DEFINED | P_IDENT(GPIO_PG15) | P_FUNCT(1)) + +/* SPI Port Mux */ +#define P_SPI0_SS (P_DEFINED | P_IDENT(GPIO_PG11) | P_FUNCT(0)) +#define P_SPI0_SCK (P_DEFINED | P_IDENT(GPIO_PG12) | P_FUNCT(0)) +#define P_SPI0_MISO (P_DEFINED | P_IDENT(GPIO_PG13) | P_FUNCT(0)) +#define P_SPI0_MOSI (P_DEFINED | P_IDENT(GPIO_PG14) | P_FUNCT(0)) + +#define P_SPI0_SSEL1 (P_DEFINED | P_IDENT(GPIO_PF7) | P_FUNCT(0)) +#define P_SPI0_SSEL2 (P_DEFINED | P_IDENT(GPIO_PG15) | P_FUNCT(0)) +#define P_SPI0_SSEL3 (P_DEFINED | P_IDENT(GPIO_PH4) | P_FUNCT(2)) +#define P_SPI0_SSEL4 (P_DEFINED | P_IDENT(GPIO_PG10) | P_FUNCT(2)) +#define P_SPI0_SSEL5 (P_DEFINED | P_IDENT(GPIO_PG3) | P_FUNCT(2)) + +#define P_SPI1_SS (P_DEFINED | P_IDENT(GPIO_PH0) | P_FUNCT(1)) +#define P_SPI1_SCK (P_DEFINED | P_IDENT(GPIO_PH1) | P_FUNCT(1)) +#define P_SPI1_MISO (P_DEFINED | P_IDENT(GPIO_PH2) | P_FUNCT(1)) +#define P_SPI1_MOSI (P_DEFINED | P_IDENT(GPIO_PH3) | P_FUNCT(1)) + +#define P_SPI1_SSEL1 (P_DEFINED | P_IDENT(GPIO_PH6) | P_FUNCT(2)) +#define P_SPI1_SSEL2 (P_DEFINED | P_IDENT(GPIO_PF0) | P_FUNCT(2)) +#define P_SPI1_SSEL3 (P_DEFINED | P_IDENT(GPIO_PG0) | P_FUNCT(2)) +#define P_SPI1_SSEL4 (P_DEFINED | P_IDENT(GPIO_PF8) | P_FUNCT(2)) +#define P_SPI1_SSEL5 (P_DEFINED | P_IDENT(GPIO_PG11) | P_FUNCT(2)) + +/* SPORT Port Mux */ +#define P_SPORT0_DRPRI (P_DEFINED | P_IDENT(GPIO_PG3) | P_FUNCT(0)) +#define P_SPORT0_RSCLK (P_DEFINED | P_IDENT(GPIO_PG4) | P_FUNCT(0)) +#define P_SPORT0_RFS (P_DEFINED | P_IDENT(GPIO_PG5) | P_FUNCT(0)) +#define P_SPORT0_TFS (P_DEFINED | P_IDENT(GPIO_PG6) | P_FUNCT(0)) +#define P_SPORT0_DTPRI (P_DEFINED | P_IDENT(GPIO_PG7) | P_FUNCT(0)) +#define P_SPORT0_TSCLK (P_DEFINED | P_IDENT(GPIO_PG8) | P_FUNCT(0)) +#define P_SPORT0_DTSEC (P_DEFINED | P_IDENT(GPIO_PG9) | P_FUNCT(0)) +#define P_SPORT0_DRSEC (P_DEFINED | P_IDENT(GPIO_PG10) | P_FUNCT(0)) + +#define P_SPORT1_DRPRI (P_DEFINED | P_IDENT(GPIO_PH0) | P_FUNCT(0)) +#define P_SPORT1_RFS (P_DEFINED | P_IDENT(GPIO_PH1) | P_FUNCT(0)) +#define P_SPORT1_RSCLK (P_DEFINED | P_IDENT(GPIO_PH2) | P_FUNCT(0)) +#define P_SPORT1_DTPRI (P_DEFINED | P_IDENT(GPIO_PH3) | P_FUNCT(0)) +#define P_SPORT1_TFS (P_DEFINED | P_IDENT(GPIO_PH4) | P_FUNCT(0)) +#define P_SPORT1_TSCLK (P_DEFINED | P_IDENT(GPIO_PH5) | P_FUNCT(0)) +#define P_SPORT1_DTSEC (P_DEFINED | P_IDENT(GPIO_PH6) | P_FUNCT(0)) +#define P_SPORT1_DRSEC (P_DEFINED | P_IDENT(GPIO_PH7) | P_FUNCT(0)) + +/* UART Port Mux */ +#define P_UART0_TX (P_DEFINED | P_IDENT(GPIO_PG9) | P_FUNCT(1)) +#define P_UART0_RX (P_DEFINED | P_IDENT(GPIO_PG10) | P_FUNCT(1)) + +#define P_UART1_TX (P_DEFINED | P_IDENT(GPIO_PH6) | P_FUNCT(1)) +#define P_UART1_RX (P_DEFINED | P_IDENT(GPIO_PH7) | P_FUNCT(1)) + +/* Timer */ +#define P_TMRCLK (P_DEFINED | P_IDENT(GPIO_PG5) | P_FUNCT(2)) +#define P_TMR0 (P_DEFINED | P_IDENT(GPIO_PG6) | P_FUNCT(2)) +#define P_TMR1 (P_DEFINED | P_IDENT(GPIO_PG7) | P_FUNCT(2)) +#define P_TMR2 (P_DEFINED | P_IDENT(GPIO_PF9) | P_FUNCT(2)) +#define P_TMR3 (P_DEFINED | P_IDENT(GPIO_PF10) | P_FUNCT(2)) +#define P_TMR4 (P_DEFINED | P_IDENT(GPIO_PG9) | P_FUNCT(2)) +#define P_TMR5 (P_DEFINED | P_IDENT(GPIO_PG4) | P_FUNCT(2)) +#define P_TMR6 (P_DEFINED | P_IDENT(GPIO_PG8) | P_FUNCT(2)) +#define P_TMR7 (P_DEFINED | P_IDENT(GPIO_PH7) | P_FUNCT(2)) + +/* DMA */ +#define P_DMAR1 (P_DEFINED | P_IDENT(GPIO_PG1) | P_FUNCT(1)) +#define P_DMAR0 (P_DEFINED | P_IDENT(GPIO_PG2) | P_FUNCT(1)) + +/* TWI */ +#define P_TWI0_SCL (P_DONTCARE) +#define P_TWI0_SDA (P_DONTCARE) + +/* PWM */ +#define P_PWM0_AH (P_DEFINED | P_IDENT(GPIO_PF1) | P_FUNCT(2)) +#define P_PWM0_AL (P_DEFINED | P_IDENT(GPIO_PF2) | P_FUNCT(2)) +#define P_PWM0_BH (P_DEFINED | P_IDENT(GPIO_PF3) | P_FUNCT(2)) +#define P_PWM0_BL (P_DEFINED | P_IDENT(GPIO_PF4) | P_FUNCT(2)) +#define P_PWM0_CH (P_DEFINED | P_IDENT(GPIO_PF5) | P_FUNCT(2)) +#define P_PWM0_CL (P_DEFINED | P_IDENT(GPIO_PF6) | P_FUNCT(2)) +#define P_PWM0_SYNC (P_DEFINED | P_IDENT(GPIO_PF7) | P_FUNCT(2)) + +#define P_PWM1_AH (P_DEFINED | P_IDENT(GPIO_PF11) | P_FUNCT(2)) +#define P_PWM1_AL (P_DEFINED | P_IDENT(GPIO_PF12) | P_FUNCT(2)) +#define P_PWM1_BH (P_DEFINED | P_IDENT(GPIO_PF13) | P_FUNCT(2)) +#define P_PWM1_BL (P_DEFINED | P_IDENT(GPIO_PF14) | P_FUNCT(2)) +#define P_PWM1_CH (P_DEFINED | P_IDENT(GPIO_PG1) | P_FUNCT(2)) +#define P_PWM1_CL (P_DEFINED | P_IDENT(GPIO_PG2) | P_FUNCT(2)) +#define P_PWM1_SYNC (P_DEFINED | P_IDENT(GPIO_PF15) | P_FUNCT(2)) + +#define P_PWM_TRIPB (P_DEFINED | P_IDENT(GPIO_PG14) | P_FUNCT(2)) + +/* RSI */ +#define P_RSI_DATA0 (P_DEFINED | P_IDENT(GPIO_PG3) | P_FUNCT(1)) +#define P_RSI_DATA1 (P_DEFINED | P_IDENT(GPIO_PG4) | P_FUNCT(1)) +#define P_RSI_DATA2 (P_DEFINED | P_IDENT(GPIO_PG5) | P_FUNCT(1)) +#define P_RSI_DATA3 (P_DEFINED | P_IDENT(GPIO_PG6) | P_FUNCT(1)) +#define P_RSI_DATA4 (P_DEFINED | P_IDENT(GPIO_PH0) | P_FUNCT(2)) +#define P_RSI_DATA5 (P_DEFINED | P_IDENT(GPIO_PH1) | P_FUNCT(2)) +#define P_RSI_DATA6 (P_DEFINED | P_IDENT(GPIO_PH2) | P_FUNCT(2)) +#define P_RSI_DATA7 (P_DEFINED | P_IDENT(GPIO_PH3) | P_FUNCT(2)) +#define P_RSI_CMD (P_DEFINED | P_IDENT(GPIO_PG7) | P_FUNCT(1)) +#define P_RSI_CLK (P_DEFINED | P_IDENT(GPIO_PG8) | P_FUNCT(1)) + +/* PTP */ +#define P_PTP_PPS (P_DEFINED | P_IDENT(GPIO_PG12) | P_FUNCT(2)) +#define P_PTP_CLKOUT (P_DEFINED | P_IDENT(GPIO_PG13) | P_FUNCT(2)) + +#define P_HWAIT (P_DEFINED | P_IDENT(GPIO_PG000000000) | P_FUNCT(1)) + +#endif /* _MACH_PORTMUX_H_ */ diff --git a/arch/blackfin/mach-bf518/ints-priority.c b/arch/blackfin/mach-bf518/ints-priority.c new file mode 100644 index 000000000000..c490c79194c0 --- /dev/null +++ b/arch/blackfin/mach-bf518/ints-priority.c @@ -0,0 +1,99 @@ +/* + * File: arch/blackfin/mach-bf518/ints-priority.c + * Based on: arch/blackfin/mach-bf527/ints-priority.c + * Author: Bryan Wu + * + * Created: + * Description: Set up the interrupt priorities + * + * Modified: + * Copyright 2004-2007 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include + +void __init program_IAR(void) +{ + /* Program the IAR0 Register with the configured priority */ + bfin_write_SIC_IAR0(((CONFIG_IRQ_PLL_WAKEUP - 7) << IRQ_PLL_WAKEUP_POS) | + ((CONFIG_IRQ_DMA0_ERROR - 7) << IRQ_DMA0_ERROR_POS) | + ((CONFIG_IRQ_DMAR0_BLK - 7) << IRQ_DMAR0_BLK_POS) | + ((CONFIG_IRQ_DMAR1_BLK - 7) << IRQ_DMAR1_BLK_POS) | + ((CONFIG_IRQ_DMAR0_OVR - 7) << IRQ_DMAR0_OVR_POS) | + ((CONFIG_IRQ_DMAR1_OVR - 7) << IRQ_DMAR1_OVR_POS) | + ((CONFIG_IRQ_PPI_ERROR - 7) << IRQ_PPI_ERROR_POS) | + ((CONFIG_IRQ_MAC_ERROR - 7) << IRQ_MAC_ERROR_POS)); + + + bfin_write_SIC_IAR1(((CONFIG_IRQ_SPORT0_ERROR - 7) << IRQ_SPORT0_ERROR_POS) | + ((CONFIG_IRQ_SPORT1_ERROR - 7) << IRQ_SPORT1_ERROR_POS) | + ((CONFIG_IRQ_PTP_ERROR - 7) << IRQ_PTP_ERROR_POS) | + ((CONFIG_IRQ_UART0_ERROR - 7) << IRQ_UART0_ERROR_POS) | + ((CONFIG_IRQ_UART1_ERROR - 7) << IRQ_UART1_ERROR_POS) | + ((CONFIG_IRQ_RTC - 7) << IRQ_RTC_POS) | + ((CONFIG_IRQ_PPI - 7) << IRQ_PPI_POS)); + + bfin_write_SIC_IAR2(((CONFIG_IRQ_SPORT0_RX - 7) << IRQ_SPORT0_RX_POS) | + ((CONFIG_IRQ_SPORT0_TX - 7) << IRQ_SPORT0_TX_POS) | + ((CONFIG_IRQ_SPORT1_RX - 7) << IRQ_SPORT1_RX_POS) | + ((CONFIG_IRQ_SPORT1_TX - 7) << IRQ_SPORT1_TX_POS) | + ((CONFIG_IRQ_TWI - 7) << IRQ_TWI_POS) | + ((CONFIG_IRQ_SPI0 - 7) << IRQ_SPI0_POS) | + ((CONFIG_IRQ_UART0_RX - 7) << IRQ_UART0_RX_POS) | + ((CONFIG_IRQ_UART0_TX - 7) << IRQ_UART0_TX_POS)); + + bfin_write_SIC_IAR3(((CONFIG_IRQ_UART1_RX - 7) << IRQ_UART1_RX_POS) | + ((CONFIG_IRQ_UART1_TX - 7) << IRQ_UART1_TX_POS) | + ((CONFIG_IRQ_OPTSEC - 7) << IRQ_OPTSEC_POS) | + ((CONFIG_IRQ_CNT - 7) << IRQ_CNT_POS) | + ((CONFIG_IRQ_MAC_RX - 7) << IRQ_MAC_RX_POS) | + ((CONFIG_IRQ_PORTH_INTA - 7) << IRQ_PORTH_INTA_POS) | + ((CONFIG_IRQ_MAC_TX - 7) << IRQ_MAC_TX_POS) | + ((CONFIG_IRQ_PORTH_INTB - 7) << IRQ_PORTH_INTB_POS)); + + bfin_write_SIC_IAR4(((CONFIG_IRQ_TMR0 - 7) << IRQ_TMR0_POS) | + ((CONFIG_IRQ_TMR1 - 7) << IRQ_TMR1_POS) | + ((CONFIG_IRQ_TMR2 - 7) << IRQ_TMR2_POS) | + ((CONFIG_IRQ_TMR3 - 7) << IRQ_TMR3_POS) | + ((CONFIG_IRQ_TMR4 - 7) << IRQ_TMR4_POS) | + ((CONFIG_IRQ_TMR5 - 7) << IRQ_TMR5_POS) | + ((CONFIG_IRQ_TMR6 - 7) << IRQ_TMR6_POS) | + ((CONFIG_IRQ_TMR7 - 7) << IRQ_TMR7_POS)); + + bfin_write_SIC_IAR5(((CONFIG_IRQ_PORTG_INTA - 7) << IRQ_PORTG_INTA_POS) | + ((CONFIG_IRQ_PORTG_INTB - 7) << IRQ_PORTG_INTB_POS) | + ((CONFIG_IRQ_MEM_DMA0 - 7) << IRQ_MEM_DMA0_POS) | + ((CONFIG_IRQ_MEM_DMA1 - 7) << IRQ_MEM_DMA1_POS) | + ((CONFIG_IRQ_WATCH - 7) << IRQ_WATCH_POS) | + ((CONFIG_IRQ_PORTF_INTA - 7) << IRQ_PORTF_INTA_POS) | + ((CONFIG_IRQ_PORTF_INTB - 7) << IRQ_PORTF_INTB_POS) | + ((CONFIG_IRQ_SPI0_ERROR - 7) << IRQ_SPI0_ERROR_POS)); + + bfin_write_SIC_IAR6(((CONFIG_IRQ_SPI1_ERROR - 7) << IRQ_SPI1_ERROR_POS) | + ((CONFIG_IRQ_RSI_INT0 - 7) << IRQ_RSI_INT0_POS) | + ((CONFIG_IRQ_RSI_INT1 - 7) << IRQ_RSI_INT1_POS) | + ((CONFIG_IRQ_PWM_TRIP - 7) << IRQ_PWM_TRIP_POS) | + ((CONFIG_IRQ_PWM_SYNC - 7) << IRQ_PWM_SYNC_POS) | + ((CONFIG_IRQ_PTP_STAT - 7) << IRQ_PTP_STAT_POS)); + + SSYNC(); +} diff --git a/arch/blackfin/mach-common/dpmc_modes.S b/arch/blackfin/mach-common/dpmc_modes.S index 21f61605f1f3..4da50bcd9300 100644 --- a/arch/blackfin/mach-common/dpmc_modes.S +++ b/arch/blackfin/mach-common/dpmc_modes.S @@ -248,7 +248,7 @@ ENDPROC(_unset_dram_srfs) ENTRY(_set_sic_iwr) #if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) || \ - defined(CONFIG_BF538) || defined(CONFIG_BF539) + defined(CONFIG_BF538) || defined(CONFIG_BF539) || defined(CONFIG_BF51x) P0.H = hi(SIC_IWR0); P0.L = lo(SIC_IWR0); P1.H = hi(SIC_IWR1); diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c index 5ae507f59885..c32fa695f8cc 100644 --- a/arch/blackfin/mach-common/ints-priority.c +++ b/arch/blackfin/mach-common/ints-priority.c @@ -104,7 +104,8 @@ static void __init search_IAR(void) for (irqn = 0; irqn < NR_PERI_INTS; irqn++) { int iar_shift = (irqn & 7) * 4; if (ivg == (0xf & -#if defined(CONFIG_BF52x) || defined(CONFIG_BF538) || defined(CONFIG_BF539) +#if defined(CONFIG_BF52x) || defined(CONFIG_BF538) \ + || defined(CONFIG_BF539) || defined(CONFIG_BF51x) bfin_read32((unsigned long *)SIC_IAR0 + ((irqn % 32) >> 3) + ((irqn / 32) * ((SIC_IAR4 - SIC_IAR0) / 4))) >> iar_shift)) { @@ -543,7 +544,7 @@ static void bfin_demux_gpio_irq(unsigned int inta_irq, case IRQ_PORTF_INTA: irq = IRQ_PF0; break; -#elif defined(CONFIG_BF52x) +#elif defined(CONFIG_BF52x) || defined(CONFIG_BF51x) case IRQ_PORTF_INTA: irq = IRQ_PF0; break; @@ -990,7 +991,8 @@ int __init init_arch_irq(void) int irq; unsigned long ilat = 0; /* Disable all the peripheral intrs - page 4-29 HW Ref manual */ -#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) || defined(BF538_FAMILY) +#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) \ + || defined(BF538_FAMILY) || defined(CONFIG_BF51x) bfin_write_SIC_IMASK0(SIC_UNMASK_ALL); bfin_write_SIC_IMASK1(SIC_UNMASK_ALL); # ifdef CONFIG_BF54x @@ -1035,7 +1037,7 @@ int __init init_arch_irq(void) case IRQ_PINT1: case IRQ_PINT2: case IRQ_PINT3: -#elif defined(CONFIG_BF52x) +#elif defined(CONFIG_BF52x) || defined(CONFIG_BF51x) case IRQ_PORTF_INTA: case IRQ_PORTG_INTA: case IRQ_PORTH_INTA: @@ -1094,10 +1096,11 @@ int __init init_arch_irq(void) IMASK_IVG14 | IMASK_IVG13 | IMASK_IVG12 | IMASK_IVG11 | IMASK_IVG10 | IMASK_IVG9 | IMASK_IVG8 | IMASK_IVG7 | IMASK_IVGHW; -#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) || defined(BF538_FAMILY) +#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) \ + || defined(BF538_FAMILY) || defined(CONFIG_BF51x) bfin_write_SIC_IWR0(IWR_DISABLE_ALL); -#if defined(CONFIG_BF52x) - /* BF52x system reset does not properly reset SIC_IWR1 which +#if defined(CONFIG_BF52x) || defined(CONFIG_BF51x) + /* BF52x/BF51x system reset does not properly reset SIC_IWR1 which * will screw up the bootrom as it relies on MDMA0/1 waking it * up from IDLE instructions. See this report for more info: * http://blackfin.uclinux.org/gf/tracker/4323 @@ -1126,7 +1129,8 @@ void do_irq(int vec, struct pt_regs *fp) } else { struct ivgx *ivg = ivg7_13[vec - IVG7].ifirst; struct ivgx *ivg_stop = ivg7_13[vec - IVG7].istop; -#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) || defined(BF538_FAMILY) +#if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) \ + || defined(BF538_FAMILY) || defined(CONFIG_BF51x) unsigned long sic_status[3]; sic_status[0] = bfin_read_SIC_ISR0() & bfin_read_SIC_IMASK0(); diff --git a/arch/blackfin/mach-common/pm.c b/arch/blackfin/mach-common/pm.c index f774d8aa5b03..ee33a8a988bd 100644 --- a/arch/blackfin/mach-common/pm.c +++ b/arch/blackfin/mach-common/pm.c @@ -83,9 +83,9 @@ void bfin_pm_suspend_standby_enter(void) bfin_pm_standby_restore(); #if defined(CONFIG_BF54x) || defined(CONFIG_BF52x) || defined(CONFIG_BF561) || \ - defined(CONFIG_BF538) || defined(CONFIG_BF539) + defined(CONFIG_BF538) || defined(CONFIG_BF539) || defined(CONFIG_BF51x) bfin_write_SIC_IWR0(IWR_DISABLE_ALL); -#if defined(CONFIG_BF52x) +#if defined(CONFIG_BF52x) || defined(CONFIG_BF51x) /* BF52x system reset does not properly reset SIC_IWR1 which * will screw up the bootrom as it relies on MDMA0/1 waking it * up from IDLE instructions. See this report for more info: From 588ba8199e06e4d558114093d0b5812920035c72 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 28 Oct 2008 14:38:51 +0800 Subject: [PATCH 0757/2985] Blackfin arch: remove unused local define Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- arch/blackfin/kernel/bfin_dma_5xx.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/blackfin/kernel/bfin_dma_5xx.c b/arch/blackfin/kernel/bfin_dma_5xx.c index 339293d677cc..ca3a26a78893 100644 --- a/arch/blackfin/kernel/bfin_dma_5xx.c +++ b/arch/blackfin/kernel/bfin_dma_5xx.c @@ -38,9 +38,6 @@ #include #include -/* Remove unused code not exported by symbol or internally called */ -#define REMOVE_DEAD_CODE - /************************************************************************** * Global Variables ***************************************************************************/ From 27228b2e4c1726a376b32f8b12242718ebf5b8a4 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 28 Oct 2008 15:45:42 +0800 Subject: [PATCH 0758/2985] Blackfin arch: unify check_gpio() to reduce arch differences Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- arch/blackfin/kernel/bfin_gpio.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/blackfin/kernel/bfin_gpio.c b/arch/blackfin/kernel/bfin_gpio.c index 5556e13993bf..a808baf5d309 100644 --- a/arch/blackfin/kernel/bfin_gpio.c +++ b/arch/blackfin/kernel/bfin_gpio.c @@ -216,24 +216,18 @@ static unsigned int sic_iwr_irqs[gpio_bank(MAX_BLACKFIN_GPIOS)] = {IRQ_PROG0_INT #endif #endif /* CONFIG_PM */ -#if defined(BF548_FAMILY) inline int check_gpio(unsigned gpio) { +#if defined(BF548_FAMILY) if (gpio == GPIO_PB15 || gpio == GPIO_PC14 || gpio == GPIO_PC15 || gpio == GPIO_PH14 || gpio == GPIO_PH15 - || gpio == GPIO_PJ14 || gpio == GPIO_PJ15 - || gpio >= MAX_BLACKFIN_GPIOS) + || gpio == GPIO_PJ14 || gpio == GPIO_PJ15) return -EINVAL; - return 0; -} -#else -inline int check_gpio(unsigned gpio) -{ +#endif if (gpio >= MAX_BLACKFIN_GPIOS) return -EINVAL; return 0; } -#endif static void gpio_error(unsigned gpio) { From 1f7d373f4773eca06978446f677b4de5a4814095 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 28 Oct 2008 15:47:11 +0800 Subject: [PATCH 0759/2985] Blackfin arch: fix cmp_label() so it doesnt incorrectly accept partial leading matches Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- arch/blackfin/kernel/bfin_gpio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/blackfin/kernel/bfin_gpio.c b/arch/blackfin/kernel/bfin_gpio.c index a808baf5d309..4aa3c053297f 100644 --- a/arch/blackfin/kernel/bfin_gpio.c +++ b/arch/blackfin/kernel/bfin_gpio.c @@ -256,8 +256,7 @@ static int cmp_label(unsigned short ident, const char *label) } if (label) - return strncmp(str_ident[ident].name, - label, strlen(label)); + return strcmp(str_ident[ident].name, label); else return -EINVAL; } From 6c7ec0ec93d3f8e661a1d11221d869654dd3fcd9 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 28 Oct 2008 15:49:59 +0800 Subject: [PATCH 0760/2985] Blackfin arch: unify peripheral_request() to reduce arch differences Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- arch/blackfin/kernel/bfin_gpio.c | 89 ++++---------------------------- 1 file changed, 11 insertions(+), 78 deletions(-) diff --git a/arch/blackfin/kernel/bfin_gpio.c b/arch/blackfin/kernel/bfin_gpio.c index 4aa3c053297f..8d12853a64fd 100644 --- a/arch/blackfin/kernel/bfin_gpio.c +++ b/arch/blackfin/kernel/bfin_gpio.c @@ -870,7 +870,6 @@ EXPORT_SYMBOL(get_gpio_dir); * MODIFICATION HISTORY : **************************************************************/ -#ifdef BF548_FAMILY int peripheral_request(unsigned short per, const char *label) { unsigned long flags; @@ -886,15 +885,16 @@ int peripheral_request(unsigned short per, const char *label) if (!(per & P_DEFINED)) return -ENODEV; - if (check_gpio(ident) < 0) + if (check_gpio(ident)) return -EINVAL; local_irq_save(flags); + /* Can't do GPIO and peripheral at the same time */ if (unlikely(reserved_gpio_map[gpio_bank(ident)] & gpio_bit(ident))) { dump_stack(); printk(KERN_ERR - "%s: Peripheral %d is already reserved as GPIO by %s !\n", + "%s: Peripheral %d is already reserved as GPIO by %s !\n", __func__, ident, get_label(ident)); local_irq_restore(flags); return -EBUSY; @@ -902,15 +902,18 @@ int peripheral_request(unsigned short per, const char *label) if (unlikely(reserved_peri_map[gpio_bank(ident)] & gpio_bit(ident))) { - u16 funct = get_portmux(ident); - /* * Pin functions like AMC address strobes my * be requested and used by several drivers */ - if (!((per & P_MAYSHARE) && (funct == P_FUNCT2MUX(per)))) { +#ifdef BF548_FAMILY + u16 funct = get_portmux(ident); + if (!((per & P_MAYSHARE) && (funct == P_FUNCT2MUX(per)))) { +#else + if (!(per & P_MAYSHARE)) { +#endif /* * Allow that the identical pin function can * be requested from the same driver twice @@ -931,89 +934,19 @@ int peripheral_request(unsigned short per, const char *label) anyway: reserved_peri_map[gpio_bank(ident)] |= gpio_bit(ident); +#ifdef BF548_FAMILY portmux_setup(ident, P_FUNCT2MUX(per)); - port_setup(ident, PERIPHERAL_USAGE); - - local_irq_restore(flags); - set_label(ident, label); - - return 0; -} -EXPORT_SYMBOL(peripheral_request); #else - -int peripheral_request(unsigned short per, const char *label) -{ - unsigned long flags; - unsigned short ident = P_IDENT(per); - - /* - * Don't cares are pins with only one dedicated function - */ - - if (per & P_DONTCARE) - return 0; - - if (!(per & P_DEFINED)) - return -ENODEV; - - local_irq_save(flags); - - if (!check_gpio(ident)) { - - if (unlikely(reserved_gpio_map[gpio_bank(ident)] & gpio_bit(ident))) { - dump_stack(); - printk(KERN_ERR - "%s: Peripheral %d is already reserved as GPIO by %s !\n", - __func__, ident, get_label(ident)); - local_irq_restore(flags); - return -EBUSY; - } - - } - - if (unlikely(reserved_peri_map[gpio_bank(ident)] & gpio_bit(ident))) { - - /* - * Pin functions like AMC address strobes my - * be requested and used by several drivers - */ - - if (!(per & P_MAYSHARE)) { - - /* - * Allow that the identical pin function can - * be requested from the same driver twice - */ - - if (cmp_label(ident, label) == 0) - goto anyway; - - dump_stack(); - printk(KERN_ERR - "%s: Peripheral %d function %d is already" - " reserved by %s !\n", - __func__, ident, P_FUNCT2MUX(per), - get_label(ident)); - local_irq_restore(flags); - return -EBUSY; - } - - } - - anyway: portmux_setup(per, P_FUNCT2MUX(per)); - +#endif port_setup(ident, PERIPHERAL_USAGE); - reserved_peri_map[gpio_bank(ident)] |= gpio_bit(ident); local_irq_restore(flags); set_label(ident, label); return 0; } EXPORT_SYMBOL(peripheral_request); -#endif int peripheral_request_list(const unsigned short per[], const char *label) { From a2d03a1d8e2562cc64a223485c06db9840ac3b2b Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 28 Oct 2008 15:53:37 +0800 Subject: [PATCH 0761/2985] Blackfin arch: unify port_setup() to reduce arch differences Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- arch/blackfin/kernel/bfin_gpio.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/arch/blackfin/kernel/bfin_gpio.c b/arch/blackfin/kernel/bfin_gpio.c index 8d12853a64fd..96090268e007 100644 --- a/arch/blackfin/kernel/bfin_gpio.c +++ b/arch/blackfin/kernel/bfin_gpio.c @@ -261,29 +261,25 @@ static int cmp_label(unsigned short ident, const char *label) return -EINVAL; } +static void port_setup(unsigned gpio, unsigned short usage) +{ + if (check_gpio(gpio)) + return; + #if defined(BF527_FAMILY) || defined(BF537_FAMILY) || defined(BF518_FAMILY) -static void port_setup(unsigned gpio, unsigned short usage) -{ - if (!check_gpio(gpio)) { - if (usage == GPIO_USAGE) - *port_fer[gpio_bank(gpio)] &= ~gpio_bit(gpio); - else - *port_fer[gpio_bank(gpio)] |= gpio_bit(gpio); - SSYNC(); - } -} + if (usage == GPIO_USAGE) + *port_fer[gpio_bank(gpio)] &= ~gpio_bit(gpio); + else + *port_fer[gpio_bank(gpio)] |= gpio_bit(gpio); + SSYNC(); #elif defined(BF548_FAMILY) -static void port_setup(unsigned gpio, unsigned short usage) -{ if (usage == GPIO_USAGE) gpio_array[gpio_bank(gpio)]->port_fer &= ~gpio_bit(gpio); else gpio_array[gpio_bank(gpio)]->port_fer |= gpio_bit(gpio); SSYNC(); -} -#else -# define port_setup(...) do { } while (0) #endif +} #ifdef BF537_FAMILY static struct { From 6a87d29bc684d845fe8338a8ce279f743d343250 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 28 Oct 2008 16:16:29 +0800 Subject: [PATCH 0762/2985] Blackfin arch: refine the gpio check refine the gpio check in peripheral_request() so that it only checks pins that can be used as both GPIO and a peripheral Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- arch/blackfin/kernel/bfin_gpio.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/blackfin/kernel/bfin_gpio.c b/arch/blackfin/kernel/bfin_gpio.c index 96090268e007..3e698d651f17 100644 --- a/arch/blackfin/kernel/bfin_gpio.c +++ b/arch/blackfin/kernel/bfin_gpio.c @@ -881,13 +881,13 @@ int peripheral_request(unsigned short per, const char *label) if (!(per & P_DEFINED)) return -ENODEV; - if (check_gpio(ident)) - return -EINVAL; - local_irq_save(flags); - /* Can't do GPIO and peripheral at the same time */ - if (unlikely(reserved_gpio_map[gpio_bank(ident)] & gpio_bit(ident))) { + /* If a pin can be muxed as either GPIO or peripheral, make + * sure it is not already a GPIO pin when we request it. + */ + if (unlikely(!check_gpio(ident) && + reserved_gpio_map[gpio_bank(ident)] & gpio_bit(ident))) { dump_stack(); printk(KERN_ERR "%s: Peripheral %d is already reserved as GPIO by %s !\n", From 3529e0414b600faa1b6d822569b3343131235813 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 28 Oct 2008 16:22:41 +0800 Subject: [PATCH 0763/2985] Blackfin arch: update anomaly lists to match latest sheets Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- arch/blackfin/mach-bf527/include/mach/anomaly.h | 7 ++++--- arch/blackfin/mach-bf533/include/mach/anomaly.h | 2 ++ arch/blackfin/mach-bf537/include/mach/anomaly.h | 2 ++ arch/blackfin/mach-bf538/include/mach/anomaly.h | 14 ++++++++------ arch/blackfin/mach-bf548/include/mach/anomaly.h | 2 ++ arch/blackfin/mach-bf561/include/mach/anomaly.h | 2 ++ 6 files changed, 20 insertions(+), 9 deletions(-) diff --git a/arch/blackfin/mach-bf527/include/mach/anomaly.h b/arch/blackfin/mach-bf527/include/mach/anomaly.h index 62373e61c585..8d09e6d5c9cd 100644 --- a/arch/blackfin/mach-bf527/include/mach/anomaly.h +++ b/arch/blackfin/mach-bf527/include/mach/anomaly.h @@ -28,7 +28,7 @@ /* Multi-Issue Instruction with dsp32shiftimm in slot1 and P-reg Store in slot2 Not Supported */ #define ANOMALY_05000074 (1) /* DMA_RUN Bit Is Not Valid after a Peripheral Receive Channel DMA Stops */ -#define ANOMALY_05000119 (1) +#define ANOMALY_05000119 (1) /* note: brokenness is noted in documentation, not anomaly sheet */ /* Rx.H Cannot Be Used to Access 16-bit System MMR Registers */ #define ANOMALY_05000122 (1) /* Spurious Hardware Error from an Access in the Shadow of a Conditional Branch */ @@ -37,8 +37,6 @@ #define ANOMALY_05000265 (1) /* False Hardware Errors Caused by Fetches at the Boundary of Reserved Memory */ #define ANOMALY_05000310 (1) -/* Errors when SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted */ -#define ANOMALY_05000312 (ANOMALY_BF527) /* PPI Is Level-Sensitive on First Transfer In Single Frame Sync Modes */ #define ANOMALY_05000313 (__SILICON_REVISION__ < 2) /* Incorrect Access of OTP_STATUS During otp_write() Function */ @@ -153,6 +151,8 @@ #define ANOMALY_05000430 (ANOMALY_BF527 && __SILICON_REVISION__ > 1) /* bfrom_SysControl() Does Not Clear SIC_IWR1 Before Executing PLL Programming Sequence */ #define ANOMALY_05000432 (ANOMALY_BF526) +/* IFLUSH Instruction at End of Hardware Loop Causes Infinite Stall */ +#define ANOMALY_05000443 (1) /* Anomalies that don't exist on this proc */ #define ANOMALY_05000125 (0) @@ -168,6 +168,7 @@ #define ANOMALY_05000285 (0) #define ANOMALY_05000307 (0) #define ANOMALY_05000311 (0) +#define ANOMALY_05000312 (0) #define ANOMALY_05000323 (0) #define ANOMALY_05000363 (0) diff --git a/arch/blackfin/mach-bf533/include/mach/anomaly.h b/arch/blackfin/mach-bf533/include/mach/anomaly.h index f544fc56959a..e34bc72aa96f 100644 --- a/arch/blackfin/mach-bf533/include/mach/anomaly.h +++ b/arch/blackfin/mach-bf533/include/mach/anomaly.h @@ -194,6 +194,8 @@ #define ANOMALY_05000403 (1) /* Speculative Fetches Can Cause Undesired External FIFO Operations */ #define ANOMALY_05000416 (1) +/* IFLUSH Instruction at End of Hardware Loop Causes Infinite Stall */ +#define ANOMALY_05000443 (1) /* These anomalies have been "phased" out of analog.com anomaly sheets and are * here to show running on older silicon just isn't feasible. diff --git a/arch/blackfin/mach-bf537/include/mach/anomaly.h b/arch/blackfin/mach-bf537/include/mach/anomaly.h index c68992494f9e..c6c18f8644c7 100644 --- a/arch/blackfin/mach-bf537/include/mach/anomaly.h +++ b/arch/blackfin/mach-bf537/include/mach/anomaly.h @@ -148,6 +148,8 @@ #define ANOMALY_05000402 (__SILICON_REVISION__ >= 5) /* Level-Sensitive External GPIO Wakeups May Cause Indefinite Stall */ #define ANOMALY_05000403 (1) +/* IFLUSH Instruction at End of Hardware Loop Causes Infinite Stall */ +#define ANOMALY_05000443 (1) /* Anomalies that don't exist on this proc */ #define ANOMALY_05000125 (0) diff --git a/arch/blackfin/mach-bf538/include/mach/anomaly.h b/arch/blackfin/mach-bf538/include/mach/anomaly.h index 4df618ce2a6d..80b3bd98e309 100644 --- a/arch/blackfin/mach-bf538/include/mach/anomaly.h +++ b/arch/blackfin/mach-bf538/include/mach/anomaly.h @@ -15,7 +15,7 @@ #define _MACH_ANOMALY_H_ #if __SILICON_REVISION__ < 4 -# error will not work on BF538 silicon version 0.0, 0.1, 0.2 or 0.3 +# error will not work on BF538 silicon version 0.0, 0.1, 0.2, or 0.3 #endif /* Multi-Issue Instruction with dsp32shiftimm in slot1 and P-reg Store in slot2 Not Supported */ @@ -106,16 +106,18 @@ #define ANOMALY_05000403 (1) /* Speculative Fetches Can Cause Undesired External FIFO Operations */ #define ANOMALY_05000416 (1) +/* IFLUSH Instruction at End of Hardware Loop Causes Infinite Stall */ +#define ANOMALY_05000443 (1) /* Anomalies that don't exist on this proc */ -#define ANOMALY_05000230 (0) -#define ANOMALY_05000353 (1) -#define ANOMALY_05000386 (1) -#define ANOMALY_05000198 (0) #define ANOMALY_05000158 (0) +#define ANOMALY_05000198 (0) +#define ANOMALY_05000230 (0) +#define ANOMALY_05000263 (0) #define ANOMALY_05000311 (0) #define ANOMALY_05000323 (0) -#define ANOMALY_05000263 (0) +#define ANOMALY_05000353 (1) #define ANOMALY_05000363 (0) +#define ANOMALY_05000386 (1) #endif diff --git a/arch/blackfin/mach-bf548/include/mach/anomaly.h b/arch/blackfin/mach-bf548/include/mach/anomaly.h index 816b09278f62..98f973299b87 100644 --- a/arch/blackfin/mach-bf548/include/mach/anomaly.h +++ b/arch/blackfin/mach-bf548/include/mach/anomaly.h @@ -157,6 +157,8 @@ #define ANOMALY_05000429 (__SILICON_REVISION__ < 2) /* Software System Reset Corrupts PLL_LOCKCNT Register */ #define ANOMALY_05000430 (__SILICON_REVISION__ >= 2) +/* IFLUSH Instruction at End of Hardware Loop Causes Infinite Stall */ +#define ANOMALY_05000443 (1) /* Anomalies that don't exist on this proc */ #define ANOMALY_05000125 (0) diff --git a/arch/blackfin/mach-bf561/include/mach/anomaly.h b/arch/blackfin/mach-bf561/include/mach/anomaly.h index 22990df04ae1..a1ff7c40238f 100644 --- a/arch/blackfin/mach-bf561/include/mach/anomaly.h +++ b/arch/blackfin/mach-bf561/include/mach/anomaly.h @@ -264,6 +264,8 @@ #define ANOMALY_05000371 (1) /* Level-Sensitive External GPIO Wakeups May Cause Indefinite Stall */ #define ANOMALY_05000403 (1) +/* IFLUSH Instruction at End of Hardware Loop Causes Infinite Stall */ +#define ANOMALY_05000443 (1) /* Anomalies that don't exist on this proc */ #define ANOMALY_05000158 (0) From a2ba8b19989e038bdf1a9fcc25e860d5077d2474 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Tue, 28 Oct 2008 18:19:29 +0800 Subject: [PATCH 0764/2985] Blackfin arch: lookup channel2irq() only once Add irq to struct dma_channel lookup channel2irq() only once, since channel2irq() is fairly large on some Blackfin derivatives. Signed-off-by: Michael Hennerich Signed-off-by: Bryan Wu --- arch/blackfin/include/asm/dma.h | 1 + arch/blackfin/kernel/bfin_dma_5xx.c | 24 ++++++------------------ 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/arch/blackfin/include/asm/dma.h b/arch/blackfin/include/asm/dma.h index 6509733bb0f6..9befcbae00b9 100644 --- a/arch/blackfin/include/asm/dma.h +++ b/arch/blackfin/include/asm/dma.h @@ -140,6 +140,7 @@ struct dma_channel { struct dma_register *regs; struct dmasg *sg; /* large mode descriptor */ unsigned int ctrl_num; /* controller number */ + unsigned int irq; dma_interrupt_t irq_callback; void *data; unsigned int dma_enable_flag; diff --git a/arch/blackfin/kernel/bfin_dma_5xx.c b/arch/blackfin/kernel/bfin_dma_5xx.c index ca3a26a78893..add58d219361 100644 --- a/arch/blackfin/kernel/bfin_dma_5xx.c +++ b/arch/blackfin/kernel/bfin_dma_5xx.c @@ -139,19 +139,16 @@ EXPORT_SYMBOL(request_dma); int set_dma_callback(unsigned int channel, dma_interrupt_t callback, void *data) { - int ret_irq = 0; - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE && channel < MAX_BLACKFIN_DMA_CHANNEL)); if (callback != NULL) { int ret_val; - ret_irq = channel2irq(channel); - + dma_ch[channel].irq = channel2irq(channel); dma_ch[channel].data = data; ret_val = - request_irq(ret_irq, (void *)callback, IRQF_DISABLED, + request_irq(dma_ch[channel].irq, callback, IRQF_DISABLED, dma_ch[channel].device_id, data); if (ret_val) { printk(KERN_NOTICE @@ -166,7 +163,6 @@ EXPORT_SYMBOL(set_dma_callback); void free_dma(unsigned int channel) { - int ret_irq; pr_debug("freedma() : BEGIN \n"); BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE @@ -176,10 +172,8 @@ void free_dma(unsigned int channel) disable_dma(channel); clear_dma_buffer(channel); - if (dma_ch[channel].irq_callback != NULL) { - ret_irq = channel2irq(channel); - free_irq(ret_irq, dma_ch[channel].data); - } + if (dma_ch[channel].irq_callback != NULL) + free_irq(dma_ch[channel].irq, dma_ch[channel].data); /* Clear the DMA Variable in the Channel */ mutex_lock(&(dma_ch[channel].dmalock)); @@ -192,27 +186,21 @@ EXPORT_SYMBOL(free_dma); void dma_enable_irq(unsigned int channel) { - int ret_irq; - pr_debug("dma_enable_irq() : BEGIN \n"); BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE && channel < MAX_BLACKFIN_DMA_CHANNEL)); - ret_irq = channel2irq(channel); - enable_irq(ret_irq); + enable_irq(dma_ch[channel].irq); } EXPORT_SYMBOL(dma_enable_irq); void dma_disable_irq(unsigned int channel) { - int ret_irq; - pr_debug("dma_disable_irq() : BEGIN \n"); BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE && channel < MAX_BLACKFIN_DMA_CHANNEL)); - ret_irq = channel2irq(channel); - disable_irq(ret_irq); + disable_irq(dma_ch[channel].irq); } EXPORT_SYMBOL(dma_disable_irq); From e04f9f427bca526d7752879a5b3d341628c0cc0d Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Tue, 28 Oct 2008 18:18:47 +0800 Subject: [PATCH 0765/2985] Blackfin arch: Remove useless SSYNCs in DMA code Tons of SSYNC operation will impact the DMA performance Signed-off-by: Bryan Wu --- arch/blackfin/kernel/bfin_dma_5xx.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/arch/blackfin/kernel/bfin_dma_5xx.c b/arch/blackfin/kernel/bfin_dma_5xx.c index add58d219361..35d51ac3a060 100644 --- a/arch/blackfin/kernel/bfin_dma_5xx.c +++ b/arch/blackfin/kernel/bfin_dma_5xx.c @@ -53,7 +53,6 @@ static void clear_dma_buffer(unsigned int channel) dma_ch[channel].regs->cfg |= RESTART; SSYNC(); dma_ch[channel].regs->cfg &= ~RESTART; - SSYNC(); } static int __init blackfin_dma_init(void) @@ -245,7 +244,6 @@ void enable_dma(unsigned int channel) dma_ch[channel].regs->curr_y_count = 0; dma_ch[channel].regs->cfg |= DMAEN; /* Set the enable bit */ - SSYNC(); pr_debug("enable_dma() : END \n"); return; } @@ -265,7 +263,6 @@ void set_dma_start_addr(unsigned int channel, unsigned long addr) && channel < MAX_BLACKFIN_DMA_CHANNEL)); dma_ch[channel].regs->start_addr = addr; - SSYNC(); pr_debug("set_dma_start_addr() : END\n"); } EXPORT_SYMBOL(set_dma_start_addr); @@ -278,7 +275,6 @@ void set_dma_next_desc_addr(unsigned int channel, unsigned long addr) && channel < MAX_BLACKFIN_DMA_CHANNEL)); dma_ch[channel].regs->next_desc_ptr = addr; - SSYNC(); pr_debug("set_dma_next_desc_addr() : END\n"); } EXPORT_SYMBOL(set_dma_next_desc_addr); @@ -291,7 +287,6 @@ void set_dma_curr_desc_addr(unsigned int channel, unsigned long addr) && channel < MAX_BLACKFIN_DMA_CHANNEL)); dma_ch[channel].regs->curr_desc_ptr = addr; - SSYNC(); pr_debug("set_dma_curr_desc_addr() : END\n"); } EXPORT_SYMBOL(set_dma_curr_desc_addr); @@ -302,7 +297,6 @@ void set_dma_x_count(unsigned int channel, unsigned short x_count) && channel < MAX_BLACKFIN_DMA_CHANNEL)); dma_ch[channel].regs->x_count = x_count; - SSYNC(); } EXPORT_SYMBOL(set_dma_x_count); @@ -312,7 +306,6 @@ void set_dma_y_count(unsigned int channel, unsigned short y_count) && channel < MAX_BLACKFIN_DMA_CHANNEL)); dma_ch[channel].regs->y_count = y_count; - SSYNC(); } EXPORT_SYMBOL(set_dma_y_count); @@ -322,7 +315,6 @@ void set_dma_x_modify(unsigned int channel, short x_modify) && channel < MAX_BLACKFIN_DMA_CHANNEL)); dma_ch[channel].regs->x_modify = x_modify; - SSYNC(); } EXPORT_SYMBOL(set_dma_x_modify); @@ -332,7 +324,6 @@ void set_dma_y_modify(unsigned int channel, short y_modify) && channel < MAX_BLACKFIN_DMA_CHANNEL)); dma_ch[channel].regs->y_modify = y_modify; - SSYNC(); } EXPORT_SYMBOL(set_dma_y_modify); @@ -342,7 +333,7 @@ void set_dma_config(unsigned int channel, unsigned short config) && channel < MAX_BLACKFIN_DMA_CHANNEL)); dma_ch[channel].regs->cfg = config; - SSYNC(); + } EXPORT_SYMBOL(set_dma_config); @@ -367,8 +358,6 @@ void set_dma_sg(unsigned int channel, struct dmasg *sg, int nr_sg) dma_ch[channel].regs->cfg |= ((nr_sg & 0x0F) << 8); dma_ch[channel].regs->next_desc_ptr = (unsigned int)sg; - - SSYNC(); } EXPORT_SYMBOL(set_dma_sg); @@ -378,7 +367,6 @@ void set_dma_curr_addr(unsigned int channel, unsigned long addr) && channel < MAX_BLACKFIN_DMA_CHANNEL)); dma_ch[channel].regs->curr_addr_ptr = addr; - SSYNC(); } EXPORT_SYMBOL(set_dma_curr_addr); From 2cf851137b55cd0c49fd9e005cd01ac4761c005e Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 28 Oct 2008 16:34:42 +0800 Subject: [PATCH 0766/2985] Blackfin arch: only add IFLUSH nop padding when anomaly 443 is enabled Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- arch/blackfin/mach-common/cache.S | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/blackfin/mach-common/cache.S b/arch/blackfin/mach-common/cache.S index a028e9450419..3c98dacbf289 100644 --- a/arch/blackfin/mach-common/cache.S +++ b/arch/blackfin/mach-common/cache.S @@ -49,13 +49,17 @@ .ifnb \optflushins \optflushins [P0]; .endif +#if ANOMALY_05000443 .ifb \optnopins 2: .endif \flushins [P0++]; .ifnb \optnopins -2: \optnopins; +2: \optnopins; .endif +#else +2: \flushins [P0++]; +#endif RTS; .endm From 8f72fbdf0d92e6127583cc548bf043c60cd4720f Mon Sep 17 00:00:00 2001 From: Alexander Beregalov Date: Wed, 29 Oct 2008 17:13:08 -0400 Subject: [PATCH 0767/2985] ext4: fix printk format warning fs/ext4/balloc.c:607: warning: format '%lld' expects type 'long long int', but argument 2 has type 's64' fs/ext4/inode.c:1822: warning: format '%lld' expects type 'long long int', but argument 2 has type 's64' fs/ext4/inode.c:1824: warning: format '%lld' expects type 'long long int', but argument 2 has type 's64' Signed-off-by: Alexander Beregalov Signed-off-by: Theodore Ts'o --- fs/ext4/balloc.c | 2 +- fs/ext4/inode.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 38b3acf5683b..152c390f3c3f 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -614,7 +614,7 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) if (dirty_blocks < 0) { printk(KERN_CRIT "Dirty block accounting " "went wrong %lld\n", - dirty_blocks); + (long long)dirty_blocks); } } /* Check whether we have space after diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6702a49992a6..5b088121686a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1831,9 +1831,9 @@ static void ext4_print_free_blocks(struct inode *inode) ext4_count_free_blocks(inode->i_sb)); printk(KERN_EMERG "Free/Dirty block details\n"); printk(KERN_EMERG "free_blocks=%lld\n", - percpu_counter_sum(&sbi->s_freeblocks_counter)); + (long long)percpu_counter_sum(&sbi->s_freeblocks_counter)); printk(KERN_EMERG "dirty_blocks=%lld\n", - percpu_counter_sum(&sbi->s_dirtyblocks_counter)); + (long long)percpu_counter_sum(&sbi->s_dirtyblocks_counter)); printk(KERN_EMERG "Block reservation details\n"); printk(KERN_EMERG "i_reserved_data_blocks=%lu\n", EXT4_I(inode)->i_reserved_data_blocks); From 5e1f8c9e20a92743eefc9a82c2db835213905e26 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 28 Oct 2008 13:21:55 -0400 Subject: [PATCH 0768/2985] ext3: Add support for non-native signed/unsigned htree hash algorithms The original ext3 hash algorithms assumed that variables of type char were signed, as God and K&R intended. Unfortunately, this assumption is not true on some architectures. Userspace support for marking filesystems with non-native signed/unsigned chars was added two years ago, but the kernel-side support was never added (until now). Signed-off-by: "Theodore Ts'o" Cc: akpm@linux-foundation.org Cc: linux-kernel@vger.kernel.org --- fs/ext3/hash.c | 81 ++++++++++++++++++++++++++++++++------ fs/ext3/namei.c | 7 ++++ fs/ext3/super.c | 12 ++++++ include/linux/ext3_fs.h | 28 ++++++++++++- include/linux/ext3_fs_sb.h | 1 + 5 files changed, 116 insertions(+), 13 deletions(-) diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c index c30e149fbd2e..7d215b4d4f2e 100644 --- a/fs/ext3/hash.c +++ b/fs/ext3/hash.c @@ -35,23 +35,71 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) /* The old legacy hash */ -static __u32 dx_hack_hash (const char *name, int len) +static __u32 dx_hack_hash_unsigned(const char *name, int len) { - __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; - while (len--) { - __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); + __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; + const unsigned char *ucp = (const unsigned char *) name; - if (hash & 0x80000000) hash -= 0x7fffffff; + while (len--) { + hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373)); + + if (hash & 0x80000000) + hash -= 0x7fffffff; hash1 = hash0; hash0 = hash; } - return (hash0 << 1); + return hash0 << 1; } -static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) +static __u32 dx_hack_hash_signed(const char *name, int len) +{ + __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; + const signed char *scp = (const signed char *) name; + + while (len--) { + hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373)); + + if (hash & 0x80000000) + hash -= 0x7fffffff; + hash1 = hash0; + hash0 = hash; + } + return hash0 << 1; +} + +static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num) { __u32 pad, val; int i; + const signed char *scp = (const signed char *) msg; + + pad = (__u32)len | ((__u32)len << 8); + pad |= pad << 16; + + val = pad; + if (len > num*4) + len = num * 4; + for (i = 0; i < len; i++) { + if ((i % 4) == 0) + val = pad; + val = ((int) scp[i]) + (val << 8); + if ((i % 4) == 3) { + *buf++ = val; + val = pad; + num--; + } + } + if (--num >= 0) + *buf++ = val; + while (--num >= 0) + *buf++ = pad; +} + +static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num) +{ + __u32 pad, val; + int i; + const unsigned char *ucp = (const unsigned char *) msg; pad = (__u32)len | ((__u32)len << 8); pad |= pad << 16; @@ -62,7 +110,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) for (i=0; i < len; i++) { if ((i % 4) == 0) val = pad; - val = msg[i] + (val << 8); + val = ((int) ucp[i]) + (val << 8); if ((i % 4) == 3) { *buf++ = val; val = pad; @@ -95,6 +143,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) const char *p; int i; __u32 in[8], buf[4]; + void (*str2hashbuf)(const char *, int, __u32 *, int) = + str2hashbuf_signed; /* Initialize the default seed for the hash checksum functions */ buf[0] = 0x67452301; @@ -113,13 +163,18 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) } switch (hinfo->hash_version) { - case DX_HASH_LEGACY: - hash = dx_hack_hash(name, len); + case DX_HASH_LEGACY_UNSIGNED: + hash = dx_hack_hash_unsigned(name, len); break; + case DX_HASH_LEGACY: + hash = dx_hack_hash_signed(name, len); + break; + case DX_HASH_HALF_MD4_UNSIGNED: + str2hashbuf = str2hashbuf_unsigned; case DX_HASH_HALF_MD4: p = name; while (len > 0) { - str2hashbuf(p, len, in, 8); + (*str2hashbuf)(p, len, in, 8); half_md4_transform(buf, in); len -= 32; p += 32; @@ -127,10 +182,12 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) minor_hash = buf[2]; hash = buf[1]; break; + case DX_HASH_TEA_UNSIGNED: + str2hashbuf = str2hashbuf_unsigned; case DX_HASH_TEA: p = name; while (len > 0) { - str2hashbuf(p, len, in, 4); + (*str2hashbuf)(p, len, in, 4); TEA_transform(buf, in); len -= 16; p += 16; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 1dd2abe6313e..287b304d42a7 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -368,6 +368,8 @@ dx_probe(struct qstr *entry, struct inode *dir, goto fail; } hinfo->hash_version = root->info.hash_version; + if (hinfo->hash_version <= DX_HASH_TEA) + hinfo->hash_version += EXT3_SB(dir->i_sb)->s_hash_unsigned; hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed; if (entry) ext3fs_dirhash(entry->name, entry->len, hinfo); @@ -636,6 +638,9 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, dir = dir_file->f_path.dentry->d_inode; if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) { hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version; + if (hinfo.hash_version <= DX_HASH_TEA) + hinfo.hash_version += + EXT3_SB(dir->i_sb)->s_hash_unsigned; hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed; count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo, start_hash, start_minor_hash); @@ -1398,6 +1403,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, /* Initialize as for dx_probe */ hinfo.hash_version = root->info.hash_version; + if (hinfo.hash_version <= DX_HASH_TEA) + hinfo.hash_version += EXT3_SB(dir->i_sb)->s_hash_unsigned; hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed; ext3fs_dirhash(name, namelen, &hinfo); frame = frames; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index f6c94f232ec1..541d5e4f7f6e 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1744,6 +1744,18 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) for (i=0; i < 4; i++) sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); sbi->s_def_hash_version = es->s_def_hash_version; + i = le32_to_cpu(es->s_flags); + if (i & EXT2_FLAGS_UNSIGNED_HASH) + sbi->s_hash_unsigned = 3; + else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { +#ifdef __CHAR_UNSIGNED__ + es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); + sbi->s_hash_unsigned = 3; +#else + es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); +#endif + sb->s_dirt = 1; + } if (sbi->s_blocks_per_group > blocksize * 8) { printk (KERN_ERR diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index d14f02918483..9004794a35fe 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -353,6 +353,13 @@ struct ext3_inode { #define EXT3_ERROR_FS 0x0002 /* Errors detected */ #define EXT3_ORPHAN_FS 0x0004 /* Orphans being recovered */ +/* + * Misc. filesystem flags + */ +#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */ +#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */ +#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */ + /* * Mount flags */ @@ -489,7 +496,23 @@ struct ext3_super_block { __u16 s_reserved_word_pad; __le32 s_default_mount_opts; __le32 s_first_meta_bg; /* First metablock block group */ - __u32 s_reserved[190]; /* Padding to the end of the block */ + __le32 s_mkfs_time; /* When the filesystem was created */ + __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ + /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ +/*150*/ __le32 s_blocks_count_hi; /* Blocks count */ + __le32 s_r_blocks_count_hi; /* Reserved blocks count */ + __le32 s_free_blocks_count_hi; /* Free blocks count */ + __le16 s_min_extra_isize; /* All inodes have at least # bytes */ + __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ + __le32 s_flags; /* Miscellaneous flags */ + __le16 s_raid_stride; /* RAID stride */ + __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ + __le64 s_mmp_block; /* Block for multi-mount protection */ + __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ + __u8 s_log_groups_per_flex; /* FLEX_BG group size */ + __u8 s_reserved_char_pad2; + __le16 s_reserved_pad; + __u32 s_reserved[162]; /* Padding to the end of the block */ }; #ifdef __KERNEL__ @@ -694,6 +717,9 @@ static inline __le16 ext3_rec_len_to_disk(unsigned len) #define DX_HASH_LEGACY 0 #define DX_HASH_HALF_MD4 1 #define DX_HASH_TEA 2 +#define DX_HASH_LEGACY_UNSIGNED 3 +#define DX_HASH_HALF_MD4_UNSIGNED 4 +#define DX_HASH_TEA_UNSIGNED 5 #ifdef __KERNEL__ diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h index e024e38248ff..a4e9216b3a6d 100644 --- a/include/linux/ext3_fs_sb.h +++ b/include/linux/ext3_fs_sb.h @@ -57,6 +57,7 @@ struct ext3_sb_info { u32 s_next_generation; u32 s_hash_seed[4]; int s_def_hash_version; + int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ struct percpu_counter s_freeblocks_counter; struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; From f99b25897a86fcfff9140396a97261ae65fed872 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 28 Oct 2008 13:21:44 -0400 Subject: [PATCH 0769/2985] ext4: Add support for non-native signed/unsigned htree hash algorithms The original ext3 hash algorithms assumed that variables of type char were signed, as God and K&R intended. Unfortunately, this assumption is not true on some architectures. Userspace support for marking filesystems with non-native signed/unsigned chars was added two years ago, but the kernel-side support was never added (until now). Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 ++ fs/ext4/ext4_sb.h | 1 + fs/ext4/hash.c | 81 ++++++++++++++++++++++++++++++++++++++++------- fs/ext4/namei.c | 7 ++++ fs/ext4/super.c | 12 +++++++ 5 files changed, 92 insertions(+), 12 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b0537c827024..8370ffd2d62f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -891,6 +891,9 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len) #define DX_HASH_LEGACY 0 #define DX_HASH_HALF_MD4 1 #define DX_HASH_TEA 2 +#define DX_HASH_LEGACY_UNSIGNED 3 +#define DX_HASH_HALF_MD4_UNSIGNED 4 +#define DX_HASH_TEA_UNSIGNED 5 #ifdef __KERNEL__ diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index b21f16713db0..ad7ea09baa72 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h @@ -57,6 +57,7 @@ struct ext4_sb_info { u32 s_next_generation; u32 s_hash_seed[4]; int s_def_hash_version; + int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ struct percpu_counter s_freeblocks_counter; struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index 556ca8eba3db..ac8f168c8ab4 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -35,23 +35,43 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) /* The old legacy hash */ -static __u32 dx_hack_hash(const char *name, int len) +static __u32 dx_hack_hash_unsigned(const char *name, int len) { - __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; - while (len--) { - __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); + __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; + const unsigned char *ucp = (const unsigned char *) name; - if (hash & 0x80000000) hash -= 0x7fffffff; + while (len--) { + hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373)); + + if (hash & 0x80000000) + hash -= 0x7fffffff; hash1 = hash0; hash0 = hash; } - return (hash0 << 1); + return hash0 << 1; } -static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) +static __u32 dx_hack_hash_signed(const char *name, int len) +{ + __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; + const signed char *scp = (const signed char *) name; + + while (len--) { + hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373)); + + if (hash & 0x80000000) + hash -= 0x7fffffff; + hash1 = hash0; + hash0 = hash; + } + return hash0 << 1; +} + +static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num) { __u32 pad, val; int i; + const signed char *scp = (const signed char *) msg; pad = (__u32)len | ((__u32)len << 8); pad |= pad << 16; @@ -62,7 +82,35 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) for (i = 0; i < len; i++) { if ((i % 4) == 0) val = pad; - val = msg[i] + (val << 8); + val = ((int) scp[i]) + (val << 8); + if ((i % 4) == 3) { + *buf++ = val; + val = pad; + num--; + } + } + if (--num >= 0) + *buf++ = val; + while (--num >= 0) + *buf++ = pad; +} + +static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num) +{ + __u32 pad, val; + int i; + const unsigned char *ucp = (const unsigned char *) msg; + + pad = (__u32)len | ((__u32)len << 8); + pad |= pad << 16; + + val = pad; + if (len > num*4) + len = num * 4; + for (i = 0; i < len; i++) { + if ((i % 4) == 0) + val = pad; + val = ((int) ucp[i]) + (val << 8); if ((i % 4) == 3) { *buf++ = val; val = pad; @@ -95,6 +143,8 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) const char *p; int i; __u32 in[8], buf[4]; + void (*str2hashbuf)(const char *, int, __u32 *, int) = + str2hashbuf_signed; /* Initialize the default seed for the hash checksum functions */ buf[0] = 0x67452301; @@ -113,13 +163,18 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) } switch (hinfo->hash_version) { - case DX_HASH_LEGACY: - hash = dx_hack_hash(name, len); + case DX_HASH_LEGACY_UNSIGNED: + hash = dx_hack_hash_unsigned(name, len); break; + case DX_HASH_LEGACY: + hash = dx_hack_hash_signed(name, len); + break; + case DX_HASH_HALF_MD4_UNSIGNED: + str2hashbuf = str2hashbuf_unsigned; case DX_HASH_HALF_MD4: p = name; while (len > 0) { - str2hashbuf(p, len, in, 8); + (*str2hashbuf)(p, len, in, 8); half_md4_transform(buf, in); len -= 32; p += 32; @@ -127,10 +182,12 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) minor_hash = buf[2]; hash = buf[1]; break; + case DX_HASH_TEA_UNSIGNED: + str2hashbuf = str2hashbuf_unsigned; case DX_HASH_TEA: p = name; while (len > 0) { - str2hashbuf(p, len, in, 4); + (*str2hashbuf)(p, len, in, 4); TEA_transform(buf, in); len -= 16; p += 16; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 9fd2a5e1be4d..315858db8078 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -372,6 +372,8 @@ dx_probe(const struct qstr *d_name, struct inode *dir, goto fail; } hinfo->hash_version = root->info.hash_version; + if (hinfo->hash_version <= DX_HASH_TEA) + hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; if (d_name) ext4fs_dirhash(d_name->name, d_name->len, hinfo); @@ -641,6 +643,9 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, dir = dir_file->f_path.dentry->d_inode; if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; + if (hinfo.hash_version <= DX_HASH_TEA) + hinfo.hash_version += + EXT4_SB(dir->i_sb)->s_hash_unsigned; hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo, start_hash, start_minor_hash); @@ -1408,6 +1413,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, /* Initialize as for dx_probe */ hinfo.hash_version = root->info.hash_version; + if (hinfo.hash_version <= DX_HASH_TEA) + hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; ext4fs_dirhash(name, namelen, &hinfo); frame = frames; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 04158ad74dbb..08fc86a358d3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2118,6 +2118,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) for (i = 0; i < 4; i++) sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); sbi->s_def_hash_version = es->s_def_hash_version; + i = le32_to_cpu(es->s_flags); + if (i & EXT2_FLAGS_UNSIGNED_HASH) + sbi->s_hash_unsigned = 3; + else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { +#ifdef __CHAR_UNSIGNED__ + es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); + sbi->s_hash_unsigned = 3; +#else + es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); +#endif + sb->s_dirt = 1; + } if (sbi->s_blocks_per_group > blocksize * 8) { printk(KERN_ERR From abeb21efb10cd9e980f611c9bb408f172ed44465 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Wed, 29 Oct 2008 11:06:03 +0800 Subject: [PATCH 0770/2985] Blackfin arch: remove most BUG_ON channel checks keep BUG_ON in DMA request, free and set_dma_callback. Signed-off-by: Michael Hennerich Signed-off-by: Bryan Wu --- arch/blackfin/kernel/bfin_dma_5xx.c | 70 ----------------------------- 1 file changed, 70 deletions(-) diff --git a/arch/blackfin/kernel/bfin_dma_5xx.c b/arch/blackfin/kernel/bfin_dma_5xx.c index 35d51ac3a060..a778bc80dc52 100644 --- a/arch/blackfin/kernel/bfin_dma_5xx.c +++ b/arch/blackfin/kernel/bfin_dma_5xx.c @@ -162,7 +162,6 @@ EXPORT_SYMBOL(set_dma_callback); void free_dma(unsigned int channel) { - pr_debug("freedma() : BEGIN \n"); BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE && channel < MAX_BLACKFIN_DMA_CHANNEL)); @@ -186,9 +185,6 @@ EXPORT_SYMBOL(free_dma); void dma_enable_irq(unsigned int channel) { pr_debug("dma_enable_irq() : BEGIN \n"); - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - enable_irq(dma_ch[channel].irq); } EXPORT_SYMBOL(dma_enable_irq); @@ -196,9 +192,6 @@ EXPORT_SYMBOL(dma_enable_irq); void dma_disable_irq(unsigned int channel) { pr_debug("dma_disable_irq() : BEGIN \n"); - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - disable_irq(dma_ch[channel].irq); } EXPORT_SYMBOL(dma_disable_irq); @@ -219,10 +212,6 @@ EXPORT_SYMBOL(dma_channel_active); void disable_dma(unsigned int channel) { pr_debug("stop_dma() : BEGIN \n"); - - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - dma_ch[channel].regs->cfg &= ~DMAEN; /* Clean the enable bit */ SSYNC(); dma_ch[channel].chan_status = DMA_CHANNEL_REQUESTED; @@ -235,10 +224,6 @@ EXPORT_SYMBOL(disable_dma); void enable_dma(unsigned int channel) { pr_debug("enable_dma() : BEGIN \n"); - - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - dma_ch[channel].chan_status = DMA_CHANNEL_ENABLED; dma_ch[channel].regs->curr_x_count = 0; dma_ch[channel].regs->curr_y_count = 0; @@ -258,10 +243,6 @@ EXPORT_SYMBOL(enable_dma); void set_dma_start_addr(unsigned int channel, unsigned long addr) { pr_debug("set_dma_start_addr() : BEGIN \n"); - - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - dma_ch[channel].regs->start_addr = addr; pr_debug("set_dma_start_addr() : END\n"); } @@ -270,10 +251,6 @@ EXPORT_SYMBOL(set_dma_start_addr); void set_dma_next_desc_addr(unsigned int channel, unsigned long addr) { pr_debug("set_dma_next_desc_addr() : BEGIN \n"); - - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - dma_ch[channel].regs->next_desc_ptr = addr; pr_debug("set_dma_next_desc_addr() : END\n"); } @@ -282,10 +259,6 @@ EXPORT_SYMBOL(set_dma_next_desc_addr); void set_dma_curr_desc_addr(unsigned int channel, unsigned long addr) { pr_debug("set_dma_curr_desc_addr() : BEGIN \n"); - - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - dma_ch[channel].regs->curr_desc_ptr = addr; pr_debug("set_dma_curr_desc_addr() : END\n"); } @@ -293,47 +266,31 @@ EXPORT_SYMBOL(set_dma_curr_desc_addr); void set_dma_x_count(unsigned int channel, unsigned short x_count) { - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - dma_ch[channel].regs->x_count = x_count; } EXPORT_SYMBOL(set_dma_x_count); void set_dma_y_count(unsigned int channel, unsigned short y_count) { - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - dma_ch[channel].regs->y_count = y_count; } EXPORT_SYMBOL(set_dma_y_count); void set_dma_x_modify(unsigned int channel, short x_modify) { - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - dma_ch[channel].regs->x_modify = x_modify; } EXPORT_SYMBOL(set_dma_x_modify); void set_dma_y_modify(unsigned int channel, short y_modify) { - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - dma_ch[channel].regs->y_modify = y_modify; } EXPORT_SYMBOL(set_dma_y_modify); void set_dma_config(unsigned int channel, unsigned short config) { - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - dma_ch[channel].regs->cfg = config; - } EXPORT_SYMBOL(set_dma_config); @@ -352,20 +309,13 @@ EXPORT_SYMBOL(set_bfin_dma_config); void set_dma_sg(unsigned int channel, struct dmasg *sg, int nr_sg) { - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - dma_ch[channel].regs->cfg |= ((nr_sg & 0x0F) << 8); - dma_ch[channel].regs->next_desc_ptr = (unsigned int)sg; } EXPORT_SYMBOL(set_dma_sg); void set_dma_curr_addr(unsigned int channel, unsigned long addr) { - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - dma_ch[channel].regs->curr_addr_ptr = addr; } EXPORT_SYMBOL(set_dma_curr_addr); @@ -375,9 +325,6 @@ EXPORT_SYMBOL(set_dma_curr_addr); *-----------------------------------------------------------------------------*/ unsigned short get_dma_curr_irqstat(unsigned int channel) { - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - return dma_ch[channel].regs->irq_status; } EXPORT_SYMBOL(get_dma_curr_irqstat); @@ -387,8 +334,6 @@ EXPORT_SYMBOL(get_dma_curr_irqstat); *-----------------------------------------------------------------------------*/ void clear_dma_irqstat(unsigned int channel) { - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); dma_ch[channel].regs->irq_status |= 3; } EXPORT_SYMBOL(clear_dma_irqstat); @@ -398,9 +343,6 @@ EXPORT_SYMBOL(clear_dma_irqstat); *-----------------------------------------------------------------------------*/ unsigned short get_dma_curr_xcount(unsigned int channel) { - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - return dma_ch[channel].regs->curr_x_count; } EXPORT_SYMBOL(get_dma_curr_xcount); @@ -410,36 +352,24 @@ EXPORT_SYMBOL(get_dma_curr_xcount); *-----------------------------------------------------------------------------*/ unsigned short get_dma_curr_ycount(unsigned int channel) { - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - return dma_ch[channel].regs->curr_y_count; } EXPORT_SYMBOL(get_dma_curr_ycount); unsigned long get_dma_next_desc_ptr(unsigned int channel) { - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - return dma_ch[channel].regs->next_desc_ptr; } EXPORT_SYMBOL(get_dma_next_desc_ptr); unsigned long get_dma_curr_desc_ptr(unsigned int channel) { - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - return dma_ch[channel].regs->curr_desc_ptr; } EXPORT_SYMBOL(get_dma_curr_desc_ptr); unsigned long get_dma_curr_addr(unsigned int channel) { - BUG_ON(!(dma_ch[channel].chan_status != DMA_CHANNEL_FREE - && channel < MAX_BLACKFIN_DMA_CHANNEL)); - return dma_ch[channel].regs->curr_addr_ptr; } EXPORT_SYMBOL(get_dma_curr_addr); From 09779678d12482024e06380cacc4c3ff2f129f23 Mon Sep 17 00:00:00 2001 From: Stelian Pop Date: Tue, 28 Oct 2008 23:20:46 -0400 Subject: [PATCH 0771/2985] Input: appletouch - driver refactoring The appletouch driver has grown up from supporting only a couple of touchpads into supporting many touchpads, which can have different number of sensors, different aspect ratios etc. This patch cleans up the current driver code and makes it easy to support the features of each different touchpad. As a side effect, this patch also modifies the 'Y' multiplication factor of the 'geyser3' and 'geyser4' touchpads (found on Core Duo and Core2 Duo MacBook and MacBook Pro laptops) in order to make the touchpad output match the aspect ratio of the touchpad (Y factor changed from 43 to 64). [dtor@mail.ru: make atp_info constant] Signed-off-by: Stelian Pop Acked-by: Johannes Berg Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/appletouch.c | 274 +++++++++++++++---------------- 1 file changed, 136 insertions(+), 138 deletions(-) diff --git a/drivers/input/mouse/appletouch.c b/drivers/input/mouse/appletouch.c index 079816e6b23b..454b96112f03 100644 --- a/drivers/input/mouse/appletouch.c +++ b/drivers/input/mouse/appletouch.c @@ -3,7 +3,7 @@ * * Copyright (C) 2001-2004 Greg Kroah-Hartman (greg@kroah.com) * Copyright (C) 2005-2008 Johannes Berg (johannes@sipsolutions.net) - * Copyright (C) 2005 Stelian Pop (stelian@popies.net) + * Copyright (C) 2005-2008 Stelian Pop (stelian@popies.net) * Copyright (C) 2005 Frank Arnold (frank@scirocco-5v-turbo.de) * Copyright (C) 2005 Peter Osterlund (petero2@telia.com) * Copyright (C) 2005 Michael Hanselmann (linux-kernel@hansmi.ch) @@ -35,16 +35,74 @@ #include #include -/* Type of touchpad */ -enum atp_touchpad_type { - ATP_FOUNTAIN, - ATP_GEYSER1, - ATP_GEYSER2, - ATP_GEYSER3, - ATP_GEYSER4 +/* + * Note: We try to keep the touchpad aspect ratio while still doing only + * simple arithmetics: + * 0 <= x <= (xsensors - 1) * xfact + * 0 <= y <= (ysensors - 1) * yfact + */ +struct atp_info { + int xsensors; /* number of X sensors */ + int xsensors_17; /* 17" models have more sensors */ + int ysensors; /* number of Y sensors */ + int xfact; /* X multiplication factor */ + int yfact; /* Y multiplication factor */ + int datalen; /* size of USB transfers */ + void (*callback)(struct urb *); /* callback function */ }; -#define ATP_DEVICE(prod, type) \ +static void atp_complete_geyser_1_2(struct urb *urb); +static void atp_complete_geyser_3_4(struct urb *urb); + +static const struct atp_info fountain_info = { + .xsensors = 16, + .xsensors_17 = 26, + .ysensors = 16, + .xfact = 64, + .yfact = 43, + .datalen = 81, + .callback = atp_complete_geyser_1_2, +}; + +static const struct atp_info geyser1_info = { + .xsensors = 16, + .xsensors_17 = 26, + .ysensors = 16, + .xfact = 64, + .yfact = 43, + .datalen = 81, + .callback = atp_complete_geyser_1_2, +}; + +static const struct atp_info geyser2_info = { + .xsensors = 15, + .xsensors_17 = 20, + .ysensors = 9, + .xfact = 64, + .yfact = 43, + .datalen = 64, + .callback = atp_complete_geyser_1_2, +}; + +static const struct atp_info geyser3_info = { + .xsensors = 20, + .ysensors = 10, + .xfact = 64, + .yfact = 64, + .datalen = 64, + .callback = atp_complete_geyser_3_4, +}; + +static const struct atp_info geyser4_info = { + .xsensors = 20, + .ysensors = 10, + .xfact = 64, + .yfact = 64, + .datalen = 64, + .callback = atp_complete_geyser_3_4, +}; + +#define ATP_DEVICE(prod, info) \ { \ .match_flags = USB_DEVICE_ID_MATCH_DEVICE | \ USB_DEVICE_ID_MATCH_INT_CLASS | \ @@ -53,7 +111,7 @@ enum atp_touchpad_type { .idProduct = (prod), \ .bInterfaceClass = 0x03, \ .bInterfaceProtocol = 0x02, \ - .driver_info = ATP_ ## type, \ + .driver_info = (unsigned long) &info, \ } /* @@ -62,43 +120,39 @@ enum atp_touchpad_type { * According to Info.plist Geyser IV is the same as Geyser III.) */ -static struct usb_device_id atp_table [] = { +static struct usb_device_id atp_table[] = { /* PowerBooks Feb 2005, iBooks G4 */ - ATP_DEVICE(0x020e, FOUNTAIN), /* FOUNTAIN ANSI */ - ATP_DEVICE(0x020f, FOUNTAIN), /* FOUNTAIN ISO */ - ATP_DEVICE(0x030a, FOUNTAIN), /* FOUNTAIN TP ONLY */ - ATP_DEVICE(0x030b, GEYSER1), /* GEYSER 1 TP ONLY */ + ATP_DEVICE(0x020e, fountain_info), /* FOUNTAIN ANSI */ + ATP_DEVICE(0x020f, fountain_info), /* FOUNTAIN ISO */ + ATP_DEVICE(0x030a, fountain_info), /* FOUNTAIN TP ONLY */ + ATP_DEVICE(0x030b, geyser1_info), /* GEYSER 1 TP ONLY */ /* PowerBooks Oct 2005 */ - ATP_DEVICE(0x0214, GEYSER2), /* GEYSER 2 ANSI */ - ATP_DEVICE(0x0215, GEYSER2), /* GEYSER 2 ISO */ - ATP_DEVICE(0x0216, GEYSER2), /* GEYSER 2 JIS */ + ATP_DEVICE(0x0214, geyser2_info), /* GEYSER 2 ANSI */ + ATP_DEVICE(0x0215, geyser2_info), /* GEYSER 2 ISO */ + ATP_DEVICE(0x0216, geyser2_info), /* GEYSER 2 JIS */ /* Core Duo MacBook & MacBook Pro */ - ATP_DEVICE(0x0217, GEYSER3), /* GEYSER 3 ANSI */ - ATP_DEVICE(0x0218, GEYSER3), /* GEYSER 3 ISO */ - ATP_DEVICE(0x0219, GEYSER3), /* GEYSER 3 JIS */ + ATP_DEVICE(0x0217, geyser3_info), /* GEYSER 3 ANSI */ + ATP_DEVICE(0x0218, geyser3_info), /* GEYSER 3 ISO */ + ATP_DEVICE(0x0219, geyser3_info), /* GEYSER 3 JIS */ /* Core2 Duo MacBook & MacBook Pro */ - ATP_DEVICE(0x021a, GEYSER4), /* GEYSER 4 ANSI */ - ATP_DEVICE(0x021b, GEYSER4), /* GEYSER 4 ISO */ - ATP_DEVICE(0x021c, GEYSER4), /* GEYSER 4 JIS */ + ATP_DEVICE(0x021a, geyser4_info), /* GEYSER 4 ANSI */ + ATP_DEVICE(0x021b, geyser4_info), /* GEYSER 4 ISO */ + ATP_DEVICE(0x021c, geyser4_info), /* GEYSER 4 JIS */ /* Core2 Duo MacBook3,1 */ - ATP_DEVICE(0x0229, GEYSER4), /* GEYSER 4 HF ANSI */ - ATP_DEVICE(0x022a, GEYSER4), /* GEYSER 4 HF ISO */ - ATP_DEVICE(0x022b, GEYSER4), /* GEYSER 4 HF JIS */ + ATP_DEVICE(0x0229, geyser4_info), /* GEYSER 4 HF ANSI */ + ATP_DEVICE(0x022a, geyser4_info), /* GEYSER 4 HF ISO */ + ATP_DEVICE(0x022b, geyser4_info), /* GEYSER 4 HF JIS */ /* Terminating entry */ { } }; MODULE_DEVICE_TABLE(usb, atp_table); -/* - * number of sensors. Note that only 16 instead of 26 X (horizontal) - * sensors exist on 12" and 15" PowerBooks. All models have 16 Y - * (vertical) sensors. - */ +/* maximum number of sensors */ #define ATP_XSENSORS 26 #define ATP_YSENSORS 16 @@ -107,21 +161,6 @@ MODULE_DEVICE_TABLE(usb, atp_table); /* maximum pressure this driver will report */ #define ATP_PRESSURE 300 -/* - * multiplication factor for the X and Y coordinates. - * We try to keep the touchpad aspect ratio while still doing only simple - * arithmetics. - * The factors below give coordinates like: - * - * 0 <= x < 960 on 12" and 15" Powerbooks - * 0 <= x < 1600 on 17" Powerbooks and 17" MacBook Pro - * 0 <= x < 1216 on MacBooks and 15" MacBook Pro - * - * 0 <= y < 646 on all Powerbooks - * 0 <= y < 774 on all MacBooks - */ -#define ATP_XFACT 64 -#define ATP_YFACT 43 /* * Threshold for the touchpad sensors. Any change less than ATP_THRESHOLD is @@ -159,7 +198,7 @@ struct atp { struct urb *urb; /* usb request block */ u8 *data; /* transferred data */ struct input_dev *input; /* input dev */ - enum atp_touchpad_type type; /* type of touchpad */ + const struct atp_info *info; /* touchpad model */ bool open; bool valid; /* are the samples valid? */ bool size_detect_done; @@ -169,7 +208,6 @@ struct atp { signed char xy_cur[ATP_XSENSORS + ATP_YSENSORS]; signed char xy_old[ATP_XSENSORS + ATP_YSENSORS]; int xy_acc[ATP_XSENSORS + ATP_YSENSORS]; - int datalen; /* size of USB transfer */ int idlecount; /* number of empty packets */ struct work_struct work; }; @@ -359,7 +397,7 @@ static int atp_status_check(struct urb *urb) if (!dev->overflow_warned) { printk(KERN_WARNING "appletouch: OVERFLOW with data " "length %d, actual length is %d\n", - dev->datalen, dev->urb->actual_length); + dev->info->datalen, dev->urb->actual_length); dev->overflow_warned = true; } case -ECONNRESET: @@ -377,7 +415,7 @@ static int atp_status_check(struct urb *urb) } /* drop incomplete datasets */ - if (dev->urb->actual_length != dev->datalen) { + if (dev->urb->actual_length != dev->info->datalen) { dprintk("appletouch: incomplete data package" " (first byte: %d, length: %d).\n", dev->data[0], dev->urb->actual_length); @@ -387,6 +425,25 @@ static int atp_status_check(struct urb *urb) return ATP_URB_STATUS_SUCCESS; } +static void atp_detect_size(struct atp *dev) +{ + int i; + + /* 17" Powerbooks have extra X sensors */ + for (i = dev->info->xsensors; i < ATP_XSENSORS; i++) { + if (dev->xy_cur[i]) { + + printk(KERN_INFO "appletouch: 17\" model detected.\n"); + + input_set_abs_params(dev->input, ABS_X, 0, + (dev->info->xsensors_17 - 1) * + dev->info->xfact - 1, + ATP_FUZZ, 0); + break; + } + } +} + /* * USB interrupt callback functions */ @@ -407,7 +464,7 @@ static void atp_complete_geyser_1_2(struct urb *urb) goto exit; /* reorder the sensors values */ - if (dev->type == ATP_GEYSER2) { + if (dev->info == &geyser2_info) { memset(dev->xy_cur, 0, sizeof(dev->xy_cur)); /* @@ -437,8 +494,8 @@ static void atp_complete_geyser_1_2(struct urb *urb) dev->xy_cur[i + 24] = dev->data[5 * i + 44]; /* Y values */ - dev->xy_cur[i + 26] = dev->data[5 * i + 1]; - dev->xy_cur[i + 34] = dev->data[5 * i + 3]; + dev->xy_cur[ATP_XSENSORS + i] = dev->data[5 * i + 1]; + dev->xy_cur[ATP_XSENSORS + i + 8] = dev->data[5 * i + 3]; } } @@ -453,32 +510,8 @@ static void atp_complete_geyser_1_2(struct urb *urb) memcpy(dev->xy_old, dev->xy_cur, sizeof(dev->xy_old)); /* Perform size detection, if not done already */ - if (!dev->size_detect_done) { - - /* 17" Powerbooks have extra X sensors */ - for (i = (dev->type == ATP_GEYSER2 ? 15 : 16); - i < ATP_XSENSORS; i++) { - if (!dev->xy_cur[i]) - continue; - - printk(KERN_INFO - "appletouch: 17\" model detected.\n"); - - if (dev->type == ATP_GEYSER2) - input_set_abs_params(dev->input, ABS_X, - 0, - (20 - 1) * - ATP_XFACT - 1, - ATP_FUZZ, 0); - else - input_set_abs_params(dev->input, ABS_X, - 0, - (26 - 1) * - ATP_XFACT - 1, - ATP_FUZZ, 0); - break; - } - + if (unlikely(!dev->size_detect_done)) { + atp_detect_size(dev); dev->size_detect_done = 1; goto exit; } @@ -499,10 +532,10 @@ static void atp_complete_geyser_1_2(struct urb *urb) dbg_dump("accumulator", dev->xy_acc); x = atp_calculate_abs(dev->xy_acc, ATP_XSENSORS, - ATP_XFACT, &x_z, &x_f); + dev->info->xfact, &x_z, &x_f); y = atp_calculate_abs(dev->xy_acc + ATP_XSENSORS, ATP_YSENSORS, - ATP_YFACT, &y_z, &y_f); - key = dev->data[dev->datalen - 1] & ATP_STATUS_BUTTON; + dev->info->yfact, &y_z, &y_f); + key = dev->data[dev->info->datalen - 1] & ATP_STATUS_BUTTON; if (x && y) { if (dev->x_old != -1) { @@ -583,7 +616,7 @@ static void atp_complete_geyser_3_4(struct urb *urb) dbg_dump("sample", dev->xy_cur); /* Just update the base values (i.e. touchpad in untouched state) */ - if (dev->data[dev->datalen - 1] & ATP_STATUS_BASE_UPDATE) { + if (dev->data[dev->info->datalen - 1] & ATP_STATUS_BASE_UPDATE) { dprintk(KERN_DEBUG "appletouch: updated base values\n"); @@ -610,10 +643,10 @@ static void atp_complete_geyser_3_4(struct urb *urb) dbg_dump("accumulator", dev->xy_acc); x = atp_calculate_abs(dev->xy_acc, ATP_XSENSORS, - ATP_XFACT, &x_z, &x_f); + dev->info->xfact, &x_z, &x_f); y = atp_calculate_abs(dev->xy_acc + ATP_XSENSORS, ATP_YSENSORS, - ATP_YFACT, &y_z, &y_f); - key = dev->data[dev->datalen - 1] & ATP_STATUS_BUTTON; + dev->info->yfact, &y_z, &y_f); + key = dev->data[dev->info->datalen - 1] & ATP_STATUS_BUTTON; if (x && y) { if (dev->x_old != -1) { @@ -705,7 +738,7 @@ static int atp_handle_geyser(struct atp *dev) { struct usb_device *udev = dev->udev; - if (dev->type != ATP_FOUNTAIN) { + if (dev->info != &fountain_info) { /* switch to raw sensor mode */ if (atp_geyser_init(udev)) return -EIO; @@ -726,6 +759,7 @@ static int atp_probe(struct usb_interface *iface, struct usb_endpoint_descriptor *endpoint; int int_in_endpointAddr = 0; int i, error = -ENOMEM; + const struct atp_info *info = (const struct atp_info *)id->driver_info; /* set up the endpoint information */ /* use only the first interrupt-in endpoint */ @@ -753,35 +787,22 @@ static int atp_probe(struct usb_interface *iface, dev->udev = udev; dev->input = input_dev; - dev->type = id->driver_info; + dev->info = info; dev->overflow_warned = false; - if (dev->type == ATP_FOUNTAIN || dev->type == ATP_GEYSER1) - dev->datalen = 81; - else - dev->datalen = 64; dev->urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->urb) goto err_free_devs; - dev->data = usb_buffer_alloc(dev->udev, dev->datalen, GFP_KERNEL, + dev->data = usb_buffer_alloc(dev->udev, dev->info->datalen, GFP_KERNEL, &dev->urb->transfer_dma); if (!dev->data) goto err_free_urb; - /* Select the USB complete (callback) function */ - if (dev->type == ATP_FOUNTAIN || - dev->type == ATP_GEYSER1 || - dev->type == ATP_GEYSER2) - usb_fill_int_urb(dev->urb, udev, - usb_rcvintpipe(udev, int_in_endpointAddr), - dev->data, dev->datalen, - atp_complete_geyser_1_2, dev, 1); - else - usb_fill_int_urb(dev->urb, udev, - usb_rcvintpipe(udev, int_in_endpointAddr), - dev->data, dev->datalen, - atp_complete_geyser_3_4, dev, 1); + usb_fill_int_urb(dev->urb, udev, + usb_rcvintpipe(udev, int_in_endpointAddr), + dev->data, dev->info->datalen, + dev->info->callback, dev, 1); error = atp_handle_geyser(dev); if (error) @@ -802,35 +823,12 @@ static int atp_probe(struct usb_interface *iface, set_bit(EV_ABS, input_dev->evbit); - if (dev->type == ATP_GEYSER3 || dev->type == ATP_GEYSER4) { - /* - * MacBook have 20 X sensors, 10 Y sensors - */ - input_set_abs_params(input_dev, ABS_X, 0, - ((20 - 1) * ATP_XFACT) - 1, ATP_FUZZ, 0); - input_set_abs_params(input_dev, ABS_Y, 0, - ((10 - 1) * ATP_YFACT) - 1, ATP_FUZZ, 0); - } else if (dev->type == ATP_GEYSER2) { - /* - * Oct 2005 15" PowerBooks have 15 X sensors, 17" are detected - * later. - */ - input_set_abs_params(input_dev, ABS_X, 0, - ((15 - 1) * ATP_XFACT) - 1, ATP_FUZZ, 0); - input_set_abs_params(input_dev, ABS_Y, 0, - ((9 - 1) * ATP_YFACT) - 1, ATP_FUZZ, 0); - } else { - /* - * 12" and 15" Powerbooks only have 16 x sensors, - * 17" models are detected later. - */ - input_set_abs_params(input_dev, ABS_X, 0, - (16 - 1) * ATP_XFACT - 1, - ATP_FUZZ, 0); - input_set_abs_params(input_dev, ABS_Y, 0, - (ATP_YSENSORS - 1) * ATP_YFACT - 1, - ATP_FUZZ, 0); - } + input_set_abs_params(input_dev, ABS_X, 0, + (dev->info->xsensors - 1) * dev->info->xfact - 1, + ATP_FUZZ, 0); + input_set_abs_params(input_dev, ABS_Y, 0, + (dev->info->ysensors - 1) * dev->info->yfact - 1, + ATP_FUZZ, 0); input_set_abs_params(input_dev, ABS_PRESSURE, 0, ATP_PRESSURE, 0, 0); set_bit(EV_KEY, input_dev->evbit); @@ -852,7 +850,7 @@ static int atp_probe(struct usb_interface *iface, return 0; err_free_buffer: - usb_buffer_free(dev->udev, dev->datalen, + usb_buffer_free(dev->udev, dev->info->datalen, dev->data, dev->urb->transfer_dma); err_free_urb: usb_free_urb(dev->urb); @@ -871,7 +869,7 @@ static void atp_disconnect(struct usb_interface *iface) if (dev) { usb_kill_urb(dev->urb); input_unregister_device(dev->input); - usb_buffer_free(dev->udev, dev->datalen, + usb_buffer_free(dev->udev, dev->info->datalen, dev->data, dev->urb->transfer_dma); usb_free_urb(dev->urb); kfree(dev); From 3cf35049d8b2ccab62bbb8991ad1322ff2276840 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 28 Oct 2008 23:53:19 -0400 Subject: [PATCH 0772/2985] Input: omap-keypad - interrupt disable fix The GPIO interrupts are disabled several times after the first key press. No need to disable - again - the interrupts in the omap_kp_scan_keypad function on OMAP2. Signed-off-by: Peter Ujfalusi Signed-off-by: Tony Lindgren Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/omap-keypad.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/input/keyboard/omap-keypad.c b/drivers/input/keyboard/omap-keypad.c index 69e674ecf19a..a016c8d85a3d 100644 --- a/drivers/input/keyboard/omap-keypad.c +++ b/drivers/input/keyboard/omap-keypad.c @@ -122,10 +122,6 @@ static void omap_kp_scan_keypad(struct omap_kp *omap_kp, unsigned char *state) /* read the keypad status */ if (cpu_is_omap24xx()) { - int i; - for (i = 0; i < omap_kp->rows; i++) - disable_irq(OMAP_GPIO_IRQ(row_gpios[i])); - /* read the keypad status */ for (col = 0; col < omap_kp->cols; col++) { set_col_gpio_val(omap_kp, ~(1 << col)); From dde4ac07263161264d089c7556d0295487787b77 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 28 Oct 2008 23:53:25 -0400 Subject: [PATCH 0773/2985] Input: omap-keypad - allow more than 6 rows There is no reason to limit the GPIO rows to 6 for OMAP2. Signed-off-by: Peter Ujfalusi Signed-off-by: Tony Lindgren Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/omap-keypad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/omap-keypad.c b/drivers/input/keyboard/omap-keypad.c index a016c8d85a3d..ec0ebee46069 100644 --- a/drivers/input/keyboard/omap-keypad.c +++ b/drivers/input/keyboard/omap-keypad.c @@ -125,7 +125,7 @@ static void omap_kp_scan_keypad(struct omap_kp *omap_kp, unsigned char *state) /* read the keypad status */ for (col = 0; col < omap_kp->cols; col++) { set_col_gpio_val(omap_kp, ~(1 << col)); - state[col] = ~(get_row_gpio_val(omap_kp)) & 0x3f; + state[col] = ~(get_row_gpio_val(omap_kp)) & 0xff; } set_col_gpio_val(omap_kp, 0); From 26615249daaaa9fc194b5154261b9569112ea9fd Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 16 Oct 2008 12:08:14 +0400 Subject: [PATCH 0774/2985] [MTD] sharpsl_nand: switch to driver model usage. Start cleanup of sharpsl_nand driver. Convert it to platform driver. Corresponding device is temprorary registered in sharpsl.c but will be later moved to corresponding board files. Signed-off-by: Dmitry Baryshkov --- drivers/mtd/nand/sharpsl.c | 63 +++++++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 7 deletions(-) diff --git a/drivers/mtd/nand/sharpsl.c b/drivers/mtd/nand/sharpsl.c index 30a518e211bd..0a99188a7730 100644 --- a/drivers/mtd/nand/sharpsl.c +++ b/drivers/mtd/nand/sharpsl.c @@ -20,12 +20,13 @@ #include #include #include +#include + #include #include #include static void __iomem *sharpsl_io_base; -static int sharpsl_phys_base = 0x0C000000; /* register offset */ #define ECCLPLB sharpsl_io_base+0x00 /* line parity 7 - 0 bit */ @@ -150,10 +151,11 @@ const char *part_probes[] = { "cmdlinepart", NULL }; /* * Main initialization routine */ -static int __init sharpsl_nand_init(void) +static int __devinit sharpsl_nand_probe(struct platform_device *pdev) { struct nand_chip *this; struct mtd_partition *sharpsl_partition_info; + struct resource *r; int err = 0; /* Allocate memory for MTD device structure and private data */ @@ -163,8 +165,15 @@ static int __init sharpsl_nand_init(void) return -ENOMEM; } + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) { + dev_err(&pdev->dev, "no io memory resource defined!\n"); + err = -ENODEV; + goto err_get_res; + } + /* map physical address */ - sharpsl_io_base = ioremap(sharpsl_phys_base, 0x1000); + sharpsl_io_base = ioremap(r->start, resource_size(r)); if (!sharpsl_io_base) { printk("ioremap to access Sharp SL NAND chip failed\n"); kfree(sharpsl_mtd); @@ -242,14 +251,16 @@ static int __init sharpsl_nand_init(void) /* Return happy */ return 0; -} -module_init(sharpsl_nand_init); +err_get_res: + kfree(sharpsl_mtd); + return err; +} /* * Clean up routine */ -static void __exit sharpsl_nand_cleanup(void) +static int __devexit sharpsl_nand_remove(struct platform_device *pdev) { /* Release resources, unregister device */ nand_release(sharpsl_mtd); @@ -258,9 +269,47 @@ static void __exit sharpsl_nand_cleanup(void) /* Free the MTD device structure */ kfree(sharpsl_mtd); + + return 0; } -module_exit(sharpsl_nand_cleanup); +static struct platform_driver sharpsl_nand_driver = { + .driver = { + .name = "sharpsl-nand", + .owner = THIS_MODULE, + }, + .probe = sharpsl_nand_probe, + .remove = __devexit_p(sharpsl_nand_remove), +}; + +static struct resource sharpsl_nand_resources[] = { + { + .start = 0x0C000000, + .end = 0x0C000FFF, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device sharpsl_nand_device = { + .name = "sharpsl-nand", + .id = -1, + .resource = sharpsl_nand_resources, + .num_resources = ARRAY_SIZE(sharpsl_nand_resources), +}; + +static int __init sharpsl_nand_init(void) +{ + platform_device_register(&sharpsl_nand_device); + return platform_driver_register(&sharpsl_nand_driver); +} +module_init(sharpsl_nand_init); + +static void __exit sharpsl_nand_exit(void) +{ + platform_driver_unregister(&sharpsl_nand_driver); + platform_device_unregister(&sharpsl_nand_device); +} +module_exit(sharpsl_nand_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Richard Purdie "); From 2206ef1c5f6002e474b8eff8a56b6b7fd2efbe8f Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 16 Oct 2008 17:49:06 +0400 Subject: [PATCH 0775/2985] [MTD] sharpsl_nand: make drvdata non-static Merge mtd_info and nand_chip info special struct and make it drvdata instead of plain static variable. Signed-off-by: Dmitry Baryshkov --- drivers/mtd/nand/sharpsl.c | 53 +++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/drivers/mtd/nand/sharpsl.c b/drivers/mtd/nand/sharpsl.c index 0a99188a7730..6851806b7015 100644 --- a/drivers/mtd/nand/sharpsl.c +++ b/drivers/mtd/nand/sharpsl.c @@ -26,6 +26,11 @@ #include #include +struct sharpsl_nand { + struct mtd_info mtd; + struct nand_chip chip; +}; + static void __iomem *sharpsl_io_base; /* register offset */ @@ -45,11 +50,6 @@ static void __iomem *sharpsl_io_base; #define FLCLE (1 << 1) #define FLCE0 (1 << 0) -/* - * MTD structure for SharpSL - */ -static struct mtd_info *sharpsl_mtd = NULL; - /* * Define partitions for flash device */ @@ -157,10 +157,11 @@ static int __devinit sharpsl_nand_probe(struct platform_device *pdev) struct mtd_partition *sharpsl_partition_info; struct resource *r; int err = 0; + struct sharpsl_nand *sharpsl; /* Allocate memory for MTD device structure and private data */ - sharpsl_mtd = kmalloc(sizeof(struct mtd_info) + sizeof(struct nand_chip), GFP_KERNEL); - if (!sharpsl_mtd) { + sharpsl = kzalloc(sizeof(struct sharpsl_nand), GFP_KERNEL); + if (!sharpsl) { printk("Unable to allocate SharpSL NAND MTD device structure.\n"); return -ENOMEM; } @@ -176,20 +177,18 @@ static int __devinit sharpsl_nand_probe(struct platform_device *pdev) sharpsl_io_base = ioremap(r->start, resource_size(r)); if (!sharpsl_io_base) { printk("ioremap to access Sharp SL NAND chip failed\n"); - kfree(sharpsl_mtd); - return -EIO; + err = -EIO; + goto err_ioremap; } /* Get pointer to private data */ - this = (struct nand_chip *)(&sharpsl_mtd[1]); - - /* Initialize structures */ - memset(sharpsl_mtd, 0, sizeof(struct mtd_info)); - memset(this, 0, sizeof(struct nand_chip)); + this = (struct nand_chip *)(&sharpsl->chip); /* Link the private data with the MTD structure */ - sharpsl_mtd->priv = this; - sharpsl_mtd->owner = THIS_MODULE; + sharpsl->mtd.priv = this; + sharpsl->mtd.owner = THIS_MODULE; + + platform_set_drvdata(pdev, sharpsl); /* * PXA initialize @@ -218,16 +217,17 @@ static int __devinit sharpsl_nand_probe(struct platform_device *pdev) this->ecc.correct = nand_correct_data; /* Scan to find existence of the device */ - err = nand_scan(sharpsl_mtd, 1); + err = nand_scan(&sharpsl->mtd, 1); if (err) { + platform_set_drvdata(pdev, NULL); iounmap(sharpsl_io_base); - kfree(sharpsl_mtd); + kfree(sharpsl); return err; } /* Register the partitions */ - sharpsl_mtd->name = "sharpsl-nand"; - nr_partitions = parse_mtd_partitions(sharpsl_mtd, part_probes, &sharpsl_partition_info, 0); + sharpsl->mtd.name = "sharpsl-nand"; + nr_partitions = parse_mtd_partitions(&sharpsl->mtd, part_probes, &sharpsl_partition_info, 0); if (nr_partitions <= 0) { nr_partitions = DEFAULT_NUM_PARTITIONS; @@ -247,13 +247,14 @@ static int __devinit sharpsl_nand_probe(struct platform_device *pdev) } } - add_mtd_partitions(sharpsl_mtd, sharpsl_partition_info, nr_partitions); + add_mtd_partitions(&sharpsl->mtd, sharpsl_partition_info, nr_partitions); /* Return happy */ return 0; +err_ioremap: err_get_res: - kfree(sharpsl_mtd); + kfree(sharpsl); return err; } @@ -262,13 +263,17 @@ err_get_res: */ static int __devexit sharpsl_nand_remove(struct platform_device *pdev) { + struct sharpsl_nand *sharpsl = platform_get_drvdata(pdev); + /* Release resources, unregister device */ - nand_release(sharpsl_mtd); + nand_release(&sharpsl->mtd); + + platform_set_drvdata(pdev, NULL); iounmap(sharpsl_io_base); /* Free the MTD device structure */ - kfree(sharpsl_mtd); + kfree(sharpsl); return 0; } From a4e4f29cbeec200c12c6f3ebedc053a581f90b48 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 16 Oct 2008 17:58:18 +0400 Subject: [PATCH 0776/2985] [MTD] sharpsl_nand: move io addr to struct sharpsl_nand Signed-off-by: Dmitry Baryshkov --- drivers/mtd/nand/sharpsl.c | 59 +++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/drivers/mtd/nand/sharpsl.c b/drivers/mtd/nand/sharpsl.c index 6851806b7015..328e02ee3d8e 100644 --- a/drivers/mtd/nand/sharpsl.c +++ b/drivers/mtd/nand/sharpsl.c @@ -29,18 +29,20 @@ struct sharpsl_nand { struct mtd_info mtd; struct nand_chip chip; + + void __iomem *io; }; -static void __iomem *sharpsl_io_base; +#define mtd_to_sharpsl(_mtd) container_of(_mtd, struct sharpsl_nand, mtd) /* register offset */ -#define ECCLPLB sharpsl_io_base+0x00 /* line parity 7 - 0 bit */ -#define ECCLPUB sharpsl_io_base+0x04 /* line parity 15 - 8 bit */ -#define ECCCP sharpsl_io_base+0x08 /* column parity 5 - 0 bit */ -#define ECCCNTR sharpsl_io_base+0x0C /* ECC byte counter */ -#define ECCCLRR sharpsl_io_base+0x10 /* cleare ECC */ -#define FLASHIO sharpsl_io_base+0x14 /* Flash I/O */ -#define FLASHCTL sharpsl_io_base+0x18 /* Flash Control */ +#define ECCLPLB 0x00 /* line parity 7 - 0 bit */ +#define ECCLPUB 0x04 /* line parity 15 - 8 bit */ +#define ECCCP 0x08 /* column parity 5 - 0 bit */ +#define ECCCNTR 0x0C /* ECC byte counter */ +#define ECCCLRR 0x10 /* cleare ECC */ +#define FLASHIO 0x14 /* Flash I/O */ +#define FLASHCTL 0x18 /* Flash Control */ /* Flash control bit */ #define FLRYBY (1 << 5) @@ -85,6 +87,7 @@ static struct mtd_partition sharpsl_nand_default_partition_info[] = { static void sharpsl_nand_hwcontrol(struct mtd_info *mtd, int cmd, unsigned int ctrl) { + struct sharpsl_nand *sharpsl = mtd_to_sharpsl(mtd); struct nand_chip *chip = mtd->priv; if (ctrl & NAND_CTRL_CHANGE) { @@ -94,7 +97,7 @@ static void sharpsl_nand_hwcontrol(struct mtd_info *mtd, int cmd, bits ^= 0x11; - writeb((readb(FLASHCTL) & ~0x17) | bits, FLASHCTL); + writeb((readb(sharpsl->io + FLASHCTL) & ~0x17) | bits, sharpsl->io + FLASHCTL); } if (cmd != NAND_CMD_NONE) @@ -128,20 +131,23 @@ static struct nand_ecclayout akita_oobinfo = { static int sharpsl_nand_dev_ready(struct mtd_info *mtd) { - return !((readb(FLASHCTL) & FLRYBY) == 0); + struct sharpsl_nand *sharpsl = mtd_to_sharpsl(mtd); + return !((readb(sharpsl->io + FLASHCTL) & FLRYBY) == 0); } static void sharpsl_nand_enable_hwecc(struct mtd_info *mtd, int mode) { - writeb(0, ECCCLRR); + struct sharpsl_nand *sharpsl = mtd_to_sharpsl(mtd); + writeb(0, sharpsl->io + ECCCLRR); } static int sharpsl_nand_calculate_ecc(struct mtd_info *mtd, const u_char * dat, u_char * ecc_code) { - ecc_code[0] = ~readb(ECCLPUB); - ecc_code[1] = ~readb(ECCLPLB); - ecc_code[2] = (~readb(ECCCP) << 2) | 0x03; - return readb(ECCCNTR) != 0; + struct sharpsl_nand *sharpsl = mtd_to_sharpsl(mtd); + ecc_code[0] = ~readb(sharpsl->io + ECCLPUB); + ecc_code[1] = ~readb(sharpsl->io + ECCLPLB); + ecc_code[2] = (~readb(sharpsl->io + ECCCP) << 2) | 0x03; + return readb(sharpsl->io + ECCCNTR) != 0; } #ifdef CONFIG_MTD_PARTITIONS @@ -174,8 +180,8 @@ static int __devinit sharpsl_nand_probe(struct platform_device *pdev) } /* map physical address */ - sharpsl_io_base = ioremap(r->start, resource_size(r)); - if (!sharpsl_io_base) { + sharpsl->io = ioremap(r->start, resource_size(r)); + if (!sharpsl->io) { printk("ioremap to access Sharp SL NAND chip failed\n"); err = -EIO; goto err_ioremap; @@ -193,11 +199,11 @@ static int __devinit sharpsl_nand_probe(struct platform_device *pdev) /* * PXA initialize */ - writeb(readb(FLASHCTL) | FLWP, FLASHCTL); + writeb(readb(sharpsl->io + FLASHCTL) | FLWP, sharpsl->io + FLASHCTL); /* Set address of NAND IO lines */ - this->IO_ADDR_R = FLASHIO; - this->IO_ADDR_W = FLASHIO; + this->IO_ADDR_R = sharpsl->io + FLASHIO; + this->IO_ADDR_W = sharpsl->io + FLASHIO; /* Set address of hardware control function */ this->cmd_ctrl = sharpsl_nand_hwcontrol; this->dev_ready = sharpsl_nand_dev_ready; @@ -218,12 +224,8 @@ static int __devinit sharpsl_nand_probe(struct platform_device *pdev) /* Scan to find existence of the device */ err = nand_scan(&sharpsl->mtd, 1); - if (err) { - platform_set_drvdata(pdev, NULL); - iounmap(sharpsl_io_base); - kfree(sharpsl); - return err; - } + if (err) + goto err_scan; /* Register the partitions */ sharpsl->mtd.name = "sharpsl-nand"; @@ -252,6 +254,9 @@ static int __devinit sharpsl_nand_probe(struct platform_device *pdev) /* Return happy */ return 0; +err_scan: + platform_set_drvdata(pdev, NULL); + iounmap(sharpsl->io); err_ioremap: err_get_res: kfree(sharpsl); @@ -270,7 +275,7 @@ static int __devexit sharpsl_nand_remove(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); - iounmap(sharpsl_io_base); + iounmap(sharpsl->io); /* Free the MTD device structure */ kfree(sharpsl); From c176d0ca8b775a784e38d8afc7c7b42e8906282d Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 16 Oct 2008 18:22:28 +0400 Subject: [PATCH 0777/2985] [MTD] sharpsl-nand: cleanup partitions support Signed-off-by: Dmitry Baryshkov --- drivers/mtd/nand/sharpsl.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/nand/sharpsl.c b/drivers/mtd/nand/sharpsl.c index 328e02ee3d8e..17625c0a8f61 100644 --- a/drivers/mtd/nand/sharpsl.c +++ b/drivers/mtd/nand/sharpsl.c @@ -2,6 +2,7 @@ * drivers/mtd/nand/sharpsl.c * * Copyright (C) 2004 Richard Purdie + * Copyright (C) 2008 Dmitry Baryshkov * * Based on Sharp's NAND driver sharp_sl.c * @@ -52,12 +53,12 @@ struct sharpsl_nand { #define FLCLE (1 << 1) #define FLCE0 (1 << 0) +#ifdef CONFIG_MTD_PARTITIONS /* * Define partitions for flash device */ #define DEFAULT_NUM_PARTITIONS 3 -static int nr_partitions; static struct mtd_partition sharpsl_nand_default_partition_info[] = { { .name = "System Area", @@ -75,6 +76,7 @@ static struct mtd_partition sharpsl_nand_default_partition_info[] = { .size = MTDPART_SIZ_FULL, }, }; +#endif /* * hardware specific access to control-lines @@ -151,7 +153,7 @@ static int sharpsl_nand_calculate_ecc(struct mtd_info *mtd, const u_char * dat, } #ifdef CONFIG_MTD_PARTITIONS -const char *part_probes[] = { "cmdlinepart", NULL }; +static const char *part_probes[] = { "cmdlinepart", NULL }; #endif /* @@ -160,7 +162,10 @@ const char *part_probes[] = { "cmdlinepart", NULL }; static int __devinit sharpsl_nand_probe(struct platform_device *pdev) { struct nand_chip *this; +#ifdef CONFIG_MTD_PARTITIONS struct mtd_partition *sharpsl_partition_info; + int nr_partitions; +#endif struct resource *r; int err = 0; struct sharpsl_nand *sharpsl; @@ -229,10 +234,11 @@ static int __devinit sharpsl_nand_probe(struct platform_device *pdev) /* Register the partitions */ sharpsl->mtd.name = "sharpsl-nand"; +#ifdef CONFIG_MTD_PARTITIONS nr_partitions = parse_mtd_partitions(&sharpsl->mtd, part_probes, &sharpsl_partition_info, 0); if (nr_partitions <= 0) { - nr_partitions = DEFAULT_NUM_PARTITIONS; + nr_partitions = ARRAY_SIZE(sharpsl_nand_default_partition_info); sharpsl_partition_info = sharpsl_nand_default_partition_info; if (machine_is_poodle()) { sharpsl_partition_info[1].size = 22 * 1024 * 1024; @@ -249,11 +255,19 @@ static int __devinit sharpsl_nand_probe(struct platform_device *pdev) } } - add_mtd_partitions(&sharpsl->mtd, sharpsl_partition_info, nr_partitions); + err = add_mtd_partitions(&sharpsl->mtd, sharpsl_partition_info, nr_partitions); +#else + err = add_mtd_device(&sharpsl->mtd); +#endif + if (err) + goto err_add; /* Return happy */ return 0; +err_add: + nand_release(&sharpsl->mtd); + err_scan: platform_set_drvdata(pdev, NULL); iounmap(sharpsl->io); From a20c7ab570ffdce1d6f67c7acf8c1c502a3b3839 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 16 Oct 2008 18:43:48 +0400 Subject: [PATCH 0778/2985] [MTD] sharpsl-nand: use platform_data for model-specific values Add platform_data which holds all model-specific values, like badblocks pattern, oobinfo, partitions. Signed-off-by: Dmitry Baryshkov --- drivers/mtd/nand/sharpsl.c | 158 +++++++++++++++++++----------------- include/linux/mtd/sharpsl.h | 20 +++++ 2 files changed, 104 insertions(+), 74 deletions(-) create mode 100644 include/linux/mtd/sharpsl.h diff --git a/drivers/mtd/nand/sharpsl.c b/drivers/mtd/nand/sharpsl.c index 17625c0a8f61..698378ca8e0a 100644 --- a/drivers/mtd/nand/sharpsl.c +++ b/drivers/mtd/nand/sharpsl.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -53,31 +54,6 @@ struct sharpsl_nand { #define FLCLE (1 << 1) #define FLCE0 (1 << 0) -#ifdef CONFIG_MTD_PARTITIONS -/* - * Define partitions for flash device - */ -#define DEFAULT_NUM_PARTITIONS 3 - -static struct mtd_partition sharpsl_nand_default_partition_info[] = { - { - .name = "System Area", - .offset = 0, - .size = 7 * 1024 * 1024, - }, - { - .name = "Root Filesystem", - .offset = 7 * 1024 * 1024, - .size = 30 * 1024 * 1024, - }, - { - .name = "Home Filesystem", - .offset = MTDPART_OFS_APPEND, - .size = MTDPART_SIZ_FULL, - }, -}; -#endif - /* * hardware specific access to control-lines * ctrl: @@ -106,31 +82,6 @@ static void sharpsl_nand_hwcontrol(struct mtd_info *mtd, int cmd, writeb(cmd, chip->IO_ADDR_W); } -static uint8_t scan_ff_pattern[] = { 0xff, 0xff }; - -static struct nand_bbt_descr sharpsl_bbt = { - .options = 0, - .offs = 4, - .len = 2, - .pattern = scan_ff_pattern -}; - -static struct nand_bbt_descr sharpsl_akita_bbt = { - .options = 0, - .offs = 4, - .len = 1, - .pattern = scan_ff_pattern -}; - -static struct nand_ecclayout akita_oobinfo = { - .eccbytes = 24, - .eccpos = { - 0x5, 0x1, 0x2, 0x3, 0x6, 0x7, 0x15, 0x11, - 0x12, 0x13, 0x16, 0x17, 0x25, 0x21, 0x22, 0x23, - 0x26, 0x27, 0x35, 0x31, 0x32, 0x33, 0x36, 0x37}, - .oobfree = {{0x08, 0x09}} -}; - static int sharpsl_nand_dev_ready(struct mtd_info *mtd) { struct sharpsl_nand *sharpsl = mtd_to_sharpsl(mtd); @@ -169,6 +120,12 @@ static int __devinit sharpsl_nand_probe(struct platform_device *pdev) struct resource *r; int err = 0; struct sharpsl_nand *sharpsl; + struct sharpsl_nand_platform_data *data = pdev->dev.platform_data; + + if (!data) { + dev_err(&pdev->dev, "no platform data!\n"); + return -EINVAL; + } /* Allocate memory for MTD device structure and private data */ sharpsl = kzalloc(sizeof(struct sharpsl_nand), GFP_KERNEL); @@ -218,11 +175,8 @@ static int __devinit sharpsl_nand_probe(struct platform_device *pdev) this->ecc.mode = NAND_ECC_HW; this->ecc.size = 256; this->ecc.bytes = 3; - this->badblock_pattern = &sharpsl_bbt; - if (machine_is_akita() || machine_is_borzoi()) { - this->badblock_pattern = &sharpsl_akita_bbt; - this->ecc.layout = &akita_oobinfo; - } + this->badblock_pattern = data->badblock_pattern; + this->ecc.layout = data->ecc_layout; this->ecc.hwctl = sharpsl_nand_enable_hwecc; this->ecc.calculate = sharpsl_nand_calculate_ecc; this->ecc.correct = nand_correct_data; @@ -236,29 +190,16 @@ static int __devinit sharpsl_nand_probe(struct platform_device *pdev) sharpsl->mtd.name = "sharpsl-nand"; #ifdef CONFIG_MTD_PARTITIONS nr_partitions = parse_mtd_partitions(&sharpsl->mtd, part_probes, &sharpsl_partition_info, 0); - if (nr_partitions <= 0) { - nr_partitions = ARRAY_SIZE(sharpsl_nand_default_partition_info); - sharpsl_partition_info = sharpsl_nand_default_partition_info; - if (machine_is_poodle()) { - sharpsl_partition_info[1].size = 22 * 1024 * 1024; - } else if (machine_is_corgi() || machine_is_shepherd()) { - sharpsl_partition_info[1].size = 25 * 1024 * 1024; - } else if (machine_is_husky()) { - sharpsl_partition_info[1].size = 53 * 1024 * 1024; - } else if (machine_is_spitz()) { - sharpsl_partition_info[1].size = 5 * 1024 * 1024; - } else if (machine_is_akita()) { - sharpsl_partition_info[1].size = 58 * 1024 * 1024; - } else if (machine_is_borzoi()) { - sharpsl_partition_info[1].size = 32 * 1024 * 1024; - } + nr_partitions = data->nr_partitions; + sharpsl_partition_info = data->partitions; } - err = add_mtd_partitions(&sharpsl->mtd, sharpsl_partition_info, nr_partitions); -#else - err = add_mtd_device(&sharpsl->mtd); + if (nr_partitions > 0) + err = add_mtd_partitions(&sharpsl->mtd, sharpsl_partition_info, nr_partitions); + else #endif + err = add_mtd_device(&sharpsl->mtd); if (err) goto err_add; @@ -306,6 +247,58 @@ static struct platform_driver sharpsl_nand_driver = { .remove = __devexit_p(sharpsl_nand_remove), }; +/* + * Define partitions for flash device + */ +static struct mtd_partition sharpsl_nand_partitions[] = { + { + .name = "System Area", + .offset = 0, + .size = 7 * 1024 * 1024, + }, + { + .name = "Root Filesystem", + .offset = 7 * 1024 * 1024, + .size = 30 * 1024 * 1024, + }, + { + .name = "Home Filesystem", + .offset = MTDPART_OFS_APPEND, + .size = MTDPART_SIZ_FULL, + }, +}; + +static uint8_t scan_ff_pattern[] = { 0xff, 0xff }; + +static struct nand_bbt_descr sharpsl_bbt = { + .options = 0, + .offs = 4, + .len = 2, + .pattern = scan_ff_pattern +}; + +static struct nand_bbt_descr sharpsl_akita_bbt = { + .options = 0, + .offs = 4, + .len = 1, + .pattern = scan_ff_pattern +}; + +static struct nand_ecclayout akita_oobinfo = { + .eccbytes = 24, + .eccpos = { + 0x5, 0x1, 0x2, 0x3, 0x6, 0x7, 0x15, 0x11, + 0x12, 0x13, 0x16, 0x17, 0x25, 0x21, 0x22, 0x23, + 0x26, 0x27, 0x35, 0x31, 0x32, 0x33, 0x36, 0x37}, + .oobfree = {{0x08, 0x09}} +}; + +static struct sharpsl_nand_platform_data sharpsl_nand_platform_data = { + .badblock_pattern = &sharpsl_bbt, + .partitions = sharpsl_nand_partitions, + .nr_partitions = ARRAY_SIZE(sharpsl_nand_partitions), +}; + static struct resource sharpsl_nand_resources[] = { { .start = 0x0C000000, @@ -319,10 +312,27 @@ static struct platform_device sharpsl_nand_device = { .id = -1, .resource = sharpsl_nand_resources, .num_resources = ARRAY_SIZE(sharpsl_nand_resources), + .dev.platform_data = &sharpsl_nand_platform_data, }; static int __init sharpsl_nand_init(void) { + if (machine_is_poodle()) { + sharpsl_nand_partitions[1].size = 22 * 1024 * 1024; + } else if (machine_is_corgi() || machine_is_shepherd()) { + sharpsl_nand_partitions[1].size = 25 * 1024 * 1024; + } else if (machine_is_husky()) { + sharpsl_nand_partitions[1].size = 53 * 1024 * 1024; + } else if (machine_is_spitz()) { + sharpsl_nand_partitions[1].size = 5 * 1024 * 1024; + } else if (machine_is_akita()) { + sharpsl_nand_partitions[1].size = 58 * 1024 * 1024; + sharpsl_nand_platform_data.badblock_pattern = &sharpsl_akita_bbt; + sharpsl_nand_platform_data.ecc_layout = &akita_oobinfo; + } else if (machine_is_borzoi()) { + sharpsl_nand_partitions[1].size = 32 * 1024 * 1024; + } + platform_device_register(&sharpsl_nand_device); return platform_driver_register(&sharpsl_nand_driver); } diff --git a/include/linux/mtd/sharpsl.h b/include/linux/mtd/sharpsl.h new file mode 100644 index 000000000000..25f4d2a845c1 --- /dev/null +++ b/include/linux/mtd/sharpsl.h @@ -0,0 +1,20 @@ +/* + * SharpSL NAND support + * + * Copyright (C) 2008 Dmitry Baryshkov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +struct sharpsl_nand_platform_data { + struct nand_bbt_descr *badblock_pattern; + struct nand_ecclayout *ecc_layout; + struct mtd_partition *partitions; + unsigned int nr_partitions; +}; From 6af7a8eb1eb2b5a0967fccf61e750b085d60ad48 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 16 Oct 2008 19:17:05 +0400 Subject: [PATCH 0779/2985] [MTD] sharpsl-nand: move registration to board code Finally move registration of sharpsl-nand device to board-specific code. sharpsl nand driver is now clean and simple. Signed-off-by: Dmitry Baryshkov --- arch/arm/mach-pxa/corgi.c | 54 ++++++++++++++++++++++++ arch/arm/mach-pxa/poodle.c | 51 ++++++++++++++++++++++ arch/arm/mach-pxa/spitz.c | 77 ++++++++++++++++++++++++++++++++++ drivers/mtd/nand/sharpsl.c | 86 -------------------------------------- 4 files changed, 182 insertions(+), 86 deletions(-) diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c index 65558d6aa220..b65be8e7792a 100644 --- a/arch/arm/mach-pxa/corgi.c +++ b/arch/arm/mach-pxa/corgi.c @@ -26,6 +26,7 @@ #include #include #include +#include #include