Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
 "This pull is mostly cleanups and fixes:

   - The raid5/6 cleanups from Zhao Lei fixup some long standing warts
     in the code and add improvements on top of the scrubbing support
     from 3.19.

   - Josef has round one of our ENOSPC fixes coming from large btrfs
     clusters here at FB.

   - Dave Sterba continues a long series of cleanups (thanks Dave), and
     Filipe continues hammering on corner cases in fsync and others

  This all was held up a little trying to track down a use-after-free in
  btrfs raid5/6.  It's not clear yet if this is just made easier to
  trigger with this pull or if its a new bug from the raid5/6 cleanups.
  Dave Sterba is the only one to trigger it so far, but he has a
  consistent way to reproduce, so we'll get it nailed shortly"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (68 commits)
  Btrfs: don't remove extents and xattrs when logging new names
  Btrfs: fix fsync data loss after adding hard link to inode
  Btrfs: fix BUG_ON in btrfs_orphan_add() when delete unused block group
  Btrfs: account for large extents with enospc
  Btrfs: don't set and clear delalloc for O_DIRECT writes
  Btrfs: only adjust outstanding_extents when we do a short write
  btrfs: Fix out-of-space bug
  Btrfs: scrub, fix sleep in atomic context
  Btrfs: fix scheduler warning when syncing log
  Btrfs: Remove unnecessary placeholder in btrfs_err_code
  btrfs: cleanup init for list in free-space-cache
  btrfs: delete chunk allocation attemp when setting block group ro
  btrfs: clear bio reference after submit_one_bio()
  Btrfs: fix scrub race leading to use-after-free
  Btrfs: add missing cleanup on sysfs init failure
  Btrfs: fix race between transaction commit and empty block group removal
  btrfs: add more checks to btrfs_read_sys_array
  btrfs: cleanup, rename a few variables in btrfs_read_sys_array
  btrfs: add checks for sys_chunk_array sizes
  btrfs: more superblock checks, lower bounds on devices and sectorsize/nodesize
  ...
This commit is contained in:
Linus Torvalds 2015-02-19 14:36:00 -08:00
commit 2b9fb532d4
34 changed files with 1065 additions and 863 deletions

View File

@ -1246,25 +1246,6 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans,
return ret;
}
/*
* this makes the path point to (inum INODE_ITEM ioff)
*/
int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
struct btrfs_path *path)
{
struct btrfs_key key;
return btrfs_find_item(fs_root, path, inum, ioff,
BTRFS_INODE_ITEM_KEY, &key);
}
static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
struct btrfs_path *path,
struct btrfs_key *found_key)
{
return btrfs_find_item(fs_root, path, inum, ioff,
BTRFS_INODE_REF_KEY, found_key);
}
int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
u64 start_off, struct btrfs_path *path,
struct btrfs_inode_extref **ret_extref,
@ -1374,7 +1355,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
}
ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
ret = btrfs_find_item(fs_root, path, parent, 0,
BTRFS_INODE_REF_KEY, &found_key);
if (ret > 0)
ret = -ENOENT;
if (ret)
@ -1727,8 +1709,10 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
struct btrfs_key found_key;
while (!ret) {
ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
&found_key);
ret = btrfs_find_item(fs_root, path, inum,
parent ? parent + 1 : 0, BTRFS_INODE_REF_KEY,
&found_key);
if (ret < 0)
break;
if (ret) {

View File

@ -32,9 +32,6 @@ struct inode_fs_paths {
typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
void *ctx);
int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
struct btrfs_path *path);
int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
struct btrfs_path *path, struct btrfs_key *found_key,
u64 *flags);

View File

@ -185,6 +185,9 @@ struct btrfs_inode {
struct btrfs_delayed_node *delayed_node;
/* File creation time. */
struct timespec i_otime;
struct inode vfs_inode;
};

View File

@ -213,11 +213,19 @@ static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
*/
static void add_root_to_dirty_list(struct btrfs_root *root)
{
if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
!test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
return;
spin_lock(&root->fs_info->trans_lock);
if (test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state) &&
list_empty(&root->dirty_list)) {
list_add(&root->dirty_list,
&root->fs_info->dirty_cowonly_roots);
if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
/* Want the extent tree to be the last on the list */
if (root->objectid == BTRFS_EXTENT_TREE_OBJECTID)
list_move_tail(&root->dirty_list,
&root->fs_info->dirty_cowonly_roots);
else
list_move(&root->dirty_list,
&root->fs_info->dirty_cowonly_roots);
}
spin_unlock(&root->fs_info->trans_lock);
}
@ -1363,8 +1371,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
BUG_ON(tm->slot != 0);
eb_rewin = alloc_dummy_extent_buffer(eb->start,
fs_info->tree_root->nodesize);
eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start);
if (!eb_rewin) {
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
@ -1444,7 +1451,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
} else if (old_root) {
btrfs_tree_read_unlock(eb_root);
free_extent_buffer(eb_root);
eb = alloc_dummy_extent_buffer(logical, root->nodesize);
eb = alloc_dummy_extent_buffer(root->fs_info, logical);
} else {
btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
eb = btrfs_clone_extent_buffer(eb_root);
@ -2282,7 +2289,7 @@ static void reada_for_search(struct btrfs_root *root,
if ((search <= target && target - search <= 65536) ||
(search > target && search - target <= 65536)) {
gen = btrfs_node_ptr_generation(node, nr);
readahead_tree_block(root, search, blocksize);
readahead_tree_block(root, search);
nread += blocksize;
}
nscan++;
@ -2301,7 +2308,6 @@ static noinline void reada_for_balance(struct btrfs_root *root,
u64 gen;
u64 block1 = 0;
u64 block2 = 0;
int blocksize;
parent = path->nodes[level + 1];
if (!parent)
@ -2309,7 +2315,6 @@ static noinline void reada_for_balance(struct btrfs_root *root,
nritems = btrfs_header_nritems(parent);
slot = path->slots[level + 1];
blocksize = root->nodesize;
if (slot > 0) {
block1 = btrfs_node_blockptr(parent, slot - 1);
@ -2334,9 +2339,9 @@ static noinline void reada_for_balance(struct btrfs_root *root,
}
if (block1)
readahead_tree_block(root, block1, blocksize);
readahead_tree_block(root, block1);
if (block2)
readahead_tree_block(root, block2, blocksize);
readahead_tree_block(root, block2);
}
@ -2609,32 +2614,24 @@ static int key_search(struct extent_buffer *b, struct btrfs_key *key,
return 0;
}
int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
u64 iobjectid, u64 ioff, u8 key_type,
struct btrfs_key *found_key)
{
int ret;
struct btrfs_key key;
struct extent_buffer *eb;
struct btrfs_path *path;
ASSERT(path);
ASSERT(found_key);
key.type = key_type;
key.objectid = iobjectid;
key.offset = ioff;
if (found_path == NULL) {
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
} else
path = found_path;
ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
if ((ret < 0) || (found_key == NULL)) {
if (path != found_path)
btrfs_free_path(path);
if (ret < 0)
return ret;
}
eb = path->nodes[0];
if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
@ -3383,7 +3380,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
add_root_to_dirty_list(root);
extent_buffer_get(c);
path->nodes[level] = c;
path->locks[level] = BTRFS_WRITE_LOCK;
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
path->slots[level] = 0;
return 0;
}
@ -4356,13 +4353,15 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
path->search_for_split = 1;
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
path->search_for_split = 0;
if (ret > 0)
ret = -EAGAIN;
if (ret < 0)
goto err;
ret = -EAGAIN;
leaf = path->nodes[0];
/* if our item isn't there or got smaller, return now */
if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
/* if our item isn't there, return now */
if (item_size != btrfs_item_size_nr(leaf, path->slots[0]))
goto err;
/* the leaf has changed, it now has room. return now */

View File

@ -198,6 +198,8 @@ static int btrfs_csum_sizes[] = { 4, 0 };
#define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024)
#define BTRFS_MAX_EXTENT_SIZE (128 * 1024 * 1024)
/*
* The key defines the order in the tree, and so it also defines (optimal)
* block layout.
@ -1020,6 +1022,9 @@ enum btrfs_raid_types {
BTRFS_BLOCK_GROUP_RAID6 | \
BTRFS_BLOCK_GROUP_DUP | \
BTRFS_BLOCK_GROUP_RAID10)
#define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \
BTRFS_BLOCK_GROUP_RAID6)
/*
* We need a bit for restriper to be able to tell when chunks of type
* SINGLE are available. This "extended" profile format is used in
@ -1239,7 +1244,6 @@ enum btrfs_disk_cache_state {
BTRFS_DC_ERROR = 1,
BTRFS_DC_CLEAR = 2,
BTRFS_DC_SETUP = 3,
BTRFS_DC_NEED_WRITE = 4,
};
struct btrfs_caching_control {
@ -1277,7 +1281,6 @@ struct btrfs_block_group_cache {
unsigned long full_stripe_len;
unsigned int ro:1;
unsigned int dirty:1;
unsigned int iref:1;
unsigned int has_caching_ctl:1;
unsigned int removed:1;
@ -1315,6 +1318,9 @@ struct btrfs_block_group_cache {
struct list_head ro_list;
atomic_t trimming;
/* For dirty block groups */
struct list_head dirty_list;
};
/* delayed seq elem */
@ -1741,6 +1747,7 @@ struct btrfs_fs_info {
spinlock_t unused_bgs_lock;
struct list_head unused_bgs;
struct mutex unused_bg_unpin_mutex;
/* For btrfs to record security options */
struct security_mnt_opts security_opts;
@ -1776,6 +1783,7 @@ struct btrfs_subvolume_writers {
#define BTRFS_ROOT_DEFRAG_RUNNING 6
#define BTRFS_ROOT_FORCE_COW 7
#define BTRFS_ROOT_MULTI_LOG_TASKS 8
#define BTRFS_ROOT_DIRTY 9
/*
* in ram representation of the tree. extent_root is used for all allocations
@ -1794,8 +1802,6 @@ struct btrfs_root {
struct btrfs_fs_info *fs_info;
struct extent_io_tree dirty_log_pages;
struct kobject root_kobj;
struct completion kobj_unregister;
struct mutex objectid_mutex;
spinlock_t accounting_lock;
@ -2465,31 +2471,6 @@ BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
static inline struct btrfs_timespec *
btrfs_inode_atime(struct btrfs_inode_item *inode_item)
{
unsigned long ptr = (unsigned long)inode_item;
ptr += offsetof(struct btrfs_inode_item, atime);
return (struct btrfs_timespec *)ptr;
}
static inline struct btrfs_timespec *
btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
{
unsigned long ptr = (unsigned long)inode_item;
ptr += offsetof(struct btrfs_inode_item, mtime);
return (struct btrfs_timespec *)ptr;
}
static inline struct btrfs_timespec *
btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
{
unsigned long ptr = (unsigned long)inode_item;
ptr += offsetof(struct btrfs_inode_item, ctime);
return (struct btrfs_timespec *)ptr;
}
BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);

View File

@ -1755,27 +1755,31 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
btrfs_set_stack_inode_block_group(inode_item, 0);
btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
btrfs_set_stack_timespec_sec(&inode_item->atime,
inode->i_atime.tv_sec);
btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item),
btrfs_set_stack_timespec_nsec(&inode_item->atime,
inode->i_atime.tv_nsec);
btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item),
btrfs_set_stack_timespec_sec(&inode_item->mtime,
inode->i_mtime.tv_sec);
btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item),
btrfs_set_stack_timespec_nsec(&inode_item->mtime,
inode->i_mtime.tv_nsec);
btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item),
btrfs_set_stack_timespec_sec(&inode_item->ctime,
inode->i_ctime.tv_sec);
btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item),
btrfs_set_stack_timespec_nsec(&inode_item->ctime,
inode->i_ctime.tv_nsec);
btrfs_set_stack_timespec_sec(&inode_item->otime,
BTRFS_I(inode)->i_otime.tv_sec);
btrfs_set_stack_timespec_nsec(&inode_item->otime,
BTRFS_I(inode)->i_otime.tv_nsec);
}
int btrfs_fill_inode(struct inode *inode, u32 *rdev)
{
struct btrfs_delayed_node *delayed_node;
struct btrfs_inode_item *inode_item;
struct btrfs_timespec *tspec;
delayed_node = btrfs_get_delayed_node(inode);
if (!delayed_node)
@ -1802,17 +1806,19 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
*rdev = btrfs_stack_inode_rdev(inode_item);
BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
tspec = btrfs_inode_atime(inode_item);
inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec);
inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
tspec = btrfs_inode_mtime(inode_item);
inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec);
inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
tspec = btrfs_inode_ctime(inode_item);
inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec);
inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
BTRFS_I(inode)->i_otime.tv_sec =
btrfs_stack_timespec_sec(&inode_item->otime);
BTRFS_I(inode)->i_otime.tv_nsec =
btrfs_stack_timespec_nsec(&inode_item->otime);
inode->i_generation = BTRFS_I(inode)->generation;
BTRFS_I(inode)->index_cnt = (u64)-1;

View File

@ -440,18 +440,9 @@ leave:
*/
static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info)
{
s64 writers;
DEFINE_WAIT(wait);
set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state);
do {
prepare_to_wait(&fs_info->replace_wait, &wait,
TASK_UNINTERRUPTIBLE);
writers = percpu_counter_sum(&fs_info->bio_counter);
if (writers)
schedule();
finish_wait(&fs_info->replace_wait, &wait);
} while (writers);
wait_event(fs_info->replace_wait, !percpu_counter_sum(
&fs_info->bio_counter));
}
/*
@ -932,15 +923,15 @@ void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount)
void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info)
{
DEFINE_WAIT(wait);
again:
percpu_counter_inc(&fs_info->bio_counter);
if (test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state)) {
while (1) {
percpu_counter_inc(&fs_info->bio_counter);
if (likely(!test_bit(BTRFS_FS_STATE_DEV_REPLACING,
&fs_info->fs_state)))
break;
btrfs_bio_counter_dec(fs_info);
wait_event(fs_info->replace_wait,
!test_bit(BTRFS_FS_STATE_DEV_REPLACING,
&fs_info->fs_state));
goto again;
}
}

View File

@ -318,7 +318,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
memcpy(&found, result, csum_size);
read_extent_buffer(buf, &val, 0, csum_size);
printk_ratelimited(KERN_INFO
printk_ratelimited(KERN_WARNING
"BTRFS: %s checksum verify failed on %llu wanted %X found %X "
"level %d\n",
root->fs_info->sb->s_id, buf->start,
@ -367,7 +367,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
ret = 0;
goto out;
}
printk_ratelimited(KERN_INFO "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n",
printk_ratelimited(KERN_ERR
"BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n",
eb->fs_info->sb->s_id, eb->start,
parent_transid, btrfs_header_generation(eb));
ret = 1;
@ -633,21 +634,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) {
printk_ratelimited(KERN_INFO "BTRFS (device %s): bad tree block start "
printk_ratelimited(KERN_ERR "BTRFS (device %s): bad tree block start "
"%llu %llu\n",
eb->fs_info->sb->s_id, found_start, eb->start);
ret = -EIO;
goto err;
}
if (check_tree_block_fsid(root, eb)) {
printk_ratelimited(KERN_INFO "BTRFS (device %s): bad fsid on block %llu\n",
printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n",
eb->fs_info->sb->s_id, eb->start);
ret = -EIO;
goto err;
}
found_level = btrfs_header_level(eb);
if (found_level >= BTRFS_MAX_LEVEL) {
btrfs_info(root->fs_info, "bad tree block level %d",
btrfs_err(root->fs_info, "bad tree block level %d",
(int)btrfs_header_level(eb));
ret = -EIO;
goto err;
@ -1073,12 +1074,12 @@ static const struct address_space_operations btree_aops = {
.set_page_dirty = btree_set_page_dirty,
};
void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
void readahead_tree_block(struct btrfs_root *root, u64 bytenr)
{
struct extent_buffer *buf = NULL;
struct inode *btree_inode = root->fs_info->btree_inode;
buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf)
return;
read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
@ -1086,7 +1087,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
free_extent_buffer(buf);
}
int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
int mirror_num, struct extent_buffer **eb)
{
struct extent_buffer *buf = NULL;
@ -1094,7 +1095,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree;
int ret;
buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf)
return 0;
@ -1125,12 +1126,11 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
}
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize)
u64 bytenr)
{
if (btrfs_test_is_dummy_root(root))
return alloc_test_extent_buffer(root->fs_info, bytenr,
blocksize);
return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
return alloc_test_extent_buffer(root->fs_info, bytenr);
return alloc_extent_buffer(root->fs_info, bytenr);
}
@ -1152,7 +1152,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
struct extent_buffer *buf = NULL;
int ret;
buf = btrfs_find_create_tree_block(root, bytenr, root->nodesize);
buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf)
return NULL;
@ -1275,12 +1275,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
memset(&root->root_kobj, 0, sizeof(root->root_kobj));
if (fs_info)
root->defrag_trans_start = fs_info->generation;
else
root->defrag_trans_start = 0;
init_completion(&root->kobj_unregister);
root->root_key.objectid = objectid;
root->anon_dev = 0;
@ -1630,6 +1628,8 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
bool check_ref)
{
struct btrfs_root *root;
struct btrfs_path *path;
struct btrfs_key key;
int ret;
if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
@ -1669,8 +1669,17 @@ again:
if (ret)
goto fail;
ret = btrfs_find_item(fs_info->tree_root, NULL, BTRFS_ORPHAN_OBJECTID,
location->objectid, BTRFS_ORPHAN_ITEM_KEY, NULL);
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto fail;
}
key.objectid = BTRFS_ORPHAN_OBJECTID;
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = location->objectid;
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
btrfs_free_path(path);
if (ret < 0)
goto fail;
if (ret == 0)
@ -2232,6 +2241,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->qgroup_op_lock);
spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->unused_bgs_lock);
mutex_init(&fs_info->unused_bg_unpin_mutex);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex);
@ -2496,7 +2506,7 @@ int open_ctree(struct super_block *sb,
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
printk(KERN_ERR "BTRFS: has skinny extents\n");
printk(KERN_INFO "BTRFS: has skinny extents\n");
/*
* flag our filesystem as having big metadata blocks if
@ -2520,7 +2530,7 @@ int open_ctree(struct super_block *sb,
*/
if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
(sectorsize != nodesize)) {
printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes "
printk(KERN_ERR "BTRFS: unequal leaf/node/sector sizes "
"are not allowed for mixed block groups on %s\n",
sb->s_id);
goto fail_alloc;
@ -2628,12 +2638,12 @@ int open_ctree(struct super_block *sb,
sb->s_blocksize_bits = blksize_bits(sectorsize);
if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
printk(KERN_INFO "BTRFS: valid FS not found on %s\n", sb->s_id);
printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id);
goto fail_sb_buffer;
}
if (sectorsize != PAGE_SIZE) {
printk(KERN_WARNING "BTRFS: Incompatible sector size(%lu) "
printk(KERN_ERR "BTRFS: incompatible sector size (%lu) "
"found on %s\n", (unsigned long)sectorsize, sb->s_id);
goto fail_sb_buffer;
}
@ -2642,7 +2652,7 @@ int open_ctree(struct super_block *sb,
ret = btrfs_read_sys_array(tree_root);
mutex_unlock(&fs_info->chunk_mutex);
if (ret) {
printk(KERN_WARNING "BTRFS: failed to read the system "
printk(KERN_ERR "BTRFS: failed to read the system "
"array on %s\n", sb->s_id);
goto fail_sb_buffer;
}
@ -2657,7 +2667,7 @@ int open_ctree(struct super_block *sb,
generation);
if (!chunk_root->node ||
!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n",
printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
sb->s_id);
goto fail_tree_roots;
}
@ -2669,7 +2679,7 @@ int open_ctree(struct super_block *sb,
ret = btrfs_read_chunk_tree(chunk_root);
if (ret) {
printk(KERN_WARNING "BTRFS: failed to read chunk tree on %s\n",
printk(KERN_ERR "BTRFS: failed to read chunk tree on %s\n",
sb->s_id);
goto fail_tree_roots;
}
@ -2681,7 +2691,7 @@ int open_ctree(struct super_block *sb,
btrfs_close_extra_devices(fs_info, fs_devices, 0);
if (!fs_devices->latest_bdev) {
printk(KERN_CRIT "BTRFS: failed to read devices on %s\n",
printk(KERN_ERR "BTRFS: failed to read devices on %s\n",
sb->s_id);
goto fail_tree_roots;
}
@ -2765,7 +2775,7 @@ retry_root_backup:
ret = btrfs_recover_balance(fs_info);
if (ret) {
printk(KERN_WARNING "BTRFS: failed to recover balance\n");
printk(KERN_ERR "BTRFS: failed to recover balance\n");
goto fail_block_groups;
}
@ -3860,6 +3870,21 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
btrfs_super_log_root(sb));
/*
* Check the lower bound, the alignment and other constraints are
* checked later.
*/
if (btrfs_super_nodesize(sb) < 4096) {
printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n",
btrfs_super_nodesize(sb));
ret = -EINVAL;
}
if (btrfs_super_sectorsize(sb) < 4096) {
printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n",
btrfs_super_sectorsize(sb));
ret = -EINVAL;
}
if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n",
fs_info->fsid, sb->dev_item.fsid);
@ -3873,6 +3898,10 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
if (btrfs_super_num_devices(sb) > (1UL << 31))
printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
btrfs_super_num_devices(sb));
if (btrfs_super_num_devices(sb) == 0) {
printk(KERN_ERR "BTRFS: number of devices is 0\n");
ret = -EINVAL;
}
if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) {
printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n",
@ -3880,6 +3909,25 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
/*
* Obvious sys_chunk_array corruptions, it must hold at least one key
* and one chunk
*/
if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
printk(KERN_ERR "BTRFS: system chunk array too big %u > %u\n",
btrfs_super_sys_array_size(sb),
BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
ret = -EINVAL;
}
if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
+ sizeof(struct btrfs_chunk)) {
printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n",
btrfs_super_sys_array_size(sb),
sizeof(struct btrfs_disk_key)
+ sizeof(struct btrfs_chunk));
ret = -EINVAL;
}
/*
* The generation is a global counter, we'll trust it more than the others
* but it's still possible that it's the one that's wrong.

View File

@ -46,11 +46,11 @@ struct btrfs_fs_devices;
struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
u64 parent_transid);
void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize);
int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
void readahead_tree_block(struct btrfs_root *root, u64 bytenr);
int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
int mirror_num, struct extent_buffer **eb);
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize);
u64 bytenr);
void clean_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf);
int open_ctree(struct super_block *sb,

View File

@ -74,8 +74,9 @@ enum {
RESERVE_ALLOC_NO_ACCOUNT = 2,
};
static int update_block_group(struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc);
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, int alloc);
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
@ -1925,7 +1926,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
*/
ret = 0;
}
kfree(bbio);
btrfs_put_bbio(bbio);
}
if (actual_bytes)
@ -2768,7 +2769,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head;
int ret;
int run_all = count == (unsigned long)-1;
int run_most = 0;
/* We'll clean this up in btrfs_cleanup_transaction */
if (trans->aborted)
@ -2778,10 +2778,8 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
root = root->fs_info->tree_root;
delayed_refs = &trans->transaction->delayed_refs;
if (count == 0) {
if (count == 0)
count = atomic_read(&delayed_refs->num_entries) * 2;
run_most = 1;
}
again:
#ifdef SCRAMBLE_DELAYED_REFS
@ -3315,120 +3313,42 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_block_group_cache *cache;
int err = 0;
struct btrfs_transaction *cur_trans = trans->transaction;
int ret = 0;
struct btrfs_path *path;
u64 last = 0;
if (list_empty(&cur_trans->dirty_bgs))
return 0;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
again:
while (1) {
cache = btrfs_lookup_first_block_group(root->fs_info, last);
while (cache) {
if (cache->disk_cache_state == BTRFS_DC_CLEAR)
break;
cache = next_block_group(root, cache);
}
if (!cache) {
if (last == 0)
break;
last = 0;
continue;
}
err = cache_save_setup(cache, trans, path);
last = cache->key.objectid + cache->key.offset;
/*
* We don't need the lock here since we are protected by the transaction
* commit. We want to do the cache_save_setup first and then run the
* delayed refs to make sure we have the best chance at doing this all
* in one shot.
*/
while (!list_empty(&cur_trans->dirty_bgs)) {
cache = list_first_entry(&cur_trans->dirty_bgs,
struct btrfs_block_group_cache,
dirty_list);
list_del_init(&cache->dirty_list);
if (cache->disk_cache_state == BTRFS_DC_CLEAR)
cache_save_setup(cache, trans, path);
if (!ret)
ret = btrfs_run_delayed_refs(trans, root,
(unsigned long) -1);
if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP)
btrfs_write_out_cache(root, trans, cache, path);
if (!ret)
ret = write_one_cache_group(trans, root, path, cache);
btrfs_put_block_group(cache);
}
while (1) {
if (last == 0) {
err = btrfs_run_delayed_refs(trans, root,
(unsigned long)-1);
if (err) /* File system offline */
goto out;
}
cache = btrfs_lookup_first_block_group(root->fs_info, last);
while (cache) {
if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
btrfs_put_block_group(cache);
goto again;
}
if (cache->dirty)
break;
cache = next_block_group(root, cache);
}
if (!cache) {
if (last == 0)
break;
last = 0;
continue;
}
if (cache->disk_cache_state == BTRFS_DC_SETUP)
cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
cache->dirty = 0;
last = cache->key.objectid + cache->key.offset;
err = write_one_cache_group(trans, root, path, cache);
btrfs_put_block_group(cache);
if (err) /* File system offline */
goto out;
}
while (1) {
/*
* I don't think this is needed since we're just marking our
* preallocated extent as written, but just in case it can't
* hurt.
*/
if (last == 0) {
err = btrfs_run_delayed_refs(trans, root,
(unsigned long)-1);
if (err) /* File system offline */
goto out;
}
cache = btrfs_lookup_first_block_group(root->fs_info, last);
while (cache) {
/*
* Really this shouldn't happen, but it could if we
* couldn't write the entire preallocated extent and
* splitting the extent resulted in a new block.
*/
if (cache->dirty) {
btrfs_put_block_group(cache);
goto again;
}
if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
break;
cache = next_block_group(root, cache);
}
if (!cache) {
if (last == 0)
break;
last = 0;
continue;
}
err = btrfs_write_out_cache(root, trans, cache, path);
/*
* If we didn't have an error then the cache state is still
* NEED_WRITE, so we can set it to WRITTEN.
*/
if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
cache->disk_cache_state = BTRFS_DC_WRITTEN;
last = cache->key.objectid + cache->key.offset;
btrfs_put_block_group(cache);
}
out:
btrfs_free_path(path);
return err;
return ret;
}
int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
@ -5043,19 +4963,25 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
/**
* drop_outstanding_extent - drop an outstanding extent
* @inode: the inode we're dropping the extent for
* @num_bytes: the number of bytes we're relaseing.
*
* This is called when we are freeing up an outstanding extent, either called
* after an error or after an extent is written. This will return the number of
* reserved extents that need to be freed. This must be called with
* BTRFS_I(inode)->lock held.
*/
static unsigned drop_outstanding_extent(struct inode *inode)
static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
{
unsigned drop_inode_space = 0;
unsigned dropped_extents = 0;
unsigned num_extents = 0;
BUG_ON(!BTRFS_I(inode)->outstanding_extents);
BTRFS_I(inode)->outstanding_extents--;
num_extents = (unsigned)div64_u64(num_bytes +
BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
ASSERT(num_extents);
ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
BTRFS_I(inode)->outstanding_extents -= num_extents;
if (BTRFS_I(inode)->outstanding_extents == 0 &&
test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
@ -5226,7 +5152,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
out_fail:
spin_lock(&BTRFS_I(inode)->lock);
dropped = drop_outstanding_extent(inode);
dropped = drop_outstanding_extent(inode, num_bytes);
/*
* If the inodes csum_bytes is the same as the original
* csum_bytes then we know we haven't raced with any free()ers
@ -5305,7 +5231,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
num_bytes = ALIGN(num_bytes, root->sectorsize);
spin_lock(&BTRFS_I(inode)->lock);
dropped = drop_outstanding_extent(inode);
dropped = drop_outstanding_extent(inode, num_bytes);
if (num_bytes)
to_free = calc_csum_metadata_size(inode, num_bytes, 0);
@ -5375,8 +5301,9 @@ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
btrfs_free_reserved_data_space(inode, num_bytes);
}
static int update_block_group(struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc)
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, int alloc)
{
struct btrfs_block_group_cache *cache = NULL;
struct btrfs_fs_info *info = root->fs_info;
@ -5414,6 +5341,14 @@ static int update_block_group(struct btrfs_root *root,
if (!alloc && cache->cached == BTRFS_CACHE_NO)
cache_block_group(cache, 1);
spin_lock(&trans->transaction->dirty_bgs_lock);
if (list_empty(&cache->dirty_list)) {
list_add_tail(&cache->dirty_list,
&trans->transaction->dirty_bgs);
btrfs_get_block_group(cache);
}
spin_unlock(&trans->transaction->dirty_bgs_lock);
byte_in_group = bytenr - cache->key.objectid;
WARN_ON(byte_in_group > cache->key.offset);
@ -5424,7 +5359,6 @@ static int update_block_group(struct btrfs_root *root,
cache->disk_cache_state < BTRFS_DC_CLEAR)
cache->disk_cache_state = BTRFS_DC_CLEAR;
cache->dirty = 1;
old_val = btrfs_block_group_used(&cache->item);
num_bytes = min(total, cache->key.offset - byte_in_group);
if (alloc) {
@ -5807,10 +5741,13 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
unpin = &fs_info->freed_extents[0];
while (1) {
mutex_lock(&fs_info->unused_bg_unpin_mutex);
ret = find_first_extent_bit(unpin, 0, &start, &end,
EXTENT_DIRTY, NULL);
if (ret)
if (ret) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
break;
}
if (btrfs_test_opt(root, DISCARD))
ret = btrfs_discard_extent(root, start,
@ -5818,6 +5755,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
clear_extent_dirty(unpin, start, end, GFP_NOFS);
unpin_extent_range(root, start, end, true);
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
cond_resched();
}
@ -6103,7 +6041,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
}
ret = update_block_group(root, bytenr, num_bytes, 0);
ret = update_block_group(trans, root, bytenr, num_bytes, 0);
if (ret) {
btrfs_abort_transaction(trans, extent_root, ret);
goto out;
@ -6205,7 +6143,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct extent_buffer *buf,
u64 parent, int last_ref)
{
struct btrfs_block_group_cache *cache = NULL;
int pin = 1;
int ret;
@ -6221,17 +6158,20 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
if (!last_ref)
return;
cache = btrfs_lookup_block_group(root->fs_info, buf->start);
if (btrfs_header_generation(buf) == trans->transid) {
struct btrfs_block_group_cache *cache;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
ret = check_ref_cleanup(trans, root, buf->start);
if (!ret)
goto out;
}
cache = btrfs_lookup_block_group(root->fs_info, buf->start);
if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
pin_down_extent(root, cache, buf->start, buf->len, 1);
btrfs_put_block_group(cache);
goto out;
}
@ -6239,6 +6179,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
btrfs_add_free_space(cache, buf->start, buf->len);
btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
btrfs_put_block_group(cache);
trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
pin = 0;
}
@ -6253,7 +6194,6 @@ out:
* anymore.
*/
clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
btrfs_put_block_group(cache);
}
/* Can return -ENOMEM */
@ -7063,7 +7003,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
if (ret)
return ret;
ret = update_block_group(root, ins->objectid, ins->offset, 1);
ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
ins->objectid, ins->offset);
@ -7152,7 +7092,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
return ret;
}
ret = update_block_group(root, ins->objectid, root->nodesize, 1);
ret = update_block_group(trans, root, ins->objectid, root->nodesize,
1);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
ins->objectid, ins->offset);
@ -7217,11 +7158,11 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
static struct extent_buffer *
btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u64 bytenr, u32 blocksize, int level)
u64 bytenr, int level)
{
struct extent_buffer *buf;
buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf)
return ERR_PTR(-ENOMEM);
btrfs_set_header_generation(buf, trans->transid);
@ -7340,7 +7281,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
if (btrfs_test_is_dummy_root(root)) {
buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
blocksize, level);
level);
if (!IS_ERR(buf))
root->alloc_bytenr += blocksize;
return buf;
@ -7357,8 +7298,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
return ERR_PTR(ret);
}
buf = btrfs_init_new_buffer(trans, root, ins.objectid,
blocksize, level);
buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
BUG_ON(IS_ERR(buf)); /* -ENOMEM */
if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
@ -7487,7 +7427,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
continue;
}
reada:
readahead_tree_block(root, bytenr, blocksize);
readahead_tree_block(root, bytenr);
nread++;
}
wc->reada_slot = slot;
@ -7828,7 +7768,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
next = btrfs_find_tree_block(root, bytenr);
if (!next) {
next = btrfs_find_create_tree_block(root, bytenr, blocksize);
next = btrfs_find_create_tree_block(root, bytenr);
if (!next)
return -ENOMEM;
btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
@ -8548,14 +8488,6 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
if (IS_ERR(trans))
return PTR_ERR(trans);
alloc_flags = update_block_group_flags(root, cache->flags);
if (alloc_flags != cache->flags) {
ret = do_chunk_alloc(trans, root, alloc_flags,
CHUNK_ALLOC_FORCE);
if (ret < 0)
goto out;
}
ret = set_block_group_ro(cache, 0);
if (!ret)
goto out;
@ -8566,6 +8498,11 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
goto out;
ret = set_block_group_ro(cache, 0);
out:
if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
alloc_flags = update_block_group_flags(root, cache->flags);
check_system_chunk(trans, root, alloc_flags);
}
btrfs_end_transaction(trans, root);
return ret;
}
@ -9005,6 +8942,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
INIT_LIST_HEAD(&cache->cluster_list);
INIT_LIST_HEAD(&cache->bg_list);
INIT_LIST_HEAD(&cache->ro_list);
INIT_LIST_HEAD(&cache->dirty_list);
btrfs_init_free_space_ctl(cache);
atomic_set(&cache->trimming, 0);
@ -9068,9 +9006,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
* b) Setting 'dirty flag' makes sure that we flush
* the new space cache info onto disk.
*/
cache->disk_cache_state = BTRFS_DC_CLEAR;
if (btrfs_test_opt(root, SPACE_CACHE))
cache->dirty = 1;
cache->disk_cache_state = BTRFS_DC_CLEAR;
}
read_extent_buffer(leaf, &cache->item,
@ -9460,6 +9397,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
}
}
spin_lock(&trans->transaction->dirty_bgs_lock);
if (!list_empty(&block_group->dirty_list)) {
list_del_init(&block_group->dirty_list);
btrfs_put_block_group(block_group);
}
spin_unlock(&trans->transaction->dirty_bgs_lock);
btrfs_remove_free_space_cache(block_group);
spin_lock(&block_group->space_info->lock);
@ -9611,7 +9555,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* Want to do this before we do anything else so we can recover
* properly if we fail to join the transaction.
*/
trans = btrfs_join_transaction(root);
/* 1 for btrfs_orphan_reserve_metadata() */
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
btrfs_set_block_group_rw(root, block_group);
ret = PTR_ERR(trans);
@ -9624,18 +9569,33 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
*/
start = block_group->key.objectid;
end = start + block_group->key.offset - 1;
/*
* Hold the unused_bg_unpin_mutex lock to avoid racing with
* btrfs_finish_extent_commit(). If we are at transaction N,
* another task might be running finish_extent_commit() for the
* previous transaction N - 1, and have seen a range belonging
* to the block group in freed_extents[] before we were able to
* clear the whole block group range from freed_extents[]. This
* means that task can lookup for the block group after we
* unpinned it from freed_extents[] and removed it, leading to
* a BUG_ON() at btrfs_unpin_extent_range().
*/
mutex_lock(&fs_info->unused_bg_unpin_mutex);
ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
EXTENT_DIRTY, GFP_NOFS);
if (ret) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
btrfs_set_block_group_rw(root, block_group);
goto end_trans;
}
ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
EXTENT_DIRTY, GFP_NOFS);
if (ret) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
btrfs_set_block_group_rw(root, block_group);
goto end_trans;
}
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
/* Reset pinned so btrfs_put_block_group doesn't complain */
block_group->pinned = 0;

View File

@ -64,7 +64,7 @@ void btrfs_leak_debug_check(void)
while (!list_empty(&states)) {
state = list_entry(states.next, struct extent_state, leak_list);
pr_err("BTRFS: state leak: start %llu end %llu state %lu in tree %d refs %d\n",
pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
state->start, state->end, state->state,
extent_state_in_tree(state),
atomic_read(&state->refs));
@ -396,21 +396,21 @@ static void merge_state(struct extent_io_tree *tree,
}
static void set_state_cb(struct extent_io_tree *tree,
struct extent_state *state, unsigned long *bits)
struct extent_state *state, unsigned *bits)
{
if (tree->ops && tree->ops->set_bit_hook)
tree->ops->set_bit_hook(tree->mapping->host, state, bits);
}
static void clear_state_cb(struct extent_io_tree *tree,
struct extent_state *state, unsigned long *bits)
struct extent_state *state, unsigned *bits)
{
if (tree->ops && tree->ops->clear_bit_hook)
tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
}
static void set_state_bits(struct extent_io_tree *tree,
struct extent_state *state, unsigned long *bits);
struct extent_state *state, unsigned *bits);
/*
* insert an extent_state struct into the tree. 'bits' are set on the
@ -426,7 +426,7 @@ static int insert_state(struct extent_io_tree *tree,
struct extent_state *state, u64 start, u64 end,
struct rb_node ***p,
struct rb_node **parent,
unsigned long *bits)
unsigned *bits)
{
struct rb_node *node;
@ -511,10 +511,10 @@ static struct extent_state *next_state(struct extent_state *state)
*/
static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
struct extent_state *state,
unsigned long *bits, int wake)
unsigned *bits, int wake)
{
struct extent_state *next;
unsigned long bits_to_clear = *bits & ~EXTENT_CTLBITS;
unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
@ -570,7 +570,7 @@ static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
* This takes the tree lock, and returns 0 on success and < 0 on error.
*/
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, int wake, int delete,
unsigned bits, int wake, int delete,
struct extent_state **cached_state,
gfp_t mask)
{
@ -789,9 +789,9 @@ out:
static void set_state_bits(struct extent_io_tree *tree,
struct extent_state *state,
unsigned long *bits)
unsigned *bits)
{
unsigned long bits_to_set = *bits & ~EXTENT_CTLBITS;
unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
set_state_cb(tree, state, bits);
if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
@ -803,7 +803,7 @@ static void set_state_bits(struct extent_io_tree *tree,
static void cache_state_if_flags(struct extent_state *state,
struct extent_state **cached_ptr,
const u64 flags)
unsigned flags)
{
if (cached_ptr && !(*cached_ptr)) {
if (!flags || (state->state & flags)) {
@ -833,7 +833,7 @@ static void cache_state(struct extent_state *state,
static int __must_check
__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, unsigned long exclusive_bits,
unsigned bits, unsigned exclusive_bits,
u64 *failed_start, struct extent_state **cached_state,
gfp_t mask)
{
@ -1034,7 +1034,7 @@ search_again:
}
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, u64 * failed_start,
unsigned bits, u64 * failed_start,
struct extent_state **cached_state, gfp_t mask)
{
return __set_extent_bit(tree, start, end, bits, 0, failed_start,
@ -1060,7 +1060,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
* boundary bits like LOCK.
*/
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, unsigned long clear_bits,
unsigned bits, unsigned clear_bits,
struct extent_state **cached_state, gfp_t mask)
{
struct extent_state *state;
@ -1268,14 +1268,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
}
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, gfp_t mask)
unsigned bits, gfp_t mask)
{
return set_extent_bit(tree, start, end, bits, NULL,
NULL, mask);
}
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, gfp_t mask)
unsigned bits, gfp_t mask)
{
return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
}
@ -1330,10 +1330,11 @@ int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
* us if waiting is desired.
*/
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, struct extent_state **cached_state)
unsigned bits, struct extent_state **cached_state)
{
int err;
u64 failed_start;
while (1) {
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
EXTENT_LOCKED, &failed_start,
@ -1440,7 +1441,7 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
*/
static struct extent_state *
find_first_extent_bit_state(struct extent_io_tree *tree,
u64 start, unsigned long bits)
u64 start, unsigned bits)
{
struct rb_node *node;
struct extent_state *state;
@ -1474,7 +1475,7 @@ out:
* If nothing was found, 1 is returned. If found something, return 0.
*/
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned long bits,
u64 *start_ret, u64 *end_ret, unsigned bits,
struct extent_state **cached_state)
{
struct extent_state *state;
@ -1753,7 +1754,7 @@ out_failed:
int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
struct page *locked_page,
unsigned long clear_bits,
unsigned clear_bits,
unsigned long page_ops)
{
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
@ -1810,7 +1811,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
*/
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end, u64 max_bytes,
unsigned long bits, int contig)
unsigned bits, int contig)
{
struct rb_node *node;
struct extent_state *state;
@ -1928,7 +1929,7 @@ out:
* range is found set.
*/
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, int filled, struct extent_state *cached)
unsigned bits, int filled, struct extent_state *cached)
{
struct extent_state *state = NULL;
struct rb_node *node;
@ -2057,7 +2058,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
sector = bbio->stripes[mirror_num-1].physical >> 9;
bio->bi_iter.bi_sector = sector;
dev = bbio->stripes[mirror_num-1].dev;
kfree(bbio);
btrfs_put_bbio(bbio);
if (!dev || !dev->bdev || !dev->writeable) {
bio_put(bio);
return -EIO;
@ -2816,8 +2817,10 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
bio_add_page(bio, page, page_size, offset) < page_size) {
ret = submit_one_bio(rw, bio, mirror_num,
prev_bio_flags);
if (ret < 0)
if (ret < 0) {
*bio_ret = NULL;
return ret;
}
bio = NULL;
} else {
return 0;
@ -3239,7 +3242,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
page,
&delalloc_start,
&delalloc_end,
128 * 1024 * 1024);
BTRFS_MAX_EXTENT_SIZE);
if (nr_delalloc == 0) {
delalloc_start = delalloc_end + 1;
continue;
@ -4598,11 +4601,11 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
static struct extent_buffer *
__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
unsigned long len, gfp_t mask)
unsigned long len)
{
struct extent_buffer *eb = NULL;
eb = kmem_cache_zalloc(extent_buffer_cache, mask);
eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS);
if (eb == NULL)
return NULL;
eb->start = start;
@ -4643,7 +4646,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
struct extent_buffer *new;
unsigned long num_pages = num_extent_pages(src->start, src->len);
new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS);
new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
if (new == NULL)
return NULL;
@ -4666,13 +4669,26 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
return new;
}
struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start)
{
struct extent_buffer *eb;
unsigned long num_pages = num_extent_pages(0, len);
unsigned long len;
unsigned long num_pages;
unsigned long i;
eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS);
if (!fs_info) {
/*
* Called only from tests that don't always have a fs_info
* available, but we know that nodesize is 4096
*/
len = 4096;
} else {
len = fs_info->tree_root->nodesize;
}
num_pages = num_extent_pages(0, len);
eb = __alloc_extent_buffer(fs_info, start, len);
if (!eb)
return NULL;
@ -4762,7 +4778,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len)
u64 start)
{
struct extent_buffer *eb, *exists = NULL;
int ret;
@ -4770,7 +4786,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
eb = find_extent_buffer(fs_info, start);
if (eb)
return eb;
eb = alloc_dummy_extent_buffer(start, len);
eb = alloc_dummy_extent_buffer(fs_info, start);
if (!eb)
return NULL;
eb->fs_info = fs_info;
@ -4808,8 +4824,9 @@ free_eb:
#endif
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len)
u64 start)
{
unsigned long len = fs_info->tree_root->nodesize;
unsigned long num_pages = num_extent_pages(start, len);
unsigned long i;
unsigned long index = start >> PAGE_CACHE_SHIFT;
@ -4824,7 +4841,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
if (eb)
return eb;
eb = __alloc_extent_buffer(fs_info, start, len, GFP_NOFS);
eb = __alloc_extent_buffer(fs_info, start, len);
if (!eb)
return NULL;

View File

@ -4,22 +4,22 @@
#include <linux/rbtree.h>
/* bits for the extent state */
#define EXTENT_DIRTY 1
#define EXTENT_WRITEBACK (1 << 1)
#define EXTENT_UPTODATE (1 << 2)
#define EXTENT_LOCKED (1 << 3)
#define EXTENT_NEW (1 << 4)
#define EXTENT_DELALLOC (1 << 5)
#define EXTENT_DEFRAG (1 << 6)
#define EXTENT_BOUNDARY (1 << 9)
#define EXTENT_NODATASUM (1 << 10)
#define EXTENT_DO_ACCOUNTING (1 << 11)
#define EXTENT_FIRST_DELALLOC (1 << 12)
#define EXTENT_NEED_WAIT (1 << 13)
#define EXTENT_DAMAGED (1 << 14)
#define EXTENT_NORESERVE (1 << 15)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
#define EXTENT_DIRTY (1U << 0)
#define EXTENT_WRITEBACK (1U << 1)
#define EXTENT_UPTODATE (1U << 2)
#define EXTENT_LOCKED (1U << 3)
#define EXTENT_NEW (1U << 4)
#define EXTENT_DELALLOC (1U << 5)
#define EXTENT_DEFRAG (1U << 6)
#define EXTENT_BOUNDARY (1U << 9)
#define EXTENT_NODATASUM (1U << 10)
#define EXTENT_DO_ACCOUNTING (1U << 11)
#define EXTENT_FIRST_DELALLOC (1U << 12)
#define EXTENT_NEED_WAIT (1U << 13)
#define EXTENT_DAMAGED (1U << 14)
#define EXTENT_NORESERVE (1U << 15)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
/*
* flags for bio submission. The high bits indicate the compression
@ -81,9 +81,9 @@ struct extent_io_ops {
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate);
void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
unsigned long *bits);
unsigned *bits);
void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
unsigned long *bits);
unsigned *bits);
void (*merge_extent_hook)(struct inode *inode,
struct extent_state *new,
struct extent_state *other);
@ -108,7 +108,7 @@ struct extent_state {
/* ADD NEW ELEMENTS AFTER THIS */
wait_queue_head_t wq;
atomic_t refs;
unsigned long state;
unsigned state;
/* for use by the FS */
u64 private;
@ -188,7 +188,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
int try_release_extent_buffer(struct page *page);
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, struct extent_state **cached);
unsigned bits, struct extent_state **cached);
int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached, gfp_t mask);
@ -202,21 +202,21 @@ void extent_io_exit(void);
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
u64 max_bytes, unsigned long bits, int contig);
u64 max_bytes, unsigned bits, int contig);
void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, int filled,
unsigned bits, int filled,
struct extent_state *cached_state);
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, gfp_t mask);
unsigned bits, gfp_t mask);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, int wake, int delete,
unsigned bits, int wake, int delete,
struct extent_state **cached, gfp_t mask);
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, gfp_t mask);
unsigned bits, gfp_t mask);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, u64 *failed_start,
unsigned bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask);
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
@ -229,14 +229,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned long bits, unsigned long clear_bits,
unsigned bits, unsigned clear_bits,
struct extent_state **cached_state, gfp_t mask);
int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned long bits,
u64 *start_ret, u64 *end_ret, unsigned bits,
struct extent_state **cached_state);
int extent_invalidatepage(struct extent_io_tree *tree,
struct page *page, unsigned long offset);
@ -262,8 +262,9 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
void set_page_extent_mapped(struct page *page);
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len);
struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len);
u64 start);
struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
@ -322,7 +323,7 @@ int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
struct page *locked_page,
unsigned long bits_to_clear,
unsigned bits_to_clear,
unsigned long page_ops);
struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
@ -377,5 +378,5 @@ noinline u64 find_lock_delalloc_range(struct inode *inode,
u64 *end, u64 max_bytes);
#endif
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len);
u64 start);
#endif

View File

@ -651,15 +651,13 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
struct io_ctl io_ctl;
struct btrfs_key key;
struct btrfs_free_space *e, *n;
struct list_head bitmaps;
LIST_HEAD(bitmaps);
u64 num_entries;
u64 num_bitmaps;
u64 generation;
u8 type;
int ret = 0;
INIT_LIST_HEAD(&bitmaps);
/* Nothing in the space cache, goodbye */
if (!i_size_read(inode))
return 0;
@ -1243,6 +1241,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct inode *inode;
int ret = 0;
enum btrfs_disk_cache_state dcs = BTRFS_DC_WRITTEN;
root = root->fs_info->tree_root;
@ -1266,9 +1265,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
path, block_group->key.objectid);
if (ret) {
spin_lock(&block_group->lock);
block_group->disk_cache_state = BTRFS_DC_ERROR;
spin_unlock(&block_group->lock);
dcs = BTRFS_DC_ERROR;
ret = 0;
#ifdef DEBUG
btrfs_err(root->fs_info,
@ -1277,6 +1274,9 @@ int btrfs_write_out_cache(struct btrfs_root *root,
#endif
}
spin_lock(&block_group->lock);
block_group->disk_cache_state = dcs;
spin_unlock(&block_group->lock);
iput(inode);
return ret;
}
@ -2903,7 +2903,6 @@ int btrfs_find_space_cluster(struct btrfs_root *root,
trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
min_bytes);
INIT_LIST_HEAD(&bitmaps);
ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
bytes + empty_size,
cont1_bytes, min_bytes);

View File

@ -344,6 +344,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
return -ENOMEM;
path->leave_spinning = 1;
path->skip_release_on_error = 1;
ret = btrfs_insert_empty_item(trans, root, path, &key,
ins_len);
if (ret == -EEXIST) {
@ -362,8 +363,12 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
ptr = (unsigned long)(ref + 1);
ret = 0;
} else if (ret < 0) {
if (ret == -EOVERFLOW)
ret = -EMLINK;
if (ret == -EOVERFLOW) {
if (find_name_in_backref(path, name, name_len, &ref))
ret = -EEXIST;
else
ret = -EMLINK;
}
goto out;
} else {
ref = btrfs_item_ptr(path->nodes[0], path->slots[0],

View File

@ -1530,10 +1530,45 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
static void btrfs_split_extent_hook(struct inode *inode,
struct extent_state *orig, u64 split)
{
u64 size;
/* not delalloc, ignore it */
if (!(orig->state & EXTENT_DELALLOC))
return;
size = orig->end - orig->start + 1;
if (size > BTRFS_MAX_EXTENT_SIZE) {
u64 num_extents;
u64 new_size;
/*
* We need the largest size of the remaining extent to see if we
* need to add a new outstanding extent. Think of the following
* case
*
* [MEAX_EXTENT_SIZEx2 - 4k][4k]
*
* The new_size would just be 4k and we'd think we had enough
* outstanding extents for this if we only took one side of the
* split, same goes for the other direction. We need to see if
* the larger size still is the same amount of extents as the
* original size, because if it is we need to add a new
* outstanding extent. But if we split up and the larger size
* is less than the original then we are good to go since we've
* already accounted for the extra extent in our original
* accounting.
*/
new_size = orig->end - split + 1;
if ((split - orig->start) > new_size)
new_size = split - orig->start;
num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE) < num_extents)
return;
}
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
@ -1549,10 +1584,34 @@ static void btrfs_merge_extent_hook(struct inode *inode,
struct extent_state *new,
struct extent_state *other)
{
u64 new_size, old_size;
u64 num_extents;
/* not delalloc, ignore it */
if (!(other->state & EXTENT_DELALLOC))
return;
old_size = other->end - other->start + 1;
new_size = old_size + (new->end - new->start + 1);
/* we're not bigger than the max, unreserve the space and go */
if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->lock);
return;
}
/*
* If we grew by another max_extent, just return, we want to keep that
* reserved amount.
*/
num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE) > num_extents)
return;
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->lock);
@ -1604,7 +1663,7 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root,
* have pending delalloc work to be done.
*/
static void btrfs_set_bit_hook(struct inode *inode,
struct extent_state *state, unsigned long *bits)
struct extent_state *state, unsigned *bits)
{
if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
@ -1645,9 +1704,11 @@ static void btrfs_set_bit_hook(struct inode *inode,
*/
static void btrfs_clear_bit_hook(struct inode *inode,
struct extent_state *state,
unsigned long *bits)
unsigned *bits)
{
u64 len = state->end + 1 - state->start;
u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1,
BTRFS_MAX_EXTENT_SIZE);
spin_lock(&BTRFS_I(inode)->lock);
if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
@ -1667,7 +1728,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
*bits &= ~EXTENT_FIRST_DELALLOC;
} else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents--;
BTRFS_I(inode)->outstanding_extents -= num_extents;
spin_unlock(&BTRFS_I(inode)->lock);
}
@ -2945,7 +3006,7 @@ static int __readpage_endio_check(struct inode *inode,
return 0;
zeroit:
if (__ratelimit(&_rs))
btrfs_info(BTRFS_I(inode)->root->fs_info,
btrfs_warn(BTRFS_I(inode)->root->fs_info,
"csum failed ino %llu off %llu csum %u expected csum %u",
btrfs_ino(inode), start, csum, csum_expected);
memset(kaddr + pgoff, 1, len);
@ -3407,7 +3468,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
out:
if (ret)
btrfs_crit(root->fs_info,
btrfs_err(root->fs_info,
"could not do orphan cleanup %d", ret);
btrfs_free_path(path);
return ret;
@ -3490,7 +3551,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_inode_item *inode_item;
struct btrfs_timespec *tspec;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key location;
unsigned long ptr;
@ -3527,17 +3587,19 @@ static void btrfs_read_locked_inode(struct inode *inode)
i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
tspec = btrfs_inode_atime(inode_item);
inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
tspec = btrfs_inode_mtime(inode_item);
inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
tspec = btrfs_inode_ctime(inode_item);
inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
BTRFS_I(inode)->i_otime.tv_sec =
btrfs_timespec_sec(leaf, &inode_item->otime);
BTRFS_I(inode)->i_otime.tv_nsec =
btrfs_timespec_nsec(leaf, &inode_item->otime);
inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
@ -3656,21 +3718,26 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
btrfs_set_token_timespec_sec(leaf, &item->atime,
inode->i_atime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
btrfs_set_token_timespec_nsec(leaf, &item->atime,
inode->i_atime.tv_nsec, &token);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
btrfs_set_token_timespec_sec(leaf, &item->mtime,
inode->i_mtime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
btrfs_set_token_timespec_nsec(leaf, &item->mtime,
inode->i_mtime.tv_nsec, &token);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
btrfs_set_token_timespec_sec(leaf, &item->ctime,
inode->i_ctime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
btrfs_set_token_timespec_nsec(leaf, &item->ctime,
inode->i_ctime.tv_nsec, &token);
btrfs_set_token_timespec_sec(leaf, &item->otime,
BTRFS_I(inode)->i_otime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, &item->otime,
BTRFS_I(inode)->i_otime.tv_nsec, &token);
btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
&token);
btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
@ -5007,6 +5074,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,
struct btrfs_root *new_root;
struct btrfs_root_ref *ref;
struct extent_buffer *leaf;
struct btrfs_key key;
int ret;
int err = 0;
@ -5017,9 +5085,12 @@ static int fixup_tree_root_location(struct btrfs_root *root,
}
err = -ENOENT;
ret = btrfs_find_item(root->fs_info->tree_root, path,
BTRFS_I(dir)->root->root_key.objectid,
location->objectid, BTRFS_ROOT_REF_KEY, NULL);
key.objectid = BTRFS_I(dir)->root->root_key.objectid;
key.type = BTRFS_ROOT_REF_KEY;
key.offset = location->objectid;
ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, path,
0, 0);
if (ret) {
if (ret < 0)
err = ret;
@ -5258,7 +5329,10 @@ static struct inode *new_simple_dir(struct super_block *s,
inode->i_op = &btrfs_dir_ro_inode_operations;
inode->i_fop = &simple_dir_operations;
inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_mtime = CURRENT_TIME;
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
BTRFS_I(inode)->i_otime = inode->i_mtime;
return inode;
}
@ -5826,7 +5900,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode_init_owner(inode, dir, mode);
inode_set_bytes(inode, 0);
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_mtime = CURRENT_TIME;
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
BTRFS_I(inode)->i_otime = inode->i_mtime;
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item,
@ -7134,11 +7213,12 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
u64 start = iblock << inode->i_blkbits;
u64 lockstart, lockend;
u64 len = bh_result->b_size;
u64 orig_len = len;
int unlock_bits = EXTENT_LOCKED;
int ret = 0;
if (create)
unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
unlock_bits |= EXTENT_DIRTY;
else
len = min_t(u64, len, root->sectorsize);
@ -7269,14 +7349,12 @@ unlock:
if (start + len > i_size_read(inode))
i_size_write(inode, start + len);
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockstart + len - 1, EXTENT_DELALLOC, NULL,
&cached_state, GFP_NOFS);
BUG_ON(ret);
if (len < orig_len) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
}
btrfs_free_reserved_data_space(inode, len);
}
/*
@ -7805,8 +7883,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
}
/* async crcs make it difficult to collect full stripe writes. */
if (btrfs_get_alloc_profile(root, 1) &
(BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))
if (btrfs_get_alloc_profile(root, 1) & BTRFS_BLOCK_GROUP_RAID56_MASK)
async_submit = 0;
else
async_submit = 1;
@ -8053,8 +8130,6 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode,
count - (size_t)ret);
else
btrfs_delalloc_release_metadata(inode, 0);
}
out:
if (wakeup)
@ -8575,6 +8650,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->delayed_node = NULL;
ei->i_otime.tv_sec = 0;
ei->i_otime.tv_nsec = 0;
inode = &ei->vfs_inode;
extent_map_tree_init(&ei->extent_tree);
extent_io_tree_init(&ei->io_tree, &inode->i_data);

View File

@ -1431,9 +1431,8 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
qgroup = u64_to_ptr(unode->aux);
qgroup->rfer += sign * oper->num_bytes;
qgroup->rfer_cmpr += sign * oper->num_bytes;
WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
qgroup->excl += sign * oper->num_bytes;
if (sign < 0)
WARN_ON(qgroup->excl < oper->num_bytes);
qgroup->excl_cmpr += sign * oper->num_bytes;
qgroup_dirty(fs_info, qgroup);

View File

@ -58,15 +58,6 @@
*/
#define RBIO_CACHE_READY_BIT 3
/*
* bbio and raid_map is managed by the caller, so we shouldn't free
* them here. And besides that, all rbios with this flag should not
* be cached, because we need raid_map to check the rbios' stripe
* is the same or not, but it is very likely that the caller has
* free raid_map, so don't cache those rbios.
*/
#define RBIO_HOLD_BBIO_MAP_BIT 4
#define RBIO_CACHE_SIZE 1024
enum btrfs_rbio_ops {
@ -79,13 +70,6 @@ struct btrfs_raid_bio {
struct btrfs_fs_info *fs_info;
struct btrfs_bio *bbio;
/*
* logical block numbers for the start of each stripe
* The last one or two are p/q. These are sorted,
* so raid_map[0] is the start of our full stripe
*/
u64 *raid_map;
/* while we're doing rmw on a stripe
* we put it into a hash table so we can
* lock the stripe and merge more rbios
@ -303,7 +287,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
*/
static int rbio_bucket(struct btrfs_raid_bio *rbio)
{
u64 num = rbio->raid_map[0];
u64 num = rbio->bbio->raid_map[0];
/*
* we shift down quite a bit. We're using byte
@ -606,8 +590,8 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
test_bit(RBIO_CACHE_BIT, &cur->flags))
return 0;
if (last->raid_map[0] !=
cur->raid_map[0])
if (last->bbio->raid_map[0] !=
cur->bbio->raid_map[0])
return 0;
/* we can't merge with different operations */
@ -689,7 +673,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
spin_lock_irqsave(&h->lock, flags);
list_for_each_entry(cur, &h->hash_list, hash_list) {
walk++;
if (cur->raid_map[0] == rbio->raid_map[0]) {
if (cur->bbio->raid_map[0] == rbio->bbio->raid_map[0]) {
spin_lock(&cur->bio_list_lock);
/* can we steal this cached rbio's pages? */
@ -841,21 +825,6 @@ done_nolock:
remove_rbio_from_cache(rbio);
}
static inline void
__free_bbio_and_raid_map(struct btrfs_bio *bbio, u64 *raid_map, int need)
{
if (need) {
kfree(raid_map);
kfree(bbio);
}
}
static inline void free_bbio_and_raid_map(struct btrfs_raid_bio *rbio)
{
__free_bbio_and_raid_map(rbio->bbio, rbio->raid_map,
!test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags));
}
static void __free_raid_bio(struct btrfs_raid_bio *rbio)
{
int i;
@ -875,8 +844,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
}
}
free_bbio_and_raid_map(rbio);
btrfs_put_bbio(rbio->bbio);
kfree(rbio);
}
@ -985,8 +953,7 @@ static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
* this does not allocate any pages for rbio->pages.
*/
static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
struct btrfs_bio *bbio, u64 *raid_map,
u64 stripe_len)
struct btrfs_bio *bbio, u64 stripe_len)
{
struct btrfs_raid_bio *rbio;
int nr_data = 0;
@ -1007,7 +974,6 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
INIT_LIST_HEAD(&rbio->stripe_cache);
INIT_LIST_HEAD(&rbio->hash_list);
rbio->bbio = bbio;
rbio->raid_map = raid_map;
rbio->fs_info = root->fs_info;
rbio->stripe_len = stripe_len;
rbio->nr_pages = num_pages;
@ -1028,10 +994,12 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
rbio->bio_pages = p + sizeof(struct page *) * num_pages;
rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2;
if (raid_map[real_stripes - 1] == RAID6_Q_STRIPE)
if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
nr_data = real_stripes - 1;
else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
nr_data = real_stripes - 2;
else
nr_data = real_stripes - 1;
BUG();
rbio->nr_data = nr_data;
return rbio;
@ -1182,7 +1150,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
spin_lock_irq(&rbio->bio_list_lock);
bio_list_for_each(bio, &rbio->bio_list) {
start = (u64)bio->bi_iter.bi_sector << 9;
stripe_offset = start - rbio->raid_map[0];
stripe_offset = start - rbio->bbio->raid_map[0];
page_index = stripe_offset >> PAGE_CACHE_SHIFT;
for (i = 0; i < bio->bi_vcnt; i++) {
@ -1402,7 +1370,7 @@ static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
logical <<= 9;
for (i = 0; i < rbio->nr_data; i++) {
stripe_start = rbio->raid_map[i];
stripe_start = rbio->bbio->raid_map[i];
if (logical >= stripe_start &&
logical < stripe_start + rbio->stripe_len) {
return i;
@ -1776,17 +1744,16 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
* our main entry point for writes from the rest of the FS.
*/
int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 *raid_map,
u64 stripe_len)
struct btrfs_bio *bbio, u64 stripe_len)
{
struct btrfs_raid_bio *rbio;
struct btrfs_plug_cb *plug = NULL;
struct blk_plug_cb *cb;
int ret;
rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
rbio = alloc_rbio(root, bbio, stripe_len);
if (IS_ERR(rbio)) {
__free_bbio_and_raid_map(bbio, raid_map, 1);
btrfs_put_bbio(bbio);
return PTR_ERR(rbio);
}
bio_list_add(&rbio->bio_list, bio);
@ -1885,9 +1852,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
}
/* all raid6 handling here */
if (rbio->raid_map[rbio->real_stripes - 1] ==
RAID6_Q_STRIPE) {
if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) {
/*
* single failure, rebuild from parity raid5
* style
@ -1922,8 +1887,9 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
* here due to a crc mismatch and we can't give them the
* data they want
*/
if (rbio->raid_map[failb] == RAID6_Q_STRIPE) {
if (rbio->raid_map[faila] == RAID5_P_STRIPE) {
if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) {
if (rbio->bbio->raid_map[faila] ==
RAID5_P_STRIPE) {
err = -EIO;
goto cleanup;
}
@ -1934,7 +1900,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
goto pstripe;
}
if (rbio->raid_map[failb] == RAID5_P_STRIPE) {
if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) {
raid6_datap_recov(rbio->real_stripes,
PAGE_SIZE, faila, pointers);
} else {
@ -2001,8 +1967,7 @@ cleanup:
cleanup_io:
if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
if (err == 0 &&
!test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags))
if (err == 0)
cache_rbio_pages(rbio);
else
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
@ -2156,15 +2121,16 @@ cleanup:
* of the drive.
*/
int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 *raid_map,
u64 stripe_len, int mirror_num, int generic_io)
struct btrfs_bio *bbio, u64 stripe_len,
int mirror_num, int generic_io)
{
struct btrfs_raid_bio *rbio;
int ret;
rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
rbio = alloc_rbio(root, bbio, stripe_len);
if (IS_ERR(rbio)) {
__free_bbio_and_raid_map(bbio, raid_map, generic_io);
if (generic_io)
btrfs_put_bbio(bbio);
return PTR_ERR(rbio);
}
@ -2175,7 +2141,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
rbio->faila = find_logical_bio_stripe(rbio, bio);
if (rbio->faila == -1) {
BUG();
__free_bbio_and_raid_map(bbio, raid_map, generic_io);
if (generic_io)
btrfs_put_bbio(bbio);
kfree(rbio);
return -EIO;
}
@ -2184,7 +2151,7 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
btrfs_bio_counter_inc_noblocked(root->fs_info);
rbio->generic_bio_cnt = 1;
} else {
set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags);
btrfs_get_bbio(bbio);
}
/*
@ -2240,14 +2207,14 @@ static void read_rebuild_work(struct btrfs_work *work)
struct btrfs_raid_bio *
raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 *raid_map,
u64 stripe_len, struct btrfs_device *scrub_dev,
struct btrfs_bio *bbio, u64 stripe_len,
struct btrfs_device *scrub_dev,
unsigned long *dbitmap, int stripe_nsectors)
{
struct btrfs_raid_bio *rbio;
int i;
rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
rbio = alloc_rbio(root, bbio, stripe_len);
if (IS_ERR(rbio))
return NULL;
bio_list_add(&rbio->bio_list, bio);
@ -2279,10 +2246,10 @@ void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
int stripe_offset;
int index;
ASSERT(logical >= rbio->raid_map[0]);
ASSERT(logical + PAGE_SIZE <= rbio->raid_map[0] +
ASSERT(logical >= rbio->bbio->raid_map[0]);
ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] +
rbio->stripe_len * rbio->nr_data);
stripe_offset = (int)(logical - rbio->raid_map[0]);
stripe_offset = (int)(logical - rbio->bbio->raid_map[0]);
index = stripe_offset >> PAGE_CACHE_SHIFT;
rbio->bio_pages[index] = page;
}

View File

@ -43,16 +43,15 @@ struct btrfs_raid_bio;
struct btrfs_device;
int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 *raid_map,
u64 stripe_len, int mirror_num, int generic_io);
struct btrfs_bio *bbio, u64 stripe_len,
int mirror_num, int generic_io);
int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 *raid_map,
u64 stripe_len);
struct btrfs_bio *bbio, u64 stripe_len);
struct btrfs_raid_bio *
raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
struct btrfs_bio *bbio, u64 *raid_map,
u64 stripe_len, struct btrfs_device *scrub_dev,
struct btrfs_bio *bbio, u64 stripe_len,
struct btrfs_device *scrub_dev,
unsigned long *dbitmap, int stripe_nsectors);
void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
struct page *page, u64 logical);

View File

@ -66,7 +66,6 @@ struct reada_extctl {
struct reada_extent {
u64 logical;
struct btrfs_key top;
u32 blocksize;
int err;
struct list_head extctl;
int refcnt;
@ -349,7 +348,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
blocksize = root->nodesize;
re->logical = logical;
re->blocksize = blocksize;
re->top = *top;
INIT_LIST_HEAD(&re->extctl);
spin_lock_init(&re->lock);
@ -463,7 +461,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
kfree(bbio);
btrfs_put_bbio(bbio);
return re;
error:
@ -488,7 +486,7 @@ error:
kref_put(&zone->refcnt, reada_zone_release);
spin_unlock(&fs_info->reada_lock);
}
kfree(bbio);
btrfs_put_bbio(bbio);
kfree(re);
return re_exist;
}
@ -660,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
int mirror_num = 0;
struct extent_buffer *eb = NULL;
u64 logical;
u32 blocksize;
int ret;
int i;
int need_kick = 0;
@ -694,7 +691,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
spin_unlock(&fs_info->reada_lock);
return 0;
}
dev->reada_next = re->logical + re->blocksize;
dev->reada_next = re->logical + fs_info->tree_root->nodesize;
re->refcnt++;
spin_unlock(&fs_info->reada_lock);
@ -709,7 +706,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
}
}
logical = re->logical;
blocksize = re->blocksize;
spin_lock(&re->lock);
if (re->scheduled_for == NULL) {
@ -724,8 +720,8 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
return 0;
atomic_inc(&dev->reada_in_flight);
ret = reada_tree_block_flagged(fs_info->extent_root, logical, blocksize,
mirror_num, &eb);
ret = reada_tree_block_flagged(fs_info->extent_root, logical,
mirror_num, &eb);
if (ret)
__readahead_hook(fs_info->extent_root, NULL, logical, ret);
else if (eb)
@ -851,7 +847,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
break;
printk(KERN_DEBUG
" re: logical %llu size %u empty %d for %lld",
re->logical, re->blocksize,
re->logical, fs_info->tree_root->nodesize,
list_empty(&re->extctl), re->scheduled_for ?
re->scheduled_for->devid : -1);
@ -886,7 +882,8 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
}
printk(KERN_DEBUG
"re: logical %llu size %u list empty %d for %lld",
re->logical, re->blocksize, list_empty(&re->extctl),
re->logical, fs_info->tree_root->nodesize,
list_empty(&re->extctl),
re->scheduled_for ? re->scheduled_for->devid : -1);
for (i = 0; i < re->nzones; ++i) {
printk(KERN_CONT " zone %llu-%llu devs",

View File

@ -2855,9 +2855,10 @@ static void update_processed_blocks(struct reloc_control *rc,
}
}
static int tree_block_processed(u64 bytenr, u32 blocksize,
struct reloc_control *rc)
static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
{
u32 blocksize = rc->extent_root->nodesize;
if (test_range_bit(&rc->processed_blocks, bytenr,
bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
return 1;
@ -2965,8 +2966,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
while (rb_node) {
block = rb_entry(rb_node, struct tree_block, rb_node);
if (!block->key_ready)
readahead_tree_block(rc->extent_root, block->bytenr,
block->key.objectid);
readahead_tree_block(rc->extent_root, block->bytenr);
rb_node = rb_next(rb_node);
}
@ -3353,7 +3353,7 @@ static int __add_tree_block(struct reloc_control *rc,
bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info,
SKINNY_METADATA);
if (tree_block_processed(bytenr, blocksize, rc))
if (tree_block_processed(bytenr, rc))
return 0;
if (tree_search(blocks, bytenr))
@ -3611,7 +3611,7 @@ static int find_data_references(struct reloc_control *rc,
if (added)
goto next;
if (!tree_block_processed(leaf->start, leaf->len, rc)) {
if (!tree_block_processed(leaf->start, rc)) {
block = kmalloc(sizeof(*block), GFP_NOFS);
if (!block) {
err = -ENOMEM;

View File

@ -66,7 +66,6 @@ struct scrub_ctx;
struct scrub_recover {
atomic_t refs;
struct btrfs_bio *bbio;
u64 *raid_map;
u64 map_length;
};
@ -80,7 +79,7 @@ struct scrub_page {
u64 logical;
u64 physical;
u64 physical_for_dev_replace;
atomic_t ref_count;
atomic_t refs;
struct {
unsigned int mirror_num:8;
unsigned int have_csum:1;
@ -113,7 +112,7 @@ struct scrub_block {
struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
int page_count;
atomic_t outstanding_pages;
atomic_t ref_count; /* free mem on transition to zero */
atomic_t refs; /* free mem on transition to zero */
struct scrub_ctx *sctx;
struct scrub_parity *sparity;
struct {
@ -142,7 +141,7 @@ struct scrub_parity {
int stripe_len;
atomic_t ref_count;
atomic_t refs;
struct list_head spages;
@ -194,6 +193,15 @@ struct scrub_ctx {
*/
struct btrfs_scrub_progress stat;
spinlock_t stat_lock;
/*
* Use a ref counter to avoid use-after-free issues. Scrub workers
* decrement bios_in_flight and workers_pending and then do a wakeup
* on the list_wait wait queue. We must ensure the main scrub task
* doesn't free the scrub context before or while the workers are
* doing the wakeup() call.
*/
atomic_t refs;
};
struct scrub_fixup_nodatasum {
@ -236,10 +244,7 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
struct btrfs_fs_info *fs_info,
struct scrub_block *original_sblock,
u64 length, u64 logical,
static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
struct scrub_block *sblocks_for_recheck);
static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
struct scrub_block *sblock, int is_metadata,
@ -251,8 +256,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
const u8 *csum, u64 generation,
u16 csum_size);
static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
struct scrub_block *sblock_good,
int force_write);
struct scrub_block *sblock_good);
static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
struct scrub_block *sblock_good,
int page_num, int force_write);
@ -302,10 +306,12 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
static void copy_nocow_pages_worker(struct btrfs_work *work);
static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
static void scrub_put_ctx(struct scrub_ctx *sctx);
static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
{
atomic_inc(&sctx->refs);
atomic_inc(&sctx->bios_in_flight);
}
@ -313,6 +319,7 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
{
atomic_dec(&sctx->bios_in_flight);
wake_up(&sctx->list_wait);
scrub_put_ctx(sctx);
}
static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
@ -346,6 +353,7 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
{
struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
atomic_inc(&sctx->refs);
/*
* increment scrubs_running to prevent cancel requests from
* completing as long as a worker is running. we must also
@ -388,6 +396,7 @@ static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
atomic_dec(&sctx->workers_pending);
wake_up(&fs_info->scrub_pause_wait);
wake_up(&sctx->list_wait);
scrub_put_ctx(sctx);
}
static void scrub_free_csums(struct scrub_ctx *sctx)
@ -433,6 +442,12 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
kfree(sctx);
}
static void scrub_put_ctx(struct scrub_ctx *sctx)
{
if (atomic_dec_and_test(&sctx->refs))
scrub_free_ctx(sctx);
}
static noinline_for_stack
struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
{
@ -457,6 +472,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
if (!sctx)
goto nomem;
atomic_set(&sctx->refs, 1);
sctx->is_dev_replace = is_dev_replace;
sctx->pages_per_rd_bio = pages_per_rd_bio;
sctx->curr = -1;
@ -520,6 +536,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
struct inode_fs_paths *ipath = NULL;
struct btrfs_root *local_root;
struct btrfs_key root_key;
struct btrfs_key key;
root_key.objectid = root;
root_key.type = BTRFS_ROOT_ITEM_KEY;
@ -530,7 +547,14 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
goto err;
}
ret = inode_item_info(inum, 0, local_root, swarn->path);
/*
* this makes the path point to (inum INODE_ITEM ioff)
*/
key.objectid = inum;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
if (ret) {
btrfs_release_path(swarn->path);
goto err;
@ -848,8 +872,7 @@ static inline void scrub_get_recover(struct scrub_recover *recover)
static inline void scrub_put_recover(struct scrub_recover *recover)
{
if (atomic_dec_and_test(&recover->refs)) {
kfree(recover->bbio);
kfree(recover->raid_map);
btrfs_put_bbio(recover->bbio);
kfree(recover);
}
}
@ -955,8 +978,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
}
/* setup the context, map the logical blocks and alloc the pages */
ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length,
logical, sblocks_for_recheck);
ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
if (ret) {
spin_lock(&sctx->stat_lock);
sctx->stat.read_errors++;
@ -1030,9 +1052,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
if (!is_metadata && !have_csum) {
struct scrub_fixup_nodatasum *fixup_nodatasum;
nodatasum_case:
WARN_ON(sctx->is_dev_replace);
nodatasum_case:
/*
* !is_metadata and !have_csum, this means that the data
* might not be COW'ed, that it might be modified
@ -1091,76 +1114,20 @@ nodatasum_case:
sblock_other->no_io_error_seen) {
if (sctx->is_dev_replace) {
scrub_write_block_to_dev_replace(sblock_other);
} else {
int force_write = is_metadata || have_csum;
ret = scrub_repair_block_from_good_copy(
sblock_bad, sblock_other,
force_write);
}
if (0 == ret)
goto corrected_error;
} else {
ret = scrub_repair_block_from_good_copy(
sblock_bad, sblock_other);
if (!ret)
goto corrected_error;
}
}
}
/*
* for dev_replace, pick good pages and write to the target device.
*/
if (sctx->is_dev_replace) {
success = 1;
for (page_num = 0; page_num < sblock_bad->page_count;
page_num++) {
int sub_success;
sub_success = 0;
for (mirror_index = 0;
mirror_index < BTRFS_MAX_MIRRORS &&
sblocks_for_recheck[mirror_index].page_count > 0;
mirror_index++) {
struct scrub_block *sblock_other =
sblocks_for_recheck + mirror_index;
struct scrub_page *page_other =
sblock_other->pagev[page_num];
if (!page_other->io_error) {
ret = scrub_write_page_to_dev_replace(
sblock_other, page_num);
if (ret == 0) {
/* succeeded for this page */
sub_success = 1;
break;
} else {
btrfs_dev_replace_stats_inc(
&sctx->dev_root->
fs_info->dev_replace.
num_write_errors);
}
}
}
if (!sub_success) {
/*
* did not find a mirror to fetch the page
* from. scrub_write_page_to_dev_replace()
* handles this case (page->io_error), by
* filling the block with zeros before
* submitting the write request
*/
success = 0;
ret = scrub_write_page_to_dev_replace(
sblock_bad, page_num);
if (ret)
btrfs_dev_replace_stats_inc(
&sctx->dev_root->fs_info->
dev_replace.num_write_errors);
}
}
goto out;
}
if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
goto did_not_correct_error;
/*
* for regular scrub, repair those pages that are errored.
* In case of I/O errors in the area that is supposed to be
* repaired, continue by picking good copies of those pages.
* Select the good pages from mirrors to rewrite bad pages from
@ -1184,44 +1151,64 @@ nodatasum_case:
* mirror, even if other 512 byte sectors in the same PAGE_SIZE
* area are unreadable.
*/
/* can only fix I/O errors from here on */
if (sblock_bad->no_io_error_seen)
goto did_not_correct_error;
success = 1;
for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
for (page_num = 0; page_num < sblock_bad->page_count;
page_num++) {
struct scrub_page *page_bad = sblock_bad->pagev[page_num];
struct scrub_block *sblock_other = NULL;
if (!page_bad->io_error)
/* skip no-io-error page in scrub */
if (!page_bad->io_error && !sctx->is_dev_replace)
continue;
for (mirror_index = 0;
mirror_index < BTRFS_MAX_MIRRORS &&
sblocks_for_recheck[mirror_index].page_count > 0;
mirror_index++) {
struct scrub_block *sblock_other = sblocks_for_recheck +
mirror_index;
struct scrub_page *page_other = sblock_other->pagev[
page_num];
if (!page_other->io_error) {
ret = scrub_repair_page_from_good_copy(
sblock_bad, sblock_other, page_num, 0);
if (0 == ret) {
page_bad->io_error = 0;
break; /* succeeded for this page */
/* try to find no-io-error page in mirrors */
if (page_bad->io_error) {
for (mirror_index = 0;
mirror_index < BTRFS_MAX_MIRRORS &&
sblocks_for_recheck[mirror_index].page_count > 0;
mirror_index++) {
if (!sblocks_for_recheck[mirror_index].
pagev[page_num]->io_error) {
sblock_other = sblocks_for_recheck +
mirror_index;
break;
}
}
if (!sblock_other)
success = 0;
}
if (page_bad->io_error) {
/* did not find a mirror to copy the page from */
success = 0;
if (sctx->is_dev_replace) {
/*
* did not find a mirror to fetch the page
* from. scrub_write_page_to_dev_replace()
* handles this case (page->io_error), by
* filling the block with zeros before
* submitting the write request
*/
if (!sblock_other)
sblock_other = sblock_bad;
if (scrub_write_page_to_dev_replace(sblock_other,
page_num) != 0) {
btrfs_dev_replace_stats_inc(
&sctx->dev_root->
fs_info->dev_replace.
num_write_errors);
success = 0;
}
} else if (sblock_other) {
ret = scrub_repair_page_from_good_copy(sblock_bad,
sblock_other,
page_num, 0);
if (0 == ret)
page_bad->io_error = 0;
else
success = 0;
}
}
if (success) {
if (success && !sctx->is_dev_replace) {
if (is_metadata || have_csum) {
/*
* need to verify the checksum now that all
@ -1288,19 +1275,18 @@ out:
return 0;
}
static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio, u64 *raid_map)
static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
{
if (raid_map) {
if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE)
return 3;
else
return 2;
} else {
if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
return 2;
else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
return 3;
else
return (int)bbio->num_stripes;
}
}
static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
u64 *raid_map,
u64 mapped_length,
int nstripes, int mirror,
int *stripe_index,
@ -1308,7 +1294,7 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
{
int i;
if (raid_map) {
if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
/* RAID5/6 */
for (i = 0; i < nstripes; i++) {
if (raid_map[i] == RAID6_Q_STRIPE ||
@ -1329,72 +1315,65 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
}
}
static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
struct btrfs_fs_info *fs_info,
struct scrub_block *original_sblock,
u64 length, u64 logical,
static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
struct scrub_block *sblocks_for_recheck)
{
struct scrub_ctx *sctx = original_sblock->sctx;
struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
u64 length = original_sblock->page_count * PAGE_SIZE;
u64 logical = original_sblock->pagev[0]->logical;
struct scrub_recover *recover;
struct btrfs_bio *bbio;
u64 *raid_map;
u64 sublen;
u64 mapped_length;
u64 stripe_offset;
int stripe_index;
int page_index;
int page_index = 0;
int mirror_index;
int nmirrors;
int ret;
/*
* note: the two members ref_count and outstanding_pages
* note: the two members refs and outstanding_pages
* are not used (and not set) in the blocks that are used for
* the recheck procedure
*/
page_index = 0;
while (length > 0) {
sublen = min_t(u64, length, PAGE_SIZE);
mapped_length = sublen;
bbio = NULL;
raid_map = NULL;
/*
* with a length of PAGE_SIZE, each returned stripe
* represents one mirror
*/
ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical,
&mapped_length, &bbio, 0, &raid_map);
&mapped_length, &bbio, 0, 1);
if (ret || !bbio || mapped_length < sublen) {
kfree(bbio);
kfree(raid_map);
btrfs_put_bbio(bbio);
return -EIO;
}
recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
if (!recover) {
kfree(bbio);
kfree(raid_map);
btrfs_put_bbio(bbio);
return -ENOMEM;
}
atomic_set(&recover->refs, 1);
recover->bbio = bbio;
recover->raid_map = raid_map;
recover->map_length = mapped_length;
BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
nmirrors = scrub_nr_raid_mirrors(bbio, raid_map);
nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
for (mirror_index = 0; mirror_index < nmirrors;
mirror_index++) {
struct scrub_block *sblock;
struct scrub_page *page;
if (mirror_index >= BTRFS_MAX_MIRRORS)
continue;
sblock = sblocks_for_recheck + mirror_index;
sblock->sctx = sctx;
page = kzalloc(sizeof(*page), GFP_NOFS);
@ -1410,9 +1389,12 @@ leave_nomem:
sblock->pagev[page_index] = page;
page->logical = logical;
scrub_stripe_index_and_offset(logical, raid_map,
scrub_stripe_index_and_offset(logical,
bbio->map_type,
bbio->raid_map,
mapped_length,
bbio->num_stripes,
bbio->num_stripes -
bbio->num_tgtdevs,
mirror_index,
&stripe_index,
&stripe_offset);
@ -1458,7 +1440,8 @@ static void scrub_bio_wait_endio(struct bio *bio, int error)
static inline int scrub_is_page_on_raid56(struct scrub_page *page)
{
return page->recover && page->recover->raid_map;
return page->recover &&
(page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
}
static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
@ -1475,7 +1458,6 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
bio->bi_end_io = scrub_bio_wait_endio;
ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio,
page->recover->raid_map,
page->recover->map_length,
page->mirror_num, 0);
if (ret)
@ -1615,8 +1597,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
}
static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
struct scrub_block *sblock_good,
int force_write)
struct scrub_block *sblock_good)
{
int page_num;
int ret = 0;
@ -1626,8 +1607,7 @@ static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
sblock_good,
page_num,
force_write);
page_num, 1);
if (ret_sub)
ret = ret_sub;
}
@ -2067,12 +2047,12 @@ static int scrub_checksum_super(struct scrub_block *sblock)
static void scrub_block_get(struct scrub_block *sblock)
{
atomic_inc(&sblock->ref_count);
atomic_inc(&sblock->refs);
}
static void scrub_block_put(struct scrub_block *sblock)
{
if (atomic_dec_and_test(&sblock->ref_count)) {
if (atomic_dec_and_test(&sblock->refs)) {
int i;
if (sblock->sparity)
@ -2086,12 +2066,12 @@ static void scrub_block_put(struct scrub_block *sblock)
static void scrub_page_get(struct scrub_page *spage)
{
atomic_inc(&spage->ref_count);
atomic_inc(&spage->refs);
}
static void scrub_page_put(struct scrub_page *spage)
{
if (atomic_dec_and_test(&spage->ref_count)) {
if (atomic_dec_and_test(&spage->refs)) {
if (spage->page)
__free_page(spage->page);
kfree(spage);
@ -2217,7 +2197,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
/* one ref inside this function, plus one for each page added to
* a bio later on */
atomic_set(&sblock->ref_count, 1);
atomic_set(&sblock->refs, 1);
sblock->sctx = sctx;
sblock->no_io_error_seen = 1;
@ -2510,7 +2490,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
/* one ref inside this function, plus one for each page added to
* a bio later on */
atomic_set(&sblock->ref_count, 1);
atomic_set(&sblock->refs, 1);
sblock->sctx = sctx;
sblock->no_io_error_seen = 1;
sblock->sparity = sparity;
@ -2705,7 +2685,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
struct btrfs_raid_bio *rbio;
struct scrub_page *spage;
struct btrfs_bio *bbio = NULL;
u64 *raid_map = NULL;
u64 length;
int ret;
@ -2716,8 +2695,8 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
length = sparity->logic_end - sparity->logic_start + 1;
ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE,
sparity->logic_start,
&length, &bbio, 0, &raid_map);
if (ret || !bbio || !raid_map)
&length, &bbio, 0, 1);
if (ret || !bbio || !bbio->raid_map)
goto bbio_out;
bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
@ -2729,8 +2708,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
bio->bi_end_io = scrub_parity_bio_endio;
rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio,
raid_map, length,
sparity->scrub_dev,
length, sparity->scrub_dev,
sparity->dbitmap,
sparity->nsectors);
if (!rbio)
@ -2747,8 +2725,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
rbio_out:
bio_put(bio);
bbio_out:
kfree(bbio);
kfree(raid_map);
btrfs_put_bbio(bbio);
bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
sparity->nsectors);
spin_lock(&sctx->stat_lock);
@ -2765,12 +2742,12 @@ static inline int scrub_calc_parity_bitmap_len(int nsectors)
static void scrub_parity_get(struct scrub_parity *sparity)
{
atomic_inc(&sparity->ref_count);
atomic_inc(&sparity->refs);
}
static void scrub_parity_put(struct scrub_parity *sparity)
{
if (!atomic_dec_and_test(&sparity->ref_count))
if (!atomic_dec_and_test(&sparity->refs))
return;
scrub_parity_check_and_repair(sparity);
@ -2820,7 +2797,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
sparity->scrub_dev = sdev;
sparity->logic_start = logic_start;
sparity->logic_end = logic_end;
atomic_set(&sparity->ref_count, 1);
atomic_set(&sparity->refs, 1);
INIT_LIST_HEAD(&sparity->spages);
sparity->dbitmap = sparity->bitmap;
sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
@ -3037,8 +3014,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
increment = map->stripe_len;
mirror_num = num % map->num_stripes + 1;
} else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
BTRFS_BLOCK_GROUP_RAID6)) {
} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
get_raid56_logic_offset(physical, num, map, &offset, NULL);
increment = map->stripe_len * nr_data_stripes(map);
mirror_num = 1;
@ -3074,8 +3050,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
*/
logical = base + offset;
physical_end = physical + nstripes * map->stripe_len;
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
BTRFS_BLOCK_GROUP_RAID6)) {
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
get_raid56_logic_offset(physical_end, num,
map, &logic_end, NULL);
logic_end += base;
@ -3121,8 +3096,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
ret = 0;
while (physical < physical_end) {
/* for raid56, we skip parity stripe */
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
BTRFS_BLOCK_GROUP_RAID6)) {
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
ret = get_raid56_logic_offset(physical, num,
map, &logical, &stripe_logical);
logical += base;
@ -3280,8 +3254,7 @@ again:
scrub_free_csums(sctx);
if (extent_logical + extent_len <
key.objectid + bytes) {
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
BTRFS_BLOCK_GROUP_RAID6)) {
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
/*
* loop until we find next data stripe
* or we have finished all stripes.
@ -3775,7 +3748,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
scrub_workers_put(fs_info);
mutex_unlock(&fs_info->scrub_lock);
scrub_free_ctx(sctx);
scrub_put_ctx(sctx);
return ret;
}
@ -3881,14 +3854,14 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
&mapped_length, &bbio, 0);
if (ret || !bbio || mapped_length < extent_len ||
!bbio->stripes[0].dev->bdev) {
kfree(bbio);
btrfs_put_bbio(bbio);
return;
}
*extent_physical = bbio->stripes[0].physical;
*extent_mirror_num = bbio->mirror_num;
*extent_dev = bbio->stripes[0].dev;
kfree(bbio);
btrfs_put_bbio(bbio);
}
static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,

View File

@ -2471,12 +2471,9 @@ verbose_printk("btrfs: send_utimes %llu\n", ino);
if (ret < 0)
goto out;
TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb,
btrfs_inode_atime(ii));
TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb,
btrfs_inode_mtime(ii));
TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb,
btrfs_inode_ctime(ii));
TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime);
/* TODO Add otime support when the otime patches get into upstream */
ret = send_cmd(sctx);

View File

@ -1958,11 +1958,6 @@ static int btrfs_freeze(struct super_block *sb)
return btrfs_commit_transaction(trans, root);
}
static int btrfs_unfreeze(struct super_block *sb)
{
return 0;
}
static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
{
struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
@ -2011,7 +2006,6 @@ static const struct super_operations btrfs_super_ops = {
.statfs = btrfs_statfs,
.remount_fs = btrfs_remount,
.freeze_fs = btrfs_freeze,
.unfreeze_fs = btrfs_unfreeze,
};
static const struct file_operations btrfs_ctl_fops = {

View File

@ -733,10 +733,18 @@ int btrfs_init_sysfs(void)
ret = btrfs_init_debugfs();
if (ret)
return ret;
goto out1;
init_feature_attrs();
ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
if (ret)
goto out2;
return 0;
out2:
debugfs_remove_recursive(btrfs_debugfs_root_dentry);
out1:
kset_unregister(btrfs_kset);
return ret;
}

View File

@ -53,7 +53,7 @@ static int test_btrfs_split_item(void)
return -ENOMEM;
}
path->nodes[0] = eb = alloc_dummy_extent_buffer(0, 4096);
path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, 4096);
if (!eb) {
test_msg("Could not allocate dummy buffer\n");
ret = -ENOMEM;

View File

@ -258,8 +258,7 @@ static int test_find_delalloc(void)
}
ret = 0;
out_bits:
clear_extent_bits(&tmp, 0, total_dirty - 1,
(unsigned long)-1, GFP_NOFS);
clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS);
out:
if (locked_page)
page_cache_release(locked_page);

View File

@ -255,7 +255,7 @@ static noinline int test_btrfs_get_extent(void)
goto out;
}
root->node = alloc_dummy_extent_buffer(0, 4096);
root->node = alloc_dummy_extent_buffer(NULL, 4096);
if (!root->node) {
test_msg("Couldn't allocate dummy buffer\n");
goto out;
@ -843,7 +843,7 @@ static int test_hole_first(void)
goto out;
}
root->node = alloc_dummy_extent_buffer(0, 4096);
root->node = alloc_dummy_extent_buffer(NULL, 4096);
if (!root->node) {
test_msg("Couldn't allocate dummy buffer\n");
goto out;

View File

@ -404,12 +404,22 @@ int btrfs_test_qgroups(void)
ret = -ENOMEM;
goto out;
}
/* We are using this root as our extent root */
root->fs_info->extent_root = root;
/*
* Some of the paths we test assume we have a filled out fs_info, so we
* just need to add the root in there so we don't panic.
*/
root->fs_info->tree_root = root;
root->fs_info->quota_root = root;
root->fs_info->quota_enabled = 1;
/*
* Can't use bytenr 0, some things freak out
* *cough*backref walking code*cough*
*/
root->node = alloc_test_extent_buffer(root->fs_info, 4096, 4096);
root->node = alloc_test_extent_buffer(root->fs_info, 4096);
if (!root->node) {
test_msg("Couldn't allocate dummy buffer\n");
ret = -ENOMEM;
@ -448,17 +458,6 @@ int btrfs_test_qgroups(void)
goto out;
}
/* We are using this root as our extent root */
root->fs_info->extent_root = root;
/*
* Some of the paths we test assume we have a filled out fs_info, so we
* just need to addt he root in there so we don't panic.
*/
root->fs_info->tree_root = root;
root->fs_info->quota_root = root;
root->fs_info->quota_enabled = 1;
test_msg("Running qgroup tests\n");
ret = test_no_shared_qgroup(root);
if (ret)

View File

@ -220,6 +220,7 @@ loop:
* commit the transaction.
*/
atomic_set(&cur_trans->use_count, 2);
cur_trans->have_free_bgs = 0;
cur_trans->start_time = get_seconds();
cur_trans->delayed_refs.href_root = RB_ROOT;
@ -248,6 +249,8 @@ loop:
INIT_LIST_HEAD(&cur_trans->pending_chunks);
INIT_LIST_HEAD(&cur_trans->switch_commits);
INIT_LIST_HEAD(&cur_trans->pending_ordered);
INIT_LIST_HEAD(&cur_trans->dirty_bgs);
spin_lock_init(&cur_trans->dirty_bgs_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(&cur_trans->dirty_pages,
fs_info->btree_inode->i_mapping);
@ -1020,6 +1023,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
u64 old_root_bytenr;
u64 old_root_used;
struct btrfs_root *tree_root = root->fs_info->tree_root;
bool extent_root = (root->objectid == BTRFS_EXTENT_TREE_OBJECTID);
old_root_used = btrfs_root_used(&root->root_item);
btrfs_write_dirty_block_groups(trans, root);
@ -1027,7 +1031,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
while (1) {
old_root_bytenr = btrfs_root_bytenr(&root->root_item);
if (old_root_bytenr == root->node->start &&
old_root_used == btrfs_root_used(&root->root_item))
old_root_used == btrfs_root_used(&root->root_item) &&
(!extent_root ||
list_empty(&trans->transaction->dirty_bgs)))
break;
btrfs_set_root_node(&root->root_item, root->node);
@ -1038,7 +1044,15 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
return ret;
old_root_used = btrfs_root_used(&root->root_item);
ret = btrfs_write_dirty_block_groups(trans, root);
if (extent_root) {
ret = btrfs_write_dirty_block_groups(trans, root);
if (ret)
return ret;
}
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
if (ret)
return ret;
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
if (ret)
return ret;
}
@ -1061,10 +1075,6 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
struct extent_buffer *eb;
int ret;
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
if (ret)
return ret;
eb = btrfs_lock_root_node(fs_info->tree_root);
ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
0, &eb);
@ -1097,6 +1107,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
next = fs_info->dirty_cowonly_roots.next;
list_del_init(next);
root = list_entry(next, struct btrfs_root, dirty_list);
clear_bit(BTRFS_ROOT_DIRTY, &root->state);
if (root != fs_info->extent_root)
list_add_tail(&root->dirty_list,
@ -1983,6 +1994,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
switch_commit_roots(cur_trans, root->fs_info);
assert_qgroups_uptodate(trans);
ASSERT(list_empty(&cur_trans->dirty_bgs));
update_super_roots(root);
btrfs_set_super_log_root(root->fs_info->super_copy, 0);
@ -2026,6 +2038,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_finish_extent_commit(trans, root);
if (cur_trans->have_free_bgs)
btrfs_clear_space_info_full(root->fs_info);
root->fs_info->last_trans_committed = cur_trans->transid;
/*
* We needn't acquire the lock here because there is no other task

View File

@ -47,6 +47,11 @@ struct btrfs_transaction {
atomic_t num_writers;
atomic_t use_count;
/*
* true if there is free bgs operations in this transaction
*/
int have_free_bgs;
/* Be protected by fs_info->trans_lock when we want to change it. */
enum btrfs_trans_state state;
struct list_head list;
@ -58,6 +63,8 @@ struct btrfs_transaction {
struct list_head pending_chunks;
struct list_head pending_ordered;
struct list_head switch_commits;
struct list_head dirty_bgs;
spinlock_t dirty_bgs_lock;
struct btrfs_delayed_ref_root delayed_refs;
int aborted;
};

View File

@ -453,11 +453,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
insert:
btrfs_release_path(path);
/* try to insert the key into the destination tree */
path->skip_release_on_error = 1;
ret = btrfs_insert_empty_item(trans, root, path,
key, item_size);
path->skip_release_on_error = 0;
/* make sure any existing item is the correct size */
if (ret == -EEXIST) {
if (ret == -EEXIST || ret == -EOVERFLOW) {
u32 found_size;
found_size = btrfs_item_size_nr(path->nodes[0],
path->slots[0]);
@ -488,8 +490,20 @@ insert:
src_item = (struct btrfs_inode_item *)src_ptr;
dst_item = (struct btrfs_inode_item *)dst_ptr;
if (btrfs_inode_generation(eb, src_item) == 0)
if (btrfs_inode_generation(eb, src_item) == 0) {
struct extent_buffer *dst_eb = path->nodes[0];
if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) {
struct btrfs_map_token token;
u64 ino_size = btrfs_inode_size(eb, src_item);
btrfs_init_map_token(&token);
btrfs_set_token_inode_size(dst_eb, dst_item,
ino_size, &token);
}
goto no_copy;
}
if (overwrite_root &&
S_ISDIR(btrfs_inode_mode(eb, src_item)) &&
@ -844,7 +858,7 @@ out:
static noinline int backref_in_log(struct btrfs_root *log,
struct btrfs_key *key,
u64 ref_objectid,
char *name, int namelen)
const char *name, int namelen)
{
struct btrfs_path *path;
struct btrfs_inode_ref *ref;
@ -1254,13 +1268,14 @@ out:
}
static int insert_orphan_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 offset)
struct btrfs_root *root, u64 ino)
{
int ret;
ret = btrfs_find_item(root, NULL, BTRFS_ORPHAN_OBJECTID,
offset, BTRFS_ORPHAN_ITEM_KEY, NULL);
if (ret > 0)
ret = btrfs_insert_orphan_item(trans, root, offset);
ret = btrfs_insert_orphan_item(trans, root, ino);
if (ret == -EEXIST)
ret = 0;
return ret;
}
@ -1287,6 +1302,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
cur_offset = 0;
while (cur_offset < item_size) {
extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
@ -1302,7 +1318,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
}
btrfs_release_path(path);
if (ret < 0)
if (ret < 0 && ret != -ENOENT)
return ret;
return nlink;
}
@ -1394,9 +1410,6 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
nlink = ret;
ret = count_inode_extrefs(root, inode, path);
if (ret == -ENOENT)
ret = 0;
if (ret < 0)
goto out;
@ -1556,6 +1569,30 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
return ret;
}
/*
* Return true if an inode reference exists in the log for the given name,
* inode and parent inode.
*/
static bool name_in_log_ref(struct btrfs_root *log_root,
const char *name, const int name_len,
const u64 dirid, const u64 ino)
{
struct btrfs_key search_key;
search_key.objectid = ino;
search_key.type = BTRFS_INODE_REF_KEY;
search_key.offset = dirid;
if (backref_in_log(log_root, &search_key, dirid, name, name_len))
return true;
search_key.type = BTRFS_INODE_EXTREF_KEY;
search_key.offset = btrfs_extref_hash(dirid, name, name_len);
if (backref_in_log(log_root, &search_key, dirid, name, name_len))
return true;
return false;
}
/*
* take a single entry in a log directory item and replay it into
* the subvolume.
@ -1666,10 +1703,17 @@ out:
return ret;
insert:
if (name_in_log_ref(root->log_root, name, name_len,
key->objectid, log_key.objectid)) {
/* The dentry will be added later. */
ret = 0;
update_size = false;
goto out;
}
btrfs_release_path(path);
ret = insert_one_name(trans, root, path, key->objectid, key->offset,
name, name_len, log_type, &log_key);
if (ret && ret != -ENOENT)
if (ret && ret != -ENOENT && ret != -EEXIST)
goto out;
update_size = false;
ret = 0;
@ -2164,7 +2208,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
parent = path->nodes[*level];
root_owner = btrfs_header_owner(parent);
next = btrfs_find_create_tree_block(root, bytenr, blocksize);
next = btrfs_find_create_tree_block(root, bytenr);
if (!next)
return -ENOMEM;
@ -2416,8 +2460,8 @@ static void wait_for_writer(struct btrfs_trans_handle *trans,
mutex_unlock(&root->log_mutex);
if (atomic_read(&root->log_writers))
schedule();
mutex_lock(&root->log_mutex);
finish_wait(&root->log_writer_wait, &wait);
mutex_lock(&root->log_mutex);
}
}
@ -3219,7 +3263,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
static void fill_inode_item(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf,
struct btrfs_inode_item *item,
struct inode *inode, int log_inode_only)
struct inode *inode, int log_inode_only,
u64 logged_isize)
{
struct btrfs_map_token token;
@ -3232,7 +3277,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
* to say 'update this inode with these values'
*/
btrfs_set_token_inode_generation(leaf, item, 0, &token);
btrfs_set_token_inode_size(leaf, item, 0, &token);
btrfs_set_token_inode_size(leaf, item, logged_isize, &token);
} else {
btrfs_set_token_inode_generation(leaf, item,
BTRFS_I(inode)->generation,
@ -3245,19 +3290,19 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
btrfs_set_token_timespec_sec(leaf, &item->atime,
inode->i_atime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
btrfs_set_token_timespec_nsec(leaf, &item->atime,
inode->i_atime.tv_nsec, &token);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
btrfs_set_token_timespec_sec(leaf, &item->mtime,
inode->i_mtime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
btrfs_set_token_timespec_nsec(leaf, &item->mtime,
inode->i_mtime.tv_nsec, &token);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
btrfs_set_token_timespec_sec(leaf, &item->ctime,
inode->i_ctime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
btrfs_set_token_timespec_nsec(leaf, &item->ctime,
inode->i_ctime.tv_nsec, &token);
btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
@ -3284,7 +3329,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
return ret;
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
fill_inode_item(trans, path->nodes[0], inode_item, inode, 0);
fill_inode_item(trans, path->nodes[0], inode_item, inode, 0, 0);
btrfs_release_path(path);
return 0;
}
@ -3293,7 +3338,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
struct inode *inode,
struct btrfs_path *dst_path,
struct btrfs_path *src_path, u64 *last_extent,
int start_slot, int nr, int inode_only)
int start_slot, int nr, int inode_only,
u64 logged_isize)
{
unsigned long src_offset;
unsigned long dst_offset;
@ -3350,7 +3396,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
dst_path->slots[0],
struct btrfs_inode_item);
fill_inode_item(trans, dst_path->nodes[0], inode_item,
inode, inode_only == LOG_INODE_EXISTS);
inode, inode_only == LOG_INODE_EXISTS,
logged_isize);
} else {
copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
src_offset, ins_sizes[i]);
@ -3902,6 +3949,33 @@ process:
return ret;
}
static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
struct btrfs_path *path, u64 *size_ret)
{
struct btrfs_key key;
int ret;
key.objectid = btrfs_ino(inode);
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, log, &key, path, 0, 0);
if (ret < 0) {
return ret;
} else if (ret > 0) {
*size_ret = i_size_read(inode);
} else {
struct btrfs_inode_item *item;
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
*size_ret = btrfs_inode_size(path->nodes[0], item);
}
btrfs_release_path(path);
return 0;
}
/* log a single inode in the tree log.
* At least one parent directory for this inode must exist in the tree
* or be logged already.
@ -3939,6 +4013,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
bool fast_search = false;
u64 ino = btrfs_ino(inode);
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
u64 logged_isize = 0;
path = btrfs_alloc_path();
if (!path)
@ -3966,15 +4041,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
max_key.type = (u8)-1;
max_key.offset = (u64)-1;
/* Only run delayed items if we are a dir or a new file */
/*
* Only run delayed items if we are a dir or a new file.
* Otherwise commit the delayed inode only, which is needed in
* order for the log replay code to mark inodes for link count
* fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items).
*/
if (S_ISDIR(inode->i_mode) ||
BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) {
BTRFS_I(inode)->generation > root->fs_info->last_trans_committed)
ret = btrfs_commit_inode_delayed_items(trans, inode);
if (ret) {
btrfs_free_path(path);
btrfs_free_path(dst_path);
return ret;
}
else
ret = btrfs_commit_inode_delayed_inode(inode);
if (ret) {
btrfs_free_path(path);
btrfs_free_path(dst_path);
return ret;
}
mutex_lock(&BTRFS_I(inode)->log_mutex);
@ -3988,22 +4070,56 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
if (S_ISDIR(inode->i_mode)) {
int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
if (inode_only == LOG_INODE_EXISTS)
max_key_type = BTRFS_XATTR_ITEM_KEY;
if (inode_only == LOG_INODE_EXISTS) {
max_key_type = BTRFS_INODE_EXTREF_KEY;
max_key.type = max_key_type;
}
ret = drop_objectid_items(trans, log, path, ino, max_key_type);
} else {
if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags)) {
clear_bit(BTRFS_INODE_COPY_EVERYTHING,
&BTRFS_I(inode)->runtime_flags);
ret = btrfs_truncate_inode_items(trans, log,
inode, 0, 0);
} else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
&BTRFS_I(inode)->runtime_flags) ||
if (inode_only == LOG_INODE_EXISTS) {
/*
* Make sure the new inode item we write to the log has
* the same isize as the current one (if it exists).
* This is necessary to prevent data loss after log
* replay, and also to prevent doing a wrong expanding
* truncate - for e.g. create file, write 4K into offset
* 0, fsync, write 4K into offset 4096, add hard link,
* fsync some other file (to sync log), power fail - if
* we use the inode's current i_size, after log replay
* we get a 8Kb file, with the last 4Kb extent as a hole
* (zeroes), as if an expanding truncate happened,
* instead of getting a file of 4Kb only.
*/
err = logged_inode_size(log, inode, path,
&logged_isize);
if (err)
goto out_unlock;
}
if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags)) {
if (inode_only == LOG_INODE_EXISTS) {
max_key.type = BTRFS_INODE_EXTREF_KEY;
ret = drop_objectid_items(trans, log, path, ino,
max_key.type);
} else {
clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
clear_bit(BTRFS_INODE_COPY_EVERYTHING,
&BTRFS_I(inode)->runtime_flags);
ret = btrfs_truncate_inode_items(trans, log,
inode, 0, 0);
}
} else if (test_bit(BTRFS_INODE_COPY_EVERYTHING,
&BTRFS_I(inode)->runtime_flags) ||
inode_only == LOG_INODE_EXISTS) {
if (inode_only == LOG_INODE_ALL)
if (inode_only == LOG_INODE_ALL) {
clear_bit(BTRFS_INODE_COPY_EVERYTHING,
&BTRFS_I(inode)->runtime_flags);
fast_search = true;
max_key.type = BTRFS_XATTR_ITEM_KEY;
max_key.type = BTRFS_XATTR_ITEM_KEY;
} else {
max_key.type = BTRFS_INODE_EXTREF_KEY;
}
ret = drop_objectid_items(trans, log, path, ino,
max_key.type);
} else {
@ -4047,7 +4163,8 @@ again:
}
ret = copy_items(trans, inode, dst_path, path, &last_extent,
ins_start_slot, ins_nr, inode_only);
ins_start_slot, ins_nr, inode_only,
logged_isize);
if (ret < 0) {
err = ret;
goto out_unlock;
@ -4071,7 +4188,7 @@ next_slot:
if (ins_nr) {
ret = copy_items(trans, inode, dst_path, path,
&last_extent, ins_start_slot,
ins_nr, inode_only);
ins_nr, inode_only, logged_isize);
if (ret < 0) {
err = ret;
goto out_unlock;
@ -4092,7 +4209,8 @@ next_slot:
}
if (ins_nr) {
ret = copy_items(trans, inode, dst_path, path, &last_extent,
ins_start_slot, ins_nr, inode_only);
ins_start_slot, ins_nr, inode_only,
logged_isize);
if (ret < 0) {
err = ret;
goto out_unlock;
@ -4273,6 +4391,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
struct dentry *old_parent = NULL;
int ret = 0;
u64 last_committed = root->fs_info->last_trans_committed;
const struct dentry * const first_parent = parent;
const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
last_committed);
sb = inode->i_sb;
@ -4328,7 +4449,6 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
goto end_trans;
}
inode_only = LOG_INODE_EXISTS;
while (1) {
if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
break;
@ -4337,8 +4457,22 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (root != BTRFS_I(inode)->root)
break;
/*
* On unlink we must make sure our immediate parent directory
* inode is fully logged. This is to prevent leaving dangling
* directory index entries and a wrong directory inode's i_size.
* Not doing so can result in a directory being impossible to
* delete after log replay (rmdir will always fail with error
* -ENOTEMPTY).
*/
if (did_unlink && parent == first_parent)
inode_only = LOG_INODE_ALL;
else
inode_only = LOG_INODE_EXISTS;
if (BTRFS_I(inode)->generation >
root->fs_info->last_trans_committed) {
root->fs_info->last_trans_committed ||
inode_only == LOG_INODE_ALL) {
ret = btrfs_log_inode(trans, root, inode, inode_only,
0, LLONG_MAX, ctx);
if (ret)

View File

@ -1310,6 +1310,8 @@ again:
if (ret) {
btrfs_error(root->fs_info, ret,
"Failed to remove dev extent item");
} else {
trans->transaction->have_free_bgs = 1;
}
out:
btrfs_free_path(path);
@ -4196,7 +4198,7 @@ static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
{
if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)))
if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
return;
btrfs_set_fs_incompat(info, RAID56);
@ -4803,10 +4805,8 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
BUG_ON(em->start > logical || em->start + em->len < logical);
map = (struct map_lookup *)em->bdev;
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
BTRFS_BLOCK_GROUP_RAID6)) {
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
len = map->stripe_len * nr_data_stripes(map);
}
free_extent_map(em);
return len;
}
@ -4826,8 +4826,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
BUG_ON(em->start > logical || em->start + em->len < logical);
map = (struct map_lookup *)em->bdev;
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
BTRFS_BLOCK_GROUP_RAID6))
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
ret = 1;
free_extent_map(em);
return ret;
@ -4876,32 +4875,24 @@ static inline int parity_smaller(u64 a, u64 b)
}
/* Bubble-sort the stripe set to put the parity/syndrome stripes last */
static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map)
static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
{
struct btrfs_bio_stripe s;
int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
int i;
u64 l;
int again = 1;
int m;
while (again) {
again = 0;
for (i = 0; i < real_stripes - 1; i++) {
if (parity_smaller(raid_map[i], raid_map[i+1])) {
for (i = 0; i < num_stripes - 1; i++) {
if (parity_smaller(bbio->raid_map[i],
bbio->raid_map[i+1])) {
s = bbio->stripes[i];
l = raid_map[i];
l = bbio->raid_map[i];
bbio->stripes[i] = bbio->stripes[i+1];
raid_map[i] = raid_map[i+1];
bbio->raid_map[i] = bbio->raid_map[i+1];
bbio->stripes[i+1] = s;
raid_map[i+1] = l;
if (bbio->tgtdev_map) {
m = bbio->tgtdev_map[i];
bbio->tgtdev_map[i] =
bbio->tgtdev_map[i + 1];
bbio->tgtdev_map[i + 1] = m;
}
bbio->raid_map[i+1] = l;
again = 1;
}
@ -4909,10 +4900,41 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map)
}
}
static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
{
struct btrfs_bio *bbio = kzalloc(
sizeof(struct btrfs_bio) +
sizeof(struct btrfs_bio_stripe) * (total_stripes) +
sizeof(int) * (real_stripes) +
sizeof(u64) * (real_stripes),
GFP_NOFS);
if (!bbio)
return NULL;
atomic_set(&bbio->error, 0);
atomic_set(&bbio->refs, 1);
return bbio;
}
void btrfs_get_bbio(struct btrfs_bio *bbio)
{
WARN_ON(!atomic_read(&bbio->refs));
atomic_inc(&bbio->refs);
}
void btrfs_put_bbio(struct btrfs_bio *bbio)
{
if (!bbio)
return;
if (atomic_dec_and_test(&bbio->refs))
kfree(bbio);
}
static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret,
int mirror_num, u64 **raid_map_ret)
int mirror_num, int need_raid_map)
{
struct extent_map *em;
struct map_lookup *map;
@ -4925,7 +4947,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
u64 stripe_nr_orig;
u64 stripe_nr_end;
u64 stripe_len;
u64 *raid_map = NULL;
int stripe_index;
int i;
int ret = 0;
@ -4976,7 +4997,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
stripe_offset = offset - stripe_offset;
/* if we're here for raid56, we need to know the stripe aligned start */
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) {
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
unsigned long full_stripe_len = stripe_len * nr_data_stripes(map);
raid56_full_stripe_start = offset;
@ -4989,8 +5010,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
if (rw & REQ_DISCARD) {
/* we don't discard raid56 yet */
if (map->type &
(BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) {
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
ret = -EOPNOTSUPP;
goto out;
}
@ -5000,7 +5020,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
/* For writes to RAID[56], allow a full stripeset across all disks.
For other RAID types and for RAID[56] reads, just allow a single
stripe (on a single disk). */
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6) &&
if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
(rw & REQ_WRITE)) {
max_len = stripe_len * nr_data_stripes(map) -
(offset - raid56_full_stripe_start);
@ -5047,7 +5067,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
u64 physical_of_found = 0;
ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS,
logical, &tmp_length, &tmp_bbio, 0, NULL);
logical, &tmp_length, &tmp_bbio, 0, 0);
if (ret) {
WARN_ON(tmp_bbio != NULL);
goto out;
@ -5061,7 +5081,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
* is not left of the left cursor
*/
ret = -EIO;
kfree(tmp_bbio);
btrfs_put_bbio(tmp_bbio);
goto out;
}
@ -5096,11 +5116,11 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
} else {
WARN_ON(1);
ret = -EIO;
kfree(tmp_bbio);
btrfs_put_bbio(tmp_bbio);
goto out;
}
kfree(tmp_bbio);
btrfs_put_bbio(tmp_bbio);
} else if (mirror_num > map->num_stripes) {
mirror_num = 0;
}
@ -5166,15 +5186,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
mirror_num = stripe_index - old_stripe_index + 1;
}
} else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
BTRFS_BLOCK_GROUP_RAID6)) {
u64 tmp;
if (raid_map_ret &&
} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
if (need_raid_map &&
((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
mirror_num > 1)) {
int i, rot;
/* push stripe_nr back to the start of the full stripe */
stripe_nr = raid56_full_stripe_start;
do_div(stripe_nr, stripe_len * nr_data_stripes(map));
@ -5183,32 +5198,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
num_stripes = map->num_stripes;
max_errors = nr_parity_stripes(map);
raid_map = kmalloc_array(num_stripes, sizeof(u64),
GFP_NOFS);
if (!raid_map) {
ret = -ENOMEM;
goto out;
}
/* Work out the disk rotation on this stripe-set */
tmp = stripe_nr;
rot = do_div(tmp, num_stripes);
/* Fill in the logical address of each stripe */
tmp = stripe_nr * nr_data_stripes(map);
for (i = 0; i < nr_data_stripes(map); i++)
raid_map[(i+rot) % num_stripes] =
em->start + (tmp + i) * map->stripe_len;
raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
if (map->type & BTRFS_BLOCK_GROUP_RAID6)
raid_map[(i+rot+1) % num_stripes] =
RAID6_Q_STRIPE;
*length = map->stripe_len;
stripe_index = 0;
stripe_offset = 0;
} else {
u64 tmp;
/*
* Mirror #0 or #1 means the original data block.
* Mirror #2 is RAID5 parity block.
@ -5246,17 +5241,42 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
tgtdev_indexes = num_stripes;
}
bbio = kzalloc(btrfs_bio_size(num_alloc_stripes, tgtdev_indexes),
GFP_NOFS);
bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes);
if (!bbio) {
kfree(raid_map);
ret = -ENOMEM;
goto out;
}
atomic_set(&bbio->error, 0);
if (dev_replace_is_ongoing)
bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
/* build raid_map */
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK &&
need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
mirror_num > 1)) {
u64 tmp;
int i, rot;
bbio->raid_map = (u64 *)((void *)bbio->stripes +
sizeof(struct btrfs_bio_stripe) *
num_alloc_stripes +
sizeof(int) * tgtdev_indexes);
/* Work out the disk rotation on this stripe-set */
tmp = stripe_nr;
rot = do_div(tmp, num_stripes);
/* Fill in the logical address of each stripe */
tmp = stripe_nr * nr_data_stripes(map);
for (i = 0; i < nr_data_stripes(map); i++)
bbio->raid_map[(i+rot) % num_stripes] =
em->start + (tmp + i) * map->stripe_len;
bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
if (map->type & BTRFS_BLOCK_GROUP_RAID6)
bbio->raid_map[(i+rot+1) % num_stripes] =
RAID6_Q_STRIPE;
}
if (rw & REQ_DISCARD) {
int factor = 0;
int sub_stripes = 0;
@ -5340,6 +5360,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
max_errors = btrfs_chunk_max_errors(map);
if (bbio->raid_map)
sort_parity_stripes(bbio, num_stripes);
tgtdev_indexes = 0;
if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) &&
dev_replace->tgtdev != NULL) {
@ -5427,6 +5450,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
}
*bbio_ret = bbio;
bbio->map_type = map->type;
bbio->num_stripes = num_stripes;
bbio->max_errors = max_errors;
bbio->mirror_num = mirror_num;
@ -5443,10 +5467,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
bbio->mirror_num = map->num_stripes + 1;
}
if (raid_map) {
sort_parity_stripes(bbio, raid_map);
*raid_map_ret = raid_map;
}
out:
if (dev_replace_is_ongoing)
btrfs_dev_replace_unlock(dev_replace);
@ -5459,17 +5479,17 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
struct btrfs_bio **bbio_ret, int mirror_num)
{
return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
mirror_num, NULL);
mirror_num, 0);
}
/* For Scrub/replace */
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num,
u64 **raid_map_ret)
int need_raid_map)
{
return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
mirror_num, raid_map_ret);
mirror_num, need_raid_map);
}
int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
@ -5511,8 +5531,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
do_div(length, map->num_stripes / map->sub_stripes);
else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
do_div(length, map->num_stripes);
else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
BTRFS_BLOCK_GROUP_RAID6)) {
else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
do_div(length, nr_data_stripes(map));
rmap_len = map->stripe_len * nr_data_stripes(map);
}
@ -5565,7 +5584,7 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e
bio_endio_nodec(bio, err);
else
bio_endio(bio, err);
kfree(bbio);
btrfs_put_bbio(bbio);
}
static void btrfs_end_bio(struct bio *bio, int err)
@ -5808,7 +5827,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
u64 logical = (u64)bio->bi_iter.bi_sector << 9;
u64 length = 0;
u64 map_length;
u64 *raid_map = NULL;
int ret;
int dev_nr = 0;
int total_devs = 1;
@ -5819,7 +5837,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
btrfs_bio_counter_inc_blocked(root->fs_info);
ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
mirror_num, &raid_map);
mirror_num, 1);
if (ret) {
btrfs_bio_counter_dec(root->fs_info);
return ret;
@ -5832,15 +5850,13 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
bbio->fs_info = root->fs_info;
atomic_set(&bbio->stripes_pending, bbio->num_stripes);
if (raid_map) {
if (bbio->raid_map) {
/* In this case, map_length has been set to the length of
a single stripe; not the whole write */
if (rw & WRITE) {
ret = raid56_parity_write(root, bio, bbio,
raid_map, map_length);
ret = raid56_parity_write(root, bio, bbio, map_length);
} else {
ret = raid56_parity_recover(root, bio, bbio,
raid_map, map_length,
ret = raid56_parity_recover(root, bio, bbio, map_length,
mirror_num, 1);
}
@ -6238,17 +6254,22 @@ int btrfs_read_sys_array(struct btrfs_root *root)
struct extent_buffer *sb;
struct btrfs_disk_key *disk_key;
struct btrfs_chunk *chunk;
u8 *ptr;
unsigned long sb_ptr;
u8 *array_ptr;
unsigned long sb_array_offset;
int ret = 0;
u32 num_stripes;
u32 array_size;
u32 len = 0;
u32 cur;
u32 cur_offset;
struct btrfs_key key;
sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
BTRFS_SUPER_INFO_SIZE);
ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize);
/*
* This will create extent buffer of nodesize, superblock size is
* fixed to BTRFS_SUPER_INFO_SIZE. If nodesize > sb size, this will
* overallocate but we can keep it as-is, only the first page is used.
*/
sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET);
if (!sb)
return -ENOMEM;
btrfs_set_buffer_uptodate(sb);
@ -6271,35 +6292,56 @@ int btrfs_read_sys_array(struct btrfs_root *root)
write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
array_size = btrfs_super_sys_array_size(super_copy);
ptr = super_copy->sys_chunk_array;
sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
cur = 0;
array_ptr = super_copy->sys_chunk_array;
sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
cur_offset = 0;
while (cur_offset < array_size) {
disk_key = (struct btrfs_disk_key *)array_ptr;
len = sizeof(*disk_key);
if (cur_offset + len > array_size)
goto out_short_read;
while (cur < array_size) {
disk_key = (struct btrfs_disk_key *)ptr;
btrfs_disk_key_to_cpu(&key, disk_key);
len = sizeof(*disk_key); ptr += len;
sb_ptr += len;
cur += len;
array_ptr += len;
sb_array_offset += len;
cur_offset += len;
if (key.type == BTRFS_CHUNK_ITEM_KEY) {
chunk = (struct btrfs_chunk *)sb_ptr;
chunk = (struct btrfs_chunk *)sb_array_offset;
/*
* At least one btrfs_chunk with one stripe must be
* present, exact stripe count check comes afterwards
*/
len = btrfs_chunk_item_size(1);
if (cur_offset + len > array_size)
goto out_short_read;
num_stripes = btrfs_chunk_num_stripes(sb, chunk);
len = btrfs_chunk_item_size(num_stripes);
if (cur_offset + len > array_size)
goto out_short_read;
ret = read_one_chunk(root, &key, sb, chunk);
if (ret)
break;
num_stripes = btrfs_chunk_num_stripes(sb, chunk);
len = btrfs_chunk_item_size(num_stripes);
} else {
ret = -EIO;
break;
}
ptr += len;
sb_ptr += len;
cur += len;
array_ptr += len;
sb_array_offset += len;
cur_offset += len;
}
free_extent_buffer(sb);
return ret;
out_short_read:
printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n",
len, cur_offset);
free_extent_buffer(sb);
return -EIO;
}
int btrfs_read_chunk_tree(struct btrfs_root *root)

View File

@ -295,8 +295,10 @@ typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err);
#define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0)
struct btrfs_bio {
atomic_t refs;
atomic_t stripes_pending;
struct btrfs_fs_info *fs_info;
u64 map_type; /* get from map_lookup->type */
bio_end_io_t *end_io;
struct bio *orig_bio;
unsigned long flags;
@ -307,6 +309,12 @@ struct btrfs_bio {
int mirror_num;
int num_tgtdevs;
int *tgtdev_map;
/*
* logical block numbers for the start of each stripe
* The last one or two are p/q. These are sorted,
* so raid_map[0] is the start of our full stripe
*/
u64 *raid_map;
struct btrfs_bio_stripe stripes[];
};
@ -388,19 +396,15 @@ struct btrfs_balance_control {
int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
u64 end, u64 *length);
#define btrfs_bio_size(total_stripes, real_stripes) \
(sizeof(struct btrfs_bio) + \
(sizeof(struct btrfs_bio_stripe) * (total_stripes)) + \
(sizeof(int) * (real_stripes)))
void btrfs_get_bbio(struct btrfs_bio *bbio);
void btrfs_put_bbio(struct btrfs_bio *bbio);
int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num);
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num,
u64 **raid_map_ret);
int need_raid_map);
int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
u64 chunk_start, u64 physical, u64 devid,
u64 **logical, int *naddrs, int *stripe_len);

View File

@ -495,8 +495,7 @@ struct btrfs_ioctl_send_args {
/* Error codes as returned by the kernel */
enum btrfs_err_code {
notused,
BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,