Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (25 commits) Btrfs: forced readonly mounts on errors btrfs: Require CAP_SYS_ADMIN for filesystem rebalance Btrfs: don't warn if we get ENOSPC in btrfs_block_rsv_check btrfs: Fix memory leak in btrfs_read_fs_root_no_radix() btrfs: check NULL or not btrfs: Don't pass NULL ptr to func that may deref it. btrfs: mount failure return value fix btrfs: Mem leak in btrfs_get_acl() btrfs: fix wrong free space information of btrfs btrfs: make the chunk allocator utilize the devices better btrfs: restructure find_free_dev_extent() btrfs: fix wrong calculation of stripe size btrfs: try to reclaim some space when chunk allocation fails btrfs: fix wrong data space statistics fs/btrfs: Fix build of ctree Btrfs: fix off by one while setting block groups readonly Btrfs: Add BTRFS_IOC_SUBVOL_GETFLAGS/SETFLAGS ioctls Btrfs: Add readonly snapshots support Btrfs: Refactor btrfs_ioctl_snap_create() btrfs: Extract duplicate decompress code ...
This commit is contained in:
commit
eee2a817df
|
@ -4,6 +4,8 @@ config BTRFS_FS
|
|||
select LIBCRC32C
|
||||
select ZLIB_INFLATE
|
||||
select ZLIB_DEFLATE
|
||||
select LZO_COMPRESS
|
||||
select LZO_DECOMPRESS
|
||||
help
|
||||
Btrfs is a new filesystem with extents, writable snapshotting,
|
||||
support for multiple devices and many more features.
|
||||
|
|
|
@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
|||
transaction.o inode.o file.o tree-defrag.o \
|
||||
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
|
||||
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
|
||||
export.o tree-log.o acl.o free-space-cache.o zlib.o \
|
||||
export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \
|
||||
compression.o delayed-ref.o relocation.o
|
||||
|
|
|
@ -60,8 +60,10 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
|
|||
size = __btrfs_getxattr(inode, name, value, size);
|
||||
if (size > 0) {
|
||||
acl = posix_acl_from_xattr(value, size);
|
||||
if (IS_ERR(acl))
|
||||
if (IS_ERR(acl)) {
|
||||
kfree(value);
|
||||
return acl;
|
||||
}
|
||||
set_cached_acl(inode, type, acl);
|
||||
}
|
||||
kfree(value);
|
||||
|
|
|
@ -157,7 +157,7 @@ struct btrfs_inode {
|
|||
/*
|
||||
* always compress this one file
|
||||
*/
|
||||
unsigned force_compress:1;
|
||||
unsigned force_compress:4;
|
||||
|
||||
struct inode vfs_inode;
|
||||
};
|
||||
|
|
|
@ -62,6 +62,9 @@ struct compressed_bio {
|
|||
/* number of bytes on disk */
|
||||
unsigned long compressed_len;
|
||||
|
||||
/* the compression algorithm for this bio */
|
||||
int compress_type;
|
||||
|
||||
/* number of compressed pages in the array */
|
||||
unsigned long nr_pages;
|
||||
|
||||
|
@ -173,11 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err)
|
|||
/* ok, we're the last bio for this extent, lets start
|
||||
* the decompression.
|
||||
*/
|
||||
ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
|
||||
cb->start,
|
||||
cb->orig_bio->bi_io_vec,
|
||||
cb->orig_bio->bi_vcnt,
|
||||
cb->compressed_len);
|
||||
ret = btrfs_decompress_biovec(cb->compress_type,
|
||||
cb->compressed_pages,
|
||||
cb->start,
|
||||
cb->orig_bio->bi_io_vec,
|
||||
cb->orig_bio->bi_vcnt,
|
||||
cb->compressed_len);
|
||||
csum_failed:
|
||||
if (ret)
|
||||
cb->errors = 1;
|
||||
|
@ -588,6 +592,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
|||
|
||||
cb->len = uncompressed_len;
|
||||
cb->compressed_len = compressed_len;
|
||||
cb->compress_type = extent_compress_type(bio_flags);
|
||||
cb->orig_bio = bio;
|
||||
|
||||
nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
|
||||
|
@ -677,3 +682,317 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
|||
bio_put(comp_bio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
|
||||
static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES];
|
||||
static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
|
||||
static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
|
||||
static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
|
||||
|
||||
struct btrfs_compress_op *btrfs_compress_op[] = {
|
||||
&btrfs_zlib_compress,
|
||||
&btrfs_lzo_compress,
|
||||
};
|
||||
|
||||
int __init btrfs_init_compress(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
|
||||
INIT_LIST_HEAD(&comp_idle_workspace[i]);
|
||||
spin_lock_init(&comp_workspace_lock[i]);
|
||||
atomic_set(&comp_alloc_workspace[i], 0);
|
||||
init_waitqueue_head(&comp_workspace_wait[i]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* this finds an available workspace or allocates a new one
|
||||
* ERR_PTR is returned if things go bad.
|
||||
*/
|
||||
static struct list_head *find_workspace(int type)
|
||||
{
|
||||
struct list_head *workspace;
|
||||
int cpus = num_online_cpus();
|
||||
int idx = type - 1;
|
||||
|
||||
struct list_head *idle_workspace = &comp_idle_workspace[idx];
|
||||
spinlock_t *workspace_lock = &comp_workspace_lock[idx];
|
||||
atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
|
||||
wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
|
||||
int *num_workspace = &comp_num_workspace[idx];
|
||||
again:
|
||||
spin_lock(workspace_lock);
|
||||
if (!list_empty(idle_workspace)) {
|
||||
workspace = idle_workspace->next;
|
||||
list_del(workspace);
|
||||
(*num_workspace)--;
|
||||
spin_unlock(workspace_lock);
|
||||
return workspace;
|
||||
|
||||
}
|
||||
if (atomic_read(alloc_workspace) > cpus) {
|
||||
DEFINE_WAIT(wait);
|
||||
|
||||
spin_unlock(workspace_lock);
|
||||
prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
|
||||
if (atomic_read(alloc_workspace) > cpus && !*num_workspace)
|
||||
schedule();
|
||||
finish_wait(workspace_wait, &wait);
|
||||
goto again;
|
||||
}
|
||||
atomic_inc(alloc_workspace);
|
||||
spin_unlock(workspace_lock);
|
||||
|
||||
workspace = btrfs_compress_op[idx]->alloc_workspace();
|
||||
if (IS_ERR(workspace)) {
|
||||
atomic_dec(alloc_workspace);
|
||||
wake_up(workspace_wait);
|
||||
}
|
||||
return workspace;
|
||||
}
|
||||
|
||||
/*
|
||||
* put a workspace struct back on the list or free it if we have enough
|
||||
* idle ones sitting around
|
||||
*/
|
||||
static void free_workspace(int type, struct list_head *workspace)
|
||||
{
|
||||
int idx = type - 1;
|
||||
struct list_head *idle_workspace = &comp_idle_workspace[idx];
|
||||
spinlock_t *workspace_lock = &comp_workspace_lock[idx];
|
||||
atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
|
||||
wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
|
||||
int *num_workspace = &comp_num_workspace[idx];
|
||||
|
||||
spin_lock(workspace_lock);
|
||||
if (*num_workspace < num_online_cpus()) {
|
||||
list_add_tail(workspace, idle_workspace);
|
||||
(*num_workspace)++;
|
||||
spin_unlock(workspace_lock);
|
||||
goto wake;
|
||||
}
|
||||
spin_unlock(workspace_lock);
|
||||
|
||||
btrfs_compress_op[idx]->free_workspace(workspace);
|
||||
atomic_dec(alloc_workspace);
|
||||
wake:
|
||||
if (waitqueue_active(workspace_wait))
|
||||
wake_up(workspace_wait);
|
||||
}
|
||||
|
||||
/*
|
||||
* cleanup function for module exit
|
||||
*/
|
||||
static void free_workspaces(void)
|
||||
{
|
||||
struct list_head *workspace;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
|
||||
while (!list_empty(&comp_idle_workspace[i])) {
|
||||
workspace = comp_idle_workspace[i].next;
|
||||
list_del(workspace);
|
||||
btrfs_compress_op[i]->free_workspace(workspace);
|
||||
atomic_dec(&comp_alloc_workspace[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* given an address space and start/len, compress the bytes.
|
||||
*
|
||||
* pages are allocated to hold the compressed result and stored
|
||||
* in 'pages'
|
||||
*
|
||||
* out_pages is used to return the number of pages allocated. There
|
||||
* may be pages allocated even if we return an error
|
||||
*
|
||||
* total_in is used to return the number of bytes actually read. It
|
||||
* may be smaller then len if we had to exit early because we
|
||||
* ran out of room in the pages array or because we cross the
|
||||
* max_out threshold.
|
||||
*
|
||||
* total_out is used to return the total number of compressed bytes
|
||||
*
|
||||
* max_out tells us the max number of bytes that we're allowed to
|
||||
* stuff into pages
|
||||
*/
|
||||
int btrfs_compress_pages(int type, struct address_space *mapping,
|
||||
u64 start, unsigned long len,
|
||||
struct page **pages,
|
||||
unsigned long nr_dest_pages,
|
||||
unsigned long *out_pages,
|
||||
unsigned long *total_in,
|
||||
unsigned long *total_out,
|
||||
unsigned long max_out)
|
||||
{
|
||||
struct list_head *workspace;
|
||||
int ret;
|
||||
|
||||
workspace = find_workspace(type);
|
||||
if (IS_ERR(workspace))
|
||||
return -1;
|
||||
|
||||
ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
|
||||
start, len, pages,
|
||||
nr_dest_pages, out_pages,
|
||||
total_in, total_out,
|
||||
max_out);
|
||||
free_workspace(type, workspace);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* pages_in is an array of pages with compressed data.
|
||||
*
|
||||
* disk_start is the starting logical offset of this array in the file
|
||||
*
|
||||
* bvec is a bio_vec of pages from the file that we want to decompress into
|
||||
*
|
||||
* vcnt is the count of pages in the biovec
|
||||
*
|
||||
* srclen is the number of bytes in pages_in
|
||||
*
|
||||
* The basic idea is that we have a bio that was created by readpages.
|
||||
* The pages in the bio are for the uncompressed data, and they may not
|
||||
* be contiguous. They all correspond to the range of bytes covered by
|
||||
* the compressed extent.
|
||||
*/
|
||||
int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
|
||||
struct bio_vec *bvec, int vcnt, size_t srclen)
|
||||
{
|
||||
struct list_head *workspace;
|
||||
int ret;
|
||||
|
||||
workspace = find_workspace(type);
|
||||
if (IS_ERR(workspace))
|
||||
return -ENOMEM;
|
||||
|
||||
ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
|
||||
disk_start,
|
||||
bvec, vcnt, srclen);
|
||||
free_workspace(type, workspace);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* a less complex decompression routine. Our compressed data fits in a
|
||||
* single page, and we want to read a single page out of it.
|
||||
* start_byte tells us the offset into the compressed data we're interested in
|
||||
*/
|
||||
int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
|
||||
unsigned long start_byte, size_t srclen, size_t destlen)
|
||||
{
|
||||
struct list_head *workspace;
|
||||
int ret;
|
||||
|
||||
workspace = find_workspace(type);
|
||||
if (IS_ERR(workspace))
|
||||
return -ENOMEM;
|
||||
|
||||
ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
|
||||
dest_page, start_byte,
|
||||
srclen, destlen);
|
||||
|
||||
free_workspace(type, workspace);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __exit btrfs_exit_compress(void)
|
||||
{
|
||||
free_workspaces();
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy uncompressed data from working buffer to pages.
|
||||
*
|
||||
* buf_start is the byte offset we're of the start of our workspace buffer.
|
||||
*
|
||||
* total_out is the last byte of the buffer
|
||||
*/
|
||||
int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
|
||||
unsigned long total_out, u64 disk_start,
|
||||
struct bio_vec *bvec, int vcnt,
|
||||
unsigned long *page_index,
|
||||
unsigned long *pg_offset)
|
||||
{
|
||||
unsigned long buf_offset;
|
||||
unsigned long current_buf_start;
|
||||
unsigned long start_byte;
|
||||
unsigned long working_bytes = total_out - buf_start;
|
||||
unsigned long bytes;
|
||||
char *kaddr;
|
||||
struct page *page_out = bvec[*page_index].bv_page;
|
||||
|
||||
/*
|
||||
* start byte is the first byte of the page we're currently
|
||||
* copying into relative to the start of the compressed data.
|
||||
*/
|
||||
start_byte = page_offset(page_out) - disk_start;
|
||||
|
||||
/* we haven't yet hit data corresponding to this page */
|
||||
if (total_out <= start_byte)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* the start of the data we care about is offset into
|
||||
* the middle of our working buffer
|
||||
*/
|
||||
if (total_out > start_byte && buf_start < start_byte) {
|
||||
buf_offset = start_byte - buf_start;
|
||||
working_bytes -= buf_offset;
|
||||
} else {
|
||||
buf_offset = 0;
|
||||
}
|
||||
current_buf_start = buf_start;
|
||||
|
||||
/* copy bytes from the working buffer into the pages */
|
||||
while (working_bytes > 0) {
|
||||
bytes = min(PAGE_CACHE_SIZE - *pg_offset,
|
||||
PAGE_CACHE_SIZE - buf_offset);
|
||||
bytes = min(bytes, working_bytes);
|
||||
kaddr = kmap_atomic(page_out, KM_USER0);
|
||||
memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
|
||||
kunmap_atomic(kaddr, KM_USER0);
|
||||
flush_dcache_page(page_out);
|
||||
|
||||
*pg_offset += bytes;
|
||||
buf_offset += bytes;
|
||||
working_bytes -= bytes;
|
||||
current_buf_start += bytes;
|
||||
|
||||
/* check if we need to pick another page */
|
||||
if (*pg_offset == PAGE_CACHE_SIZE) {
|
||||
(*page_index)++;
|
||||
if (*page_index >= vcnt)
|
||||
return 0;
|
||||
|
||||
page_out = bvec[*page_index].bv_page;
|
||||
*pg_offset = 0;
|
||||
start_byte = page_offset(page_out) - disk_start;
|
||||
|
||||
/*
|
||||
* make sure our new page is covered by this
|
||||
* working buffer
|
||||
*/
|
||||
if (total_out <= start_byte)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* the next page in the biovec might not be adjacent
|
||||
* to the last page, but it might still be found
|
||||
* inside this working buffer. bump our offset pointer
|
||||
*/
|
||||
if (total_out > start_byte &&
|
||||
current_buf_start < start_byte) {
|
||||
buf_offset = start_byte - buf_start;
|
||||
working_bytes = total_out - start_byte;
|
||||
current_buf_start = buf_start + buf_offset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -19,24 +19,27 @@
|
|||
#ifndef __BTRFS_COMPRESSION_
|
||||
#define __BTRFS_COMPRESSION_
|
||||
|
||||
int btrfs_zlib_decompress(unsigned char *data_in,
|
||||
struct page *dest_page,
|
||||
unsigned long start_byte,
|
||||
size_t srclen, size_t destlen);
|
||||
int btrfs_zlib_compress_pages(struct address_space *mapping,
|
||||
u64 start, unsigned long len,
|
||||
struct page **pages,
|
||||
unsigned long nr_dest_pages,
|
||||
unsigned long *out_pages,
|
||||
unsigned long *total_in,
|
||||
unsigned long *total_out,
|
||||
unsigned long max_out);
|
||||
int btrfs_zlib_decompress_biovec(struct page **pages_in,
|
||||
u64 disk_start,
|
||||
struct bio_vec *bvec,
|
||||
int vcnt,
|
||||
size_t srclen);
|
||||
void btrfs_zlib_exit(void);
|
||||
int btrfs_init_compress(void);
|
||||
void btrfs_exit_compress(void);
|
||||
|
||||
int btrfs_compress_pages(int type, struct address_space *mapping,
|
||||
u64 start, unsigned long len,
|
||||
struct page **pages,
|
||||
unsigned long nr_dest_pages,
|
||||
unsigned long *out_pages,
|
||||
unsigned long *total_in,
|
||||
unsigned long *total_out,
|
||||
unsigned long max_out);
|
||||
int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
|
||||
struct bio_vec *bvec, int vcnt, size_t srclen);
|
||||
int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
|
||||
unsigned long start_byte, size_t srclen, size_t destlen);
|
||||
int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
|
||||
unsigned long total_out, u64 disk_start,
|
||||
struct bio_vec *bvec, int vcnt,
|
||||
unsigned long *page_index,
|
||||
unsigned long *pg_offset);
|
||||
|
||||
int btrfs_submit_compressed_write(struct inode *inode, u64 start,
|
||||
unsigned long len, u64 disk_start,
|
||||
unsigned long compressed_len,
|
||||
|
@ -44,4 +47,37 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
|
|||
unsigned long nr_pages);
|
||||
int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, unsigned long bio_flags);
|
||||
|
||||
struct btrfs_compress_op {
|
||||
struct list_head *(*alloc_workspace)(void);
|
||||
|
||||
void (*free_workspace)(struct list_head *workspace);
|
||||
|
||||
int (*compress_pages)(struct list_head *workspace,
|
||||
struct address_space *mapping,
|
||||
u64 start, unsigned long len,
|
||||
struct page **pages,
|
||||
unsigned long nr_dest_pages,
|
||||
unsigned long *out_pages,
|
||||
unsigned long *total_in,
|
||||
unsigned long *total_out,
|
||||
unsigned long max_out);
|
||||
|
||||
int (*decompress_biovec)(struct list_head *workspace,
|
||||
struct page **pages_in,
|
||||
u64 disk_start,
|
||||
struct bio_vec *bvec,
|
||||
int vcnt,
|
||||
size_t srclen);
|
||||
|
||||
int (*decompress)(struct list_head *workspace,
|
||||
unsigned char *data_in,
|
||||
struct page *dest_page,
|
||||
unsigned long start_byte,
|
||||
size_t srclen, size_t destlen);
|
||||
};
|
||||
|
||||
extern struct btrfs_compress_op btrfs_zlib_compress;
|
||||
extern struct btrfs_compress_op btrfs_lzo_compress;
|
||||
|
||||
#endif
|
||||
|
|
|
@ -105,6 +105,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
|
|||
/* this also releases the path */
|
||||
void btrfs_free_path(struct btrfs_path *p)
|
||||
{
|
||||
if (!p)
|
||||
return;
|
||||
btrfs_release_path(NULL, p);
|
||||
kmem_cache_free(btrfs_path_cachep, p);
|
||||
}
|
||||
|
@ -2514,6 +2516,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
|
|||
btrfs_assert_tree_locked(path->nodes[1]);
|
||||
|
||||
right = read_node_slot(root, upper, slot + 1);
|
||||
if (right == NULL)
|
||||
return 1;
|
||||
|
||||
btrfs_tree_lock(right);
|
||||
btrfs_set_lock_blocking(right);
|
||||
|
||||
|
@ -2764,6 +2769,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
|
|||
btrfs_assert_tree_locked(path->nodes[1]);
|
||||
|
||||
left = read_node_slot(root, path->nodes[1], slot - 1);
|
||||
if (left == NULL)
|
||||
return 1;
|
||||
|
||||
btrfs_tree_lock(left);
|
||||
btrfs_set_lock_blocking(left);
|
||||
|
||||
|
|
|
@ -295,6 +295,14 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
|
|||
#define BTRFS_FSID_SIZE 16
|
||||
#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
|
||||
#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
|
||||
|
||||
/*
|
||||
* File system states
|
||||
*/
|
||||
|
||||
/* Errors detected */
|
||||
#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2)
|
||||
|
||||
#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
|
||||
#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33)
|
||||
|
||||
|
@ -399,13 +407,15 @@ struct btrfs_super_block {
|
|||
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
|
||||
#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
|
||||
#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
|
||||
#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
|
||||
|
||||
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
|
||||
#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
|
||||
#define BTRFS_FEATURE_INCOMPAT_SUPP \
|
||||
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
|
||||
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
|
||||
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
|
||||
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
|
||||
BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO)
|
||||
|
||||
/*
|
||||
* A leaf is full of items. offset and size tell us where to find
|
||||
|
@ -552,9 +562,11 @@ struct btrfs_timespec {
|
|||
} __attribute__ ((__packed__));
|
||||
|
||||
enum btrfs_compression_type {
|
||||
BTRFS_COMPRESS_NONE = 0,
|
||||
BTRFS_COMPRESS_ZLIB = 1,
|
||||
BTRFS_COMPRESS_LAST = 2,
|
||||
BTRFS_COMPRESS_NONE = 0,
|
||||
BTRFS_COMPRESS_ZLIB = 1,
|
||||
BTRFS_COMPRESS_LZO = 2,
|
||||
BTRFS_COMPRESS_TYPES = 2,
|
||||
BTRFS_COMPRESS_LAST = 3,
|
||||
};
|
||||
|
||||
struct btrfs_inode_item {
|
||||
|
@ -598,6 +610,8 @@ struct btrfs_dir_item {
|
|||
u8 type;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0)
|
||||
|
||||
struct btrfs_root_item {
|
||||
struct btrfs_inode_item inode;
|
||||
__le64 generation;
|
||||
|
@ -896,7 +910,8 @@ struct btrfs_fs_info {
|
|||
*/
|
||||
u64 last_trans_log_full_commit;
|
||||
u64 open_ioctl_trans;
|
||||
unsigned long mount_opt;
|
||||
unsigned long mount_opt:20;
|
||||
unsigned long compress_type:4;
|
||||
u64 max_inline;
|
||||
u64 alloc_start;
|
||||
struct btrfs_transaction *running_transaction;
|
||||
|
@ -1051,6 +1066,9 @@ struct btrfs_fs_info {
|
|||
unsigned metadata_ratio;
|
||||
|
||||
void *bdev_holder;
|
||||
|
||||
/* filesystem state */
|
||||
u64 fs_state;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -1894,6 +1912,11 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
|
|||
BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
|
||||
last_snapshot, 64);
|
||||
|
||||
static inline bool btrfs_root_readonly(struct btrfs_root *root)
|
||||
{
|
||||
return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY;
|
||||
}
|
||||
|
||||
/* struct btrfs_super_block */
|
||||
|
||||
BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
|
||||
|
@ -2146,6 +2169,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
|
|||
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, u64 group_start);
|
||||
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
|
||||
u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
|
||||
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
|
||||
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
|
||||
int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
|
||||
|
@ -2189,6 +2213,12 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
|
|||
int btrfs_set_block_group_rw(struct btrfs_root *root,
|
||||
struct btrfs_block_group_cache *cache);
|
||||
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
|
||||
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
|
||||
int btrfs_error_unpin_extent_range(struct btrfs_root *root,
|
||||
u64 start, u64 end);
|
||||
int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
|
||||
u64 num_bytes);
|
||||
|
||||
/* ctree.c */
|
||||
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
|
||||
int level, int *slot);
|
||||
|
@ -2542,6 +2572,14 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
|
|||
/* super.c */
|
||||
int btrfs_parse_options(struct btrfs_root *root, char *options);
|
||||
int btrfs_sync_fs(struct super_block *sb, int wait);
|
||||
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
|
||||
unsigned int line, int errno);
|
||||
|
||||
#define btrfs_std_error(fs_info, errno) \
|
||||
do { \
|
||||
if ((errno)) \
|
||||
__btrfs_std_error((fs_info), __func__, __LINE__, (errno));\
|
||||
} while (0)
|
||||
|
||||
/* acl.c */
|
||||
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
|
||||
|
|
|
@ -44,6 +44,20 @@
|
|||
static struct extent_io_ops btree_extent_io_ops;
|
||||
static void end_workqueue_fn(struct btrfs_work *work);
|
||||
static void free_fs_root(struct btrfs_root *root);
|
||||
static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
|
||||
int read_only);
|
||||
static int btrfs_destroy_ordered_operations(struct btrfs_root *root);
|
||||
static int btrfs_destroy_ordered_extents(struct btrfs_root *root);
|
||||
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
|
||||
struct btrfs_root *root);
|
||||
static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
|
||||
static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
|
||||
static int btrfs_destroy_marked_extents(struct btrfs_root *root,
|
||||
struct extent_io_tree *dirty_pages,
|
||||
int mark);
|
||||
static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
|
||||
struct extent_io_tree *pinned_extents);
|
||||
static int btrfs_cleanup_transaction(struct btrfs_root *root);
|
||||
|
||||
/*
|
||||
* end_io_wq structs are used to do processing in task context when an IO is
|
||||
|
@ -353,6 +367,10 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
|
|||
WARN_ON(len == 0);
|
||||
|
||||
eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
|
||||
if (eb == NULL) {
|
||||
WARN_ON(1);
|
||||
goto out;
|
||||
}
|
||||
ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
|
||||
btrfs_header_generation(eb));
|
||||
BUG_ON(ret);
|
||||
|
@ -427,6 +445,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
|
|||
WARN_ON(len == 0);
|
||||
|
||||
eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
|
||||
if (eb == NULL) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
found_start = btrfs_header_bytenr(eb);
|
||||
if (found_start != start) {
|
||||
|
@ -1145,6 +1167,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
|
|||
}
|
||||
btrfs_free_path(path);
|
||||
if (ret) {
|
||||
kfree(root);
|
||||
if (ret > 0)
|
||||
ret = -ENOENT;
|
||||
return ERR_PTR(ret);
|
||||
|
@ -1713,8 +1736,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
|
|||
fs_info, BTRFS_ROOT_TREE_OBJECTID);
|
||||
|
||||
bh = btrfs_read_dev_super(fs_devices->latest_bdev);
|
||||
if (!bh)
|
||||
if (!bh) {
|
||||
err = -EINVAL;
|
||||
goto fail_iput;
|
||||
}
|
||||
|
||||
memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy));
|
||||
memcpy(&fs_info->super_for_commit, &fs_info->super_copy,
|
||||
|
@ -1727,6 +1752,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
|
|||
if (!btrfs_super_root(disk_super))
|
||||
goto fail_iput;
|
||||
|
||||
/* check FS state, whether FS is broken. */
|
||||
fs_info->fs_state |= btrfs_super_flags(disk_super);
|
||||
|
||||
btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
|
||||
|
||||
ret = btrfs_parse_options(tree_root, options);
|
||||
if (ret) {
|
||||
err = ret;
|
||||
|
@ -1744,10 +1774,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
|
|||
}
|
||||
|
||||
features = btrfs_super_incompat_flags(disk_super);
|
||||
if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
|
||||
features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
|
||||
btrfs_set_super_incompat_flags(disk_super, features);
|
||||
}
|
||||
features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
|
||||
if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO)
|
||||
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
|
||||
btrfs_set_super_incompat_flags(disk_super, features);
|
||||
|
||||
features = btrfs_super_compat_ro_flags(disk_super) &
|
||||
~BTRFS_FEATURE_COMPAT_RO_SUPP;
|
||||
|
@ -1957,7 +1987,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
|
|||
btrfs_set_opt(fs_info->mount_opt, SSD);
|
||||
}
|
||||
|
||||
if (btrfs_super_log_root(disk_super) != 0) {
|
||||
/* do not make disk changes in broken FS */
|
||||
if (btrfs_super_log_root(disk_super) != 0 &&
|
||||
!(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
|
||||
u64 bytenr = btrfs_super_log_root(disk_super);
|
||||
|
||||
if (fs_devices->rw_devices == 0) {
|
||||
|
@ -2442,8 +2474,28 @@ int close_ctree(struct btrfs_root *root)
|
|||
smp_mb();
|
||||
|
||||
btrfs_put_block_group_cache(fs_info);
|
||||
|
||||
/*
|
||||
* Here come 2 situations when btrfs is broken to flip readonly:
|
||||
*
|
||||
* 1. when btrfs flips readonly somewhere else before
|
||||
* btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
|
||||
* and btrfs will skip to write sb directly to keep
|
||||
* ERROR state on disk.
|
||||
*
|
||||
* 2. when btrfs flips readonly just in btrfs_commit_super,
|
||||
* and in such case, btrfs cannnot write sb via btrfs_commit_super,
|
||||
* and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
|
||||
* btrfs will cleanup all FS resources first and write sb then.
|
||||
*/
|
||||
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
|
||||
ret = btrfs_commit_super(root);
|
||||
ret = btrfs_commit_super(root);
|
||||
if (ret)
|
||||
printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
|
||||
}
|
||||
|
||||
if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
|
||||
ret = btrfs_error_commit_super(root);
|
||||
if (ret)
|
||||
printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
|
||||
}
|
||||
|
@ -2619,6 +2671,352 @@ out:
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
|
||||
int read_only)
|
||||
{
|
||||
if (read_only)
|
||||
return;
|
||||
|
||||
if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
|
||||
printk(KERN_WARNING "warning: mount fs with errors, "
|
||||
"running btrfsck is recommended\n");
|
||||
}
|
||||
|
||||
int btrfs_error_commit_super(struct btrfs_root *root)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&root->fs_info->cleaner_mutex);
|
||||
btrfs_run_delayed_iputs(root);
|
||||
mutex_unlock(&root->fs_info->cleaner_mutex);
|
||||
|
||||
down_write(&root->fs_info->cleanup_work_sem);
|
||||
up_write(&root->fs_info->cleanup_work_sem);
|
||||
|
||||
/* cleanup FS via transaction */
|
||||
btrfs_cleanup_transaction(root);
|
||||
|
||||
ret = write_ctree_super(NULL, root, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_destroy_ordered_operations(struct btrfs_root *root)
|
||||
{
|
||||
struct btrfs_inode *btrfs_inode;
|
||||
struct list_head splice;
|
||||
|
||||
INIT_LIST_HEAD(&splice);
|
||||
|
||||
mutex_lock(&root->fs_info->ordered_operations_mutex);
|
||||
spin_lock(&root->fs_info->ordered_extent_lock);
|
||||
|
||||
list_splice_init(&root->fs_info->ordered_operations, &splice);
|
||||
while (!list_empty(&splice)) {
|
||||
btrfs_inode = list_entry(splice.next, struct btrfs_inode,
|
||||
ordered_operations);
|
||||
|
||||
list_del_init(&btrfs_inode->ordered_operations);
|
||||
|
||||
btrfs_invalidate_inodes(btrfs_inode->root);
|
||||
}
|
||||
|
||||
spin_unlock(&root->fs_info->ordered_extent_lock);
|
||||
mutex_unlock(&root->fs_info->ordered_operations_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btrfs_destroy_ordered_extents(struct btrfs_root *root)
|
||||
{
|
||||
struct list_head splice;
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
struct inode *inode;
|
||||
|
||||
INIT_LIST_HEAD(&splice);
|
||||
|
||||
spin_lock(&root->fs_info->ordered_extent_lock);
|
||||
|
||||
list_splice_init(&root->fs_info->ordered_extents, &splice);
|
||||
while (!list_empty(&splice)) {
|
||||
ordered = list_entry(splice.next, struct btrfs_ordered_extent,
|
||||
root_extent_list);
|
||||
|
||||
list_del_init(&ordered->root_extent_list);
|
||||
atomic_inc(&ordered->refs);
|
||||
|
||||
/* the inode may be getting freed (in sys_unlink path). */
|
||||
inode = igrab(ordered->inode);
|
||||
|
||||
spin_unlock(&root->fs_info->ordered_extent_lock);
|
||||
if (inode)
|
||||
iput(inode);
|
||||
|
||||
atomic_set(&ordered->refs, 1);
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
|
||||
spin_lock(&root->fs_info->ordered_extent_lock);
|
||||
}
|
||||
|
||||
spin_unlock(&root->fs_info->ordered_extent_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
|
||||
struct btrfs_root *root)
|
||||
{
|
||||
struct rb_node *node;
|
||||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
struct btrfs_delayed_ref_node *ref;
|
||||
int ret = 0;
|
||||
|
||||
delayed_refs = &trans->delayed_refs;
|
||||
|
||||
spin_lock(&delayed_refs->lock);
|
||||
if (delayed_refs->num_entries == 0) {
|
||||
printk(KERN_INFO "delayed_refs has NO entry\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
node = rb_first(&delayed_refs->root);
|
||||
while (node) {
|
||||
ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
|
||||
node = rb_next(node);
|
||||
|
||||
ref->in_tree = 0;
|
||||
rb_erase(&ref->rb_node, &delayed_refs->root);
|
||||
delayed_refs->num_entries--;
|
||||
|
||||
atomic_set(&ref->refs, 1);
|
||||
if (btrfs_delayed_ref_is_head(ref)) {
|
||||
struct btrfs_delayed_ref_head *head;
|
||||
|
||||
head = btrfs_delayed_node_to_head(ref);
|
||||
mutex_lock(&head->mutex);
|
||||
kfree(head->extent_op);
|
||||
delayed_refs->num_heads--;
|
||||
if (list_empty(&head->cluster))
|
||||
delayed_refs->num_heads_ready--;
|
||||
list_del_init(&head->cluster);
|
||||
mutex_unlock(&head->mutex);
|
||||
}
|
||||
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
btrfs_put_delayed_ref(ref);
|
||||
|
||||
cond_resched();
|
||||
spin_lock(&delayed_refs->lock);
|
||||
}
|
||||
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
|
||||
{
|
||||
struct btrfs_pending_snapshot *snapshot;
|
||||
struct list_head splice;
|
||||
|
||||
INIT_LIST_HEAD(&splice);
|
||||
|
||||
list_splice_init(&t->pending_snapshots, &splice);
|
||||
|
||||
while (!list_empty(&splice)) {
|
||||
snapshot = list_entry(splice.next,
|
||||
struct btrfs_pending_snapshot,
|
||||
list);
|
||||
|
||||
list_del_init(&snapshot->list);
|
||||
|
||||
kfree(snapshot);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
|
||||
{
|
||||
struct btrfs_inode *btrfs_inode;
|
||||
struct list_head splice;
|
||||
|
||||
INIT_LIST_HEAD(&splice);
|
||||
|
||||
list_splice_init(&root->fs_info->delalloc_inodes, &splice);
|
||||
|
||||
spin_lock(&root->fs_info->delalloc_lock);
|
||||
|
||||
while (!list_empty(&splice)) {
|
||||
btrfs_inode = list_entry(splice.next, struct btrfs_inode,
|
||||
delalloc_inodes);
|
||||
|
||||
list_del_init(&btrfs_inode->delalloc_inodes);
|
||||
|
||||
btrfs_invalidate_inodes(btrfs_inode->root);
|
||||
}
|
||||
|
||||
spin_unlock(&root->fs_info->delalloc_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btrfs_destroy_marked_extents(struct btrfs_root *root,
|
||||
struct extent_io_tree *dirty_pages,
|
||||
int mark)
|
||||
{
|
||||
int ret;
|
||||
struct page *page;
|
||||
struct inode *btree_inode = root->fs_info->btree_inode;
|
||||
struct extent_buffer *eb;
|
||||
u64 start = 0;
|
||||
u64 end;
|
||||
u64 offset;
|
||||
unsigned long index;
|
||||
|
||||
while (1) {
|
||||
ret = find_first_extent_bit(dirty_pages, start, &start, &end,
|
||||
mark);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
|
||||
while (start <= end) {
|
||||
index = start >> PAGE_CACHE_SHIFT;
|
||||
start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
|
||||
page = find_get_page(btree_inode->i_mapping, index);
|
||||
if (!page)
|
||||
continue;
|
||||
offset = page_offset(page);
|
||||
|
||||
spin_lock(&dirty_pages->buffer_lock);
|
||||
eb = radix_tree_lookup(
|
||||
&(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
|
||||
offset >> PAGE_CACHE_SHIFT);
|
||||
spin_unlock(&dirty_pages->buffer_lock);
|
||||
if (eb) {
|
||||
ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
|
||||
&eb->bflags);
|
||||
atomic_set(&eb->refs, 1);
|
||||
}
|
||||
if (PageWriteback(page))
|
||||
end_page_writeback(page);
|
||||
|
||||
lock_page(page);
|
||||
if (PageDirty(page)) {
|
||||
clear_page_dirty_for_io(page);
|
||||
spin_lock_irq(&page->mapping->tree_lock);
|
||||
radix_tree_tag_clear(&page->mapping->page_tree,
|
||||
page_index(page),
|
||||
PAGECACHE_TAG_DIRTY);
|
||||
spin_unlock_irq(&page->mapping->tree_lock);
|
||||
}
|
||||
|
||||
page->mapping->a_ops->invalidatepage(page, 0);
|
||||
unlock_page(page);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
|
||||
struct extent_io_tree *pinned_extents)
|
||||
{
|
||||
struct extent_io_tree *unpin;
|
||||
u64 start;
|
||||
u64 end;
|
||||
int ret;
|
||||
|
||||
unpin = pinned_extents;
|
||||
while (1) {
|
||||
ret = find_first_extent_bit(unpin, 0, &start, &end,
|
||||
EXTENT_DIRTY);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
/* opt_discard */
|
||||
ret = btrfs_error_discard_extent(root, start, end + 1 - start);
|
||||
|
||||
clear_extent_dirty(unpin, start, end, GFP_NOFS);
|
||||
btrfs_error_unpin_extent_range(root, start, end);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btrfs_cleanup_transaction(struct btrfs_root *root)
|
||||
{
|
||||
struct btrfs_transaction *t;
|
||||
LIST_HEAD(list);
|
||||
|
||||
WARN_ON(1);
|
||||
|
||||
mutex_lock(&root->fs_info->trans_mutex);
|
||||
mutex_lock(&root->fs_info->transaction_kthread_mutex);
|
||||
|
||||
list_splice_init(&root->fs_info->trans_list, &list);
|
||||
while (!list_empty(&list)) {
|
||||
t = list_entry(list.next, struct btrfs_transaction, list);
|
||||
if (!t)
|
||||
break;
|
||||
|
||||
btrfs_destroy_ordered_operations(root);
|
||||
|
||||
btrfs_destroy_ordered_extents(root);
|
||||
|
||||
btrfs_destroy_delayed_refs(t, root);
|
||||
|
||||
btrfs_block_rsv_release(root,
|
||||
&root->fs_info->trans_block_rsv,
|
||||
t->dirty_pages.dirty_bytes);
|
||||
|
||||
/* FIXME: cleanup wait for commit */
|
||||
t->in_commit = 1;
|
||||
t->blocked = 1;
|
||||
if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
|
||||
wake_up(&root->fs_info->transaction_blocked_wait);
|
||||
|
||||
t->blocked = 0;
|
||||
if (waitqueue_active(&root->fs_info->transaction_wait))
|
||||
wake_up(&root->fs_info->transaction_wait);
|
||||
mutex_unlock(&root->fs_info->trans_mutex);
|
||||
|
||||
mutex_lock(&root->fs_info->trans_mutex);
|
||||
t->commit_done = 1;
|
||||
if (waitqueue_active(&t->commit_wait))
|
||||
wake_up(&t->commit_wait);
|
||||
mutex_unlock(&root->fs_info->trans_mutex);
|
||||
|
||||
mutex_lock(&root->fs_info->trans_mutex);
|
||||
|
||||
btrfs_destroy_pending_snapshots(t);
|
||||
|
||||
btrfs_destroy_delalloc_inodes(root);
|
||||
|
||||
spin_lock(&root->fs_info->new_trans_lock);
|
||||
root->fs_info->running_transaction = NULL;
|
||||
spin_unlock(&root->fs_info->new_trans_lock);
|
||||
|
||||
btrfs_destroy_marked_extents(root, &t->dirty_pages,
|
||||
EXTENT_DIRTY);
|
||||
|
||||
btrfs_destroy_pinned_extent(root,
|
||||
root->fs_info->pinned_extents);
|
||||
|
||||
t->use_count = 0;
|
||||
list_del_init(&t->list);
|
||||
memset(t, 0, sizeof(*t));
|
||||
kmem_cache_free(btrfs_transaction_cachep, t);
|
||||
}
|
||||
|
||||
mutex_unlock(&root->fs_info->transaction_kthread_mutex);
|
||||
mutex_unlock(&root->fs_info->trans_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct extent_io_ops btree_extent_io_ops = {
|
||||
.write_cache_pages_lock_hook = btree_lock_page_hook,
|
||||
.readpage_end_io_hook = btree_readpage_end_io_hook,
|
||||
|
|
|
@ -52,6 +52,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_root *root, int max_mirrors);
|
||||
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
|
||||
int btrfs_commit_super(struct btrfs_root *root);
|
||||
int btrfs_error_commit_super(struct btrfs_root *root);
|
||||
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
|
||||
u64 bytenr, u32 blocksize);
|
||||
struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
|
||||
|
|
|
@ -3089,7 +3089,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
|
|||
return btrfs_reduce_alloc_profile(root, flags);
|
||||
}
|
||||
|
||||
static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
|
||||
u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
|
||||
{
|
||||
u64 flags;
|
||||
|
||||
|
@ -3161,8 +3161,12 @@ alloc:
|
|||
bytes + 2 * 1024 * 1024,
|
||||
alloc_target, 0);
|
||||
btrfs_end_transaction(trans, root);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret < 0) {
|
||||
if (ret != -ENOSPC)
|
||||
return ret;
|
||||
else
|
||||
goto commit_trans;
|
||||
}
|
||||
|
||||
if (!data_sinfo) {
|
||||
btrfs_set_inode_space_info(root, inode);
|
||||
|
@ -3173,6 +3177,7 @@ alloc:
|
|||
spin_unlock(&data_sinfo->lock);
|
||||
|
||||
/* commit the current transaction and try again */
|
||||
commit_trans:
|
||||
if (!committed && !root->fs_info->open_ioctl_trans) {
|
||||
committed = 1;
|
||||
trans = btrfs_join_transaction(root, 1);
|
||||
|
@ -3721,11 +3726,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
|
|||
return 0;
|
||||
}
|
||||
|
||||
WARN_ON(1);
|
||||
printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
|
||||
block_rsv->size, block_rsv->reserved,
|
||||
block_rsv->freed[0], block_rsv->freed[1]);
|
||||
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
|
@ -7970,13 +7970,14 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache)
|
|||
|
||||
if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
|
||||
sinfo->bytes_may_use + sinfo->bytes_readonly +
|
||||
cache->reserved_pinned + num_bytes < sinfo->total_bytes) {
|
||||
cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
|
||||
sinfo->bytes_readonly += num_bytes;
|
||||
sinfo->bytes_reserved += cache->reserved_pinned;
|
||||
cache->reserved_pinned = 0;
|
||||
cache->ro = 1;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
spin_unlock(&cache->lock);
|
||||
spin_unlock(&sinfo->lock);
|
||||
return ret;
|
||||
|
@ -8012,6 +8013,62 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* helper to account the unused space of all the readonly block group in the
|
||||
* list. takes mirrors into account.
|
||||
*/
|
||||
static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
|
||||
{
|
||||
struct btrfs_block_group_cache *block_group;
|
||||
u64 free_bytes = 0;
|
||||
int factor;
|
||||
|
||||
list_for_each_entry(block_group, groups_list, list) {
|
||||
spin_lock(&block_group->lock);
|
||||
|
||||
if (!block_group->ro) {
|
||||
spin_unlock(&block_group->lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
|
||||
BTRFS_BLOCK_GROUP_RAID10 |
|
||||
BTRFS_BLOCK_GROUP_DUP))
|
||||
factor = 2;
|
||||
else
|
||||
factor = 1;
|
||||
|
||||
free_bytes += (block_group->key.offset -
|
||||
btrfs_block_group_used(&block_group->item)) *
|
||||
factor;
|
||||
|
||||
spin_unlock(&block_group->lock);
|
||||
}
|
||||
|
||||
return free_bytes;
|
||||
}
|
||||
|
||||
/*
|
||||
* helper to account the unused space of all the readonly block group in the
|
||||
* space_info. takes mirrors into account.
|
||||
*/
|
||||
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
|
||||
{
|
||||
int i;
|
||||
u64 free_bytes = 0;
|
||||
|
||||
spin_lock(&sinfo->lock);
|
||||
|
||||
for(i = 0; i < BTRFS_NR_RAID_TYPES; i++)
|
||||
if (!list_empty(&sinfo->block_groups[i]))
|
||||
free_bytes += __btrfs_get_ro_block_group_free_space(
|
||||
&sinfo->block_groups[i]);
|
||||
|
||||
spin_unlock(&sinfo->lock);
|
||||
|
||||
return free_bytes;
|
||||
}
|
||||
|
||||
int btrfs_set_block_group_rw(struct btrfs_root *root,
|
||||
struct btrfs_block_group_cache *cache)
|
||||
{
|
||||
|
@ -8092,7 +8149,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
|
|||
mutex_lock(&root->fs_info->chunk_mutex);
|
||||
list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
|
||||
u64 min_free = btrfs_block_group_used(&block_group->item);
|
||||
u64 dev_offset, max_avail;
|
||||
u64 dev_offset;
|
||||
|
||||
/*
|
||||
* check to make sure we can actually find a chunk with enough
|
||||
|
@ -8100,7 +8157,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
|
|||
*/
|
||||
if (device->total_bytes > device->bytes_used + min_free) {
|
||||
ret = find_free_dev_extent(NULL, device, min_free,
|
||||
&dev_offset, &max_avail);
|
||||
&dev_offset, NULL);
|
||||
if (!ret)
|
||||
break;
|
||||
ret = -1;
|
||||
|
@ -8584,3 +8641,14 @@ out:
|
|||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
|
||||
{
|
||||
return unpin_extent_range(root, start, end);
|
||||
}
|
||||
|
||||
int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
|
||||
u64 num_bytes)
|
||||
{
|
||||
return btrfs_discard_extent(root, bytenr, num_bytes);
|
||||
}
|
||||
|
|
|
@ -2028,8 +2028,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
|
|||
BUG_ON(extent_map_end(em) <= cur);
|
||||
BUG_ON(end < cur);
|
||||
|
||||
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
|
||||
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
|
||||
this_bio_flag = EXTENT_BIO_COMPRESSED;
|
||||
extent_set_compress_type(&this_bio_flag,
|
||||
em->compress_type);
|
||||
}
|
||||
|
||||
iosize = min(extent_map_end(em) - cur, end - cur + 1);
|
||||
cur_end = min(extent_map_end(em) - 1, end);
|
||||
|
@ -3072,6 +3075,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
|
|||
#endif
|
||||
|
||||
eb = kmem_cache_zalloc(extent_buffer_cache, mask);
|
||||
if (eb == NULL)
|
||||
return NULL;
|
||||
eb->start = start;
|
||||
eb->len = len;
|
||||
spin_lock_init(&eb->lock);
|
||||
|
|
|
@ -20,8 +20,12 @@
|
|||
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
|
||||
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
|
||||
|
||||
/* flags for bio submission */
|
||||
/*
|
||||
* flags for bio submission. The high bits indicate the compression
|
||||
* type for this bio
|
||||
*/
|
||||
#define EXTENT_BIO_COMPRESSED 1
|
||||
#define EXTENT_BIO_FLAG_SHIFT 16
|
||||
|
||||
/* these are bit numbers for test/set bit */
|
||||
#define EXTENT_BUFFER_UPTODATE 0
|
||||
|
@ -135,6 +139,17 @@ struct extent_buffer {
|
|||
wait_queue_head_t lock_wq;
|
||||
};
|
||||
|
||||
static inline void extent_set_compress_type(unsigned long *bio_flags,
|
||||
int compress_type)
|
||||
{
|
||||
*bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT;
|
||||
}
|
||||
|
||||
static inline int extent_compress_type(unsigned long bio_flags)
|
||||
{
|
||||
return bio_flags >> EXTENT_BIO_FLAG_SHIFT;
|
||||
}
|
||||
|
||||
struct extent_map_tree;
|
||||
|
||||
static inline struct extent_state *extent_state_next(struct extent_state *state)
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include <linux/module.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include "ctree.h"
|
||||
#include "extent_map.h"
|
||||
|
||||
|
||||
|
@ -54,6 +55,7 @@ struct extent_map *alloc_extent_map(gfp_t mask)
|
|||
return em;
|
||||
em->in_tree = 0;
|
||||
em->flags = 0;
|
||||
em->compress_type = BTRFS_COMPRESS_NONE;
|
||||
atomic_set(&em->refs, 1);
|
||||
return em;
|
||||
}
|
||||
|
|
|
@ -26,7 +26,8 @@ struct extent_map {
|
|||
unsigned long flags;
|
||||
struct block_device *bdev;
|
||||
atomic_t refs;
|
||||
int in_tree;
|
||||
unsigned int in_tree:1;
|
||||
unsigned int compress_type:4;
|
||||
};
|
||||
|
||||
struct extent_map_tree {
|
||||
|
|
|
@ -225,6 +225,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
|
|||
|
||||
split->bdev = em->bdev;
|
||||
split->flags = flags;
|
||||
split->compress_type = em->compress_type;
|
||||
ret = add_extent_mapping(em_tree, split);
|
||||
BUG_ON(ret);
|
||||
free_extent_map(split);
|
||||
|
@ -239,6 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
|
|||
split->len = em->start + em->len - (start + len);
|
||||
split->bdev = em->bdev;
|
||||
split->flags = flags;
|
||||
split->compress_type = em->compress_type;
|
||||
|
||||
if (compressed) {
|
||||
split->block_len = em->block_len;
|
||||
|
@ -891,6 +893,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
|
|||
if (err)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* If BTRFS flips readonly due to some impossible error
|
||||
* (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
|
||||
* although we have opened a file as writable, we have
|
||||
* to stop this write operation to ensure FS consistency.
|
||||
*/
|
||||
if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
|
||||
err = -EROFS;
|
||||
goto out;
|
||||
}
|
||||
|
||||
file_update_time(file);
|
||||
BTRFS_I(inode)->sequence++;
|
||||
|
||||
|
|
|
@ -122,10 +122,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
|
|||
size_t cur_size = size;
|
||||
size_t datasize;
|
||||
unsigned long offset;
|
||||
int use_compress = 0;
|
||||
int compress_type = BTRFS_COMPRESS_NONE;
|
||||
|
||||
if (compressed_size && compressed_pages) {
|
||||
use_compress = 1;
|
||||
compress_type = root->fs_info->compress_type;
|
||||
cur_size = compressed_size;
|
||||
}
|
||||
|
||||
|
@ -159,7 +159,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_file_extent_ram_bytes(leaf, ei, size);
|
||||
ptr = btrfs_file_extent_inline_start(ei);
|
||||
|
||||
if (use_compress) {
|
||||
if (compress_type != BTRFS_COMPRESS_NONE) {
|
||||
struct page *cpage;
|
||||
int i = 0;
|
||||
while (compressed_size > 0) {
|
||||
|
@ -176,7 +176,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
|
|||
compressed_size -= cur_size;
|
||||
}
|
||||
btrfs_set_file_extent_compression(leaf, ei,
|
||||
BTRFS_COMPRESS_ZLIB);
|
||||
compress_type);
|
||||
} else {
|
||||
page = find_get_page(inode->i_mapping,
|
||||
start >> PAGE_CACHE_SHIFT);
|
||||
|
@ -263,6 +263,7 @@ struct async_extent {
|
|||
u64 compressed_size;
|
||||
struct page **pages;
|
||||
unsigned long nr_pages;
|
||||
int compress_type;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
|
@ -280,7 +281,8 @@ static noinline int add_async_extent(struct async_cow *cow,
|
|||
u64 start, u64 ram_size,
|
||||
u64 compressed_size,
|
||||
struct page **pages,
|
||||
unsigned long nr_pages)
|
||||
unsigned long nr_pages,
|
||||
int compress_type)
|
||||
{
|
||||
struct async_extent *async_extent;
|
||||
|
||||
|
@ -290,6 +292,7 @@ static noinline int add_async_extent(struct async_cow *cow,
|
|||
async_extent->compressed_size = compressed_size;
|
||||
async_extent->pages = pages;
|
||||
async_extent->nr_pages = nr_pages;
|
||||
async_extent->compress_type = compress_type;
|
||||
list_add_tail(&async_extent->list, &cow->extents);
|
||||
return 0;
|
||||
}
|
||||
|
@ -332,6 +335,7 @@ static noinline int compress_file_range(struct inode *inode,
|
|||
unsigned long max_uncompressed = 128 * 1024;
|
||||
int i;
|
||||
int will_compress;
|
||||
int compress_type = root->fs_info->compress_type;
|
||||
|
||||
actual_end = min_t(u64, isize, end + 1);
|
||||
again:
|
||||
|
@ -381,12 +385,16 @@ again:
|
|||
WARN_ON(pages);
|
||||
pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
|
||||
|
||||
ret = btrfs_zlib_compress_pages(inode->i_mapping, start,
|
||||
total_compressed, pages,
|
||||
nr_pages, &nr_pages_ret,
|
||||
&total_in,
|
||||
&total_compressed,
|
||||
max_compressed);
|
||||
if (BTRFS_I(inode)->force_compress)
|
||||
compress_type = BTRFS_I(inode)->force_compress;
|
||||
|
||||
ret = btrfs_compress_pages(compress_type,
|
||||
inode->i_mapping, start,
|
||||
total_compressed, pages,
|
||||
nr_pages, &nr_pages_ret,
|
||||
&total_in,
|
||||
&total_compressed,
|
||||
max_compressed);
|
||||
|
||||
if (!ret) {
|
||||
unsigned long offset = total_compressed &
|
||||
|
@ -493,7 +501,8 @@ again:
|
|||
* and will submit them to the elevator.
|
||||
*/
|
||||
add_async_extent(async_cow, start, num_bytes,
|
||||
total_compressed, pages, nr_pages_ret);
|
||||
total_compressed, pages, nr_pages_ret,
|
||||
compress_type);
|
||||
|
||||
if (start + num_bytes < end) {
|
||||
start += num_bytes;
|
||||
|
@ -515,7 +524,8 @@ cleanup_and_bail_uncompressed:
|
|||
__set_page_dirty_nobuffers(locked_page);
|
||||
/* unlocked later on in the async handlers */
|
||||
}
|
||||
add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0);
|
||||
add_async_extent(async_cow, start, end - start + 1,
|
||||
0, NULL, 0, BTRFS_COMPRESS_NONE);
|
||||
*num_added += 1;
|
||||
}
|
||||
|
||||
|
@ -640,6 +650,7 @@ retry:
|
|||
em->block_start = ins.objectid;
|
||||
em->block_len = ins.offset;
|
||||
em->bdev = root->fs_info->fs_devices->latest_bdev;
|
||||
em->compress_type = async_extent->compress_type;
|
||||
set_bit(EXTENT_FLAG_PINNED, &em->flags);
|
||||
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
|
||||
|
||||
|
@ -656,11 +667,13 @@ retry:
|
|||
async_extent->ram_size - 1, 0);
|
||||
}
|
||||
|
||||
ret = btrfs_add_ordered_extent(inode, async_extent->start,
|
||||
ins.objectid,
|
||||
async_extent->ram_size,
|
||||
ins.offset,
|
||||
BTRFS_ORDERED_COMPRESSED);
|
||||
ret = btrfs_add_ordered_extent_compress(inode,
|
||||
async_extent->start,
|
||||
ins.objectid,
|
||||
async_extent->ram_size,
|
||||
ins.offset,
|
||||
BTRFS_ORDERED_COMPRESSED,
|
||||
async_extent->compress_type);
|
||||
BUG_ON(ret);
|
||||
|
||||
/*
|
||||
|
@ -1670,7 +1683,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
|
|||
struct btrfs_ordered_extent *ordered_extent = NULL;
|
||||
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
|
||||
struct extent_state *cached_state = NULL;
|
||||
int compressed = 0;
|
||||
int compress_type = 0;
|
||||
int ret;
|
||||
bool nolock = false;
|
||||
|
||||
|
@ -1711,9 +1724,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
|
|||
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
|
||||
|
||||
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
|
||||
compressed = 1;
|
||||
compress_type = ordered_extent->compress_type;
|
||||
if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
|
||||
BUG_ON(compressed);
|
||||
BUG_ON(compress_type);
|
||||
ret = btrfs_mark_extent_written(trans, inode,
|
||||
ordered_extent->file_offset,
|
||||
ordered_extent->file_offset +
|
||||
|
@ -1727,7 +1740,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
|
|||
ordered_extent->disk_len,
|
||||
ordered_extent->len,
|
||||
ordered_extent->len,
|
||||
compressed, 0, 0,
|
||||
compress_type, 0, 0,
|
||||
BTRFS_FILE_EXTENT_REG);
|
||||
unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
|
||||
ordered_extent->file_offset,
|
||||
|
@ -1829,6 +1842,8 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
|
|||
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
|
||||
logical = em->block_start;
|
||||
failrec->bio_flags = EXTENT_BIO_COMPRESSED;
|
||||
extent_set_compress_type(&failrec->bio_flags,
|
||||
em->compress_type);
|
||||
}
|
||||
failrec->logical = logical;
|
||||
free_extent_map(em);
|
||||
|
@ -3671,8 +3686,12 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
|
|||
static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
int err;
|
||||
|
||||
if (btrfs_root_readonly(root))
|
||||
return -EROFS;
|
||||
|
||||
err = inode_change_ok(inode, attr);
|
||||
if (err)
|
||||
return err;
|
||||
|
@ -4928,8 +4947,10 @@ static noinline int uncompress_inline(struct btrfs_path *path,
|
|||
size_t max_size;
|
||||
unsigned long inline_size;
|
||||
unsigned long ptr;
|
||||
int compress_type;
|
||||
|
||||
WARN_ON(pg_offset != 0);
|
||||
compress_type = btrfs_file_extent_compression(leaf, item);
|
||||
max_size = btrfs_file_extent_ram_bytes(leaf, item);
|
||||
inline_size = btrfs_file_extent_inline_item_len(leaf,
|
||||
btrfs_item_nr(leaf, path->slots[0]));
|
||||
|
@ -4939,8 +4960,8 @@ static noinline int uncompress_inline(struct btrfs_path *path,
|
|||
read_extent_buffer(leaf, tmp, ptr, inline_size);
|
||||
|
||||
max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);
|
||||
ret = btrfs_zlib_decompress(tmp, page, extent_offset,
|
||||
inline_size, max_size);
|
||||
ret = btrfs_decompress(compress_type, tmp, page,
|
||||
extent_offset, inline_size, max_size);
|
||||
if (ret) {
|
||||
char *kaddr = kmap_atomic(page, KM_USER0);
|
||||
unsigned long copy_size = min_t(u64,
|
||||
|
@ -4982,7 +5003,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
|
|||
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
|
||||
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
|
||||
struct btrfs_trans_handle *trans = NULL;
|
||||
int compressed;
|
||||
int compress_type;
|
||||
|
||||
again:
|
||||
read_lock(&em_tree->lock);
|
||||
|
@ -5041,7 +5062,7 @@ again:
|
|||
|
||||
found_type = btrfs_file_extent_type(leaf, item);
|
||||
extent_start = found_key.offset;
|
||||
compressed = btrfs_file_extent_compression(leaf, item);
|
||||
compress_type = btrfs_file_extent_compression(leaf, item);
|
||||
if (found_type == BTRFS_FILE_EXTENT_REG ||
|
||||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
|
||||
extent_end = extent_start +
|
||||
|
@ -5087,8 +5108,9 @@ again:
|
|||
em->block_start = EXTENT_MAP_HOLE;
|
||||
goto insert;
|
||||
}
|
||||
if (compressed) {
|
||||
if (compress_type != BTRFS_COMPRESS_NONE) {
|
||||
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
|
||||
em->compress_type = compress_type;
|
||||
em->block_start = bytenr;
|
||||
em->block_len = btrfs_file_extent_disk_num_bytes(leaf,
|
||||
item);
|
||||
|
@ -5122,12 +5144,14 @@ again:
|
|||
em->len = (copy_size + root->sectorsize - 1) &
|
||||
~((u64)root->sectorsize - 1);
|
||||
em->orig_start = EXTENT_MAP_INLINE;
|
||||
if (compressed)
|
||||
if (compress_type) {
|
||||
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
|
||||
em->compress_type = compress_type;
|
||||
}
|
||||
ptr = btrfs_file_extent_inline_start(item) + extent_offset;
|
||||
if (create == 0 && !PageUptodate(page)) {
|
||||
if (btrfs_file_extent_compression(leaf, item) ==
|
||||
BTRFS_COMPRESS_ZLIB) {
|
||||
if (btrfs_file_extent_compression(leaf, item) !=
|
||||
BTRFS_COMPRESS_NONE) {
|
||||
ret = uncompress_inline(path, inode, page,
|
||||
pg_offset,
|
||||
extent_offset, item);
|
||||
|
@ -6477,7 +6501,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
|
|||
ei->ordered_data_close = 0;
|
||||
ei->orphan_meta_reserved = 0;
|
||||
ei->dummy_inode = 0;
|
||||
ei->force_compress = 0;
|
||||
ei->force_compress = BTRFS_COMPRESS_NONE;
|
||||
|
||||
inode = &ei->vfs_inode;
|
||||
extent_map_tree_init(&ei->extent_tree, GFP_NOFS);
|
||||
|
@ -7105,6 +7129,10 @@ static int btrfs_set_page_dirty(struct page *page)
|
|||
|
||||
static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
|
||||
{
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
|
||||
if (btrfs_root_readonly(root) && (mask & MAY_WRITE))
|
||||
return -EROFS;
|
||||
if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
|
||||
return -EACCES;
|
||||
return generic_permission(inode, mask, flags, btrfs_check_acl);
|
||||
|
|
220
fs/btrfs/ioctl.c
220
fs/btrfs/ioctl.c
|
@ -147,6 +147,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
|
|||
unsigned int flags, oldflags;
|
||||
int ret;
|
||||
|
||||
if (btrfs_root_readonly(root))
|
||||
return -EROFS;
|
||||
|
||||
if (copy_from_user(&flags, arg, sizeof(flags)))
|
||||
return -EFAULT;
|
||||
|
||||
|
@ -360,7 +363,8 @@ fail:
|
|||
}
|
||||
|
||||
static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
|
||||
char *name, int namelen, u64 *async_transid)
|
||||
char *name, int namelen, u64 *async_transid,
|
||||
bool readonly)
|
||||
{
|
||||
struct inode *inode;
|
||||
struct dentry *parent;
|
||||
|
@ -378,6 +382,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
|
|||
btrfs_init_block_rsv(&pending_snapshot->block_rsv);
|
||||
pending_snapshot->dentry = dentry;
|
||||
pending_snapshot->root = root;
|
||||
pending_snapshot->readonly = readonly;
|
||||
|
||||
trans = btrfs_start_transaction(root->fs_info->extent_root, 5);
|
||||
if (IS_ERR(trans)) {
|
||||
|
@ -509,7 +514,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
|
|||
static noinline int btrfs_mksubvol(struct path *parent,
|
||||
char *name, int namelen,
|
||||
struct btrfs_root *snap_src,
|
||||
u64 *async_transid)
|
||||
u64 *async_transid, bool readonly)
|
||||
{
|
||||
struct inode *dir = parent->dentry->d_inode;
|
||||
struct dentry *dentry;
|
||||
|
@ -541,7 +546,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
|
|||
|
||||
if (snap_src) {
|
||||
error = create_snapshot(snap_src, dentry,
|
||||
name, namelen, async_transid);
|
||||
name, namelen, async_transid, readonly);
|
||||
} else {
|
||||
error = create_subvol(BTRFS_I(dir)->root, dentry,
|
||||
name, namelen, async_transid);
|
||||
|
@ -638,9 +643,11 @@ static int btrfs_defrag_file(struct file *file,
|
|||
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
struct page *page;
|
||||
struct btrfs_super_block *disk_super;
|
||||
unsigned long last_index;
|
||||
unsigned long ra_pages = root->fs_info->bdi.ra_pages;
|
||||
unsigned long total_read = 0;
|
||||
u64 features;
|
||||
u64 page_start;
|
||||
u64 page_end;
|
||||
u64 last_len = 0;
|
||||
|
@ -648,6 +655,14 @@ static int btrfs_defrag_file(struct file *file,
|
|||
u64 defrag_end = 0;
|
||||
unsigned long i;
|
||||
int ret;
|
||||
int compress_type = BTRFS_COMPRESS_ZLIB;
|
||||
|
||||
if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
|
||||
if (range->compress_type > BTRFS_COMPRESS_TYPES)
|
||||
return -EINVAL;
|
||||
if (range->compress_type)
|
||||
compress_type = range->compress_type;
|
||||
}
|
||||
|
||||
if (inode->i_size == 0)
|
||||
return 0;
|
||||
|
@ -683,7 +698,7 @@ static int btrfs_defrag_file(struct file *file,
|
|||
total_read++;
|
||||
mutex_lock(&inode->i_mutex);
|
||||
if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
|
||||
BTRFS_I(inode)->force_compress = 1;
|
||||
BTRFS_I(inode)->force_compress = compress_type;
|
||||
|
||||
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
|
||||
if (ret)
|
||||
|
@ -781,10 +796,17 @@ loop_unlock:
|
|||
atomic_dec(&root->fs_info->async_submit_draining);
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
BTRFS_I(inode)->force_compress = 0;
|
||||
BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
}
|
||||
|
||||
disk_super = &root->fs_info->super_copy;
|
||||
features = btrfs_super_incompat_flags(disk_super);
|
||||
if (range->compress_type == BTRFS_COMPRESS_LZO) {
|
||||
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
|
||||
btrfs_set_super_incompat_flags(disk_super, features);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_reservations:
|
||||
|
@ -901,7 +923,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
|
|||
char *name,
|
||||
unsigned long fd,
|
||||
int subvol,
|
||||
u64 *transid)
|
||||
u64 *transid,
|
||||
bool readonly)
|
||||
{
|
||||
struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
|
||||
struct file *src_file;
|
||||
|
@ -919,7 +942,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
|
|||
|
||||
if (subvol) {
|
||||
ret = btrfs_mksubvol(&file->f_path, name, namelen,
|
||||
NULL, transid);
|
||||
NULL, transid, readonly);
|
||||
} else {
|
||||
struct inode *src_inode;
|
||||
src_file = fget(fd);
|
||||
|
@ -938,7 +961,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
|
|||
}
|
||||
ret = btrfs_mksubvol(&file->f_path, name, namelen,
|
||||
BTRFS_I(src_inode)->root,
|
||||
transid);
|
||||
transid, readonly);
|
||||
fput(src_file);
|
||||
}
|
||||
out:
|
||||
|
@ -946,58 +969,139 @@ out:
|
|||
}
|
||||
|
||||
static noinline int btrfs_ioctl_snap_create(struct file *file,
|
||||
void __user *arg, int subvol,
|
||||
int v2)
|
||||
void __user *arg, int subvol)
|
||||
{
|
||||
struct btrfs_ioctl_vol_args *vol_args = NULL;
|
||||
struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL;
|
||||
char *name;
|
||||
u64 fd;
|
||||
struct btrfs_ioctl_vol_args *vol_args;
|
||||
int ret;
|
||||
|
||||
if (v2) {
|
||||
u64 transid = 0;
|
||||
u64 *ptr = NULL;
|
||||
vol_args = memdup_user(arg, sizeof(*vol_args));
|
||||
if (IS_ERR(vol_args))
|
||||
return PTR_ERR(vol_args);
|
||||
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
|
||||
|
||||
vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2));
|
||||
if (IS_ERR(vol_args_v2))
|
||||
return PTR_ERR(vol_args_v2);
|
||||
ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
|
||||
vol_args->fd, subvol,
|
||||
NULL, false);
|
||||
|
||||
if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
kfree(vol_args);
|
||||
return ret;
|
||||
}
|
||||
|
||||
name = vol_args_v2->name;
|
||||
fd = vol_args_v2->fd;
|
||||
vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
|
||||
static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
|
||||
void __user *arg, int subvol)
|
||||
{
|
||||
struct btrfs_ioctl_vol_args_v2 *vol_args;
|
||||
int ret;
|
||||
u64 transid = 0;
|
||||
u64 *ptr = NULL;
|
||||
bool readonly = false;
|
||||
|
||||
if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC)
|
||||
ptr = &transid;
|
||||
vol_args = memdup_user(arg, sizeof(*vol_args));
|
||||
if (IS_ERR(vol_args))
|
||||
return PTR_ERR(vol_args);
|
||||
vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
|
||||
|
||||
ret = btrfs_ioctl_snap_create_transid(file, name, fd,
|
||||
subvol, ptr);
|
||||
|
||||
if (ret == 0 && ptr &&
|
||||
copy_to_user(arg +
|
||||
offsetof(struct btrfs_ioctl_vol_args_v2,
|
||||
transid), ptr, sizeof(*ptr)))
|
||||
ret = -EFAULT;
|
||||
} else {
|
||||
vol_args = memdup_user(arg, sizeof(*vol_args));
|
||||
if (IS_ERR(vol_args))
|
||||
return PTR_ERR(vol_args);
|
||||
name = vol_args->name;
|
||||
fd = vol_args->fd;
|
||||
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
|
||||
|
||||
ret = btrfs_ioctl_snap_create_transid(file, name, fd,
|
||||
subvol, NULL);
|
||||
if (vol_args->flags &
|
||||
~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC)
|
||||
ptr = &transid;
|
||||
if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
|
||||
readonly = true;
|
||||
|
||||
ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
|
||||
vol_args->fd, subvol,
|
||||
ptr, readonly);
|
||||
|
||||
if (ret == 0 && ptr &&
|
||||
copy_to_user(arg +
|
||||
offsetof(struct btrfs_ioctl_vol_args_v2,
|
||||
transid), ptr, sizeof(*ptr)))
|
||||
ret = -EFAULT;
|
||||
out:
|
||||
kfree(vol_args);
|
||||
kfree(vol_args_v2);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
|
||||
void __user *arg)
|
||||
{
|
||||
struct inode *inode = fdentry(file)->d_inode;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
int ret = 0;
|
||||
u64 flags = 0;
|
||||
|
||||
if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID)
|
||||
return -EINVAL;
|
||||
|
||||
down_read(&root->fs_info->subvol_sem);
|
||||
if (btrfs_root_readonly(root))
|
||||
flags |= BTRFS_SUBVOL_RDONLY;
|
||||
up_read(&root->fs_info->subvol_sem);
|
||||
|
||||
if (copy_to_user(arg, &flags, sizeof(flags)))
|
||||
ret = -EFAULT;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
|
||||
void __user *arg)
|
||||
{
|
||||
struct inode *inode = fdentry(file)->d_inode;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct btrfs_trans_handle *trans;
|
||||
u64 root_flags;
|
||||
u64 flags;
|
||||
int ret = 0;
|
||||
|
||||
if (root->fs_info->sb->s_flags & MS_RDONLY)
|
||||
return -EROFS;
|
||||
|
||||
if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID)
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(&flags, arg, sizeof(flags)))
|
||||
return -EFAULT;
|
||||
|
||||
if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC)
|
||||
return -EINVAL;
|
||||
|
||||
if (flags & ~BTRFS_SUBVOL_RDONLY)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
down_write(&root->fs_info->subvol_sem);
|
||||
|
||||
/* nothing to do */
|
||||
if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root))
|
||||
goto out;
|
||||
|
||||
root_flags = btrfs_root_flags(&root->root_item);
|
||||
if (flags & BTRFS_SUBVOL_RDONLY)
|
||||
btrfs_set_root_flags(&root->root_item,
|
||||
root_flags | BTRFS_ROOT_SUBVOL_RDONLY);
|
||||
else
|
||||
btrfs_set_root_flags(&root->root_item,
|
||||
root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
|
||||
|
||||
trans = btrfs_start_transaction(root, 1);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
goto out_reset;
|
||||
}
|
||||
|
||||
ret = btrfs_update_root(trans, root,
|
||||
&root->root_key, &root->root_item);
|
||||
|
||||
btrfs_commit_transaction(trans, root);
|
||||
out_reset:
|
||||
if (ret)
|
||||
btrfs_set_root_flags(&root->root_item, root_flags);
|
||||
out:
|
||||
up_write(&root->fs_info->subvol_sem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1509,6 +1613,9 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
|
|||
struct btrfs_ioctl_defrag_range_args *range;
|
||||
int ret;
|
||||
|
||||
if (btrfs_root_readonly(root))
|
||||
return -EROFS;
|
||||
|
||||
ret = mnt_want_write(file->f_path.mnt);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -1637,6 +1744,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
|
|||
if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
|
||||
return -EINVAL;
|
||||
|
||||
if (btrfs_root_readonly(root))
|
||||
return -EROFS;
|
||||
|
||||
ret = mnt_want_write(file->f_path.mnt);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -1958,6 +2068,10 @@ static long btrfs_ioctl_trans_start(struct file *file)
|
|||
if (file->private_data)
|
||||
goto out;
|
||||
|
||||
ret = -EROFS;
|
||||
if (btrfs_root_readonly(root))
|
||||
goto out;
|
||||
|
||||
ret = mnt_want_write(file->f_path.mnt);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
@ -2257,13 +2371,17 @@ long btrfs_ioctl(struct file *file, unsigned int
|
|||
case FS_IOC_GETVERSION:
|
||||
return btrfs_ioctl_getversion(file, argp);
|
||||
case BTRFS_IOC_SNAP_CREATE:
|
||||
return btrfs_ioctl_snap_create(file, argp, 0, 0);
|
||||
return btrfs_ioctl_snap_create(file, argp, 0);
|
||||
case BTRFS_IOC_SNAP_CREATE_V2:
|
||||
return btrfs_ioctl_snap_create(file, argp, 0, 1);
|
||||
return btrfs_ioctl_snap_create_v2(file, argp, 0);
|
||||
case BTRFS_IOC_SUBVOL_CREATE:
|
||||
return btrfs_ioctl_snap_create(file, argp, 1, 0);
|
||||
return btrfs_ioctl_snap_create(file, argp, 1);
|
||||
case BTRFS_IOC_SNAP_DESTROY:
|
||||
return btrfs_ioctl_snap_destroy(file, argp);
|
||||
case BTRFS_IOC_SUBVOL_GETFLAGS:
|
||||
return btrfs_ioctl_subvol_getflags(file, argp);
|
||||
case BTRFS_IOC_SUBVOL_SETFLAGS:
|
||||
return btrfs_ioctl_subvol_setflags(file, argp);
|
||||
case BTRFS_IOC_DEFAULT_SUBVOL:
|
||||
return btrfs_ioctl_default_subvol(file, argp);
|
||||
case BTRFS_IOC_DEFRAG:
|
||||
|
|
|
@ -31,6 +31,7 @@ struct btrfs_ioctl_vol_args {
|
|||
};
|
||||
|
||||
#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
|
||||
#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
|
||||
|
||||
#define BTRFS_SUBVOL_NAME_MAX 4039
|
||||
struct btrfs_ioctl_vol_args_v2 {
|
||||
|
@ -133,8 +134,15 @@ struct btrfs_ioctl_defrag_range_args {
|
|||
*/
|
||||
__u32 extent_thresh;
|
||||
|
||||
/*
|
||||
* which compression method to use if turning on compression
|
||||
* for this defrag operation. If unspecified, zlib will
|
||||
* be used
|
||||
*/
|
||||
__u32 compress_type;
|
||||
|
||||
/* spare for later */
|
||||
__u32 unused[5];
|
||||
__u32 unused[4];
|
||||
};
|
||||
|
||||
struct btrfs_ioctl_space_info {
|
||||
|
@ -193,4 +201,6 @@ struct btrfs_ioctl_space_args {
|
|||
#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
|
||||
#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
|
||||
struct btrfs_ioctl_vol_args_v2)
|
||||
#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64)
|
||||
#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,420 @@
|
|||
/*
|
||||
* Copyright (C) 2008 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/lzo.h>
|
||||
#include "compression.h"
|
||||
|
||||
#define LZO_LEN 4
|
||||
|
||||
struct workspace {
|
||||
void *mem;
|
||||
void *buf; /* where compressed data goes */
|
||||
void *cbuf; /* where decompressed data goes */
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
static void lzo_free_workspace(struct list_head *ws)
|
||||
{
|
||||
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
||||
|
||||
vfree(workspace->buf);
|
||||
vfree(workspace->cbuf);
|
||||
vfree(workspace->mem);
|
||||
kfree(workspace);
|
||||
}
|
||||
|
||||
static struct list_head *lzo_alloc_workspace(void)
|
||||
{
|
||||
struct workspace *workspace;
|
||||
|
||||
workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
|
||||
if (!workspace)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
workspace->mem = vmalloc(LZO1X_MEM_COMPRESS);
|
||||
workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
|
||||
workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
|
||||
if (!workspace->mem || !workspace->buf || !workspace->cbuf)
|
||||
goto fail;
|
||||
|
||||
INIT_LIST_HEAD(&workspace->list);
|
||||
|
||||
return &workspace->list;
|
||||
fail:
|
||||
lzo_free_workspace(&workspace->list);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
static inline void write_compress_length(char *buf, size_t len)
|
||||
{
|
||||
__le32 dlen;
|
||||
|
||||
dlen = cpu_to_le32(len);
|
||||
memcpy(buf, &dlen, LZO_LEN);
|
||||
}
|
||||
|
||||
static inline size_t read_compress_length(char *buf)
|
||||
{
|
||||
__le32 dlen;
|
||||
|
||||
memcpy(&dlen, buf, LZO_LEN);
|
||||
return le32_to_cpu(dlen);
|
||||
}
|
||||
|
||||
static int lzo_compress_pages(struct list_head *ws,
|
||||
struct address_space *mapping,
|
||||
u64 start, unsigned long len,
|
||||
struct page **pages,
|
||||
unsigned long nr_dest_pages,
|
||||
unsigned long *out_pages,
|
||||
unsigned long *total_in,
|
||||
unsigned long *total_out,
|
||||
unsigned long max_out)
|
||||
{
|
||||
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
||||
int ret = 0;
|
||||
char *data_in;
|
||||
char *cpage_out;
|
||||
int nr_pages = 0;
|
||||
struct page *in_page = NULL;
|
||||
struct page *out_page = NULL;
|
||||
unsigned long bytes_left;
|
||||
|
||||
size_t in_len;
|
||||
size_t out_len;
|
||||
char *buf;
|
||||
unsigned long tot_in = 0;
|
||||
unsigned long tot_out = 0;
|
||||
unsigned long pg_bytes_left;
|
||||
unsigned long out_offset;
|
||||
unsigned long bytes;
|
||||
|
||||
*out_pages = 0;
|
||||
*total_out = 0;
|
||||
*total_in = 0;
|
||||
|
||||
in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
|
||||
data_in = kmap(in_page);
|
||||
|
||||
/*
|
||||
* store the size of all chunks of compressed data in
|
||||
* the first 4 bytes
|
||||
*/
|
||||
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
|
||||
if (out_page == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
cpage_out = kmap(out_page);
|
||||
out_offset = LZO_LEN;
|
||||
tot_out = LZO_LEN;
|
||||
pages[0] = out_page;
|
||||
nr_pages = 1;
|
||||
pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
|
||||
|
||||
/* compress at most one page of data each time */
|
||||
in_len = min(len, PAGE_CACHE_SIZE);
|
||||
while (tot_in < len) {
|
||||
ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
|
||||
&out_len, workspace->mem);
|
||||
if (ret != LZO_E_OK) {
|
||||
printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
|
||||
ret);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* store the size of this chunk of compressed data */
|
||||
write_compress_length(cpage_out + out_offset, out_len);
|
||||
tot_out += LZO_LEN;
|
||||
out_offset += LZO_LEN;
|
||||
pg_bytes_left -= LZO_LEN;
|
||||
|
||||
tot_in += in_len;
|
||||
tot_out += out_len;
|
||||
|
||||
/* copy bytes from the working buffer into the pages */
|
||||
buf = workspace->cbuf;
|
||||
while (out_len) {
|
||||
bytes = min_t(unsigned long, pg_bytes_left, out_len);
|
||||
|
||||
memcpy(cpage_out + out_offset, buf, bytes);
|
||||
|
||||
out_len -= bytes;
|
||||
pg_bytes_left -= bytes;
|
||||
buf += bytes;
|
||||
out_offset += bytes;
|
||||
|
||||
/*
|
||||
* we need another page for writing out.
|
||||
*
|
||||
* Note if there's less than 4 bytes left, we just
|
||||
* skip to a new page.
|
||||
*/
|
||||
if ((out_len == 0 && pg_bytes_left < LZO_LEN) ||
|
||||
pg_bytes_left == 0) {
|
||||
if (pg_bytes_left) {
|
||||
memset(cpage_out + out_offset, 0,
|
||||
pg_bytes_left);
|
||||
tot_out += pg_bytes_left;
|
||||
}
|
||||
|
||||
/* we're done, don't allocate new page */
|
||||
if (out_len == 0 && tot_in >= len)
|
||||
break;
|
||||
|
||||
kunmap(out_page);
|
||||
if (nr_pages == nr_dest_pages) {
|
||||
out_page = NULL;
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
|
||||
if (out_page == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
cpage_out = kmap(out_page);
|
||||
pages[nr_pages++] = out_page;
|
||||
|
||||
pg_bytes_left = PAGE_CACHE_SIZE;
|
||||
out_offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* we're making it bigger, give up */
|
||||
if (tot_in > 8192 && tot_in < tot_out)
|
||||
goto out;
|
||||
|
||||
/* we're all done */
|
||||
if (tot_in >= len)
|
||||
break;
|
||||
|
||||
if (tot_out > max_out)
|
||||
break;
|
||||
|
||||
bytes_left = len - tot_in;
|
||||
kunmap(in_page);
|
||||
page_cache_release(in_page);
|
||||
|
||||
start += PAGE_CACHE_SIZE;
|
||||
in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
|
||||
data_in = kmap(in_page);
|
||||
in_len = min(bytes_left, PAGE_CACHE_SIZE);
|
||||
}
|
||||
|
||||
if (tot_out > tot_in)
|
||||
goto out;
|
||||
|
||||
/* store the size of all chunks of compressed data */
|
||||
cpage_out = kmap(pages[0]);
|
||||
write_compress_length(cpage_out, tot_out);
|
||||
|
||||
kunmap(pages[0]);
|
||||
|
||||
ret = 0;
|
||||
*total_out = tot_out;
|
||||
*total_in = tot_in;
|
||||
out:
|
||||
*out_pages = nr_pages;
|
||||
if (out_page)
|
||||
kunmap(out_page);
|
||||
|
||||
if (in_page) {
|
||||
kunmap(in_page);
|
||||
page_cache_release(in_page);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lzo_decompress_biovec(struct list_head *ws,
|
||||
struct page **pages_in,
|
||||
u64 disk_start,
|
||||
struct bio_vec *bvec,
|
||||
int vcnt,
|
||||
size_t srclen)
|
||||
{
|
||||
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
||||
int ret = 0, ret2;
|
||||
char *data_in;
|
||||
unsigned long page_in_index = 0;
|
||||
unsigned long page_out_index = 0;
|
||||
unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
|
||||
PAGE_CACHE_SIZE;
|
||||
unsigned long buf_start;
|
||||
unsigned long buf_offset = 0;
|
||||
unsigned long bytes;
|
||||
unsigned long working_bytes;
|
||||
unsigned long pg_offset;
|
||||
|
||||
size_t in_len;
|
||||
size_t out_len;
|
||||
unsigned long in_offset;
|
||||
unsigned long in_page_bytes_left;
|
||||
unsigned long tot_in;
|
||||
unsigned long tot_out;
|
||||
unsigned long tot_len;
|
||||
char *buf;
|
||||
|
||||
data_in = kmap(pages_in[0]);
|
||||
tot_len = read_compress_length(data_in);
|
||||
|
||||
tot_in = LZO_LEN;
|
||||
in_offset = LZO_LEN;
|
||||
tot_len = min_t(size_t, srclen, tot_len);
|
||||
in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
|
||||
|
||||
tot_out = 0;
|
||||
pg_offset = 0;
|
||||
|
||||
while (tot_in < tot_len) {
|
||||
in_len = read_compress_length(data_in + in_offset);
|
||||
in_page_bytes_left -= LZO_LEN;
|
||||
in_offset += LZO_LEN;
|
||||
tot_in += LZO_LEN;
|
||||
|
||||
tot_in += in_len;
|
||||
working_bytes = in_len;
|
||||
|
||||
/* fast path: avoid using the working buffer */
|
||||
if (in_page_bytes_left >= in_len) {
|
||||
buf = data_in + in_offset;
|
||||
bytes = in_len;
|
||||
goto cont;
|
||||
}
|
||||
|
||||
/* copy bytes from the pages into the working buffer */
|
||||
buf = workspace->cbuf;
|
||||
buf_offset = 0;
|
||||
while (working_bytes) {
|
||||
bytes = min(working_bytes, in_page_bytes_left);
|
||||
|
||||
memcpy(buf + buf_offset, data_in + in_offset, bytes);
|
||||
buf_offset += bytes;
|
||||
cont:
|
||||
working_bytes -= bytes;
|
||||
in_page_bytes_left -= bytes;
|
||||
in_offset += bytes;
|
||||
|
||||
/* check if we need to pick another page */
|
||||
if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN)
|
||||
|| in_page_bytes_left == 0) {
|
||||
tot_in += in_page_bytes_left;
|
||||
|
||||
if (working_bytes == 0 && tot_in >= tot_len)
|
||||
break;
|
||||
|
||||
kunmap(pages_in[page_in_index]);
|
||||
page_in_index++;
|
||||
if (page_in_index >= total_pages_in) {
|
||||
ret = -1;
|
||||
data_in = NULL;
|
||||
goto done;
|
||||
}
|
||||
data_in = kmap(pages_in[page_in_index]);
|
||||
|
||||
in_page_bytes_left = PAGE_CACHE_SIZE;
|
||||
in_offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
|
||||
ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
|
||||
&out_len);
|
||||
if (ret != LZO_E_OK) {
|
||||
printk(KERN_WARNING "btrfs decompress failed\n");
|
||||
ret = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
buf_start = tot_out;
|
||||
tot_out += out_len;
|
||||
|
||||
ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
|
||||
tot_out, disk_start,
|
||||
bvec, vcnt,
|
||||
&page_out_index, &pg_offset);
|
||||
if (ret2 == 0)
|
||||
break;
|
||||
}
|
||||
done:
|
||||
if (data_in)
|
||||
kunmap(pages_in[page_in_index]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
|
||||
struct page *dest_page,
|
||||
unsigned long start_byte,
|
||||
size_t srclen, size_t destlen)
|
||||
{
|
||||
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
||||
size_t in_len;
|
||||
size_t out_len;
|
||||
size_t tot_len;
|
||||
int ret = 0;
|
||||
char *kaddr;
|
||||
unsigned long bytes;
|
||||
|
||||
BUG_ON(srclen < LZO_LEN);
|
||||
|
||||
tot_len = read_compress_length(data_in);
|
||||
data_in += LZO_LEN;
|
||||
|
||||
in_len = read_compress_length(data_in);
|
||||
data_in += LZO_LEN;
|
||||
|
||||
out_len = PAGE_CACHE_SIZE;
|
||||
ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
|
||||
if (ret != LZO_E_OK) {
|
||||
printk(KERN_WARNING "btrfs decompress failed!\n");
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (out_len < start_byte) {
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bytes = min_t(unsigned long, destlen, out_len - start_byte);
|
||||
|
||||
kaddr = kmap_atomic(dest_page, KM_USER0);
|
||||
memcpy(kaddr, workspace->buf + start_byte, bytes);
|
||||
kunmap_atomic(kaddr, KM_USER0);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct btrfs_compress_op btrfs_lzo_compress = {
|
||||
.alloc_workspace = lzo_alloc_workspace,
|
||||
.free_workspace = lzo_free_workspace,
|
||||
.compress_pages = lzo_compress_pages,
|
||||
.decompress_biovec = lzo_decompress_biovec,
|
||||
.decompress = lzo_decompress,
|
||||
};
|
|
@ -172,7 +172,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
|
|||
*/
|
||||
static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
||||
u64 start, u64 len, u64 disk_len,
|
||||
int type, int dio)
|
||||
int type, int dio, int compress_type)
|
||||
{
|
||||
struct btrfs_ordered_inode_tree *tree;
|
||||
struct rb_node *node;
|
||||
|
@ -189,6 +189,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
|||
entry->disk_len = disk_len;
|
||||
entry->bytes_left = len;
|
||||
entry->inode = inode;
|
||||
entry->compress_type = compress_type;
|
||||
if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
|
||||
set_bit(type, &entry->flags);
|
||||
|
||||
|
@ -220,14 +221,25 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
|||
u64 start, u64 len, u64 disk_len, int type)
|
||||
{
|
||||
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
|
||||
disk_len, type, 0);
|
||||
disk_len, type, 0,
|
||||
BTRFS_COMPRESS_NONE);
|
||||
}
|
||||
|
||||
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
|
||||
u64 start, u64 len, u64 disk_len, int type)
|
||||
{
|
||||
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
|
||||
disk_len, type, 1);
|
||||
disk_len, type, 1,
|
||||
BTRFS_COMPRESS_NONE);
|
||||
}
|
||||
|
||||
int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
|
||||
u64 start, u64 len, u64 disk_len,
|
||||
int type, int compress_type)
|
||||
{
|
||||
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
|
||||
disk_len, type, 0,
|
||||
compress_type);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -68,7 +68,7 @@ struct btrfs_ordered_sum {
|
|||
|
||||
#define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */
|
||||
|
||||
#define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */
|
||||
#define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */
|
||||
|
||||
#define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */
|
||||
|
||||
|
@ -93,6 +93,9 @@ struct btrfs_ordered_extent {
|
|||
/* flags (described above) */
|
||||
unsigned long flags;
|
||||
|
||||
/* compression algorithm */
|
||||
int compress_type;
|
||||
|
||||
/* reference count */
|
||||
atomic_t refs;
|
||||
|
||||
|
@ -148,6 +151,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
|||
u64 start, u64 len, u64 disk_len, int type);
|
||||
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
|
||||
u64 start, u64 len, u64 disk_len, int type);
|
||||
int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
|
||||
u64 start, u64 len, u64 disk_len,
|
||||
int type, int compress_type);
|
||||
int btrfs_add_ordered_sum(struct inode *inode,
|
||||
struct btrfs_ordered_extent *entry,
|
||||
struct btrfs_ordered_sum *sum);
|
||||
|
|
281
fs/btrfs/super.c
281
fs/btrfs/super.c
|
@ -54,6 +54,90 @@
|
|||
|
||||
static const struct super_operations btrfs_super_ops;
|
||||
|
||||
static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
|
||||
char nbuf[16])
|
||||
{
|
||||
char *errstr = NULL;
|
||||
|
||||
switch (errno) {
|
||||
case -EIO:
|
||||
errstr = "IO failure";
|
||||
break;
|
||||
case -ENOMEM:
|
||||
errstr = "Out of memory";
|
||||
break;
|
||||
case -EROFS:
|
||||
errstr = "Readonly filesystem";
|
||||
break;
|
||||
default:
|
||||
if (nbuf) {
|
||||
if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
|
||||
errstr = nbuf;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return errstr;
|
||||
}
|
||||
|
||||
static void __save_error_info(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
/*
|
||||
* today we only save the error info into ram. Long term we'll
|
||||
* also send it down to the disk
|
||||
*/
|
||||
fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
|
||||
}
|
||||
|
||||
/* NOTE:
|
||||
* We move write_super stuff at umount in order to avoid deadlock
|
||||
* for umount hold all lock.
|
||||
*/
|
||||
static void save_error_info(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
__save_error_info(fs_info);
|
||||
}
|
||||
|
||||
/* btrfs handle error by forcing the filesystem readonly */
|
||||
static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct super_block *sb = fs_info->sb;
|
||||
|
||||
if (sb->s_flags & MS_RDONLY)
|
||||
return;
|
||||
|
||||
if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
|
||||
sb->s_flags |= MS_RDONLY;
|
||||
printk(KERN_INFO "btrfs is forced readonly\n");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* __btrfs_std_error decodes expected errors from the caller and
|
||||
* invokes the approciate error response.
|
||||
*/
|
||||
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
|
||||
unsigned int line, int errno)
|
||||
{
|
||||
struct super_block *sb = fs_info->sb;
|
||||
char nbuf[16];
|
||||
const char *errstr;
|
||||
|
||||
/*
|
||||
* Special case: if the error is EROFS, and we're already
|
||||
* under MS_RDONLY, then it is safe here.
|
||||
*/
|
||||
if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
|
||||
return;
|
||||
|
||||
errstr = btrfs_decode_error(fs_info, errno, nbuf);
|
||||
printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
|
||||
sb->s_id, function, line, errstr);
|
||||
save_error_info(fs_info);
|
||||
|
||||
btrfs_handle_error(fs_info);
|
||||
}
|
||||
|
||||
static void btrfs_put_super(struct super_block *sb)
|
||||
{
|
||||
struct btrfs_root *root = btrfs_sb(sb);
|
||||
|
@ -69,9 +153,9 @@ enum {
|
|||
Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
|
||||
Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
|
||||
Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
|
||||
Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
|
||||
Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err,
|
||||
Opt_user_subvol_rm_allowed,
|
||||
Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
|
||||
Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
|
||||
Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err,
|
||||
};
|
||||
|
||||
static match_table_t tokens = {
|
||||
|
@ -86,7 +170,9 @@ static match_table_t tokens = {
|
|||
{Opt_alloc_start, "alloc_start=%s"},
|
||||
{Opt_thread_pool, "thread_pool=%d"},
|
||||
{Opt_compress, "compress"},
|
||||
{Opt_compress_type, "compress=%s"},
|
||||
{Opt_compress_force, "compress-force"},
|
||||
{Opt_compress_force_type, "compress-force=%s"},
|
||||
{Opt_ssd, "ssd"},
|
||||
{Opt_ssd_spread, "ssd_spread"},
|
||||
{Opt_nossd, "nossd"},
|
||||
|
@ -112,6 +198,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
|
|||
char *p, *num, *orig;
|
||||
int intarg;
|
||||
int ret = 0;
|
||||
char *compress_type;
|
||||
bool compress_force = false;
|
||||
|
||||
if (!options)
|
||||
return 0;
|
||||
|
@ -154,14 +242,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
|
|||
btrfs_set_opt(info->mount_opt, NODATACOW);
|
||||
btrfs_set_opt(info->mount_opt, NODATASUM);
|
||||
break;
|
||||
case Opt_compress:
|
||||
printk(KERN_INFO "btrfs: use compression\n");
|
||||
btrfs_set_opt(info->mount_opt, COMPRESS);
|
||||
break;
|
||||
case Opt_compress_force:
|
||||
printk(KERN_INFO "btrfs: forcing compression\n");
|
||||
btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
|
||||
case Opt_compress_force_type:
|
||||
compress_force = true;
|
||||
case Opt_compress:
|
||||
case Opt_compress_type:
|
||||
if (token == Opt_compress ||
|
||||
token == Opt_compress_force ||
|
||||
strcmp(args[0].from, "zlib") == 0) {
|
||||
compress_type = "zlib";
|
||||
info->compress_type = BTRFS_COMPRESS_ZLIB;
|
||||
} else if (strcmp(args[0].from, "lzo") == 0) {
|
||||
compress_type = "lzo";
|
||||
info->compress_type = BTRFS_COMPRESS_LZO;
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
btrfs_set_opt(info->mount_opt, COMPRESS);
|
||||
if (compress_force) {
|
||||
btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
|
||||
pr_info("btrfs: force %s compression\n",
|
||||
compress_type);
|
||||
} else
|
||||
pr_info("btrfs: use %s compression\n",
|
||||
compress_type);
|
||||
break;
|
||||
case Opt_ssd:
|
||||
printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
|
||||
|
@ -753,6 +859,127 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The helper to calc the free space on the devices that can be used to store
|
||||
* file data.
|
||||
*/
|
||||
static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_device_info *devices_info;
|
||||
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
|
||||
struct btrfs_device *device;
|
||||
u64 skip_space;
|
||||
u64 type;
|
||||
u64 avail_space;
|
||||
u64 used_space;
|
||||
u64 min_stripe_size;
|
||||
int min_stripes = 1;
|
||||
int i = 0, nr_devices;
|
||||
int ret;
|
||||
|
||||
nr_devices = fs_info->fs_devices->rw_devices;
|
||||
BUG_ON(!nr_devices);
|
||||
|
||||
devices_info = kmalloc(sizeof(*devices_info) * nr_devices,
|
||||
GFP_NOFS);
|
||||
if (!devices_info)
|
||||
return -ENOMEM;
|
||||
|
||||
/* calc min stripe number for data space alloction */
|
||||
type = btrfs_get_alloc_profile(root, 1);
|
||||
if (type & BTRFS_BLOCK_GROUP_RAID0)
|
||||
min_stripes = 2;
|
||||
else if (type & BTRFS_BLOCK_GROUP_RAID1)
|
||||
min_stripes = 2;
|
||||
else if (type & BTRFS_BLOCK_GROUP_RAID10)
|
||||
min_stripes = 4;
|
||||
|
||||
if (type & BTRFS_BLOCK_GROUP_DUP)
|
||||
min_stripe_size = 2 * BTRFS_STRIPE_LEN;
|
||||
else
|
||||
min_stripe_size = BTRFS_STRIPE_LEN;
|
||||
|
||||
list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
|
||||
if (!device->in_fs_metadata)
|
||||
continue;
|
||||
|
||||
avail_space = device->total_bytes - device->bytes_used;
|
||||
|
||||
/* align with stripe_len */
|
||||
do_div(avail_space, BTRFS_STRIPE_LEN);
|
||||
avail_space *= BTRFS_STRIPE_LEN;
|
||||
|
||||
/*
|
||||
* In order to avoid overwritting the superblock on the drive,
|
||||
* btrfs starts at an offset of at least 1MB when doing chunk
|
||||
* allocation.
|
||||
*/
|
||||
skip_space = 1024 * 1024;
|
||||
|
||||
/* user can set the offset in fs_info->alloc_start. */
|
||||
if (fs_info->alloc_start + BTRFS_STRIPE_LEN <=
|
||||
device->total_bytes)
|
||||
skip_space = max(fs_info->alloc_start, skip_space);
|
||||
|
||||
/*
|
||||
* btrfs can not use the free space in [0, skip_space - 1],
|
||||
* we must subtract it from the total. In order to implement
|
||||
* it, we account the used space in this range first.
|
||||
*/
|
||||
ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1,
|
||||
&used_space);
|
||||
if (ret) {
|
||||
kfree(devices_info);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* calc the free space in [0, skip_space - 1] */
|
||||
skip_space -= used_space;
|
||||
|
||||
/*
|
||||
* we can use the free space in [0, skip_space - 1], subtract
|
||||
* it from the total.
|
||||
*/
|
||||
if (avail_space && avail_space >= skip_space)
|
||||
avail_space -= skip_space;
|
||||
else
|
||||
avail_space = 0;
|
||||
|
||||
if (avail_space < min_stripe_size)
|
||||
continue;
|
||||
|
||||
devices_info[i].dev = device;
|
||||
devices_info[i].max_avail = avail_space;
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
nr_devices = i;
|
||||
|
||||
btrfs_descending_sort_devices(devices_info, nr_devices);
|
||||
|
||||
i = nr_devices - 1;
|
||||
avail_space = 0;
|
||||
while (nr_devices >= min_stripes) {
|
||||
if (devices_info[i].max_avail >= min_stripe_size) {
|
||||
int j;
|
||||
u64 alloc_size;
|
||||
|
||||
avail_space += devices_info[i].max_avail * min_stripes;
|
||||
alloc_size = devices_info[i].max_avail;
|
||||
for (j = i + 1 - min_stripes; j <= i; j++)
|
||||
devices_info[j].max_avail -= alloc_size;
|
||||
}
|
||||
i--;
|
||||
nr_devices--;
|
||||
}
|
||||
|
||||
kfree(devices_info);
|
||||
*free_bytes = avail_space;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
{
|
||||
struct btrfs_root *root = btrfs_sb(dentry->d_sb);
|
||||
|
@ -760,17 +987,21 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
|||
struct list_head *head = &root->fs_info->space_info;
|
||||
struct btrfs_space_info *found;
|
||||
u64 total_used = 0;
|
||||
u64 total_used_data = 0;
|
||||
u64 total_free_data = 0;
|
||||
int bits = dentry->d_sb->s_blocksize_bits;
|
||||
__be32 *fsid = (__be32 *)root->fs_info->fsid;
|
||||
int ret;
|
||||
|
||||
/* holding chunk_muext to avoid allocating new chunks */
|
||||
mutex_lock(&root->fs_info->chunk_mutex);
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(found, head, list) {
|
||||
if (found->flags & (BTRFS_BLOCK_GROUP_METADATA |
|
||||
BTRFS_BLOCK_GROUP_SYSTEM))
|
||||
total_used_data += found->disk_total;
|
||||
else
|
||||
total_used_data += found->disk_used;
|
||||
if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
|
||||
total_free_data += found->disk_total - found->disk_used;
|
||||
total_free_data -=
|
||||
btrfs_account_ro_block_groups_free_space(found);
|
||||
}
|
||||
|
||||
total_used += found->disk_used;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
@ -778,9 +1009,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
|||
buf->f_namelen = BTRFS_NAME_LEN;
|
||||
buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
|
||||
buf->f_bfree = buf->f_blocks - (total_used >> bits);
|
||||
buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
|
||||
buf->f_bsize = dentry->d_sb->s_blocksize;
|
||||
buf->f_type = BTRFS_SUPER_MAGIC;
|
||||
buf->f_bavail = total_free_data;
|
||||
ret = btrfs_calc_avail_data_space(root, &total_free_data);
|
||||
if (ret) {
|
||||
mutex_unlock(&root->fs_info->chunk_mutex);
|
||||
return ret;
|
||||
}
|
||||
buf->f_bavail += total_free_data;
|
||||
buf->f_bavail = buf->f_bavail >> bits;
|
||||
mutex_unlock(&root->fs_info->chunk_mutex);
|
||||
|
||||
/* We treat it as constant endianness (it doesn't matter _which_)
|
||||
because we want the fsid to come out the same whether mounted
|
||||
|
@ -897,10 +1136,14 @@ static int __init init_btrfs_fs(void)
|
|||
if (err)
|
||||
return err;
|
||||
|
||||
err = btrfs_init_cachep();
|
||||
err = btrfs_init_compress();
|
||||
if (err)
|
||||
goto free_sysfs;
|
||||
|
||||
err = btrfs_init_cachep();
|
||||
if (err)
|
||||
goto free_compress;
|
||||
|
||||
err = extent_io_init();
|
||||
if (err)
|
||||
goto free_cachep;
|
||||
|
@ -928,6 +1171,8 @@ free_extent_io:
|
|||
extent_io_exit();
|
||||
free_cachep:
|
||||
btrfs_destroy_cachep();
|
||||
free_compress:
|
||||
btrfs_exit_compress();
|
||||
free_sysfs:
|
||||
btrfs_exit_sysfs();
|
||||
return err;
|
||||
|
@ -942,7 +1187,7 @@ static void __exit exit_btrfs_fs(void)
|
|||
unregister_filesystem(&btrfs_fs_type);
|
||||
btrfs_exit_sysfs();
|
||||
btrfs_cleanup_fs_uuids();
|
||||
btrfs_zlib_exit();
|
||||
btrfs_exit_compress();
|
||||
}
|
||||
|
||||
module_init(init_btrfs_fs)
|
||||
|
|
|
@ -181,6 +181,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
|
|||
struct btrfs_trans_handle *h;
|
||||
struct btrfs_transaction *cur_trans;
|
||||
int ret;
|
||||
|
||||
if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
|
||||
return ERR_PTR(-EROFS);
|
||||
again:
|
||||
h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
|
||||
if (!h)
|
||||
|
@ -910,6 +913,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
|||
u64 to_reserve = 0;
|
||||
u64 index = 0;
|
||||
u64 objectid;
|
||||
u64 root_flags;
|
||||
|
||||
new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
|
||||
if (!new_root_item) {
|
||||
|
@ -967,6 +971,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
|||
btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
|
||||
memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
|
||||
|
||||
root_flags = btrfs_root_flags(new_root_item);
|
||||
if (pending->readonly)
|
||||
root_flags |= BTRFS_ROOT_SUBVOL_RDONLY;
|
||||
else
|
||||
root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
|
||||
btrfs_set_root_flags(new_root_item, root_flags);
|
||||
|
||||
old = btrfs_lock_root_node(root);
|
||||
btrfs_cow_block(trans, root, old, NULL, 0, &old);
|
||||
btrfs_set_lock_blocking(old);
|
||||
|
|
|
@ -62,6 +62,7 @@ struct btrfs_pending_snapshot {
|
|||
struct btrfs_block_rsv block_rsv;
|
||||
/* extra metadata reseration for relocation */
|
||||
int error;
|
||||
bool readonly;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <linux/blkdev.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/iocontext.h>
|
||||
#include <linux/capability.h>
|
||||
#include <asm/div64.h>
|
||||
#include "compat.h"
|
||||
#include "ctree.h"
|
||||
|
@ -600,8 +601,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
|
|||
set_blocksize(bdev, 4096);
|
||||
|
||||
bh = btrfs_read_dev_super(bdev);
|
||||
if (!bh)
|
||||
if (!bh) {
|
||||
ret = -EINVAL;
|
||||
goto error_close;
|
||||
}
|
||||
|
||||
disk_super = (struct btrfs_super_block *)bh->b_data;
|
||||
devid = btrfs_stack_device_id(&disk_super->dev_item);
|
||||
|
@ -703,7 +706,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
|
|||
goto error_close;
|
||||
bh = btrfs_read_dev_super(bdev);
|
||||
if (!bh) {
|
||||
ret = -EIO;
|
||||
ret = -EINVAL;
|
||||
goto error_close;
|
||||
}
|
||||
disk_super = (struct btrfs_super_block *)bh->b_data;
|
||||
|
@ -729,59 +732,42 @@ error:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* this uses a pretty simple search, the expectation is that it is
|
||||
* called very infrequently and that a given device has a small number
|
||||
* of extents
|
||||
*/
|
||||
int find_free_dev_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_device *device, u64 num_bytes,
|
||||
u64 *start, u64 *max_avail)
|
||||
/* helper to account the used device space in the range */
|
||||
int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
|
||||
u64 end, u64 *length)
|
||||
{
|
||||
struct btrfs_key key;
|
||||
struct btrfs_root *root = device->dev_root;
|
||||
struct btrfs_dev_extent *dev_extent = NULL;
|
||||
struct btrfs_dev_extent *dev_extent;
|
||||
struct btrfs_path *path;
|
||||
u64 hole_size = 0;
|
||||
u64 last_byte = 0;
|
||||
u64 search_start = 0;
|
||||
u64 search_end = device->total_bytes;
|
||||
u64 extent_end;
|
||||
int ret;
|
||||
int slot = 0;
|
||||
int start_found;
|
||||
int slot;
|
||||
struct extent_buffer *l;
|
||||
|
||||
*length = 0;
|
||||
|
||||
if (start >= device->total_bytes)
|
||||
return 0;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
path->reada = 2;
|
||||
start_found = 0;
|
||||
|
||||
/* FIXME use last free of some kind */
|
||||
|
||||
/* we don't want to overwrite the superblock on the drive,
|
||||
* so we make sure to start at an offset of at least 1MB
|
||||
*/
|
||||
search_start = max((u64)1024 * 1024, search_start);
|
||||
|
||||
if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
|
||||
search_start = max(root->fs_info->alloc_start, search_start);
|
||||
|
||||
key.objectid = device->devid;
|
||||
key.offset = search_start;
|
||||
key.offset = start;
|
||||
key.type = BTRFS_DEV_EXTENT_KEY;
|
||||
ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
|
||||
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
goto out;
|
||||
if (ret > 0) {
|
||||
ret = btrfs_previous_item(root, path, key.objectid, key.type);
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
if (ret > 0)
|
||||
start_found = 1;
|
||||
goto out;
|
||||
}
|
||||
l = path->nodes[0];
|
||||
btrfs_item_key_to_cpu(l, &key, path->slots[0]);
|
||||
|
||||
while (1) {
|
||||
l = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
|
@ -790,24 +776,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
|
|||
if (ret == 0)
|
||||
continue;
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
no_more_items:
|
||||
if (!start_found) {
|
||||
if (search_start >= search_end) {
|
||||
ret = -ENOSPC;
|
||||
goto error;
|
||||
}
|
||||
*start = search_start;
|
||||
start_found = 1;
|
||||
goto check_pending;
|
||||
}
|
||||
*start = last_byte > search_start ?
|
||||
last_byte : search_start;
|
||||
if (search_end <= *start) {
|
||||
ret = -ENOSPC;
|
||||
goto error;
|
||||
}
|
||||
goto check_pending;
|
||||
goto out;
|
||||
|
||||
break;
|
||||
}
|
||||
btrfs_item_key_to_cpu(l, &key, slot);
|
||||
|
||||
|
@ -815,48 +786,187 @@ no_more_items:
|
|||
goto next;
|
||||
|
||||
if (key.objectid > device->devid)
|
||||
goto no_more_items;
|
||||
break;
|
||||
|
||||
if (key.offset >= search_start && key.offset > last_byte &&
|
||||
start_found) {
|
||||
if (last_byte < search_start)
|
||||
last_byte = search_start;
|
||||
hole_size = key.offset - last_byte;
|
||||
|
||||
if (hole_size > *max_avail)
|
||||
*max_avail = hole_size;
|
||||
|
||||
if (key.offset > last_byte &&
|
||||
hole_size >= num_bytes) {
|
||||
*start = last_byte;
|
||||
goto check_pending;
|
||||
}
|
||||
}
|
||||
if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
|
||||
goto next;
|
||||
|
||||
start_found = 1;
|
||||
dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
|
||||
last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
|
||||
extent_end = key.offset + btrfs_dev_extent_length(l,
|
||||
dev_extent);
|
||||
if (key.offset <= start && extent_end > end) {
|
||||
*length = end - start + 1;
|
||||
break;
|
||||
} else if (key.offset <= start && extent_end > start)
|
||||
*length += extent_end - start;
|
||||
else if (key.offset > start && extent_end <= end)
|
||||
*length += extent_end - key.offset;
|
||||
else if (key.offset > start && key.offset <= end) {
|
||||
*length += end - key.offset + 1;
|
||||
break;
|
||||
} else if (key.offset > end)
|
||||
break;
|
||||
|
||||
next:
|
||||
path->slots[0]++;
|
||||
}
|
||||
ret = 0;
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* find_free_dev_extent - find free space in the specified device
|
||||
* @trans: transaction handler
|
||||
* @device: the device which we search the free space in
|
||||
* @num_bytes: the size of the free space that we need
|
||||
* @start: store the start of the free space.
|
||||
* @len: the size of the free space. that we find, or the size of the max
|
||||
* free space if we don't find suitable free space
|
||||
*
|
||||
* this uses a pretty simple search, the expectation is that it is
|
||||
* called very infrequently and that a given device has a small number
|
||||
* of extents
|
||||
*
|
||||
* @start is used to store the start of the free space if we find. But if we
|
||||
* don't find suitable free space, it will be used to store the start position
|
||||
* of the max free space.
|
||||
*
|
||||
* @len is used to store the size of the free space that we find.
|
||||
* But if we don't find suitable free space, it is used to store the size of
|
||||
* the max free space.
|
||||
*/
|
||||
int find_free_dev_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_device *device, u64 num_bytes,
|
||||
u64 *start, u64 *len)
|
||||
{
|
||||
struct btrfs_key key;
|
||||
struct btrfs_root *root = device->dev_root;
|
||||
struct btrfs_dev_extent *dev_extent;
|
||||
struct btrfs_path *path;
|
||||
u64 hole_size;
|
||||
u64 max_hole_start;
|
||||
u64 max_hole_size;
|
||||
u64 extent_end;
|
||||
u64 search_start;
|
||||
u64 search_end = device->total_bytes;
|
||||
int ret;
|
||||
int slot;
|
||||
struct extent_buffer *l;
|
||||
|
||||
/* FIXME use last free of some kind */
|
||||
|
||||
/* we don't want to overwrite the superblock on the drive,
|
||||
* so we make sure to start at an offset of at least 1MB
|
||||
*/
|
||||
search_start = 1024 * 1024;
|
||||
|
||||
if (root->fs_info->alloc_start + num_bytes <= search_end)
|
||||
search_start = max(root->fs_info->alloc_start, search_start);
|
||||
|
||||
max_hole_start = search_start;
|
||||
max_hole_size = 0;
|
||||
|
||||
if (search_start >= search_end) {
|
||||
ret = -ENOSPC;
|
||||
goto error;
|
||||
}
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path) {
|
||||
ret = -ENOMEM;
|
||||
goto error;
|
||||
}
|
||||
path->reada = 2;
|
||||
|
||||
key.objectid = device->devid;
|
||||
key.offset = search_start;
|
||||
key.type = BTRFS_DEV_EXTENT_KEY;
|
||||
|
||||
ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret > 0) {
|
||||
ret = btrfs_previous_item(root, path, key.objectid, key.type);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
l = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
if (slot >= btrfs_header_nritems(l)) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret == 0)
|
||||
continue;
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
break;
|
||||
}
|
||||
btrfs_item_key_to_cpu(l, &key, slot);
|
||||
|
||||
if (key.objectid < device->devid)
|
||||
goto next;
|
||||
|
||||
if (key.objectid > device->devid)
|
||||
break;
|
||||
|
||||
if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
|
||||
goto next;
|
||||
|
||||
if (key.offset > search_start) {
|
||||
hole_size = key.offset - search_start;
|
||||
|
||||
if (hole_size > max_hole_size) {
|
||||
max_hole_start = search_start;
|
||||
max_hole_size = hole_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this free space is greater than which we need,
|
||||
* it must be the max free space that we have found
|
||||
* until now, so max_hole_start must point to the start
|
||||
* of this free space and the length of this free space
|
||||
* is stored in max_hole_size. Thus, we return
|
||||
* max_hole_start and max_hole_size and go back to the
|
||||
* caller.
|
||||
*/
|
||||
if (hole_size >= num_bytes) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
|
||||
extent_end = key.offset + btrfs_dev_extent_length(l,
|
||||
dev_extent);
|
||||
if (extent_end > search_start)
|
||||
search_start = extent_end;
|
||||
next:
|
||||
path->slots[0]++;
|
||||
cond_resched();
|
||||
}
|
||||
check_pending:
|
||||
/* we have to make sure we didn't find an extent that has already
|
||||
* been allocated by the map tree or the original allocation
|
||||
*/
|
||||
BUG_ON(*start < search_start);
|
||||
|
||||
if (*start + num_bytes > search_end) {
|
||||
ret = -ENOSPC;
|
||||
goto error;
|
||||
hole_size = search_end- search_start;
|
||||
if (hole_size > max_hole_size) {
|
||||
max_hole_start = search_start;
|
||||
max_hole_size = hole_size;
|
||||
}
|
||||
/* check for pending inserts here */
|
||||
ret = 0;
|
||||
|
||||
error:
|
||||
/* See above. */
|
||||
if (hole_size < num_bytes)
|
||||
ret = -ENOSPC;
|
||||
else
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
error:
|
||||
*start = max_hole_start;
|
||||
if (len)
|
||||
*len = max_hole_size;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1196,7 +1306,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
|
|||
set_blocksize(bdev, 4096);
|
||||
bh = btrfs_read_dev_super(bdev);
|
||||
if (!bh) {
|
||||
ret = -EIO;
|
||||
ret = -EINVAL;
|
||||
goto error_close;
|
||||
}
|
||||
disk_super = (struct btrfs_super_block *)bh->b_data;
|
||||
|
@ -1916,6 +2026,9 @@ int btrfs_balance(struct btrfs_root *dev_root)
|
|||
if (dev_root->fs_info->sb->s_flags & MS_RDONLY)
|
||||
return -EROFS;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
mutex_lock(&dev_root->fs_info->volume_mutex);
|
||||
dev_root = dev_root->fs_info->dev_root;
|
||||
|
||||
|
@ -2154,66 +2267,67 @@ static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size,
|
|||
return calc_size * num_stripes;
|
||||
}
|
||||
|
||||
static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *extent_root,
|
||||
struct map_lookup **map_ret,
|
||||
u64 *num_bytes, u64 *stripe_size,
|
||||
u64 start, u64 type)
|
||||
/* Used to sort the devices by max_avail(descending sort) */
|
||||
int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2)
|
||||
{
|
||||
struct btrfs_fs_info *info = extent_root->fs_info;
|
||||
struct btrfs_device *device = NULL;
|
||||
struct btrfs_fs_devices *fs_devices = info->fs_devices;
|
||||
struct list_head *cur;
|
||||
struct map_lookup *map = NULL;
|
||||
struct extent_map_tree *em_tree;
|
||||
struct extent_map *em;
|
||||
struct list_head private_devs;
|
||||
int min_stripe_size = 1 * 1024 * 1024;
|
||||
u64 calc_size = 1024 * 1024 * 1024;
|
||||
u64 max_chunk_size = calc_size;
|
||||
u64 min_free;
|
||||
u64 avail;
|
||||
u64 max_avail = 0;
|
||||
u64 dev_offset;
|
||||
int num_stripes = 1;
|
||||
int min_stripes = 1;
|
||||
int sub_stripes = 0;
|
||||
int looped = 0;
|
||||
int ret;
|
||||
int index;
|
||||
int stripe_len = 64 * 1024;
|
||||
if (((struct btrfs_device_info *)dev_info1)->max_avail >
|
||||
((struct btrfs_device_info *)dev_info2)->max_avail)
|
||||
return -1;
|
||||
else if (((struct btrfs_device_info *)dev_info1)->max_avail <
|
||||
((struct btrfs_device_info *)dev_info2)->max_avail)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
|
||||
(type & BTRFS_BLOCK_GROUP_DUP)) {
|
||||
WARN_ON(1);
|
||||
type &= ~BTRFS_BLOCK_GROUP_DUP;
|
||||
}
|
||||
if (list_empty(&fs_devices->alloc_list))
|
||||
return -ENOSPC;
|
||||
static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type,
|
||||
int *num_stripes, int *min_stripes,
|
||||
int *sub_stripes)
|
||||
{
|
||||
*num_stripes = 1;
|
||||
*min_stripes = 1;
|
||||
*sub_stripes = 0;
|
||||
|
||||
if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
|
||||
num_stripes = fs_devices->rw_devices;
|
||||
min_stripes = 2;
|
||||
*num_stripes = fs_devices->rw_devices;
|
||||
*min_stripes = 2;
|
||||
}
|
||||
if (type & (BTRFS_BLOCK_GROUP_DUP)) {
|
||||
num_stripes = 2;
|
||||
min_stripes = 2;
|
||||
*num_stripes = 2;
|
||||
*min_stripes = 2;
|
||||
}
|
||||
if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
|
||||
if (fs_devices->rw_devices < 2)
|
||||
return -ENOSPC;
|
||||
num_stripes = 2;
|
||||
min_stripes = 2;
|
||||
*num_stripes = 2;
|
||||
*min_stripes = 2;
|
||||
}
|
||||
if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
|
||||
num_stripes = fs_devices->rw_devices;
|
||||
if (num_stripes < 4)
|
||||
*num_stripes = fs_devices->rw_devices;
|
||||
if (*num_stripes < 4)
|
||||
return -ENOSPC;
|
||||
num_stripes &= ~(u32)1;
|
||||
sub_stripes = 2;
|
||||
min_stripes = 4;
|
||||
*num_stripes &= ~(u32)1;
|
||||
*sub_stripes = 2;
|
||||
*min_stripes = 4;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices,
|
||||
u64 proposed_size, u64 type,
|
||||
int num_stripes, int small_stripe)
|
||||
{
|
||||
int min_stripe_size = 1 * 1024 * 1024;
|
||||
u64 calc_size = proposed_size;
|
||||
u64 max_chunk_size = calc_size;
|
||||
int ncopies = 1;
|
||||
|
||||
if (type & (BTRFS_BLOCK_GROUP_RAID1 |
|
||||
BTRFS_BLOCK_GROUP_DUP |
|
||||
BTRFS_BLOCK_GROUP_RAID10))
|
||||
ncopies = 2;
|
||||
|
||||
if (type & BTRFS_BLOCK_GROUP_DATA) {
|
||||
max_chunk_size = 10 * calc_size;
|
||||
min_stripe_size = 64 * 1024 * 1024;
|
||||
|
@ -2230,51 +2344,209 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
|
|||
max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
|
||||
max_chunk_size);
|
||||
|
||||
again:
|
||||
max_avail = 0;
|
||||
if (!map || map->num_stripes != num_stripes) {
|
||||
kfree(map);
|
||||
map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
|
||||
if (!map)
|
||||
return -ENOMEM;
|
||||
map->num_stripes = num_stripes;
|
||||
}
|
||||
|
||||
if (calc_size * num_stripes > max_chunk_size) {
|
||||
calc_size = max_chunk_size;
|
||||
if (calc_size * num_stripes > max_chunk_size * ncopies) {
|
||||
calc_size = max_chunk_size * ncopies;
|
||||
do_div(calc_size, num_stripes);
|
||||
do_div(calc_size, stripe_len);
|
||||
calc_size *= stripe_len;
|
||||
do_div(calc_size, BTRFS_STRIPE_LEN);
|
||||
calc_size *= BTRFS_STRIPE_LEN;
|
||||
}
|
||||
|
||||
/* we don't want tiny stripes */
|
||||
if (!looped)
|
||||
if (!small_stripe)
|
||||
calc_size = max_t(u64, min_stripe_size, calc_size);
|
||||
|
||||
/*
|
||||
* we're about to do_div by the stripe_len so lets make sure
|
||||
* we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure
|
||||
* we end up with something bigger than a stripe
|
||||
*/
|
||||
calc_size = max_t(u64, calc_size, stripe_len * 4);
|
||||
calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN);
|
||||
|
||||
do_div(calc_size, stripe_len);
|
||||
calc_size *= stripe_len;
|
||||
do_div(calc_size, BTRFS_STRIPE_LEN);
|
||||
calc_size *= BTRFS_STRIPE_LEN;
|
||||
|
||||
return calc_size;
|
||||
}
|
||||
|
||||
static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map,
|
||||
int num_stripes)
|
||||
{
|
||||
struct map_lookup *new;
|
||||
size_t len = map_lookup_size(num_stripes);
|
||||
|
||||
BUG_ON(map->num_stripes < num_stripes);
|
||||
|
||||
if (map->num_stripes == num_stripes)
|
||||
return map;
|
||||
|
||||
new = kmalloc(len, GFP_NOFS);
|
||||
if (!new) {
|
||||
/* just change map->num_stripes */
|
||||
map->num_stripes = num_stripes;
|
||||
return map;
|
||||
}
|
||||
|
||||
memcpy(new, map, len);
|
||||
new->num_stripes = num_stripes;
|
||||
kfree(map);
|
||||
return new;
|
||||
}
|
||||
|
||||
/*
|
||||
* helper to allocate device space from btrfs_device_info, in which we stored
|
||||
* max free space information of every device. It is used when we can not
|
||||
* allocate chunks by default size.
|
||||
*
|
||||
* By this helper, we can allocate a new chunk as larger as possible.
|
||||
*/
|
||||
static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_devices *fs_devices,
|
||||
struct btrfs_device_info *devices,
|
||||
int nr_device, u64 type,
|
||||
struct map_lookup **map_lookup,
|
||||
int min_stripes, u64 *stripe_size)
|
||||
{
|
||||
int i, index, sort_again = 0;
|
||||
int min_devices = min_stripes;
|
||||
u64 max_avail, min_free;
|
||||
struct map_lookup *map = *map_lookup;
|
||||
int ret;
|
||||
|
||||
if (nr_device < min_stripes)
|
||||
return -ENOSPC;
|
||||
|
||||
btrfs_descending_sort_devices(devices, nr_device);
|
||||
|
||||
max_avail = devices[0].max_avail;
|
||||
if (!max_avail)
|
||||
return -ENOSPC;
|
||||
|
||||
for (i = 0; i < nr_device; i++) {
|
||||
/*
|
||||
* if dev_offset = 0, it means the free space of this device
|
||||
* is less than what we need, and we didn't search max avail
|
||||
* extent on this device, so do it now.
|
||||
*/
|
||||
if (!devices[i].dev_offset) {
|
||||
ret = find_free_dev_extent(trans, devices[i].dev,
|
||||
max_avail,
|
||||
&devices[i].dev_offset,
|
||||
&devices[i].max_avail);
|
||||
if (ret != 0 && ret != -ENOSPC)
|
||||
return ret;
|
||||
sort_again = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* we update the max avail free extent of each devices, sort again */
|
||||
if (sort_again)
|
||||
btrfs_descending_sort_devices(devices, nr_device);
|
||||
|
||||
if (type & BTRFS_BLOCK_GROUP_DUP)
|
||||
min_devices = 1;
|
||||
|
||||
if (!devices[min_devices - 1].max_avail)
|
||||
return -ENOSPC;
|
||||
|
||||
max_avail = devices[min_devices - 1].max_avail;
|
||||
if (type & BTRFS_BLOCK_GROUP_DUP)
|
||||
do_div(max_avail, 2);
|
||||
|
||||
max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type,
|
||||
min_stripes, 1);
|
||||
if (type & BTRFS_BLOCK_GROUP_DUP)
|
||||
min_free = max_avail * 2;
|
||||
else
|
||||
min_free = max_avail;
|
||||
|
||||
if (min_free > devices[min_devices - 1].max_avail)
|
||||
return -ENOSPC;
|
||||
|
||||
map = __shrink_map_lookup_stripes(map, min_stripes);
|
||||
*stripe_size = max_avail;
|
||||
|
||||
index = 0;
|
||||
for (i = 0; i < min_stripes; i++) {
|
||||
map->stripes[i].dev = devices[index].dev;
|
||||
map->stripes[i].physical = devices[index].dev_offset;
|
||||
if (type & BTRFS_BLOCK_GROUP_DUP) {
|
||||
i++;
|
||||
map->stripes[i].dev = devices[index].dev;
|
||||
map->stripes[i].physical = devices[index].dev_offset +
|
||||
max_avail;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
*map_lookup = map;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *extent_root,
|
||||
struct map_lookup **map_ret,
|
||||
u64 *num_bytes, u64 *stripe_size,
|
||||
u64 start, u64 type)
|
||||
{
|
||||
struct btrfs_fs_info *info = extent_root->fs_info;
|
||||
struct btrfs_device *device = NULL;
|
||||
struct btrfs_fs_devices *fs_devices = info->fs_devices;
|
||||
struct list_head *cur;
|
||||
struct map_lookup *map;
|
||||
struct extent_map_tree *em_tree;
|
||||
struct extent_map *em;
|
||||
struct btrfs_device_info *devices_info;
|
||||
struct list_head private_devs;
|
||||
u64 calc_size = 1024 * 1024 * 1024;
|
||||
u64 min_free;
|
||||
u64 avail;
|
||||
u64 dev_offset;
|
||||
int num_stripes;
|
||||
int min_stripes;
|
||||
int sub_stripes;
|
||||
int min_devices; /* the min number of devices we need */
|
||||
int i;
|
||||
int ret;
|
||||
int index;
|
||||
|
||||
if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
|
||||
(type & BTRFS_BLOCK_GROUP_DUP)) {
|
||||
WARN_ON(1);
|
||||
type &= ~BTRFS_BLOCK_GROUP_DUP;
|
||||
}
|
||||
if (list_empty(&fs_devices->alloc_list))
|
||||
return -ENOSPC;
|
||||
|
||||
ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes,
|
||||
&min_stripes, &sub_stripes);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
|
||||
GFP_NOFS);
|
||||
if (!devices_info)
|
||||
return -ENOMEM;
|
||||
|
||||
map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
|
||||
if (!map) {
|
||||
ret = -ENOMEM;
|
||||
goto error;
|
||||
}
|
||||
map->num_stripes = num_stripes;
|
||||
|
||||
cur = fs_devices->alloc_list.next;
|
||||
index = 0;
|
||||
i = 0;
|
||||
|
||||
if (type & BTRFS_BLOCK_GROUP_DUP)
|
||||
calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type,
|
||||
num_stripes, 0);
|
||||
|
||||
if (type & BTRFS_BLOCK_GROUP_DUP) {
|
||||
min_free = calc_size * 2;
|
||||
else
|
||||
min_devices = 1;
|
||||
} else {
|
||||
min_free = calc_size;
|
||||
|
||||
/*
|
||||
* we add 1MB because we never use the first 1MB of the device, unless
|
||||
* we've looped, then we are likely allocating the maximum amount of
|
||||
* space left already
|
||||
*/
|
||||
if (!looped)
|
||||
min_free += 1024 * 1024;
|
||||
min_devices = min_stripes;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&private_devs);
|
||||
while (index < num_stripes) {
|
||||
|
@ -2287,27 +2559,39 @@ again:
|
|||
cur = cur->next;
|
||||
|
||||
if (device->in_fs_metadata && avail >= min_free) {
|
||||
ret = find_free_dev_extent(trans, device,
|
||||
min_free, &dev_offset,
|
||||
&max_avail);
|
||||
ret = find_free_dev_extent(trans, device, min_free,
|
||||
&devices_info[i].dev_offset,
|
||||
&devices_info[i].max_avail);
|
||||
if (ret == 0) {
|
||||
list_move_tail(&device->dev_alloc_list,
|
||||
&private_devs);
|
||||
map->stripes[index].dev = device;
|
||||
map->stripes[index].physical = dev_offset;
|
||||
map->stripes[index].physical =
|
||||
devices_info[i].dev_offset;
|
||||
index++;
|
||||
if (type & BTRFS_BLOCK_GROUP_DUP) {
|
||||
map->stripes[index].dev = device;
|
||||
map->stripes[index].physical =
|
||||
dev_offset + calc_size;
|
||||
devices_info[i].dev_offset +
|
||||
calc_size;
|
||||
index++;
|
||||
}
|
||||
}
|
||||
} else if (device->in_fs_metadata && avail > max_avail)
|
||||
max_avail = avail;
|
||||
} else if (ret != -ENOSPC)
|
||||
goto error;
|
||||
|
||||
devices_info[i].dev = device;
|
||||
i++;
|
||||
} else if (device->in_fs_metadata &&
|
||||
avail >= BTRFS_STRIPE_LEN) {
|
||||
devices_info[i].dev = device;
|
||||
devices_info[i].max_avail = avail;
|
||||
i++;
|
||||
}
|
||||
|
||||
if (cur == &fs_devices->alloc_list)
|
||||
break;
|
||||
}
|
||||
|
||||
list_splice(&private_devs, &fs_devices->alloc_list);
|
||||
if (index < num_stripes) {
|
||||
if (index >= min_stripes) {
|
||||
|
@ -2316,34 +2600,36 @@ again:
|
|||
num_stripes /= sub_stripes;
|
||||
num_stripes *= sub_stripes;
|
||||
}
|
||||
looped = 1;
|
||||
goto again;
|
||||
|
||||
map = __shrink_map_lookup_stripes(map, num_stripes);
|
||||
} else if (i >= min_devices) {
|
||||
ret = __btrfs_alloc_tiny_space(trans, fs_devices,
|
||||
devices_info, i, type,
|
||||
&map, min_stripes,
|
||||
&calc_size);
|
||||
if (ret)
|
||||
goto error;
|
||||
} else {
|
||||
ret = -ENOSPC;
|
||||
goto error;
|
||||
}
|
||||
if (!looped && max_avail > 0) {
|
||||
looped = 1;
|
||||
calc_size = max_avail;
|
||||
goto again;
|
||||
}
|
||||
kfree(map);
|
||||
return -ENOSPC;
|
||||
}
|
||||
map->sector_size = extent_root->sectorsize;
|
||||
map->stripe_len = stripe_len;
|
||||
map->io_align = stripe_len;
|
||||
map->io_width = stripe_len;
|
||||
map->stripe_len = BTRFS_STRIPE_LEN;
|
||||
map->io_align = BTRFS_STRIPE_LEN;
|
||||
map->io_width = BTRFS_STRIPE_LEN;
|
||||
map->type = type;
|
||||
map->num_stripes = num_stripes;
|
||||
map->sub_stripes = sub_stripes;
|
||||
|
||||
*map_ret = map;
|
||||
*stripe_size = calc_size;
|
||||
*num_bytes = chunk_bytes_by_type(type, calc_size,
|
||||
num_stripes, sub_stripes);
|
||||
map->num_stripes, sub_stripes);
|
||||
|
||||
em = alloc_extent_map(GFP_NOFS);
|
||||
if (!em) {
|
||||
kfree(map);
|
||||
return -ENOMEM;
|
||||
ret = -ENOMEM;
|
||||
goto error;
|
||||
}
|
||||
em->bdev = (struct block_device *)map;
|
||||
em->start = start;
|
||||
|
@ -2376,7 +2662,13 @@ again:
|
|||
index++;
|
||||
}
|
||||
|
||||
kfree(devices_info);
|
||||
return 0;
|
||||
|
||||
error:
|
||||
kfree(map);
|
||||
kfree(devices_info);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
|
||||
|
|
|
@ -20,8 +20,11 @@
|
|||
#define __BTRFS_VOLUMES_
|
||||
|
||||
#include <linux/bio.h>
|
||||
#include <linux/sort.h>
|
||||
#include "async-thread.h"
|
||||
|
||||
#define BTRFS_STRIPE_LEN (64 * 1024)
|
||||
|
||||
struct buffer_head;
|
||||
struct btrfs_pending_bios {
|
||||
struct bio *head;
|
||||
|
@ -136,6 +139,30 @@ struct btrfs_multi_bio {
|
|||
struct btrfs_bio_stripe stripes[];
|
||||
};
|
||||
|
||||
struct btrfs_device_info {
|
||||
struct btrfs_device *dev;
|
||||
u64 dev_offset;
|
||||
u64 max_avail;
|
||||
};
|
||||
|
||||
/* Used to sort the devices by max_avail(descending sort) */
|
||||
int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2);
|
||||
|
||||
/*
|
||||
* sort the devices by max_avail, in which max free extent size of each device
|
||||
* is stored.(Descending Sort)
|
||||
*/
|
||||
static inline void btrfs_descending_sort_devices(
|
||||
struct btrfs_device_info *devices,
|
||||
size_t nr_devices)
|
||||
{
|
||||
sort(devices, nr_devices, sizeof(struct btrfs_device_info),
|
||||
btrfs_cmp_device_free_bytes, NULL);
|
||||
}
|
||||
|
||||
int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
|
||||
u64 end, u64 *length);
|
||||
|
||||
#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \
|
||||
(sizeof(struct btrfs_bio_stripe) * (n)))
|
||||
|
||||
|
|
|
@ -316,6 +316,15 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
|
|||
int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
|
||||
size_t size, int flags)
|
||||
{
|
||||
struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
|
||||
|
||||
/*
|
||||
* The permission on security.* and system.* is not checked
|
||||
* in permission().
|
||||
*/
|
||||
if (btrfs_root_readonly(root))
|
||||
return -EROFS;
|
||||
|
||||
/*
|
||||
* If this is a request for a synthetic attribute in the system.*
|
||||
* namespace use the generic infrastructure to resolve a handler
|
||||
|
@ -336,6 +345,15 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
|
|||
|
||||
int btrfs_removexattr(struct dentry *dentry, const char *name)
|
||||
{
|
||||
struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
|
||||
|
||||
/*
|
||||
* The permission on security.* and system.* is not checked
|
||||
* in permission().
|
||||
*/
|
||||
if (btrfs_root_readonly(root))
|
||||
return -EROFS;
|
||||
|
||||
/*
|
||||
* If this is a request for a synthetic attribute in the system.*
|
||||
* namespace use the generic infrastructure to resolve a handler
|
||||
|
|
371
fs/btrfs/zlib.c
371
fs/btrfs/zlib.c
|
@ -32,15 +32,6 @@
|
|||
#include <linux/bio.h>
|
||||
#include "compression.h"
|
||||
|
||||
/* Plan: call deflate() with avail_in == *sourcelen,
|
||||
avail_out = *dstlen - 12 and flush == Z_FINISH.
|
||||
If it doesn't manage to finish, call it again with
|
||||
avail_in == 0 and avail_out set to the remaining 12
|
||||
bytes for it to clean up.
|
||||
Q: Is 12 bytes sufficient?
|
||||
*/
|
||||
#define STREAM_END_SPACE 12
|
||||
|
||||
struct workspace {
|
||||
z_stream inf_strm;
|
||||
z_stream def_strm;
|
||||
|
@ -48,152 +39,51 @@ struct workspace {
|
|||
struct list_head list;
|
||||
};
|
||||
|
||||
static LIST_HEAD(idle_workspace);
|
||||
static DEFINE_SPINLOCK(workspace_lock);
|
||||
static unsigned long num_workspace;
|
||||
static atomic_t alloc_workspace = ATOMIC_INIT(0);
|
||||
static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
|
||||
|
||||
/*
|
||||
* this finds an available zlib workspace or allocates a new one
|
||||
* NULL or an ERR_PTR is returned if things go bad.
|
||||
*/
|
||||
static struct workspace *find_zlib_workspace(void)
|
||||
static void zlib_free_workspace(struct list_head *ws)
|
||||
{
|
||||
struct workspace *workspace;
|
||||
int ret;
|
||||
int cpus = num_online_cpus();
|
||||
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
||||
|
||||
again:
|
||||
spin_lock(&workspace_lock);
|
||||
if (!list_empty(&idle_workspace)) {
|
||||
workspace = list_entry(idle_workspace.next, struct workspace,
|
||||
list);
|
||||
list_del(&workspace->list);
|
||||
num_workspace--;
|
||||
spin_unlock(&workspace_lock);
|
||||
return workspace;
|
||||
|
||||
}
|
||||
spin_unlock(&workspace_lock);
|
||||
if (atomic_read(&alloc_workspace) > cpus) {
|
||||
DEFINE_WAIT(wait);
|
||||
prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
|
||||
if (atomic_read(&alloc_workspace) > cpus)
|
||||
schedule();
|
||||
finish_wait(&workspace_wait, &wait);
|
||||
goto again;
|
||||
}
|
||||
atomic_inc(&alloc_workspace);
|
||||
workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
|
||||
if (!workspace) {
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
|
||||
if (!workspace->def_strm.workspace) {
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
|
||||
if (!workspace->inf_strm.workspace) {
|
||||
ret = -ENOMEM;
|
||||
goto fail_inflate;
|
||||
}
|
||||
workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
|
||||
if (!workspace->buf) {
|
||||
ret = -ENOMEM;
|
||||
goto fail_kmalloc;
|
||||
}
|
||||
return workspace;
|
||||
|
||||
fail_kmalloc:
|
||||
vfree(workspace->inf_strm.workspace);
|
||||
fail_inflate:
|
||||
vfree(workspace->def_strm.workspace);
|
||||
fail:
|
||||
kfree(workspace);
|
||||
atomic_dec(&alloc_workspace);
|
||||
wake_up(&workspace_wait);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* put a workspace struct back on the list or free it if we have enough
|
||||
* idle ones sitting around
|
||||
*/
|
||||
static int free_workspace(struct workspace *workspace)
|
||||
{
|
||||
spin_lock(&workspace_lock);
|
||||
if (num_workspace < num_online_cpus()) {
|
||||
list_add_tail(&workspace->list, &idle_workspace);
|
||||
num_workspace++;
|
||||
spin_unlock(&workspace_lock);
|
||||
if (waitqueue_active(&workspace_wait))
|
||||
wake_up(&workspace_wait);
|
||||
return 0;
|
||||
}
|
||||
spin_unlock(&workspace_lock);
|
||||
vfree(workspace->def_strm.workspace);
|
||||
vfree(workspace->inf_strm.workspace);
|
||||
kfree(workspace->buf);
|
||||
kfree(workspace);
|
||||
|
||||
atomic_dec(&alloc_workspace);
|
||||
if (waitqueue_active(&workspace_wait))
|
||||
wake_up(&workspace_wait);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* cleanup function for module exit
|
||||
*/
|
||||
static void free_workspaces(void)
|
||||
static struct list_head *zlib_alloc_workspace(void)
|
||||
{
|
||||
struct workspace *workspace;
|
||||
while (!list_empty(&idle_workspace)) {
|
||||
workspace = list_entry(idle_workspace.next, struct workspace,
|
||||
list);
|
||||
list_del(&workspace->list);
|
||||
vfree(workspace->def_strm.workspace);
|
||||
vfree(workspace->inf_strm.workspace);
|
||||
kfree(workspace->buf);
|
||||
kfree(workspace);
|
||||
atomic_dec(&alloc_workspace);
|
||||
}
|
||||
|
||||
workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
|
||||
if (!workspace)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
|
||||
workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
|
||||
workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
|
||||
if (!workspace->def_strm.workspace ||
|
||||
!workspace->inf_strm.workspace || !workspace->buf)
|
||||
goto fail;
|
||||
|
||||
INIT_LIST_HEAD(&workspace->list);
|
||||
|
||||
return &workspace->list;
|
||||
fail:
|
||||
zlib_free_workspace(&workspace->list);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
/*
|
||||
* given an address space and start/len, compress the bytes.
|
||||
*
|
||||
* pages are allocated to hold the compressed result and stored
|
||||
* in 'pages'
|
||||
*
|
||||
* out_pages is used to return the number of pages allocated. There
|
||||
* may be pages allocated even if we return an error
|
||||
*
|
||||
* total_in is used to return the number of bytes actually read. It
|
||||
* may be smaller then len if we had to exit early because we
|
||||
* ran out of room in the pages array or because we cross the
|
||||
* max_out threshold.
|
||||
*
|
||||
* total_out is used to return the total number of compressed bytes
|
||||
*
|
||||
* max_out tells us the max number of bytes that we're allowed to
|
||||
* stuff into pages
|
||||
*/
|
||||
int btrfs_zlib_compress_pages(struct address_space *mapping,
|
||||
u64 start, unsigned long len,
|
||||
struct page **pages,
|
||||
unsigned long nr_dest_pages,
|
||||
unsigned long *out_pages,
|
||||
unsigned long *total_in,
|
||||
unsigned long *total_out,
|
||||
unsigned long max_out)
|
||||
static int zlib_compress_pages(struct list_head *ws,
|
||||
struct address_space *mapping,
|
||||
u64 start, unsigned long len,
|
||||
struct page **pages,
|
||||
unsigned long nr_dest_pages,
|
||||
unsigned long *out_pages,
|
||||
unsigned long *total_in,
|
||||
unsigned long *total_out,
|
||||
unsigned long max_out)
|
||||
{
|
||||
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
||||
int ret;
|
||||
struct workspace *workspace;
|
||||
char *data_in;
|
||||
char *cpage_out;
|
||||
int nr_pages = 0;
|
||||
|
@ -205,10 +95,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
|
|||
*total_out = 0;
|
||||
*total_in = 0;
|
||||
|
||||
workspace = find_zlib_workspace();
|
||||
if (IS_ERR(workspace))
|
||||
return -1;
|
||||
|
||||
if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
|
||||
printk(KERN_WARNING "deflateInit failed\n");
|
||||
ret = -1;
|
||||
|
@ -222,6 +108,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
|
|||
data_in = kmap(in_page);
|
||||
|
||||
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
|
||||
if (out_page == NULL) {
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
cpage_out = kmap(out_page);
|
||||
pages[0] = out_page;
|
||||
nr_pages = 1;
|
||||
|
@ -260,6 +150,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
|
|||
goto out;
|
||||
}
|
||||
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
|
||||
if (out_page == NULL) {
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
cpage_out = kmap(out_page);
|
||||
pages[nr_pages] = out_page;
|
||||
nr_pages++;
|
||||
|
@ -314,55 +208,26 @@ out:
|
|||
kunmap(in_page);
|
||||
page_cache_release(in_page);
|
||||
}
|
||||
free_workspace(workspace);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* pages_in is an array of pages with compressed data.
|
||||
*
|
||||
* disk_start is the starting logical offset of this array in the file
|
||||
*
|
||||
* bvec is a bio_vec of pages from the file that we want to decompress into
|
||||
*
|
||||
* vcnt is the count of pages in the biovec
|
||||
*
|
||||
* srclen is the number of bytes in pages_in
|
||||
*
|
||||
* The basic idea is that we have a bio that was created by readpages.
|
||||
* The pages in the bio are for the uncompressed data, and they may not
|
||||
* be contiguous. They all correspond to the range of bytes covered by
|
||||
* the compressed extent.
|
||||
*/
|
||||
int btrfs_zlib_decompress_biovec(struct page **pages_in,
|
||||
u64 disk_start,
|
||||
struct bio_vec *bvec,
|
||||
int vcnt,
|
||||
size_t srclen)
|
||||
static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in,
|
||||
u64 disk_start,
|
||||
struct bio_vec *bvec,
|
||||
int vcnt,
|
||||
size_t srclen)
|
||||
{
|
||||
int ret = 0;
|
||||
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
||||
int ret = 0, ret2;
|
||||
int wbits = MAX_WBITS;
|
||||
struct workspace *workspace;
|
||||
char *data_in;
|
||||
size_t total_out = 0;
|
||||
unsigned long page_bytes_left;
|
||||
unsigned long page_in_index = 0;
|
||||
unsigned long page_out_index = 0;
|
||||
struct page *page_out;
|
||||
unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
|
||||
PAGE_CACHE_SIZE;
|
||||
unsigned long buf_start;
|
||||
unsigned long buf_offset;
|
||||
unsigned long bytes;
|
||||
unsigned long working_bytes;
|
||||
unsigned long pg_offset;
|
||||
unsigned long start_byte;
|
||||
unsigned long current_buf_start;
|
||||
char *kaddr;
|
||||
|
||||
workspace = find_zlib_workspace();
|
||||
if (IS_ERR(workspace))
|
||||
return -ENOMEM;
|
||||
|
||||
data_in = kmap(pages_in[page_in_index]);
|
||||
workspace->inf_strm.next_in = data_in;
|
||||
|
@ -372,8 +237,6 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in,
|
|||
workspace->inf_strm.total_out = 0;
|
||||
workspace->inf_strm.next_out = workspace->buf;
|
||||
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
|
||||
page_out = bvec[page_out_index].bv_page;
|
||||
page_bytes_left = PAGE_CACHE_SIZE;
|
||||
pg_offset = 0;
|
||||
|
||||
/* If it's deflate, and it's got no preset dictionary, then
|
||||
|
@ -389,107 +252,29 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in,
|
|||
|
||||
if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
|
||||
printk(KERN_WARNING "inflateInit failed\n");
|
||||
ret = -1;
|
||||
goto out;
|
||||
return -1;
|
||||
}
|
||||
while (workspace->inf_strm.total_in < srclen) {
|
||||
ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
|
||||
if (ret != Z_OK && ret != Z_STREAM_END)
|
||||
break;
|
||||
/*
|
||||
* buf start is the byte offset we're of the start of
|
||||
* our workspace buffer
|
||||
*/
|
||||
buf_start = total_out;
|
||||
|
||||
/* total_out is the last byte of the workspace buffer */
|
||||
buf_start = total_out;
|
||||
total_out = workspace->inf_strm.total_out;
|
||||
|
||||
working_bytes = total_out - buf_start;
|
||||
|
||||
/*
|
||||
* start byte is the first byte of the page we're currently
|
||||
* copying into relative to the start of the compressed data.
|
||||
*/
|
||||
start_byte = page_offset(page_out) - disk_start;
|
||||
|
||||
if (working_bytes == 0) {
|
||||
/* we didn't make progress in this inflate
|
||||
* call, we're done
|
||||
*/
|
||||
if (ret != Z_STREAM_END)
|
||||
ret = -1;
|
||||
/* we didn't make progress in this inflate call, we're done */
|
||||
if (buf_start == total_out)
|
||||
break;
|
||||
|
||||
ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
|
||||
total_out, disk_start,
|
||||
bvec, vcnt,
|
||||
&page_out_index, &pg_offset);
|
||||
if (ret2 == 0) {
|
||||
ret = 0;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* we haven't yet hit data corresponding to this page */
|
||||
if (total_out <= start_byte)
|
||||
goto next;
|
||||
|
||||
/*
|
||||
* the start of the data we care about is offset into
|
||||
* the middle of our working buffer
|
||||
*/
|
||||
if (total_out > start_byte && buf_start < start_byte) {
|
||||
buf_offset = start_byte - buf_start;
|
||||
working_bytes -= buf_offset;
|
||||
} else {
|
||||
buf_offset = 0;
|
||||
}
|
||||
current_buf_start = buf_start;
|
||||
|
||||
/* copy bytes from the working buffer into the pages */
|
||||
while (working_bytes > 0) {
|
||||
bytes = min(PAGE_CACHE_SIZE - pg_offset,
|
||||
PAGE_CACHE_SIZE - buf_offset);
|
||||
bytes = min(bytes, working_bytes);
|
||||
kaddr = kmap_atomic(page_out, KM_USER0);
|
||||
memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
|
||||
bytes);
|
||||
kunmap_atomic(kaddr, KM_USER0);
|
||||
flush_dcache_page(page_out);
|
||||
|
||||
pg_offset += bytes;
|
||||
page_bytes_left -= bytes;
|
||||
buf_offset += bytes;
|
||||
working_bytes -= bytes;
|
||||
current_buf_start += bytes;
|
||||
|
||||
/* check if we need to pick another page */
|
||||
if (page_bytes_left == 0) {
|
||||
page_out_index++;
|
||||
if (page_out_index >= vcnt) {
|
||||
ret = 0;
|
||||
goto done;
|
||||
}
|
||||
|
||||
page_out = bvec[page_out_index].bv_page;
|
||||
pg_offset = 0;
|
||||
page_bytes_left = PAGE_CACHE_SIZE;
|
||||
start_byte = page_offset(page_out) - disk_start;
|
||||
|
||||
/*
|
||||
* make sure our new page is covered by this
|
||||
* working buffer
|
||||
*/
|
||||
if (total_out <= start_byte)
|
||||
goto next;
|
||||
|
||||
/* the next page in the biovec might not
|
||||
* be adjacent to the last page, but it
|
||||
* might still be found inside this working
|
||||
* buffer. bump our offset pointer
|
||||
*/
|
||||
if (total_out > start_byte &&
|
||||
current_buf_start < start_byte) {
|
||||
buf_offset = start_byte - buf_start;
|
||||
working_bytes = total_out - start_byte;
|
||||
current_buf_start = buf_start +
|
||||
buf_offset;
|
||||
}
|
||||
}
|
||||
}
|
||||
next:
|
||||
workspace->inf_strm.next_out = workspace->buf;
|
||||
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
|
||||
|
||||
|
@ -516,35 +301,21 @@ done:
|
|||
zlib_inflateEnd(&workspace->inf_strm);
|
||||
if (data_in)
|
||||
kunmap(pages_in[page_in_index]);
|
||||
out:
|
||||
free_workspace(workspace);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* a less complex decompression routine. Our compressed data fits in a
|
||||
* single page, and we want to read a single page out of it.
|
||||
* start_byte tells us the offset into the compressed data we're interested in
|
||||
*/
|
||||
int btrfs_zlib_decompress(unsigned char *data_in,
|
||||
struct page *dest_page,
|
||||
unsigned long start_byte,
|
||||
size_t srclen, size_t destlen)
|
||||
static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
|
||||
struct page *dest_page,
|
||||
unsigned long start_byte,
|
||||
size_t srclen, size_t destlen)
|
||||
{
|
||||
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
||||
int ret = 0;
|
||||
int wbits = MAX_WBITS;
|
||||
struct workspace *workspace;
|
||||
unsigned long bytes_left = destlen;
|
||||
unsigned long total_out = 0;
|
||||
char *kaddr;
|
||||
|
||||
if (destlen > PAGE_CACHE_SIZE)
|
||||
return -ENOMEM;
|
||||
|
||||
workspace = find_zlib_workspace();
|
||||
if (IS_ERR(workspace))
|
||||
return -ENOMEM;
|
||||
|
||||
workspace->inf_strm.next_in = data_in;
|
||||
workspace->inf_strm.avail_in = srclen;
|
||||
workspace->inf_strm.total_in = 0;
|
||||
|
@ -565,8 +336,7 @@ int btrfs_zlib_decompress(unsigned char *data_in,
|
|||
|
||||
if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
|
||||
printk(KERN_WARNING "inflateInit failed\n");
|
||||
ret = -1;
|
||||
goto out;
|
||||
return -1;
|
||||
}
|
||||
|
||||
while (bytes_left > 0) {
|
||||
|
@ -616,12 +386,13 @@ next:
|
|||
ret = 0;
|
||||
|
||||
zlib_inflateEnd(&workspace->inf_strm);
|
||||
out:
|
||||
free_workspace(workspace);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void btrfs_zlib_exit(void)
|
||||
{
|
||||
free_workspaces();
|
||||
}
|
||||
struct btrfs_compress_op btrfs_zlib_compress = {
|
||||
.alloc_workspace = zlib_alloc_workspace,
|
||||
.free_workspace = zlib_free_workspace,
|
||||
.compress_pages = zlib_compress_pages,
|
||||
.decompress_biovec = zlib_decompress_biovec,
|
||||
.decompress = zlib_decompress,
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue